diff options
Diffstat (limited to 'arch/x86')
105 files changed, 1322 insertions, 976 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b72777ff32a..46c3bff3ced 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -10,6 +10,7 @@ config X86_32 def_bool y depends on !64BIT select CLKSRC_I8253 + select HAVE_UID16 config X86_64 def_bool y @@ -46,6 +47,7 @@ config X86 select HAVE_FUNCTION_GRAPH_FP_TEST select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_SYSCALL_TRACEPOINTS + select SYSCTL_EXCEPTION_TRACE select HAVE_KVM select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK @@ -65,6 +67,7 @@ config X86 select HAVE_PERF_EVENTS_NMI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_DEBUG_KMEMLEAK select ANON_INODES select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386 select HAVE_CMPXCHG_LOCAL if !M386 @@ -85,6 +88,7 @@ config X86 select IRQ_FORCED_THREADING select USE_GENERIC_SMP_HELPERS if SMP select HAVE_BPF_JIT if X86_64 + select HAVE_ARCH_TRANSPARENT_HUGEPAGE select CLKEVT_I8253 select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_IOMAP @@ -104,6 +108,10 @@ config X86 select GENERIC_STRNLEN_USER select HAVE_RCU_USER_QS if X86_64 select HAVE_IRQ_TIME_ACCOUNTING + select GENERIC_KERNEL_THREAD + select GENERIC_KERNEL_EXECVE + select MODULES_USE_ELF_REL if X86_32 + select MODULES_USE_ELF_RELA if X86_64 config INSTRUCTION_DECODER def_bool y @@ -2168,6 +2176,7 @@ config IA32_EMULATION bool "IA32 Emulation" depends on X86_64 select COMPAT_BINFMT_ELF + select HAVE_UID16 ---help--- Include code to run legacy 32-bit programs under a 64-bit kernel. You should likely turn this on, unless you're diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 474ca35b1bc..05afcca66de 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -92,7 +92,7 @@ endif ifdef CONFIG_X86_X32 x32_ld_ok := $(call try-run,\ /bin/echo -e '1: .quad 1b' | \ - $(CC) $(KBUILD_AFLAGS) -c -xassembler -o "$$TMP" - && \ + $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" - && \ $(OBJCOPY) -O elf32-x86-64 "$$TMP" "$$TMPO" && \ $(LD) -m elf32_x86_64 "$$TMPO" -o "$$TMP",y,n) ifeq ($(x32_ld_ok),y) diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index ce03476d8c8..ccce0ed67dd 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -37,7 +37,8 @@ setup-y += video-bios.o targets += $(setup-y) hostprogs-y := mkcpustr tools/build -HOST_EXTRACFLAGS += -I$(srctree)/tools/include $(USERINCLUDE) \ +HOST_EXTRACFLAGS += -I$(srctree)/tools/include \ + -include include/generated/autoconf.h \ -D__EXPORTED_HEADERS__ $(obj)/cpu.o: $(obj)/cpustr.h diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 7c04d0da709..1b9c22bea8a 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -515,6 +515,11 @@ static int xts_aesni_setkey(struct crypto_tfm *tfm, const u8 *key, } +static void aesni_xts_tweak(void *ctx, u8 *out, const u8 *in) +{ + aesni_enc(ctx, out, in); +} + static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { @@ -525,7 +530,7 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, .tbuflen = sizeof(buf), .tweak_ctx = aes_ctx(ctx->raw_tweak_ctx), - .tweak_fn = XTS_TWEAK_CAST(aesni_enc), + .tweak_fn = aesni_xts_tweak, .crypt_ctx = aes_ctx(ctx->raw_crypt_ctx), .crypt_fn = lrw_xts_encrypt_callback, }; @@ -550,7 +555,7 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, .tbuflen = sizeof(buf), .tweak_ctx = aes_ctx(ctx->raw_tweak_ctx), - .tweak_fn = XTS_TWEAK_CAST(aesni_enc), + .tweak_fn = aesni_xts_tweak, .crypt_ctx = aes_ctx(ctx->raw_crypt_ctx), .crypt_fn = lrw_xts_decrypt_callback, }; diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 9c289504e68..076745fc804 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -465,7 +465,7 @@ GLOBAL(\label) PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx - PTREGSCALL stub32_execve, sys32_execve, %rcx + PTREGSCALL stub32_execve, compat_sys_execve, %rcx PTREGSCALL stub32_fork, sys_fork, %rdi PTREGSCALL stub32_clone, sys32_clone, %rdx PTREGSCALL stub32_vfork, sys_vfork, %rdi diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index c5b938d92ea..86d68d1c880 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -385,21 +385,6 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd, return ret; } -asmlinkage long sys32_execve(const char __user *name, compat_uptr_t __user *argv, - compat_uptr_t __user *envp, struct pt_regs *regs) -{ - long error; - char *filename; - - filename = getname(name); - error = PTR_ERR(filename); - if (IS_ERR(filename)) - return error; - error = compat_do_execve(filename, argv, envp, regs); - putname(filename); - return error; -} - asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp, struct pt_regs *regs) { diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 1595d681343..66e5f0ef052 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -22,3 +22,9 @@ header-y += sigcontext32.h header-y += ucontext.h header-y += vm86.h header-y += vsyscall.h + +genhdr-y += unistd_32.h +genhdr-y += unistd_64.h +genhdr-y += unistd_x32.h + +generic-y += clkdev.h diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index f34261296ff..33880342223 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -409,7 +409,7 @@ extern struct apic *apic; * to enforce the order with in them. */ #define apic_driver(sym) \ - static struct apic *__apicdrivers_##sym __used \ + static const struct apic *__apicdrivers_##sym __used \ __aligned(sizeof(struct apic *)) \ __section(.apicdrivers) = { &sym } diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 250b8774c15..b6c3b821acf 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -240,30 +240,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) return c; } - -/* - * atomic_dec_if_positive - decrement by 1 if old value positive - * @v: pointer of type atomic_t - * - * The function returns the old value of *v minus 1, even if - * the atomic variable, v, was not decremented. - */ -static inline int atomic_dec_if_positive(atomic_t *v) -{ - int c, old, dec; - c = atomic_read(v); - for (;;) { - dec = c - 1; - if (unlikely(dec < 0)) - break; - old = atomic_cmpxchg((v), c, dec); - if (likely(old == c)) - break; - c = old; - } - return dec; -} - /** * atomic_inc_short - increment of a short integer * @v: pointer to type int diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index fedf32b73e6..59c6c401f79 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -41,6 +41,7 @@ typedef s64 __attribute__((aligned(4))) compat_s64; typedef u32 compat_uint_t; typedef u32 compat_ulong_t; typedef u64 __attribute__((aligned(4))) compat_u64; +typedef u32 compat_uptr_t; struct compat_timespec { compat_time_t tv_sec; @@ -124,6 +125,78 @@ typedef u32 compat_old_sigset_t; /* at least 32 bits */ typedef u32 compat_sigset_word; +typedef union compat_sigval { + compat_int_t sival_int; + compat_uptr_t sival_ptr; +} compat_sigval_t; + +typedef struct compat_siginfo { + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[128/sizeof(int) - 3]; + + /* kill() */ + struct { + unsigned int _pid; /* sender's pid */ + unsigned int _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + compat_timer_t _tid; /* timer id */ + int _overrun; /* overrun count */ + compat_sigval_t _sigval; /* same as below */ + int _sys_private; /* not to be passed to user */ + int _overrun_incr; /* amount to add to overrun */ + } _timer; + + /* POSIX.1b signals */ + struct { + unsigned int _pid; /* sender's pid */ + unsigned int _uid; /* sender's uid */ + compat_sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + unsigned int _pid; /* which child */ + unsigned int _uid; /* sender's uid */ + int _status; /* exit code */ + compat_clock_t _utime; + compat_clock_t _stime; + } _sigchld; + + /* SIGCHLD (x32 version) */ + struct { + unsigned int _pid; /* which child */ + unsigned int _uid; /* sender's uid */ + int _status; /* exit code */ + compat_s64 _utime; + compat_s64 _stime; + } _sigchld_x32; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ + struct { + unsigned int _addr; /* faulting insn/memory ref. */ + } _sigfault; + + /* SIGPOLL */ + struct { + int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + + struct { + unsigned int _call_addr; /* calling insn */ + int _syscall; /* triggering system call number */ + unsigned int _arch; /* AUDIT_ARCH_* of syscall */ + } _sigsys; + } _sifields; +} compat_siginfo_t; + #define COMPAT_OFF_T_MAX 0x7fffffff #define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL @@ -209,7 +282,6 @@ typedef struct user_regs_struct32 compat_elf_gregset_t; * as pointers because the syscall entry code will have * appropriately converted them already. */ -typedef u32 compat_uptr_t; static inline void __user *compat_ptr(compat_uptr_t uptr) { diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index c9dcc181d4d..6e8fdf5ad11 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -35,7 +35,7 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ efi_call_virt(f, a1, a2, a3, a4, a5, a6) -#define efi_ioremap(addr, size, type) ioremap_cache(addr, size) +#define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size) #else /* !CONFIG_X86_32 */ @@ -89,7 +89,7 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3, (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, - u32 type); + u32 type, u64 attribute); #endif /* CONFIG_X86_32 */ @@ -98,6 +98,8 @@ extern void efi_set_executable(efi_memory_desc_t *md, bool executable); extern int efi_memblock_x86_reserve_range(void); extern void efi_call_phys_prelog(void); extern void efi_call_phys_epilog(void); +extern void efi_unmap_memmap(void); +extern void efi_memory_uc(u64 addr, unsigned long size); #ifndef CONFIG_EFI /* diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index 439a9acc132..bdd35dbd060 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -90,4 +90,8 @@ static inline void arch_release_hugepage(struct page *page) { } +static inline void arch_clear_hugepage_flags(struct page *page) +{ +} + #endif /* _ASM_X86_HUGETLB_H */ diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index b04cbdb138c..e6232773ce4 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -86,73 +86,6 @@ struct stat64 { unsigned long long st_ino; } __attribute__((packed)); -typedef struct compat_siginfo { - int si_signo; - int si_errno; - int si_code; - - union { - int _pad[((128 / sizeof(int)) - 3)]; - - /* kill() */ - struct { - unsigned int _pid; /* sender's pid */ - unsigned int _uid; /* sender's uid */ - } _kill; - - /* POSIX.1b timers */ - struct { - compat_timer_t _tid; /* timer id */ - int _overrun; /* overrun count */ - compat_sigval_t _sigval; /* same as below */ - int _sys_private; /* not to be passed to user */ - int _overrun_incr; /* amount to add to overrun */ - } _timer; - - /* POSIX.1b signals */ - struct { - unsigned int _pid; /* sender's pid */ - unsigned int _uid; /* sender's uid */ - compat_sigval_t _sigval; - } _rt; - - /* SIGCHLD */ - struct { - unsigned int _pid; /* which child */ - unsigned int _uid; /* sender's uid */ - int _status; /* exit code */ - compat_clock_t _utime; - compat_clock_t _stime; - } _sigchld; - - /* SIGCHLD (x32 version) */ - struct { - unsigned int _pid; /* which child */ - unsigned int _uid; /* sender's uid */ - int _status; /* exit code */ - compat_s64 _utime; - compat_s64 _stime; - } _sigchld_x32; - - /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ - struct { - unsigned int _addr; /* faulting insn/memory ref. */ - } _sigfault; - - /* SIGPOLL */ - struct { - int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ - int _fd; - } _sigpoll; - - struct { - unsigned int _call_addr; /* calling insn */ - int _syscall; /* triggering system call number */ - unsigned int _arch; /* AUDIT_ARCH_* of syscall */ - } _sigsys; - } _sifields; -} compat_siginfo_t; - #define IA32_STACK_TOP IA32_PAGE_OFFSET #ifdef __KERNEL__ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index fbee9714d9a..7f0edceb756 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -121,6 +121,11 @@ #define MSR_P6_EVNTSEL0 0x00000186 #define MSR_P6_EVNTSEL1 0x00000187 +#define MSR_KNC_PERFCTR0 0x00000020 +#define MSR_KNC_PERFCTR1 0x00000021 +#define MSR_KNC_EVNTSEL0 0x00000028 +#define MSR_KNC_EVNTSEL1 0x00000029 + /* AMD64 MSRs. Not complete. See the architecture manual for a more complete list. */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index fc994846529..a1f780d45f7 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -146,8 +146,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd) static inline int pmd_large(pmd_t pte) { - return (pmd_flags(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == - (_PAGE_PSE | _PAGE_PRESENT); + return pmd_flags(pte) & _PAGE_PSE; } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -415,7 +414,13 @@ static inline int pte_hidden(pte_t pte) static inline int pmd_present(pmd_t pmd) { - return pmd_flags(pmd) & _PAGE_PRESENT; + /* + * Checking for _PAGE_PSE is needed too because + * split_huge_page will temporarily clear the present bit (but + * the _PAGE_PSE flag will remain set at all times while the + * _PAGE_PRESENT bit is clear). + */ + return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE); } static inline int pmd_none(pmd_t pmd) diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 0c92113c4cb..8faa215a503 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -71,6 +71,7 @@ do { \ * tables contain all the necessary information. */ #define update_mmu_cache(vma, address, ptep) do { } while (0) +#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 8251be02301..47356f9df82 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -143,6 +143,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; } #define pte_unmap(pte) ((void)(pte))/* NOP */ #define update_mmu_cache(vma, address, ptep) do { } while (0) +#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) /* Encode and de-code a swap entry */ #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b98c0d958eb..ad1fc851167 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -588,11 +588,6 @@ typedef struct { } mm_segment_t; -/* - * create a kernel thread without removing it from tasklists - */ -extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); - /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 4ca1c611b55..a9a8cf3da49 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -54,8 +54,6 @@ asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32); asmlinkage long sys32_personality(unsigned long); asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); -asmlinkage long sys32_execve(const char __user *, compat_uptr_t __user *, - compat_uptr_t __user *, struct pt_regs *); asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *); long sys32_lseek(unsigned int, int, unsigned int); diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index f1d8b441fc7..2be0b880417 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -25,7 +25,7 @@ int sys_fork(struct pt_regs *); int sys_vfork(struct pt_regs *); long sys_execve(const char __user *, const char __user *const __user *, - const char __user *const __user *, struct pt_regs *); + const char __user *const __user *); long sys_clone(unsigned long, unsigned long, void __user *, void __user *, struct pt_regs *); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index c535d847e3b..2d946e63ee8 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -79,7 +79,6 @@ struct thread_info { #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ -#define TIF_IRET 5 /* force IRET */ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ @@ -105,7 +104,6 @@ struct thread_info { #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -#define _TIF_IRET (1 << TIF_IRET) #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 0d9776e9e2d..16f3fc6ebf2 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -50,6 +50,7 @@ # define __ARCH_WANT_SYS_TIME # define __ARCH_WANT_SYS_UTIME # define __ARCH_WANT_SYS_WAITPID +# define __ARCH_WANT_SYS_EXECVE /* * "Conditional" syscalls diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 8b38be2de9e..46e24d36b7d 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -17,8 +17,8 @@ struct vsyscall_gtod_data { /* open coded 'struct timespec' */ time_t wall_time_sec; - u32 wall_time_nsec; - u32 monotonic_time_nsec; + u64 wall_time_snsec; + u64 monotonic_time_snsec; time_t monotonic_time_sec; struct timezone sys_tz; diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index 1707cfa928f..54d52ff1304 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -51,13 +51,14 @@ * with Xen so that on ARM we can have one ABI that works for 32 and 64 * bit guests. */ typedef unsigned long xen_pfn_t; +#define PRI_xen_pfn "lx" +typedef unsigned long xen_ulong_t; +#define PRI_xen_ulong "lx" /* Guest handles for primitive C types. */ __DEFINE_GUEST_HANDLE(uchar, unsigned char); __DEFINE_GUEST_HANDLE(uint, unsigned int); -__DEFINE_GUEST_HANDLE(ulong, unsigned long); DEFINE_GUEST_HANDLE(char); DEFINE_GUEST_HANDLE(int); -DEFINE_GUEST_HANDLE(long); DEFINE_GUEST_HANDLE(void); DEFINE_GUEST_HANDLE(uint64_t); DEFINE_GUEST_HANDLE(uint32_t); diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a48ea05157d..91ce48f05f9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -23,7 +23,7 @@ obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-y += probe_roms.o -obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o +obj-$(CONFIG_X86_32) += i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-y += syscall_$(BITS).o obj-$(CONFIG_X86_64) += vsyscall_64.o diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index bc552cff257..a65829ac2b9 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -30,7 +30,7 @@ static int numachip_system __read_mostly; -static struct apic apic_numachip __read_mostly; +static const struct apic apic_numachip __read_mostly; static unsigned int get_apic_id(unsigned long x) { @@ -199,7 +199,7 @@ static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -static struct apic apic_numachip __refconst = { +static const struct apic apic_numachip __refconst = { .name = "NumaConnect system", .probe = numachip_probe, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index c265593ec2c..1817fa91102 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2257,6 +2257,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) continue; cfg = irq_cfg(irq); + if (!cfg) + continue; + raw_spin_lock(&desc->lock); /* diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 68de2dc962e..28610822fb3 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -69,4 +69,7 @@ void common(void) { OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); OFFSET(BP_pref_address, boot_params, hdr.pref_address); OFFSET(BP_code32_start, boot_params, hdr.code32_start); + + BLANK(); + DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); } diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index d30a6a9a012..a0e067d3d96 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -32,7 +32,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o ifdef CONFIG_PERF_EVENTS obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o +obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o endif diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 9a7c90d80bc..93c5451bdd5 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -991,7 +991,7 @@ static struct attribute ** __cpuinit amd_l3_attrs(void) if (attrs) return attrs; - n = sizeof (default_attrs) / sizeof (struct attribute *); + n = ARRAY_SIZE(default_attrs); if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) n += 2; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 29e87d3b284..46cbf868969 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2209,11 +2209,6 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = { &mce_cmci_disabled }; -static struct dev_ext_attribute dev_attr_bios_cmci_threshold = { - __ATTR(bios_cmci_threshold, 0444, device_show_int, NULL), - &mce_bios_cmci_threshold -}; - static struct device_attribute *mce_device_attrs[] = { &dev_attr_tolerant.attr, &dev_attr_check_interval.attr, @@ -2222,7 +2217,6 @@ static struct device_attribute *mce_device_attrs[] = { &dev_attr_dont_log_ce.attr, &dev_attr_ignore_ce.attr, &dev_attr_cmci_disabled.attr, - &dev_attr_bios_cmci_threshold.attr, NULL }; diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index c4e916d7737..698b6ec12e0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -576,12 +576,10 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) int err = 0; if (shared_bank[bank]) { - nb = node_to_amd_nb(amd_get_nb_id(cpu)); - WARN_ON(!nb); /* threshold descriptor already initialized on this node? */ - if (nb->bank4) { + if (nb && nb->bank4) { /* yes, use it */ b = nb->bank4; err = kobject_add(b->kobj, &dev->kobj, name); @@ -615,8 +613,10 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) atomic_set(&b->cpus, 1); /* nb is already initialized, see above */ - WARN_ON(nb->bank4); - nb->bank4 = b; + if (nb) { + WARN_ON(nb->bank4); + nb->bank4 = b; + } } err = allocate_threshold_blocks(cpu, bank, 0, diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 915b876edd1..4a3374e61a9 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -208,12 +208,14 @@ static bool check_hw_exists(void) } /* - * Now write a value and read it back to see if it matches, - * this is needed to detect certain hardware emulators (qemu/kvm) - * that don't trap on the MSR access and always return 0s. + * Read the current value, change it and read it back to see if it + * matches, this is needed to detect certain hardware emulators + * (qemu/kvm) that don't trap on the MSR access and always return 0s. */ - val = 0xabcdUL; reg = x86_pmu_event_addr(0); + if (rdmsrl_safe(reg, &val)) + goto msr_fail; + val ^= 0xffffUL; ret = wrmsrl_safe(reg, val); ret |= rdmsrl_safe(reg, &val_new); if (ret || val != val_new) @@ -338,6 +340,9 @@ int x86_setup_perfctr(struct perf_event *event) /* BTS is currently only allowed for user-mode. */ if (!attr->exclude_kernel) return -EOPNOTSUPP; + + if (!attr->exclude_guest) + return -EOPNOTSUPP; } hwc->config |= config; @@ -380,6 +385,9 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.precise_ip) { int precise = 0; + if (!event->attr.exclude_guest) + return -EOPNOTSUPP; + /* Support for constant skid */ if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) { precise++; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 8b6defe7eef..271d2570029 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -626,6 +626,8 @@ int p4_pmu_init(void); int p6_pmu_init(void); +int knc_pmu_init(void); + #else /* CONFIG_CPU_SUP_INTEL */ static inline void reserve_ds_buffers(void) diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index eebd5ffe1bb..6336bcbd061 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -41,17 +41,22 @@ struct cpu_perf_ibs { }; struct perf_ibs { - struct pmu pmu; - unsigned int msr; - u64 config_mask; - u64 cnt_mask; - u64 enable_mask; - u64 valid_mask; - u64 max_period; - unsigned long offset_mask[1]; - int offset_max; - struct cpu_perf_ibs __percpu *pcpu; - u64 (*get_count)(u64 config); + struct pmu pmu; + unsigned int msr; + u64 config_mask; + u64 cnt_mask; + u64 enable_mask; + u64 valid_mask; + u64 max_period; + unsigned long offset_mask[1]; + int offset_max; + struct cpu_perf_ibs __percpu *pcpu; + + struct attribute **format_attrs; + struct attribute_group format_group; + const struct attribute_group *attr_groups[2]; + + u64 (*get_count)(u64 config); }; struct perf_ibs_data { @@ -446,6 +451,19 @@ static void perf_ibs_del(struct perf_event *event, int flags) static void perf_ibs_read(struct perf_event *event) { } +PMU_FORMAT_ATTR(rand_en, "config:57"); +PMU_FORMAT_ATTR(cnt_ctl, "config:19"); + +static struct attribute *ibs_fetch_format_attrs[] = { + &format_attr_rand_en.attr, + NULL, +}; + +static struct attribute *ibs_op_format_attrs[] = { + NULL, /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */ + NULL, +}; + static struct perf_ibs perf_ibs_fetch = { .pmu = { .task_ctx_nr = perf_invalid_context, @@ -465,6 +483,7 @@ static struct perf_ibs perf_ibs_fetch = { .max_period = IBS_FETCH_MAX_CNT << 4, .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK }, .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT, + .format_attrs = ibs_fetch_format_attrs, .get_count = get_ibs_fetch_count, }; @@ -488,6 +507,7 @@ static struct perf_ibs perf_ibs_op = { .max_period = IBS_OP_MAX_CNT << 4, .offset_mask = { MSR_AMD64_IBSOP_REG_MASK }, .offset_max = MSR_AMD64_IBSOP_REG_COUNT, + .format_attrs = ibs_op_format_attrs, .get_count = get_ibs_op_count, }; @@ -597,6 +617,17 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) perf_ibs->pcpu = pcpu; + /* register attributes */ + if (perf_ibs->format_attrs[0]) { + memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group)); + perf_ibs->format_group.name = "format"; + perf_ibs->format_group.attrs = perf_ibs->format_attrs; + + memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups)); + perf_ibs->attr_groups[0] = &perf_ibs->format_group; + perf_ibs->pmu.attr_groups = perf_ibs->attr_groups; + } + ret = perf_pmu_register(&perf_ibs->pmu, name, -1); if (ret) { perf_ibs->pcpu = NULL; @@ -608,13 +639,19 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) static __init int perf_event_ibs_init(void) { + struct attribute **attr = ibs_op_format_attrs; + if (!ibs_caps) return -ENODEV; /* ibs not supported by the cpu */ perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); - if (ibs_caps & IBS_CAPS_OPCNT) + + if (ibs_caps & IBS_CAPS_OPCNT) { perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; + *attr++ = &format_attr_cnt_ctl.attr; + } perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); + register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 6bca492b854..324bb523d9d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1906,6 +1906,8 @@ __init int intel_pmu_init(void) switch (boot_cpu_data.x86) { case 0x6: return p6_pmu_init(); + case 0xb: + return knc_pmu_init(); case 0xf: return p4_pmu_init(); } diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 99d96a4978b..3cf3d97cce3 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -118,22 +118,24 @@ static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box) { struct pci_dev *pdev = box->pci_dev; int box_ctl = uncore_pci_box_ctl(box); - u32 config; + u32 config = 0; - pci_read_config_dword(pdev, box_ctl, &config); - config |= SNBEP_PMON_BOX_CTL_FRZ; - pci_write_config_dword(pdev, box_ctl, config); + if (!pci_read_config_dword(pdev, box_ctl, &config)) { + config |= SNBEP_PMON_BOX_CTL_FRZ; + pci_write_config_dword(pdev, box_ctl, config); + } } static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box) { struct pci_dev *pdev = box->pci_dev; int box_ctl = uncore_pci_box_ctl(box); - u32 config; + u32 config = 0; - pci_read_config_dword(pdev, box_ctl, &config); - config &= ~SNBEP_PMON_BOX_CTL_FRZ; - pci_write_config_dword(pdev, box_ctl, config); + if (!pci_read_config_dword(pdev, box_ctl, &config)) { + config &= ~SNBEP_PMON_BOX_CTL_FRZ; + pci_write_config_dword(pdev, box_ctl, config); + } } static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event) @@ -156,7 +158,7 @@ static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct pe { struct pci_dev *pdev = box->pci_dev; struct hw_perf_event *hwc = &event->hw; - u64 count; + u64 count = 0; pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count); pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1); @@ -603,11 +605,12 @@ static struct pci_driver snbep_uncore_pci_driver = { /* * build pci bus to socket mapping */ -static void snbep_pci2phy_map_init(void) +static int snbep_pci2phy_map_init(void) { struct pci_dev *ubox_dev = NULL; int i, bus, nodeid; - u32 config; + int err = 0; + u32 config = 0; while (1) { /* find the UBOX device */ @@ -618,10 +621,14 @@ static void snbep_pci2phy_map_init(void) break; bus = ubox_dev->bus->number; /* get the Node ID of the local register */ - pci_read_config_dword(ubox_dev, 0x40, &config); + err = pci_read_config_dword(ubox_dev, 0x40, &config); + if (err) + break; nodeid = config; /* get the Node ID mapping */ - pci_read_config_dword(ubox_dev, 0x54, &config); + err = pci_read_config_dword(ubox_dev, 0x54, &config); + if (err) + break; /* * every three bits in the Node ID mapping register maps * to a particular node. @@ -633,7 +640,11 @@ static void snbep_pci2phy_map_init(void) } } }; - return; + + if (ubox_dev) + pci_dev_put(ubox_dev); + + return err ? pcibios_err_to_errno(err) : 0; } /* end of Sandy Bridge-EP uncore support */ @@ -1547,7 +1558,6 @@ void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - int port; /* adjust the main event selector and extra register index */ if (reg1->idx % 2) { @@ -1559,7 +1569,6 @@ void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event) } /* adjust extra register config */ - port = reg1->idx / 6 + box->pmu->pmu_idx * 4; switch (reg1->idx % 6) { case 2: /* shift the 8~15 bits to the 0~7 bits */ @@ -2578,9 +2587,11 @@ static int __init uncore_pci_init(void) switch (boot_cpu_data.x86_model) { case 45: /* Sandy Bridge-EP */ + ret = snbep_pci2phy_map_init(); + if (ret) + return ret; pci_uncores = snbep_pci_uncores; uncore_pci_driver = &snbep_uncore_pci_driver; - snbep_pci2phy_map_init(); break; default: return 0; @@ -2926,6 +2937,9 @@ static int __init intel_uncore_init(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return -ENODEV; + if (cpu_has_hypervisor) + return -ENODEV; + ret = uncore_pci_init(); if (ret) goto fail; diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c new file mode 100644 index 00000000000..4b7731bf23a --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_knc.c @@ -0,0 +1,319 @@ +/* Driver for Intel Xeon Phi "Knights Corner" PMU */ + +#include <linux/perf_event.h> +#include <linux/types.h> + +#include <asm/hardirq.h> + +#include "perf_event.h" + +static const u64 knc_perfmon_event_map[] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x002a, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x0016, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0029, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x002b, +}; + +static __initconst u64 knc_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + /* On Xeon Phi event "0" is a valid DATA_READ */ + /* (L1 Data Cache Reads) Instruction. */ + /* We code this as ARCH_PERFMON_EVENTSEL_INT as this */ + /* bit will always be set in x86_pmu_hw_config(). */ + [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT, + /* DATA_READ */ + [ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */ + [ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */ + [ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */ + [ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */ + [ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */ + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT, + /* DATA_READ */ + /* see note on L1 OP_READ */ + [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */ + [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */ + [ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */ + [ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + + +static u64 knc_pmu_event_map(int hw_event) +{ + return knc_perfmon_event_map[hw_event]; +} + +static struct event_constraint knc_event_constraints[] = +{ + INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */ + INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */ + INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */ + INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */ + INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */ + INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */ + INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */ + INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */ + INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */ + INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */ + INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */ + INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */ + INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */ + INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */ + INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */ + INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */ + INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */ + INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */ + INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */ + INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */ + INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */ + EVENT_CONSTRAINT_END +}; + +#define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d +#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e +#define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f + +#define KNC_ENABLE_COUNTER0 0x00000001 +#define KNC_ENABLE_COUNTER1 0x00000002 + +static void knc_pmu_disable_all(void) +{ + u64 val; + + rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); + val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); + wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); +} + +static void knc_pmu_enable_all(int added) +{ + u64 val; + + rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); + val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); + wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); +} + +static inline void +knc_pmu_disable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 val; + + val = hwc->config; + val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; + + (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); +} + +static void knc_pmu_enable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 val; + + val = hwc->config; + val |= ARCH_PERFMON_EVENTSEL_ENABLE; + + (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); +} + +static inline u64 knc_pmu_get_status(void) +{ + u64 status; + + rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status); + + return status; +} + +static inline void knc_pmu_ack_status(u64 ack) +{ + wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack); +} + +static int knc_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + int handled = 0; + int bit, loops; + u64 status; + + cpuc = &__get_cpu_var(cpu_hw_events); + + knc_pmu_disable_all(); + + status = knc_pmu_get_status(); + if (!status) { + knc_pmu_enable_all(0); + return handled; + } + + loops = 0; +again: + knc_pmu_ack_status(status); + if (++loops > 100) { + WARN_ONCE(1, "perf: irq loop stuck!\n"); + perf_event_print_debug(); + goto done; + } + + inc_irq_stat(apic_perf_irqs); + + for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { + struct perf_event *event = cpuc->events[bit]; + + handled++; + + if (!test_bit(bit, cpuc->active_mask)) + continue; + + if (!intel_pmu_save_and_restart(event)) + continue; + + perf_sample_data_init(&data, 0, event->hw.last_period); + + if (perf_event_overflow(event, &data, regs)) + x86_pmu_stop(event, 0); + } + + /* + * Repeat if there is more work to be done: + */ + status = knc_pmu_get_status(); + if (status) + goto again; + +done: + knc_pmu_enable_all(0); + + return handled; +} + + +PMU_FORMAT_ATTR(event, "config:0-7" ); +PMU_FORMAT_ATTR(umask, "config:8-15" ); +PMU_FORMAT_ATTR(edge, "config:18" ); +PMU_FORMAT_ATTR(inv, "config:23" ); +PMU_FORMAT_ATTR(cmask, "config:24-31" ); + +static struct attribute *intel_knc_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_cmask.attr, + NULL, +}; + +static __initconst struct x86_pmu knc_pmu = { + .name = "knc", + .handle_irq = knc_pmu_handle_irq, + .disable_all = knc_pmu_disable_all, + .enable_all = knc_pmu_enable_all, + .enable = knc_pmu_enable_event, + .disable = knc_pmu_disable_event, + .hw_config = x86_pmu_hw_config, + .schedule_events = x86_schedule_events, + .eventsel = MSR_KNC_EVNTSEL0, + .perfctr = MSR_KNC_PERFCTR0, + .event_map = knc_pmu_event_map, + .max_events = ARRAY_SIZE(knc_perfmon_event_map), + .apic = 1, + .max_period = (1ULL << 39) - 1, + .version = 0, + .num_counters = 2, + .cntval_bits = 40, + .cntval_mask = (1ULL << 40) - 1, + .get_event_constraints = x86_get_event_constraints, + .event_constraints = knc_event_constraints, + .format_attrs = intel_knc_formats_attr, +}; + +__init int knc_pmu_init(void) +{ + x86_pmu = knc_pmu; + + memcpy(hw_cache_event_ids, knc_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + return 0; +} diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index e4dd0f7a045..7d0270bd793 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -8,13 +8,106 @@ */ static const u64 p6_perfmon_event_map[] = { - [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, - [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, - [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, + [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, /* CPU_CLK_UNHALTED */ + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, /* INST_RETIRED */ + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, /* L2_RQSTS:M:E:S:I */ + [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, /* L2_RQSTS:I */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, /* BR_INST_RETIRED */ + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, /* BR_MISS_PRED_RETIRED */ + [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, /* BUS_DRDY_CLOCKS */ + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a2, /* RESOURCE_STALLS */ + +}; + +static __initconst u64 p6_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */ + [ C(RESULT_MISS) ] = 0x0045, /* DCU_LINES_IN */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x0f29, /* L2_LD:M:E:S:I */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */ + [ C(RESULT_MISS) ] = 0x0f28, /* L2_IFETCH:M:E:S:I */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x0025, /* L2_M_LINES_INM */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */ + [ C(RESULT_MISS) ] = 0x0085, /* ITLB_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED */ + [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISS_PRED_RETIRED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, }; static u64 p6_pmu_event_map(int hw_event) @@ -34,7 +127,7 @@ static struct event_constraint p6_event_constraints[] = { INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ - INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ + INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ @@ -64,25 +157,25 @@ static void p6_pmu_enable_all(int added) static inline void p6_pmu_disable_event(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; u64 val = P6_NOP_EVENT; - if (cpuc->enabled) - val |= ARCH_PERFMON_EVENTSEL_ENABLE; - (void)wrmsrl_safe(hwc->config_base, val); } static void p6_pmu_enable_event(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; u64 val; val = hwc->config; - if (cpuc->enabled) - val |= ARCH_PERFMON_EVENTSEL_ENABLE; + + /* + * p6 only has a global event enable, set on PerfEvtSel0 + * We "disable" events by programming P6_NOP_EVENT + * and we rely on p6_pmu_enable_all() being called + * to actually enable the events. + */ (void)wrmsrl_safe(hwc->config_base, val); } @@ -158,5 +251,9 @@ __init int p6_pmu_init(void) x86_pmu = p6_pmu; + memcpy(hw_cache_event_ids, p6_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + return 0; } diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 966512b2cac..2e8caf03f59 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -56,6 +56,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) switch (boot_cpu_data.x86) { case 6: return msr - MSR_P6_PERFCTR0; + case 11: + return msr - MSR_KNC_PERFCTR0; case 15: return msr - MSR_P4_BPU_PERFCTR0; } @@ -82,6 +84,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) switch (boot_cpu_data.x86) { case 6: return msr - MSR_P6_EVNTSEL0; + case 11: + return msr - MSR_KNC_EVNTSEL0; case 15: return msr - MSR_P4_BSU_ESCR0; } diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index ed858e9e9a7..df06ade26be 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1077,6 +1077,9 @@ void __init memblock_x86_fill(void) memblock_add(ei->addr, ei->size); } + /* throw away partial pages */ + memblock_trim_memory(PAGE_SIZE); + memblock_dump_all(); } diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 0750e3ba87c..88b725aa1d5 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -299,6 +299,21 @@ ENTRY(ret_from_fork) CFI_ENDPROC END(ret_from_fork) +ENTRY(ret_from_kernel_thread) + CFI_STARTPROC + pushl_cfi %eax + call schedule_tail + GET_THREAD_INFO(%ebp) + popl_cfi %eax + pushl_cfi $0x0202 # Reset kernel eflags + popfl_cfi + movl PT_EBP(%esp),%eax + call *PT_EBX(%esp) + movl $0,PT_EAX(%esp) + jmp syscall_exit + CFI_ENDPROC +ENDPROC(ret_from_kernel_thread) + /* * Interrupt exit functions should be protected against kprobes */ @@ -323,8 +338,7 @@ ret_from_intr: andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax #else /* - * We can be coming here from a syscall done in the kernel space, - * e.g. a failed kernel_execve(). + * We can be coming here from child spawned by kernel_thread(). */ movl PT_CS(%esp), %eax andl $SEGMENT_RPL_MASK, %eax @@ -616,6 +630,10 @@ work_notifysig: # deal with pending signals and movl %esp, %eax jne work_notifysig_v86 # returning to kernel-space or # vm86-space +1: +#else + movl %esp, %eax +#endif TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) movb PT_CS(%esp), %bl @@ -626,24 +644,15 @@ work_notifysig: # deal with pending signals and call do_notify_resume jmp resume_userspace +#ifdef CONFIG_VM86 ALIGN work_notifysig_v86: pushl_cfi %ecx # save ti_flags for do_notify_resume call save_v86_state # %eax contains pt_regs pointer popl_cfi %ecx movl %eax, %esp -#else - movl %esp, %eax + jmp 1b #endif - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - movb PT_CS(%esp), %bl - andb $SEGMENT_RPL_MASK, %bl - cmpb $USER_RPL, %bl - jb resume_kernel - xorl %edx, %edx - call do_notify_resume - jmp resume_userspace END(work_pending) # perform syscall exit tracing @@ -732,7 +741,6 @@ ENDPROC(ptregs_##name) PTREGSCALL1(iopl) PTREGSCALL0(fork) PTREGSCALL0(vfork) -PTREGSCALL3(execve) PTREGSCALL2(sigaltstack) PTREGSCALL0(sigreturn) PTREGSCALL0(rt_sigreturn) @@ -1015,16 +1023,6 @@ END(spurious_interrupt_bug) */ .popsection -ENTRY(kernel_thread_helper) - pushl $0 # fake return address for unwinder - CFI_STARTPROC - movl %edi,%eax - call *%esi - call do_exit - ud2 # padding for call trace - CFI_ENDPROC -ENDPROC(kernel_thread_helper) - #ifdef CONFIG_XEN /* Xen doesn't set %esp to be precisely what the normal sysenter entrypoint expects, so fix it up before using the normal path. */ @@ -1037,7 +1035,7 @@ ENTRY(xen_sysenter_target) ENTRY(xen_hypervisor_callback) CFI_STARTPROC - pushl_cfi $0 + pushl_cfi $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL TRACE_IRQS_OFF @@ -1079,14 +1077,16 @@ ENTRY(xen_failsafe_callback) 2: mov 8(%esp),%es 3: mov 12(%esp),%fs 4: mov 16(%esp),%gs + /* EAX == 0 => Category 1 (Bad segment) + EAX != 0 => Category 2 (Bad IRET) */ testl %eax,%eax popl_cfi %eax lea 16(%esp),%esp CFI_ADJUST_CFA_OFFSET -16 jz 5f addl $16,%esp - jmp iret_exc # EAX != 0 => Category 2 (Bad IRET) -5: pushl_cfi $0 # EAX == 0 => Category 1 (Bad segment) + jmp iret_exc +5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL jmp ret_from_exception CFI_ENDPROC diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 44531acd9a8..b51b2c7ee51 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -554,7 +554,7 @@ ENTRY(ret_from_fork) RESTORE_REST testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? - jz retint_restore_args + jz 1f testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET jnz int_ret_from_sys_call @@ -562,6 +562,14 @@ ENTRY(ret_from_fork) RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET jmp ret_from_sys_call # go to the SYSRET fastpath +1: + subq $REST_SKIP, %rsp # leave space for volatiles + CFI_ADJUST_CFA_OFFSET REST_SKIP + movq %rbp, %rdi + call *%rbx + movl $0, RAX(%rsp) + RESTORE_REST + jmp int_ret_from_sys_call CFI_ENDPROC END(ret_from_fork) @@ -862,7 +870,6 @@ ENTRY(stub_execve) PARTIAL_FRAME 0 SAVE_REST FIXUP_TOP_OF_STACK %r11 - movq %rsp, %rcx call sys_execve RESTORE_TOP_OF_STACK %r11 movq %rax,RAX(%rsp) @@ -912,8 +919,7 @@ ENTRY(stub_x32_execve) PARTIAL_FRAME 0 SAVE_REST FIXUP_TOP_OF_STACK %r11 - movq %rsp, %rcx - call sys32_execve + call compat_sys_execve RESTORE_TOP_OF_STACK %r11 movq %rax,RAX(%rsp) RESTORE_REST @@ -1318,52 +1324,6 @@ bad_gs: jmp 2b .previous -ENTRY(kernel_thread_helper) - pushq $0 # fake return address - CFI_STARTPROC - /* - * Here we are in the child and the registers are set as they were - * at kernel_thread() invocation in the parent. - */ - call *%rsi - # exit - mov %eax, %edi - call do_exit - ud2 # padding for call trace - CFI_ENDPROC -END(kernel_thread_helper) - -/* - * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. - * - * C extern interface: - * extern long execve(const char *name, char **argv, char **envp) - * - * asm input arguments: - * rdi: name, rsi: argv, rdx: envp - * - * We want to fallback into: - * extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs) - * - * do_sys_execve asm fallback arguments: - * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack - */ -ENTRY(kernel_execve) - CFI_STARTPROC - FAKE_STACK_FRAME $0 - SAVE_ALL - movq %rsp,%rcx - call sys_execve - movq %rax, RAX(%rsp) - RESTORE_REST - testq %rax,%rax - je int_ret_from_sys_call - RESTORE_ARGS - UNFAKE_STACK_FRAME - ret - CFI_ENDPROC -END(kernel_execve) - /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(call_softirq) CFI_STARTPROC @@ -1475,7 +1435,7 @@ ENTRY(xen_failsafe_callback) CFI_RESTORE r11 addq $0x30,%rsp CFI_ADJUST_CFA_OFFSET -0x30 - pushq_cfi $0 + pushq_cfi $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL jmp error_exit CFI_ENDPROC diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 3f61904365c..836f8322960 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -746,7 +746,9 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { int err; +#ifdef CONFIG_DEBUG_RODATA char opc[BREAK_INSTR_SIZE]; +#endif /* CONFIG_DEBUG_RODATA */ bpt->type = BP_BREAKPOINT; err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index b3e5e51bc90..4180a874c76 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -247,7 +247,10 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) break; case KVM_PV_REASON_PAGE_NOT_PRESENT: /* page is swapped out by the host. */ + rcu_irq_enter(); + exit_idle(); kvm_async_pf_task_wait((u32)read_cr2()); + rcu_irq_exit(); break; case KVM_PV_REASON_PAGE_READY: rcu_irq_enter(); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index dc3567e083f..b644e1c765d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -293,71 +293,6 @@ sys_clone(unsigned long clone_flags, unsigned long newsp, } /* - * This gets run with %si containing the - * function to call, and %di containing - * the "args". - */ -extern void kernel_thread_helper(void); - -/* - * Create a kernel thread - */ -int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) -{ - struct pt_regs regs; - - memset(®s, 0, sizeof(regs)); - - regs.si = (unsigned long) fn; - regs.di = (unsigned long) arg; - -#ifdef CONFIG_X86_32 - regs.ds = __USER_DS; - regs.es = __USER_DS; - regs.fs = __KERNEL_PERCPU; - regs.gs = __KERNEL_STACK_CANARY; -#else - regs.ss = __KERNEL_DS; -#endif - - regs.orig_ax = -1; - regs.ip = (unsigned long) kernel_thread_helper; - regs.cs = __KERNEL_CS | get_kernel_rpl(); - regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; - - /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); -} -EXPORT_SYMBOL(kernel_thread); - -/* - * sys_execve() executes a new program. - */ -long sys_execve(const char __user *name, - const char __user *const __user *argv, - const char __user *const __user *envp, struct pt_regs *regs) -{ - long error; - char *filename; - - filename = getname(name); - error = PTR_ERR(filename); - if (IS_ERR(filename)) - return error; - error = do_execve(filename, argv, envp, regs); - -#ifdef CONFIG_X86_32 - if (error == 0) { - /* Make sure we don't return using sysenter.. */ - set_thread_flag(TIF_IRET); - } -#endif - - putname(filename); - return error; -} - -/* * Idle related variables and functions */ unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index b9ff83c7135..44e0bff38e7 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -57,6 +57,7 @@ #include <asm/switch_to.h> asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); +asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread"); /* * Return saved PC of a blocked thread. @@ -127,23 +128,39 @@ void release_thread(struct task_struct *dead_task) } int copy_thread(unsigned long clone_flags, unsigned long sp, - unsigned long unused, + unsigned long arg, struct task_struct *p, struct pt_regs *regs) { - struct pt_regs *childregs; + struct pt_regs *childregs = task_pt_regs(p); struct task_struct *tsk; int err; - childregs = task_pt_regs(p); + p->thread.sp = (unsigned long) childregs; + p->thread.sp0 = (unsigned long) (childregs+1); + + if (unlikely(!regs)) { + /* kernel thread */ + memset(childregs, 0, sizeof(struct pt_regs)); + p->thread.ip = (unsigned long) ret_from_kernel_thread; + task_user_gs(p) = __KERNEL_STACK_CANARY; + childregs->ds = __USER_DS; + childregs->es = __USER_DS; + childregs->fs = __KERNEL_PERCPU; + childregs->bx = sp; /* function */ + childregs->bp = arg; + childregs->orig_ax = -1; + childregs->cs = __KERNEL_CS | get_kernel_rpl(); + childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; + p->fpu_counter = 0; + p->thread.io_bitmap_ptr = NULL; + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); + return 0; + } *childregs = *regs; childregs->ax = 0; childregs->sp = sp; - p->thread.sp = (unsigned long) childregs; - p->thread.sp0 = (unsigned long) (childregs+1); - p->thread.ip = (unsigned long) ret_from_fork; - task_user_gs(p) = get_user_gs(regs); p->fpu_counter = 0; @@ -190,6 +207,12 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) regs->cs = __USER_CS; regs->ip = new_ip; regs->sp = new_sp; + regs->flags = X86_EFLAGS_IF; + /* + * force it to the iret return path by making it look as if there was + * some work pending. + */ + set_thread_flag(TIF_NOTIFY_RESUME); } EXPORT_SYMBOL_GPL(start_thread); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 8a6d20ce197..16c6365e2b8 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -146,29 +146,18 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls) } int copy_thread(unsigned long clone_flags, unsigned long sp, - unsigned long unused, + unsigned long arg, struct task_struct *p, struct pt_regs *regs) { int err; struct pt_regs *childregs; struct task_struct *me = current; - childregs = ((struct pt_regs *) - (THREAD_SIZE + task_stack_page(p))) - 1; - *childregs = *regs; - - childregs->ax = 0; - if (user_mode(regs)) - childregs->sp = sp; - else - childregs->sp = (unsigned long)childregs; - + p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; + childregs = task_pt_regs(p); p->thread.sp = (unsigned long) childregs; - p->thread.sp0 = (unsigned long) (childregs+1); p->thread.usersp = me->thread.usersp; - set_tsk_thread_flag(p, TIF_FORK); - p->fpu_counter = 0; p->thread.io_bitmap_ptr = NULL; @@ -178,6 +167,24 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs; savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); + + if (unlikely(!regs)) { + /* kernel thread */ + memset(childregs, 0, sizeof(struct pt_regs)); + childregs->sp = (unsigned long)childregs; + childregs->ss = __KERNEL_DS; + childregs->bx = sp; /* function */ + childregs->bp = arg; + childregs->orig_ax = -1; + childregs->cs = __KERNEL_CS | get_kernel_rpl(); + childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; + return 0; + } + *childregs = *regs; + + childregs->ax = 0; + childregs->sp = sp; err = -ENOMEM; memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 52190a938b4..4e8ba39eaf0 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -358,14 +358,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"), }, }, - { /* Handle problems with rebooting on CompuLab SBC-FITPC2 */ - .callback = set_bios_reboot, - .ident = "CompuLab SBC-FITPC2", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "CompuLab"), - DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"), - }, - }, { /* Handle problems with rebooting on ASUS P4S800 */ .callback = set_bios_reboot, .ident = "ASUS P4S800", diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index af6db6ec5b2..4929c1be0ac 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -225,7 +225,7 @@ static struct platform_device rtc_device = { static __init int add_rtc_cmos(void) { #ifdef CONFIG_PNP - static const char *ids[] __initconst = + static const char * const const ids[] __initconst = { "PNP0b00", "PNP0b01", "PNP0b02", }; struct pnp_dev *dev; struct pnp_id *id; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d609be046b5..ca45696f30f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -68,6 +68,7 @@ #include <linux/percpu.h> #include <linux/crash_dump.h> #include <linux/tboot.h> +#include <linux/jiffies.h> #include <video/edid.h> @@ -919,8 +920,22 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_X86_64 if (max_pfn > max_low_pfn) { - max_pfn_mapped = init_memory_mapping(1UL<<32, - max_pfn<<PAGE_SHIFT); + int i; + unsigned long start, end; + unsigned long start_pfn, end_pfn; + + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, + NULL) { + + end = PFN_PHYS(end_pfn); + if (end <= (1UL<<32)) + continue; + + start = PFN_PHYS(start_pfn); + max_pfn_mapped = init_memory_mapping( + max((1UL<<32), start), end); + } + /* can we preseve max_low_pfn ?*/ max_low_pfn = max_pfn; } @@ -1032,6 +1047,20 @@ void __init setup_arch(char **cmdline_p) mcheck_init(); arch_init_ideal_nops(); + + register_refined_jiffies(CLOCK_TICK_RATE); + +#ifdef CONFIG_EFI + /* Once setup is done above, disable efi_enabled on mismatched + * firmware/kernel archtectures since there is no support for + * runtime services. + */ + if (efi_enabled && IS_ENABLED(CONFIG_X86_64) != efi_64bit) { + pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); + efi_unmap_memmap(); + efi_enabled = 0; + } +#endif } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b33144c8b30..70b27ee6118 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -824,10 +824,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) mce_notify_process(); #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ - if (thread_info_flags & _TIF_UPROBE) { - clear_thread_flag(TIF_UPROBE); + if (thread_info_flags & _TIF_UPROBE) uprobe_notify_resume(regs); - } /* deal with pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) @@ -840,10 +838,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) fire_user_return_notifiers(); -#ifdef CONFIG_X86_32 - clear_thread_flag(TIF_IRET); -#endif /* CONFIG_X86_32 */ - rcu_user_enter(); } diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c deleted file mode 100644 index 0b0cb5fede1..00000000000 --- a/arch/x86/kernel/sys_i386_32.c +++ /dev/null @@ -1,40 +0,0 @@ -/* - * This file contains various random system calls that - * have a non-standard calling sequence on the Linux/i386 - * platform. - */ - -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/fs.h> -#include <linux/smp.h> -#include <linux/sem.h> -#include <linux/msg.h> -#include <linux/shm.h> -#include <linux/stat.h> -#include <linux/syscalls.h> -#include <linux/mman.h> -#include <linux/file.h> -#include <linux/utsname.h> -#include <linux/ipc.h> - -#include <linux/uaccess.h> -#include <linux/unistd.h> - -#include <asm/syscalls.h> - -/* - * Do a system call from kernel instead of calling sys_execve so we - * end up with proper pt_regs. - */ -int kernel_execve(const char *filename, - const char *const argv[], - const char *const envp[]) -{ - long __res; - asm volatile ("int $0x80" - : "=a" (__res) - : "0" (__NR_execve), "b" (filename), "c" (argv), "d" (envp) : "memory"); - return __res; -} diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 9538f00827a..aafa5557b39 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -651,31 +651,19 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) /* * Skip these instructions as per the currently known x86 ISA. - * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 } + * rep=0x66*; nop=0x90 */ static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) { int i; for (i = 0; i < MAX_UINSN_BYTES; i++) { - if ((auprobe->insn[i] == 0x66)) + if (auprobe->insn[i] == 0x66) continue; if (auprobe->insn[i] == 0x90) return true; - if (i == (MAX_UINSN_BYTES - 1)) - break; - - if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x1f)) - return true; - - if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x19)) - return true; - - if ((auprobe->insn[i] == 0x87) && (auprobe->insn[i+1] == 0xc0)) - return true; - break; } return false; diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 54abcc0baf2..5c9687b1bde 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -561,9 +561,9 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno) if ((trapno == 3) || (trapno == 1)) { KVM86->regs32->ax = VM86_TRAP + (trapno << 8); /* setting this flag forces the code in entry_32.S to - call save_v86_state() and change the stack pointer - to KVM86->regs32 */ - set_thread_flag(TIF_IRET); + the path where we call save_v86_state() and change + the stack pointer to KVM86->regs32 */ + set_thread_flag(TIF_NOTIFY_RESUME); return 0; } do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs)); diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 8d141b30904..3a3e8c9e280 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -28,7 +28,7 @@ #include <linux/jiffies.h> #include <linux/sysctl.h> #include <linux/topology.h> -#include <linux/clocksource.h> +#include <linux/timekeeper_internal.h> #include <linux/getcpu.h> #include <linux/cpu.h> #include <linux/smp.h> @@ -82,32 +82,41 @@ void update_vsyscall_tz(void) vsyscall_gtod_data.sys_tz = sys_tz; } -void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, - struct clocksource *clock, u32 mult) +void update_vsyscall(struct timekeeper *tk) { - struct timespec monotonic; + struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; - write_seqcount_begin(&vsyscall_gtod_data.seq); + write_seqcount_begin(&vdata->seq); /* copy vsyscall data */ - vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode; - vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; - vsyscall_gtod_data.clock.mask = clock->mask; - vsyscall_gtod_data.clock.mult = mult; - vsyscall_gtod_data.clock.shift = clock->shift; - - vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; - vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; + vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode; + vdata->clock.cycle_last = tk->clock->cycle_last; + vdata->clock.mask = tk->clock->mask; + vdata->clock.mult = tk->mult; + vdata->clock.shift = tk->shift; + + vdata->wall_time_sec = tk->xtime_sec; + vdata->wall_time_snsec = tk->xtime_nsec; + + vdata->monotonic_time_sec = tk->xtime_sec + + tk->wall_to_monotonic.tv_sec; + vdata->monotonic_time_snsec = tk->xtime_nsec + + (tk->wall_to_monotonic.tv_nsec + << tk->shift); + while (vdata->monotonic_time_snsec >= + (((u64)NSEC_PER_SEC) << tk->shift)) { + vdata->monotonic_time_snsec -= + ((u64)NSEC_PER_SEC) << tk->shift; + vdata->monotonic_time_sec++; + } - monotonic = timespec_add(*wall_time, *wtm); - vsyscall_gtod_data.monotonic_time_sec = monotonic.tv_sec; - vsyscall_gtod_data.monotonic_time_nsec = monotonic.tv_nsec; + vdata->wall_time_coarse.tv_sec = tk->xtime_sec; + vdata->wall_time_coarse.tv_nsec = (long)(tk->xtime_nsec >> tk->shift); - vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); - vsyscall_gtod_data.monotonic_time_coarse = - timespec_add(vsyscall_gtod_data.wall_time_coarse, *wtm); + vdata->monotonic_time_coarse = timespec_add(vdata->wall_time_coarse, + tk->wall_to_monotonic); - write_seqcount_end(&vsyscall_gtod_data.seq); + write_seqcount_end(&vdata->seq); } static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index c6e6b721b6e..43e9fadca5d 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1311,7 +1311,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) vcpu->arch.apic_base = value; if (apic_x2apic_mode(apic)) { u32 id = kvm_apic_id(apic); - u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf)); + u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); kvm_apic_set_ldr(apic, ldr); } apic->base_address = apic->vcpu->arch.apic_base & diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6f78fa3a470..aabb1289ff0 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2497,8 +2497,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, } } - if (!is_error_pfn(pfn)) - kvm_release_pfn_clean(pfn); + kvm_release_pfn_clean(pfn); } static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig index 6e121a2a49e..7872a3330fb 100644 --- a/arch/x86/lguest/Kconfig +++ b/arch/x86/lguest/Kconfig @@ -4,7 +4,6 @@ config LGUEST_GUEST depends on X86_32 select VIRTUALIZATION select VIRTIO - select VIRTIO_RING select VIRTIO_CONSOLE help Lguest is a tiny in-kernel hypervisor. Selecting this will diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a530b230e7d..8e13ecb41be 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1220,6 +1220,7 @@ good_area: /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk * of starvation. */ flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; goto retry; } } diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index b91e4851242..937bff5cdaa 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -71,7 +71,6 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) struct address_space *mapping = vma->vm_file->f_mapping; pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - struct prio_tree_iter iter; struct vm_area_struct *svma; unsigned long saddr; pte_t *spte = NULL; @@ -81,7 +80,7 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) return (pte_t *)pmd_alloc(mm, pud, addr); mutex_lock(&mapping->i_mmap_mutex); - vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { + vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { if (svma == vma) continue; diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index ab1f6a93b52..d7aea41563b 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -35,40 +35,44 @@ struct map_range { unsigned page_size_mask; }; -static void __init find_early_table_space(struct map_range *mr, unsigned long end, - int use_pse, int use_gbpages) +/* + * First calculate space needed for kernel direct mapping page tables to cover + * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB + * pages. Then find enough contiguous space for those page tables. + */ +static void __init find_early_table_space(struct map_range *mr, int nr_range) { - unsigned long puds, pmds, ptes, tables, start = 0, good_end = end; + int i; + unsigned long puds = 0, pmds = 0, ptes = 0, tables; + unsigned long start = 0, good_end; phys_addr_t base; - puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; - tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); - - if (use_gbpages) { - unsigned long extra; - - extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); - pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; - } else - pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; + for (i = 0; i < nr_range; i++) { + unsigned long range, extra; - tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); + range = mr[i].end - mr[i].start; + puds += (range + PUD_SIZE - 1) >> PUD_SHIFT; - if (use_pse) { - unsigned long extra; + if (mr[i].page_size_mask & (1 << PG_LEVEL_1G)) { + extra = range - ((range >> PUD_SHIFT) << PUD_SHIFT); + pmds += (extra + PMD_SIZE - 1) >> PMD_SHIFT; + } else { + pmds += (range + PMD_SIZE - 1) >> PMD_SHIFT; + } - extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); + if (mr[i].page_size_mask & (1 << PG_LEVEL_2M)) { + extra = range - ((range >> PMD_SHIFT) << PMD_SHIFT); #ifdef CONFIG_X86_32 - extra += PMD_SIZE; + extra += PMD_SIZE; #endif - /* The first 2/4M doesn't use large pages. */ - if (mr->start < PMD_SIZE) - extra += mr->end - mr->start; - - ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; - } else - ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + ptes += (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; + } else { + ptes += (range + PAGE_SIZE - 1) >> PAGE_SHIFT; + } + } + tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); #ifdef CONFIG_X86_32 @@ -86,7 +90,7 @@ static void __init find_early_table_space(struct map_range *mr, unsigned long en pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n", - end - 1, pgt_buf_start << PAGE_SHIFT, + mr[nr_range - 1].end - 1, pgt_buf_start << PAGE_SHIFT, (pgt_buf_top << PAGE_SHIFT) - 1); } @@ -267,7 +271,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, * nodes are discovered. */ if (!after_bootmem) - find_early_table_space(&mr[0], end, use_pse, use_gbpages); + find_early_table_space(mr, nr_range); for (i = 0; i < nr_range; i++) ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 2b6b4a3c8be..3baff255ada 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -386,7 +386,8 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, * these mappings are more intelligent. */ if (pte_val(*pte)) { - pages++; + if (!after_bootmem) + pages++; continue; } @@ -451,6 +452,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, * attributes. */ if (page_size_mask & (1 << PG_LEVEL_2M)) { + if (!after_bootmem) + pages++; last_map_addr = next; continue; } @@ -526,6 +529,8 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, * attributes. */ if (page_size_mask & (1 << PG_LEVEL_1G)) { + if (!after_bootmem) + pages++; last_map_addr = next; continue; } diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 3d68ef6d226..0eb572eda40 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -664,20 +664,20 @@ static void free_pfn_range(u64 paddr, unsigned long size) } /* - * track_pfn_vma_copy is called when vma that is covering the pfnmap gets + * track_pfn_copy is called when vma that is covering the pfnmap gets * copied through copy_page_range(). * * If the vma has a linear pfn mapping for the entire range, we get the prot * from pte and reserve the entire vma range with single reserve_pfn_range call. */ -int track_pfn_vma_copy(struct vm_area_struct *vma) +int track_pfn_copy(struct vm_area_struct *vma) { resource_size_t paddr; unsigned long prot; unsigned long vma_size = vma->vm_end - vma->vm_start; pgprot_t pgprot; - if (is_linear_pfn_mapping(vma)) { + if (vma->vm_flags & VM_PAT) { /* * reserve the whole chunk covered by vma. We need the * starting address and protection from pte. @@ -694,31 +694,59 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) } /* - * track_pfn_vma_new is called when a _new_ pfn mapping is being established - * for physical range indicated by pfn and size. - * * prot is passed in as a parameter for the new mapping. If the vma has a * linear pfn mapping for the entire range reserve the entire vma range with * single reserve_pfn_range call. */ -int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, - unsigned long pfn, unsigned long size) +int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, + unsigned long pfn, unsigned long addr, unsigned long size) { + resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT; unsigned long flags; - resource_size_t paddr; - unsigned long vma_size = vma->vm_end - vma->vm_start; - if (is_linear_pfn_mapping(vma)) { - /* reserve the whole chunk starting from vm_pgoff */ - paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; - return reserve_pfn_range(paddr, vma_size, prot, 0); + /* reserve the whole chunk starting from paddr */ + if (addr == vma->vm_start && size == (vma->vm_end - vma->vm_start)) { + int ret; + + ret = reserve_pfn_range(paddr, size, prot, 0); + if (!ret) + vma->vm_flags |= VM_PAT; + return ret; } if (!pat_enabled) return 0; - /* for vm_insert_pfn and friends, we set prot based on lookup */ - flags = lookup_memtype(pfn << PAGE_SHIFT); + /* + * For anything smaller than the vma size we set prot based on the + * lookup. + */ + flags = lookup_memtype(paddr); + + /* Check memtype for the remaining pages */ + while (size > PAGE_SIZE) { + size -= PAGE_SIZE; + paddr += PAGE_SIZE; + if (flags != lookup_memtype(paddr)) + return -EINVAL; + } + + *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | + flags); + + return 0; +} + +int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, + unsigned long pfn) +{ + unsigned long flags; + + if (!pat_enabled) + return 0; + + /* Set prot based on lookup */ + flags = lookup_memtype((resource_size_t)pfn << PAGE_SHIFT); *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | flags); @@ -726,22 +754,31 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, } /* - * untrack_pfn_vma is called while unmapping a pfnmap for a region. + * untrack_pfn is called while unmapping a pfnmap for a region. * untrack can be called for a specific region indicated by pfn and size or - * can be for the entire vma (in which case size can be zero). + * can be for the entire vma (in which case pfn, size are zero). */ -void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, - unsigned long size) +void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, + unsigned long size) { resource_size_t paddr; - unsigned long vma_size = vma->vm_end - vma->vm_start; + unsigned long prot; - if (is_linear_pfn_mapping(vma)) { - /* free the whole chunk starting from vm_pgoff */ - paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; - free_pfn_range(paddr, vma_size); + if (!(vma->vm_flags & VM_PAT)) return; + + /* free the chunk starting from pfn or the whole chunk */ + paddr = (resource_size_t)pfn << PAGE_SHIFT; + if (!paddr && !size) { + if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { + WARN_ON_ONCE(1); + return; + } + + size = vma->vm_end - vma->vm_start; } + free_pfn_range(paddr, size); + vma->vm_flags &= ~VM_PAT; } pgprot_t pgprot_writecombine(pgprot_t prot) diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c index 8acaddd0fb2..415f6c4ced3 100644 --- a/arch/x86/mm/pat_rbtree.c +++ b/arch/x86/mm/pat_rbtree.c @@ -12,7 +12,7 @@ #include <linux/debugfs.h> #include <linux/kernel.h> #include <linux/module.h> -#include <linux/rbtree.h> +#include <linux/rbtree_augmented.h> #include <linux/sched.h> #include <linux/gfp.h> @@ -54,29 +54,24 @@ static u64 get_subtree_max_end(struct rb_node *node) return ret; } -/* Update 'subtree_max_end' for a node, based on node and its children */ -static void memtype_rb_augment_cb(struct rb_node *node, void *__unused) +static u64 compute_subtree_max_end(struct memtype *data) { - struct memtype *data; - u64 max_end, child_max_end; - - if (!node) - return; + u64 max_end = data->end, child_max_end; - data = container_of(node, struct memtype, rb); - max_end = data->end; - - child_max_end = get_subtree_max_end(node->rb_right); + child_max_end = get_subtree_max_end(data->rb.rb_right); if (child_max_end > max_end) max_end = child_max_end; - child_max_end = get_subtree_max_end(node->rb_left); + child_max_end = get_subtree_max_end(data->rb.rb_left); if (child_max_end > max_end) max_end = child_max_end; - data->subtree_max_end = max_end; + return max_end; } +RB_DECLARE_CALLBACKS(static, memtype_rb_augment_cb, struct memtype, rb, + u64, subtree_max_end, compute_subtree_max_end) + /* Find the first (lowest start addr) overlapping range from rb tree */ static struct memtype *memtype_rb_lowest_match(struct rb_root *root, u64 start, u64 end) @@ -179,15 +174,17 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) struct memtype *data = container_of(*node, struct memtype, rb); parent = *node; + if (data->subtree_max_end < newdata->end) + data->subtree_max_end = newdata->end; if (newdata->start <= data->start) node = &((*node)->rb_left); else if (newdata->start > data->start) node = &((*node)->rb_right); } + newdata->subtree_max_end = newdata->end; rb_link_node(&newdata->rb, parent, node); - rb_insert_color(&newdata->rb, root); - rb_augment_insert(&newdata->rb, memtype_rb_augment_cb, NULL); + rb_insert_augmented(&newdata->rb, root, &memtype_rb_augment_cb); } int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) @@ -209,16 +206,13 @@ int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) struct memtype *rbt_memtype_erase(u64 start, u64 end) { - struct rb_node *deepest; struct memtype *data; data = memtype_rb_exact_match(&memtype_rbroot, start, end); if (!data) goto out; - deepest = rb_augment_erase_begin(&data->rb); - rb_erase(&data->rb, &memtype_rbroot); - rb_augment_erase_end(deepest, memtype_rb_augment_cb, NULL); + rb_erase_augmented(&data->rb, &memtype_rbroot, &memtype_rb_augment_cb); out: return data; } diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 26b8a8514ee..48768df2471 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -55,7 +55,7 @@ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, val |= counter_config->extra; event &= model->event_mask ? model->event_mask : 0xFF; val |= event & 0xFF; - val |= (event & 0x0F00) << 24; + val |= (u64)(event & 0x0F00) << 24; return val; } diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index aded2a91162..ad4439145f8 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -70,11 +70,15 @@ EXPORT_SYMBOL(efi); struct efi_memory_map memmap; bool efi_64bit; -static bool efi_native; static struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; +static inline bool efi_is_native(void) +{ + return IS_ENABLED(CONFIG_X86_64) == efi_64bit; +} + static int __init setup_noefi(char *arg) { efi_enabled = 0; @@ -420,7 +424,7 @@ void __init efi_reserve_boot_services(void) } } -static void __init efi_unmap_memmap(void) +void __init efi_unmap_memmap(void) { if (memmap.map) { early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); @@ -432,7 +436,7 @@ void __init efi_free_boot_services(void) { void *p; - if (!efi_native) + if (!efi_is_native()) return; for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { @@ -684,12 +688,10 @@ void __init efi_init(void) return; } efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; - efi_native = !efi_64bit; #else efi_phys.systab = (efi_system_table_t *) (boot_params.efi_info.efi_systab | ((__u64)boot_params.efi_info.efi_systab_hi<<32)); - efi_native = efi_64bit; #endif if (efi_systab_init(efi_phys.systab)) { @@ -723,7 +725,7 @@ void __init efi_init(void) * that doesn't match the kernel 32/64-bit mode. */ - if (!efi_native) + if (!efi_is_native()) pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); else if (efi_runtime_init()) { efi_enabled = 0; @@ -735,7 +737,7 @@ void __init efi_init(void) return; } #ifdef CONFIG_X86_32 - if (efi_native) { + if (efi_is_native()) { x86_platform.get_wallclock = efi_get_time; x86_platform.set_wallclock = efi_set_rtc_mmss; } @@ -810,6 +812,16 @@ void __iomem *efi_lookup_mapped_addr(u64 phys_addr) return NULL; } +void efi_memory_uc(u64 addr, unsigned long size) +{ + unsigned long page_shift = 1UL << EFI_PAGE_SHIFT; + u64 npages; + + npages = round_up(size, page_shift) / page_shift; + memrange_efi_to_native(&addr, &npages); + set_memory_uc(addr, npages); +} + /* * This function will switch the EFI runtime services to virtual mode. * Essentially, look through the EFI memmap and map every region that @@ -823,7 +835,7 @@ void __init efi_enter_virtual_mode(void) efi_memory_desc_t *md, *prev_md = NULL; efi_status_t status; unsigned long size; - u64 end, systab, addr, npages, end_pfn; + u64 end, systab, end_pfn; void *p, *va, *new_memmap = NULL; int count = 0; @@ -834,7 +846,7 @@ void __init efi_enter_virtual_mode(void) * non-native EFI */ - if (!efi_native) { + if (!efi_is_native()) { efi_unmap_memmap(); return; } @@ -879,10 +891,14 @@ void __init efi_enter_virtual_mode(void) end_pfn = PFN_UP(end); if (end_pfn <= max_low_pfn_mapped || (end_pfn > (1UL << (32 - PAGE_SHIFT)) - && end_pfn <= max_pfn_mapped)) + && end_pfn <= max_pfn_mapped)) { va = __va(md->phys_addr); - else - va = efi_ioremap(md->phys_addr, size, md->type); + + if (!(md->attribute & EFI_MEMORY_WB)) + efi_memory_uc((u64)(unsigned long)va, size); + } else + va = efi_ioremap(md->phys_addr, size, + md->type, md->attribute); md->virt_addr = (u64) (unsigned long) va; @@ -892,13 +908,6 @@ void __init efi_enter_virtual_mode(void) continue; } - if (!(md->attribute & EFI_MEMORY_WB)) { - addr = md->virt_addr; - npages = md->num_pages; - memrange_efi_to_native(&addr, &npages); - set_memory_uc(addr, npages); - } - systab = (u64) (unsigned long) efi_phys.systab; if (md->phys_addr <= systab && systab < end) { systab += md->virt_addr - md->phys_addr; diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index ac3aa54e265..95fd505dfeb 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -82,7 +82,7 @@ void __init efi_call_phys_epilog(void) } void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, - u32 type) + u32 type, u64 attribute) { unsigned long last_map_pfn; @@ -92,8 +92,11 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) { unsigned long top = last_map_pfn << PAGE_SHIFT; - efi_ioremap(top, size - (top - phys_addr), type); + efi_ioremap(top, size - (top - phys_addr), type, attribute); } + if (!(attribute & EFI_MEMORY_WB)) + efi_memory_uc((u64)(unsigned long)__va(phys_addr), size); + return (void __iomem *)__va(phys_addr); } diff --git a/arch/x86/realmode/rm/wakeup_asm.S b/arch/x86/realmode/rm/wakeup_asm.S index e56479e5805..9e7e14797a7 100644 --- a/arch/x86/realmode/rm/wakeup_asm.S +++ b/arch/x86/realmode/rm/wakeup_asm.S @@ -74,18 +74,9 @@ ENTRY(wakeup_start) lidtl wakeup_idt - /* Clear the EFLAGS but remember if we have EFLAGS.ID */ - movl $X86_EFLAGS_ID, %ecx - pushl %ecx - popfl - pushfl - popl %edi + /* Clear the EFLAGS */ pushl $0 popfl - pushfl - popl %edx - xorl %edx, %edi - andl %ecx, %edi /* %edi is zero iff CPUID & %cr4 are missing */ /* Check header signature... */ movl signature, %eax @@ -120,12 +111,12 @@ ENTRY(wakeup_start) movl %eax, %cr3 btl $WAKEUP_BEHAVIOR_RESTORE_CR4, %edi - jz 1f + jnc 1f movl pmode_cr4, %eax movl %eax, %cr4 1: btl $WAKEUP_BEHAVIOR_RESTORE_EFER, %edi - jz 1f + jnc 1f movl pmode_efer, %eax movl pmode_efer + 4, %edx movl $MSR_EFER, %ecx diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index 7a35a6e71d4..a47103fbc69 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -17,7 +17,7 @@ 8 i386 creat sys_creat 9 i386 link sys_link 10 i386 unlink sys_unlink -11 i386 execve ptregs_execve stub32_execve +11 i386 execve sys_execve stub32_execve 12 i386 chdir sys_chdir 13 i386 time sys_time compat_sys_time 14 i386 mknod sys_mknod diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index aeaff8bef2f..07611759ce3 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -13,6 +13,8 @@ endmenu config UML_X86 def_bool y select GENERIC_FIND_FIRST_BIT + select GENERIC_KERNEL_THREAD + select GENERIC_KERNEL_EXECVE config 64BIT bool "64-bit kernel" if SUBARCH = "x86" @@ -22,9 +24,11 @@ config X86_32 def_bool !64BIT select HAVE_AOUT select ARCH_WANT_IPC_PARSE_VERSION + select MODULES_USE_ELF_REL config X86_64 def_bool 64BIT + select MODULES_USE_ELF_RELA config RWSEM_XCHGADD_ALGORITHM def_bool X86_XADD && 64BIT diff --git a/arch/x86/um/asm/checksum.h b/arch/x86/um/asm/checksum.h index b6efe2381b5..4b181b74454 100644 --- a/arch/x86/um/asm/checksum.h +++ b/arch/x86/um/asm/checksum.h @@ -1,6 +1,150 @@ #ifndef __UM_CHECKSUM_H #define __UM_CHECKSUM_H +#include <linux/string.h> +#include <linux/in6.h> + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +extern __wsum csum_partial(const void *buff, int len, __wsum sum); + +/* + * Note: when you get a NULL pointer exception here this means someone + * passed in an incorrect kernel address to one of these functions. + * + * If you use these functions directly please don't forget the + * access_ok(). + */ + +static __inline__ +__wsum csum_partial_copy_nocheck(const void *src, void *dst, + int len, __wsum sum) +{ + memcpy(dst, src, len); + return csum_partial(dst, len, sum); +} + +/* + * the same as csum_partial, but copies from src while it + * checksums, and handles user-space pointer exceptions correctly, when needed. + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ + +static __inline__ +__wsum csum_partial_copy_from_user(const void __user *src, void *dst, + int len, __wsum sum, int *err_ptr) +{ + if (copy_from_user(dst, src, len)) { + *err_ptr = -EFAULT; + return (__force __wsum)-1; + } + + return csum_partial(dst, len, sum); +} + +/** + * csum_fold - Fold and invert a 32bit checksum. + * sum: 32bit unfolded sum + * + * Fold a 32bit running checksum to 16bit and invert it. This is usually + * the last step before putting a checksum into a packet. + * Make sure not to mix with 64bit checksums. + */ +static inline __sum16 csum_fold(__wsum sum) +{ + __asm__( + " addl %1,%0\n" + " adcl $0xffff,%0" + : "=r" (sum) + : "r" ((__force u32)sum << 16), + "0" ((__force u32)sum & 0xffff0000) + ); + return (__force __sum16)(~(__force u32)sum >> 16); +} + +/** + * csum_tcpup_nofold - Compute an IPv4 pseudo header checksum. + * @saddr: source address + * @daddr: destination address + * @len: length of packet + * @proto: ip protocol of packet + * @sum: initial sum to be added in (32bit unfolded) + * + * Returns the pseudo header checksum the input data. Result is + * 32bit unfolded. + */ +static inline __wsum +csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, + unsigned short proto, __wsum sum) +{ + asm(" addl %1, %0\n" + " adcl %2, %0\n" + " adcl %3, %0\n" + " adcl $0, %0\n" + : "=r" (sum) + : "g" (daddr), "g" (saddr), "g" ((len + proto) << 8), "0" (sum)); + return sum; +} + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); +} + +/** + * ip_fast_csum - Compute the IPv4 header checksum efficiently. + * iph: ipv4 header + * ihl: length of header / 4 + */ +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + unsigned int sum; + + asm( " movl (%1), %0\n" + " subl $4, %2\n" + " jbe 2f\n" + " addl 4(%1), %0\n" + " adcl 8(%1), %0\n" + " adcl 12(%1), %0\n" + "1: adcl 16(%1), %0\n" + " lea 4(%1), %1\n" + " decl %2\n" + " jne 1b\n" + " adcl $0, %0\n" + " movl %0, %2\n" + " shrl $16, %0\n" + " addw %w2, %w0\n" + " adcl $0, %0\n" + " notl %0\n" + "2:" + /* Since the input registers which are loaded with iph and ipl + are modified, we must also specify them as outputs, or gcc + will assume they contain their original values. */ + : "=r" (sum), "=r" (iph), "=r" (ihl) + : "1" (iph), "2" (ihl) + : "memory"); + return (__force __sum16)sum; +} + #ifdef CONFIG_X86_32 # include "checksum_32.h" #else diff --git a/arch/x86/um/asm/checksum_32.h b/arch/x86/um/asm/checksum_32.h index caab74252e2..ab77b6f9a4b 100644 --- a/arch/x86/um/asm/checksum_32.h +++ b/arch/x86/um/asm/checksum_32.h @@ -5,145 +5,6 @@ #ifndef __UM_SYSDEP_CHECKSUM_H #define __UM_SYSDEP_CHECKSUM_H -#include "linux/in6.h" -#include "linux/string.h" - -/* - * computes the checksum of a memory block at buff, length len, - * and adds in "sum" (32-bit) - * - * returns a 32-bit number suitable for feeding into itself - * or csum_tcpudp_magic - * - * this function must be called with even lengths, except - * for the last fragment, which may be odd - * - * it's best to have buff aligned on a 32-bit boundary - */ -__wsum csum_partial(const void *buff, int len, __wsum sum); - -/* - * Note: when you get a NULL pointer exception here this means someone - * passed in an incorrect kernel address to one of these functions. - * - * If you use these functions directly please don't forget the - * access_ok(). - */ - -static __inline__ -__wsum csum_partial_copy_nocheck(const void *src, void *dst, - int len, __wsum sum) -{ - memcpy(dst, src, len); - return csum_partial(dst, len, sum); -} - -/* - * the same as csum_partial, but copies from src while it - * checksums, and handles user-space pointer exceptions correctly, when needed. - * - * here even more important to align src and dst on a 32-bit (or even - * better 64-bit) boundary - */ - -static __inline__ -__wsum csum_partial_copy_from_user(const void __user *src, void *dst, - int len, __wsum sum, int *err_ptr) -{ - if (copy_from_user(dst, src, len)) { - *err_ptr = -EFAULT; - return (__force __wsum)-1; - } - - return csum_partial(dst, len, sum); -} - -/* - * This is a version of ip_compute_csum() optimized for IP headers, - * which always checksum on 4 octet boundaries. - * - * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by - * Arnt Gulbrandsen. - */ -static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) -{ - unsigned int sum; - - __asm__ __volatile__( - "movl (%1), %0 ;\n" - "subl $4, %2 ;\n" - "jbe 2f ;\n" - "addl 4(%1), %0 ;\n" - "adcl 8(%1), %0 ;\n" - "adcl 12(%1), %0 ;\n" -"1: adcl 16(%1), %0 ;\n" - "lea 4(%1), %1 ;\n" - "decl %2 ;\n" - "jne 1b ;\n" - "adcl $0, %0 ;\n" - "movl %0, %2 ;\n" - "shrl $16, %0 ;\n" - "addw %w2, %w0 ;\n" - "adcl $0, %0 ;\n" - "notl %0 ;\n" -"2: ;\n" - /* Since the input registers which are loaded with iph and ipl - are modified, we must also specify them as outputs, or gcc - will assume they contain their original values. */ - : "=r" (sum), "=r" (iph), "=r" (ihl) - : "1" (iph), "2" (ihl) - : "memory"); - return (__force __sum16)sum; -} - -/* - * Fold a partial checksum - */ - -static inline __sum16 csum_fold(__wsum sum) -{ - __asm__( - "addl %1, %0 ;\n" - "adcl $0xffff, %0 ;\n" - : "=r" (sum) - : "r" ((__force u32)sum << 16), - "0" ((__force u32)sum & 0xffff0000) - ); - return (__force __sum16)(~(__force u32)sum >> 16); -} - -static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, - unsigned short len, - unsigned short proto, - __wsum sum) -{ - __asm__( - "addl %1, %0 ;\n" - "adcl %2, %0 ;\n" - "adcl %3, %0 ;\n" - "adcl $0, %0 ;\n" - : "=r" (sum) - : "g" (daddr), "g"(saddr), "g"((len + proto) << 8), "0"(sum)); - return sum; -} - -/* - * computes the checksum of the TCP/UDP pseudo-header - * returns a 16-bit checksum, already complemented - */ -static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, - unsigned short len, - unsigned short proto, - __wsum sum) -{ - return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); -} - -/* - * this routine is used for miscellaneous IP-like checksums, mainly - * in icmp.c - */ - static inline __sum16 ip_compute_csum(const void *buff, int len) { return csum_fold (csum_partial(buff, len, 0)); @@ -198,4 +59,3 @@ static __inline__ __wsum csum_and_copy_to_user(const void *src, } #endif - diff --git a/arch/x86/um/asm/checksum_64.h b/arch/x86/um/asm/checksum_64.h index a5be9031ea8..7b6cd192157 100644 --- a/arch/x86/um/asm/checksum_64.h +++ b/arch/x86/um/asm/checksum_64.h @@ -5,131 +5,6 @@ #ifndef __UM_SYSDEP_CHECKSUM_H #define __UM_SYSDEP_CHECKSUM_H -#include "linux/string.h" -#include "linux/in6.h" -#include "asm/uaccess.h" - -extern __wsum csum_partial(const void *buff, int len, __wsum sum); - -/* - * Note: when you get a NULL pointer exception here this means someone - * passed in an incorrect kernel address to one of these functions. - * - * If you use these functions directly please don't forget the - * access_ok(). - */ - -static __inline__ -__wsum csum_partial_copy_nocheck(const void *src, void *dst, - int len, __wsum sum) -{ - memcpy(dst, src, len); - return(csum_partial(dst, len, sum)); -} - -static __inline__ -__wsum csum_partial_copy_from_user(const void __user *src, - void *dst, int len, __wsum sum, - int *err_ptr) -{ - if (copy_from_user(dst, src, len)) { - *err_ptr = -EFAULT; - return (__force __wsum)-1; - } - return csum_partial(dst, len, sum); -} - -/** - * csum_fold - Fold and invert a 32bit checksum. - * sum: 32bit unfolded sum - * - * Fold a 32bit running checksum to 16bit and invert it. This is usually - * the last step before putting a checksum into a packet. - * Make sure not to mix with 64bit checksums. - */ -static inline __sum16 csum_fold(__wsum sum) -{ - __asm__( - " addl %1,%0\n" - " adcl $0xffff,%0" - : "=r" (sum) - : "r" ((__force u32)sum << 16), - "0" ((__force u32)sum & 0xffff0000) - ); - return (__force __sum16)(~(__force u32)sum >> 16); -} - -/** - * csum_tcpup_nofold - Compute an IPv4 pseudo header checksum. - * @saddr: source address - * @daddr: destination address - * @len: length of packet - * @proto: ip protocol of packet - * @sum: initial sum to be added in (32bit unfolded) - * - * Returns the pseudo header checksum the input data. Result is - * 32bit unfolded. - */ -static inline __wsum -csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, - unsigned short proto, __wsum sum) -{ - asm(" addl %1, %0\n" - " adcl %2, %0\n" - " adcl %3, %0\n" - " adcl $0, %0\n" - : "=r" (sum) - : "g" (daddr), "g" (saddr), "g" ((len + proto) << 8), "0" (sum)); - return sum; -} - -/* - * computes the checksum of the TCP/UDP pseudo-header - * returns a 16-bit checksum, already complemented - */ -static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, - unsigned short len, - unsigned short proto, - __wsum sum) -{ - return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); -} - -/** - * ip_fast_csum - Compute the IPv4 header checksum efficiently. - * iph: ipv4 header - * ihl: length of header / 4 - */ -static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) -{ - unsigned int sum; - - asm( " movl (%1), %0\n" - " subl $4, %2\n" - " jbe 2f\n" - " addl 4(%1), %0\n" - " adcl 8(%1), %0\n" - " adcl 12(%1), %0\n" - "1: adcl 16(%1), %0\n" - " lea 4(%1), %1\n" - " decl %2\n" - " jne 1b\n" - " adcl $0, %0\n" - " movl %0, %2\n" - " shrl $16, %0\n" - " addw %w2, %w0\n" - " adcl $0, %0\n" - " notl %0\n" - "2:" - /* Since the input registers which are loaded with iph and ipl - are modified, we must also specify them as outputs, or gcc - will assume they contain their original values. */ - : "=r" (sum), "=r" (iph), "=r" (ihl) - : "1" (iph), "2" (ihl) - : "memory"); - return (__force __sum16)sum; -} - static inline unsigned add32_with_carry(unsigned a, unsigned b) { asm("addl %2,%0\n\t" diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h index 0e07adc8cbe..0feee2fd507 100644 --- a/arch/x86/um/asm/elf.h +++ b/arch/x86/um/asm/elf.h @@ -6,7 +6,7 @@ #define __UM_ELF_X86_H #include <asm/user.h> -#include "skas.h" +#include <skas.h> #ifdef CONFIG_X86_32 diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h index e72cd0df5ba..755133258c4 100644 --- a/arch/x86/um/asm/ptrace.h +++ b/arch/x86/um/asm/ptrace.h @@ -1,11 +1,13 @@ #ifndef __UM_X86_PTRACE_H #define __UM_X86_PTRACE_H -#ifdef CONFIG_X86_32 -# include "ptrace_32.h" -#else -# include "ptrace_64.h" +#include <linux/compiler.h> +#ifndef CONFIG_X86_32 +#define __FRAME_OFFSETS /* Needed to get the R* macros */ #endif +#include <asm/ptrace-generic.h> + +#define user_mode(r) UPT_IS_USER(&(r)->regs) #define PT_REGS_AX(r) UPT_AX(&(r)->regs) #define PT_REGS_BX(r) UPT_BX(&(r)->regs) @@ -36,4 +38,52 @@ static inline long regs_return_value(struct pt_regs *regs) { return PT_REGS_AX(regs); } + +/* + * Forward declaration to avoid including sysdep/tls.h, which causes a + * circular include, and compilation failures. + */ +struct user_desc; + +#ifdef CONFIG_X86_32 + +#define HOST_AUDIT_ARCH AUDIT_ARCH_I386 + +extern int ptrace_get_thread_area(struct task_struct *child, int idx, + struct user_desc __user *user_desc); + +extern int ptrace_set_thread_area(struct task_struct *child, int idx, + struct user_desc __user *user_desc); + +#else + +#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64 + +#define PT_REGS_R8(r) UPT_R8(&(r)->regs) +#define PT_REGS_R9(r) UPT_R9(&(r)->regs) +#define PT_REGS_R10(r) UPT_R10(&(r)->regs) +#define PT_REGS_R11(r) UPT_R11(&(r)->regs) +#define PT_REGS_R12(r) UPT_R12(&(r)->regs) +#define PT_REGS_R13(r) UPT_R13(&(r)->regs) +#define PT_REGS_R14(r) UPT_R14(&(r)->regs) +#define PT_REGS_R15(r) UPT_R15(&(r)->regs) + +#include <asm/errno.h> + +static inline int ptrace_get_thread_area(struct task_struct *child, int idx, + struct user_desc __user *user_desc) +{ + return -ENOSYS; +} + +static inline int ptrace_set_thread_area(struct task_struct *child, int idx, + struct user_desc __user *user_desc) +{ + return -ENOSYS; +} + +extern long arch_prctl(struct task_struct *task, int code, + unsigned long __user *addr); + +#endif #endif /* __UM_X86_PTRACE_H */ diff --git a/arch/x86/um/asm/ptrace_32.h b/arch/x86/um/asm/ptrace_32.h deleted file mode 100644 index 2cf225351b6..00000000000 --- a/arch/x86/um/asm/ptrace_32.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL - */ - -#ifndef __UM_PTRACE_I386_H -#define __UM_PTRACE_I386_H - -#define HOST_AUDIT_ARCH AUDIT_ARCH_I386 - -#include "linux/compiler.h" -#include "asm/ptrace-generic.h" - -#define user_mode(r) UPT_IS_USER(&(r)->regs) - -/* - * Forward declaration to avoid including sysdep/tls.h, which causes a - * circular include, and compilation failures. - */ -struct user_desc; - -extern int ptrace_get_thread_area(struct task_struct *child, int idx, - struct user_desc __user *user_desc); - -extern int ptrace_set_thread_area(struct task_struct *child, int idx, - struct user_desc __user *user_desc); - -#endif diff --git a/arch/x86/um/asm/ptrace_64.h b/arch/x86/um/asm/ptrace_64.h deleted file mode 100644 index ea7bff39432..00000000000 --- a/arch/x86/um/asm/ptrace_64.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright 2003 PathScale, Inc. - * - * Licensed under the GPL - */ - -#ifndef __UM_PTRACE_X86_64_H -#define __UM_PTRACE_X86_64_H - -#include "linux/compiler.h" -#include "asm/errno.h" - -#define __FRAME_OFFSETS /* Needed to get the R* macros */ -#include "asm/ptrace-generic.h" - -#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64 - -#define PT_REGS_R8(r) UPT_R8(&(r)->regs) -#define PT_REGS_R9(r) UPT_R9(&(r)->regs) -#define PT_REGS_R10(r) UPT_R10(&(r)->regs) -#define PT_REGS_R11(r) UPT_R11(&(r)->regs) -#define PT_REGS_R12(r) UPT_R12(&(r)->regs) -#define PT_REGS_R13(r) UPT_R13(&(r)->regs) -#define PT_REGS_R14(r) UPT_R14(&(r)->regs) -#define PT_REGS_R15(r) UPT_R15(&(r)->regs) - -/* XXX */ -#define user_mode(r) UPT_IS_USER(&(r)->regs) - -struct user_desc; - -static inline int ptrace_get_thread_area(struct task_struct *child, int idx, - struct user_desc __user *user_desc) -{ - return -ENOSYS; -} - -static inline int ptrace_set_thread_area(struct task_struct *child, int idx, - struct user_desc __user *user_desc) -{ - return -ENOSYS; -} - -extern long arch_prctl(struct task_struct *task, int code, - unsigned long __user *addr); -#endif diff --git a/arch/x86/um/bugs_32.c b/arch/x86/um/bugs_32.c index 17d88cf2c6c..33daff4dade 100644 --- a/arch/x86/um/bugs_32.c +++ b/arch/x86/um/bugs_32.c @@ -4,9 +4,9 @@ */ #include <signal.h> -#include "kern_util.h" -#include "longjmp.h" -#include "sysdep/ptrace.h" +#include <kern_util.h> +#include <longjmp.h> +#include <sysdep/ptrace.h> #include <generated/asm-offsets.h> /* Set during early boot */ diff --git a/arch/x86/um/bugs_64.c b/arch/x86/um/bugs_64.c index 44e02ba2a26..8cc8256c698 100644 --- a/arch/x86/um/bugs_64.c +++ b/arch/x86/um/bugs_64.c @@ -4,7 +4,7 @@ * Licensed under the GPL */ -#include "sysdep/ptrace.h" +#include <sysdep/ptrace.h> void arch_check_bugs(void) { diff --git a/arch/x86/um/fault.c b/arch/x86/um/fault.c index d670f68532f..8784ab30d91 100644 --- a/arch/x86/um/fault.c +++ b/arch/x86/um/fault.c @@ -3,7 +3,7 @@ * Licensed under the GPL */ -#include "sysdep/ptrace.h" +#include <sysdep/ptrace.h> /* These two are from asm-um/uaccess.h and linux/module.h, check them. */ struct exception_table_entry diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c index 26b0e39d2ce..8e08176f0bc 100644 --- a/arch/x86/um/ldt.c +++ b/arch/x86/um/ldt.c @@ -7,11 +7,11 @@ #include <linux/sched.h> #include <linux/slab.h> #include <asm/unistd.h> -#include "os.h" -#include "proc_mm.h" -#include "skas.h" -#include "skas_ptrace.h" -#include "sysdep/tls.h" +#include <os.h> +#include <proc_mm.h> +#include <skas.h> +#include <skas_ptrace.h> +#include <sysdep/tls.h> extern int modify_ldt(int func, void *ptr, unsigned long bytecount); diff --git a/arch/x86/um/mem_64.c b/arch/x86/um/mem_64.c index 546518727a7..c6492e75797 100644 --- a/arch/x86/um/mem_64.c +++ b/arch/x86/um/mem_64.c @@ -1,6 +1,6 @@ -#include "linux/mm.h" -#include "asm/page.h" -#include "asm/mman.h" +#include <linux/mm.h> +#include <asm/page.h> +#include <asm/mman.h> const char *arch_vma_name(struct vm_area_struct *vma) { diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c index 0cdbb86b012..41bfe84e11a 100644 --- a/arch/x86/um/os-Linux/registers.c +++ b/arch/x86/um/os-Linux/registers.c @@ -9,8 +9,8 @@ #ifdef __i386__ #include <sys/user.h> #endif -#include "longjmp.h" -#include "sysdep/ptrace_user.h" +#include <longjmp.h> +#include <sysdep/ptrace_user.h> int save_fp_registers(int pid, unsigned long *fp_regs) { diff --git a/arch/x86/um/os-Linux/task_size.c b/arch/x86/um/os-Linux/task_size.c index efb16c5c9bc..8502ad30e61 100644 --- a/arch/x86/um/os-Linux/task_size.c +++ b/arch/x86/um/os-Linux/task_size.c @@ -2,7 +2,7 @@ #include <stdlib.h> #include <signal.h> #include <sys/mman.h> -#include "longjmp.h" +#include <longjmp.h> #ifdef __i386__ diff --git a/arch/x86/um/os-Linux/tls.c b/arch/x86/um/os-Linux/tls.c index 82276b6071a..9d94b3b76c7 100644 --- a/arch/x86/um/os-Linux/tls.c +++ b/arch/x86/um/os-Linux/tls.c @@ -5,7 +5,7 @@ #include <sys/syscall.h> #include <unistd.h> -#include "sysdep/tls.h" +#include <sysdep/tls.h> #ifndef PTRACE_GET_THREAD_AREA #define PTRACE_GET_THREAD_AREA 25 diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c index 3b949daa095..ce3dd4f36f3 100644 --- a/arch/x86/um/ptrace_32.c +++ b/arch/x86/um/ptrace_32.c @@ -3,10 +3,10 @@ * Licensed under the GPL */ -#include "linux/mm.h" -#include "linux/sched.h" -#include "asm/uaccess.h" -#include "skas.h" +#include <linux/mm.h> +#include <linux/sched.h> +#include <asm/uaccess.h> +#include <skas.h> extern int arch_switch_tls(struct task_struct *to); diff --git a/arch/x86/um/ptrace_user.c b/arch/x86/um/ptrace_user.c index 3960ca1dd35..617885b1899 100644 --- a/arch/x86/um/ptrace_user.c +++ b/arch/x86/um/ptrace_user.c @@ -4,7 +4,7 @@ */ #include <errno.h> -#include "ptrace_user.h" +#include <ptrace_user.h> int ptrace_getregs(long pid, unsigned long *regs_out) { diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h index 6ce2d76eb90..eb9356904ad 100644 --- a/arch/x86/um/shared/sysdep/ptrace.h +++ b/arch/x86/um/shared/sysdep/ptrace.h @@ -2,7 +2,7 @@ #define __SYSDEP_X86_PTRACE_H #include <generated/user_constants.h> -#include "sysdep/faultinfo.h" +#include <sysdep/faultinfo.h> #define MAX_REG_OFFSET (UM_FRAME_SIZE) #define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long)) diff --git a/arch/x86/um/shared/sysdep/stub.h b/arch/x86/um/shared/sysdep/stub.h index bd161e30010..3f55e5bd3ce 100644 --- a/arch/x86/um/shared/sysdep/stub.h +++ b/arch/x86/um/shared/sysdep/stub.h @@ -1,8 +1,8 @@ #include <asm/unistd.h> #include <sys/mman.h> #include <signal.h> -#include "as-layout.h" -#include "stub-data.h" +#include <as-layout.h> +#include <stub-data.h> #ifdef __i386__ #include "stub_32.h" diff --git a/arch/x86/um/shared/sysdep/syscalls_32.h b/arch/x86/um/shared/sysdep/syscalls_32.h index 05cb796aecb..8436079be91 100644 --- a/arch/x86/um/shared/sysdep/syscalls_32.h +++ b/arch/x86/um/shared/sysdep/syscalls_32.h @@ -3,8 +3,8 @@ * Licensed under the GPL */ -#include "asm/unistd.h" -#include "sysdep/ptrace.h" +#include <asm/unistd.h> +#include <sysdep/ptrace.h> typedef long syscall_handler_t(struct pt_regs); diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index ba7363ecf89..bdaa08cfbcf 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -11,8 +11,8 @@ #include <asm/unistd.h> #include <asm/uaccess.h> #include <asm/ucontext.h> -#include "frame_kern.h" -#include "skas.h" +#include <frame_kern.h> +#include <skas.h> #ifdef CONFIG_X86_32 diff --git a/arch/x86/um/stub_32.S b/arch/x86/um/stub_32.S index 54a36ec20cb..b972649d3a1 100644 --- a/arch/x86/um/stub_32.S +++ b/arch/x86/um/stub_32.S @@ -1,4 +1,4 @@ -#include "as-layout.h" +#include <as-layout.h> .globl syscall_stub .section .__syscall_stub, "ax" diff --git a/arch/x86/um/stub_64.S b/arch/x86/um/stub_64.S index 20e4a96a6dc..7160b20172d 100644 --- a/arch/x86/um/stub_64.S +++ b/arch/x86/um/stub_64.S @@ -1,4 +1,4 @@ -#include "as-layout.h" +#include <as-layout.h> .globl syscall_stub .section .__syscall_stub, "ax" diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c index b7450bd22e7..1518d2805ae 100644 --- a/arch/x86/um/stub_segv.c +++ b/arch/x86/um/stub_segv.c @@ -3,9 +3,9 @@ * Licensed under the GPL */ -#include "sysdep/stub.h" -#include "sysdep/faultinfo.h" -#include "sysdep/mcontext.h" +#include <sysdep/stub.h> +#include <sysdep/faultinfo.h> +#include <sysdep/mcontext.h> void __attribute__ ((__section__ (".__syscall_stub"))) stub_segv_handler(int sig, siginfo_t *info, void *p) diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index b5408cecac6..232e60504b3 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c @@ -25,7 +25,6 @@ #define old_mmap sys_old_mmap #define ptregs_fork sys_fork -#define ptregs_execve sys_execve #define ptregs_iopl sys_iopl #define ptregs_vm86old sys_vm86old #define ptregs_clone i386_clone diff --git a/arch/x86/um/sysrq_32.c b/arch/x86/um/sysrq_32.c index 2d5cc51e9be..c9bee5b8c0d 100644 --- a/arch/x86/um/sysrq_32.c +++ b/arch/x86/um/sysrq_32.c @@ -3,12 +3,12 @@ * Licensed under the GPL */ -#include "linux/kernel.h" -#include "linux/smp.h" -#include "linux/sched.h" -#include "linux/kallsyms.h" -#include "asm/ptrace.h" -#include "sysrq.h" +#include <linux/kernel.h> +#include <linux/smp.h> +#include <linux/sched.h> +#include <linux/kallsyms.h> +#include <asm/ptrace.h> +#include <asm/sysrq.h> /* This is declared by <linux/sched.h> */ void show_regs(struct pt_regs *regs) diff --git a/arch/x86/um/sysrq_64.c b/arch/x86/um/sysrq_64.c index 08258f17996..a0e7fb1134a 100644 --- a/arch/x86/um/sysrq_64.c +++ b/arch/x86/um/sysrq_64.c @@ -10,7 +10,7 @@ #include <linux/utsname.h> #include <asm/current.h> #include <asm/ptrace.h> -#include "sysrq.h" +#include <asm/sysrq.h> void __show_regs(struct pt_regs *regs) { diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c index baba84f8ecb..5f5feff3d24 100644 --- a/arch/x86/um/tls_32.c +++ b/arch/x86/um/tls_32.c @@ -3,12 +3,12 @@ * Licensed under the GPL */ -#include "linux/percpu.h" -#include "linux/sched.h" -#include "asm/uaccess.h" -#include "os.h" -#include "skas.h" -#include "sysdep/tls.h" +#include <linux/percpu.h> +#include <linux/sched.h> +#include <asm/uaccess.h> +#include <os.h> +#include <skas.h> +#include <sysdep/tls.h> /* * If needed we can detect when it's uninitialized. diff --git a/arch/x86/um/tls_64.c b/arch/x86/um/tls_64.c index f7ba46200ec..d22363cb854 100644 --- a/arch/x86/um/tls_64.c +++ b/arch/x86/um/tls_64.c @@ -1,4 +1,4 @@ -#include "linux/sched.h" +#include <linux/sched.h> void clear_flushed_tls(struct task_struct *task) { diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 885eff49d6a..4df6c373421 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -80,7 +80,7 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) } -notrace static inline long vgetns(void) +notrace static inline u64 vgetsns(void) { long v; cycles_t cycles; @@ -91,21 +91,24 @@ notrace static inline long vgetns(void) else return 0; v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; - return (v * gtod->clock.mult) >> gtod->clock.shift; + return v * gtod->clock.mult; } /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ notrace static int __always_inline do_realtime(struct timespec *ts) { - unsigned long seq, ns; + unsigned long seq; + u64 ns; int mode; + ts->tv_nsec = 0; do { seq = read_seqcount_begin(>od->seq); mode = gtod->clock.vclock_mode; ts->tv_sec = gtod->wall_time_sec; - ts->tv_nsec = gtod->wall_time_nsec; - ns = vgetns(); + ns = gtod->wall_time_snsec; + ns += vgetsns(); + ns >>= gtod->clock.shift; } while (unlikely(read_seqcount_retry(>od->seq, seq))); timespec_add_ns(ts, ns); @@ -114,15 +117,18 @@ notrace static int __always_inline do_realtime(struct timespec *ts) notrace static int do_monotonic(struct timespec *ts) { - unsigned long seq, ns; + unsigned long seq; + u64 ns; int mode; + ts->tv_nsec = 0; do { seq = read_seqcount_begin(>od->seq); mode = gtod->clock.vclock_mode; ts->tv_sec = gtod->monotonic_time_sec; - ts->tv_nsec = gtod->monotonic_time_nsec; - ns = vgetns(); + ns = gtod->monotonic_time_snsec; + ns += vgetsns(); + ns >>= gtod->clock.shift; } while (unlikely(read_seqcount_retry(>od->seq, seq))); timespec_add_ns(ts, ns); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 2d932c351f9..586d83812b6 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -33,6 +33,7 @@ #include <linux/memblock.h> #include <xen/xen.h> +#include <xen/events.h> #include <xen/interface/xen.h> #include <xen/interface/version.h> #include <xen/interface/physdev.h> @@ -80,8 +81,6 @@ #include "smp.h" #include "multicalls.h" -#include <xen/events.h> - EXPORT_SYMBOL_GPL(hypercall_page); DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); @@ -986,7 +985,16 @@ static void xen_write_cr4(unsigned long cr4) native_write_cr4(cr4); } - +#ifdef CONFIG_X86_64 +static inline unsigned long xen_read_cr8(void) +{ + return 0; +} +static inline void xen_write_cr8(unsigned long val) +{ + BUG_ON(val); +} +#endif static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; @@ -1155,6 +1163,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .read_cr4_safe = native_read_cr4_safe, .write_cr4 = xen_write_cr4, +#ifdef CONFIG_X86_64 + .read_cr8 = xen_read_cr8, + .write_cr8 = xen_write_cr8, +#endif + .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, @@ -1163,6 +1176,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, + .read_tscp = native_read_tscp, + .iret = xen_iret, .irq_enable_sysexit = xen_sysexit, #ifdef CONFIG_X86_64 diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 15733765797..01a4dc015ae 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -5,6 +5,7 @@ #include <xen/interface/xen.h> #include <xen/interface/sched.h> #include <xen/interface/vcpu.h> +#include <xen/events.h> #include <asm/xen/hypercall.h> #include <asm/xen/hypervisor.h> diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 5a16824cc2b..6226c99729b 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -47,6 +47,7 @@ #include <linux/gfp.h> #include <linux/memblock.h> #include <linux/seq_file.h> +#include <linux/crash_dump.h> #include <trace/events/xen.h> @@ -2381,6 +2382,43 @@ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); #ifdef CONFIG_XEN_PVHVM +#ifdef CONFIG_PROC_VMCORE +/* + * This function is used in two contexts: + * - the kdump kernel has to check whether a pfn of the crashed kernel + * was a ballooned page. vmcore is using this function to decide + * whether to access a pfn of the crashed kernel. + * - the kexec kernel has to check whether a pfn was ballooned by the + * previous kernel. If the pfn is ballooned, handle it properly. + * Returns 0 if the pfn is not backed by a RAM page, the caller may + * handle the pfn special in this case. + */ +static int xen_oldmem_pfn_is_ram(unsigned long pfn) +{ + struct xen_hvm_get_mem_type a = { + .domid = DOMID_SELF, + .pfn = pfn, + }; + int ram; + + if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a)) + return -ENXIO; + + switch (a.mem_type) { + case HVMMEM_mmio_dm: + ram = 0; + break; + case HVMMEM_ram_rw: + case HVMMEM_ram_ro: + default: + ram = 1; + break; + } + + return ram; +} +#endif + static void xen_hvm_exit_mmap(struct mm_struct *mm) { struct xen_hvm_pagetable_dying a; @@ -2411,6 +2449,9 @@ void __init xen_hvm_init_mmu_ops(void) { if (is_pagetable_dying_supported()) pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap; +#ifdef CONFIG_PROC_VMCORE + register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram); +#endif } #endif @@ -2451,8 +2492,7 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); - BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == - (VM_PFNMAP | VM_RESERVED | VM_IO))); + BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); rmd.mfn = mfn; rmd.prot = prot; diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index bb5a8105ea8..a95b41744ad 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -35,7 +35,6 @@ void xen_set_pat(u64); char * __init xen_memory_setup(void); void __init xen_arch_setup(void); -void __init xen_init_IRQ(void); void xen_enable_sysenter(void); void xen_enable_syscall(void); void xen_vcpu_restore(void); |