diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/ia64/kernel/head.S | 2 | ||||
-rw-r--r-- | arch/ia64/kernel/ivt.S | 2 | ||||
-rw-r--r-- | arch/ia64/kvm/vmm_ivt.S | 2 | ||||
-rw-r--r-- | arch/s390/include/asm/sigp.h | 19 | ||||
-rw-r--r-- | arch/s390/include/asm/smp.h | 13 | ||||
-rw-r--r-- | arch/s390/include/uapi/asm/unistd.h | 3 | ||||
-rw-r--r-- | arch/s390/kernel/compat_wrapper.c | 3 | ||||
-rw-r--r-- | arch/s390/kernel/dumpstack.c | 8 | ||||
-rw-r--r-- | arch/s390/kernel/ptrace.c | 2 | ||||
-rw-r--r-- | arch/s390/kernel/setup.c | 32 | ||||
-rw-r--r-- | arch/s390/kernel/smp.c | 15 | ||||
-rw-r--r-- | arch/s390/kernel/syscalls.S | 1 | ||||
-rw-r--r-- | arch/s390/lib/uaccess.c | 5 | ||||
-rw-r--r-- | arch/s390/mm/fault.c | 140 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.h | 8 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 38 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.h | 44 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 11 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 10 |
22 files changed, 307 insertions, 57 deletions
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index e6f80fcf013..a4acddad0c7 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -259,7 +259,7 @@ start_ap: * Switch into virtual mode: */ movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \ - |IA64_PSR_DI|IA64_PSR_AC) + |IA64_PSR_DI) ;; mov cr.ipsr=r16 movl r17=1f diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index 689ffcaa284..18e794a5724 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -58,7 +58,7 @@ #include <asm/unistd.h> #include <asm/errno.h> -#if 1 +#if 0 # define PSR_DEFAULT_BITS psr.ac #else # define PSR_DEFAULT_BITS 0 diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S index 24018484c6e..397e34a63e1 100644 --- a/arch/ia64/kvm/vmm_ivt.S +++ b/arch/ia64/kvm/vmm_ivt.S @@ -64,7 +64,7 @@ #include "kvm_minstate.h" #include "vti.h" -#if 1 +#if 0 # define PSR_DEFAULT_BITS psr.ac #else # define PSR_DEFAULT_BITS 0 diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index d091aa1aaf1..bf9c823d402 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -31,4 +31,23 @@ #define SIGP_STATUS_INCORRECT_STATE 0x00000200UL #define SIGP_STATUS_NOT_RUNNING 0x00000400UL +#ifndef __ASSEMBLY__ + +static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status) +{ + register unsigned int reg1 asm ("1") = parm; + int cc; + + asm volatile( + " sigp %1,%2,0(%3)\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc"); + if (status && cc == 1) + *status = reg1; + return cc; +} + +#endif /* __ASSEMBLY__ */ + #endif /* __S390_ASM_SIGP_H */ diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 16077939409..21703f85b48 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -7,6 +7,8 @@ #ifndef __ASM_SMP_H #define __ASM_SMP_H +#include <asm/sigp.h> + #ifdef CONFIG_SMP #include <asm/lowcore.h> @@ -50,9 +52,18 @@ static inline int smp_store_status(int cpu) { return 0; } static inline int smp_vcpu_scheduled(int cpu) { return 1; } static inline void smp_yield_cpu(int cpu) { } static inline void smp_yield(void) { } -static inline void smp_stop_cpu(void) { } static inline void smp_fill_possible_mask(void) { } +static inline void smp_stop_cpu(void) +{ + u16 pcpu = stap(); + + for (;;) { + __pcpu_sigp(pcpu, SIGP_STOP, 0, NULL); + cpu_relax(); + } +} + #endif /* CONFIG_SMP */ #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index 5eb5c9ddb12..3802d2d3a18 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -282,7 +282,8 @@ #define __NR_finit_module 344 #define __NR_sched_setattr 345 #define __NR_sched_getattr 346 -#define NR_syscalls 345 +#define __NR_renameat2 347 +#define NR_syscalls 348 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index 824c39dfddf..45cdb37aa6f 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -1,5 +1,5 @@ /* - * Compat sytem call wrappers. + * Compat system call wrappers. * * Copyright IBM Corp. 2014 */ @@ -213,3 +213,4 @@ COMPAT_SYSCALL_WRAP5(kcmp, pid_t, pid1, pid_t, pid2, int, type, unsigned long, i COMPAT_SYSCALL_WRAP3(finit_module, int, fd, const char __user *, uargs, int, flags); COMPAT_SYSCALL_WRAP3(sched_setattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, flags); COMPAT_SYSCALL_WRAP4(sched_getattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, size, unsigned int, flags); +COMPAT_SYSCALL_WRAP5(renameat2, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, unsigned int, flags); diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index e6af9406987..acb412442e5 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -144,10 +144,10 @@ void show_registers(struct pt_regs *regs) char *mode; mode = user_mode(regs) ? "User" : "Krnl"; - printk("%s PSW : %p %p (%pSR)\n", - mode, (void *) regs->psw.mask, - (void *) regs->psw.addr, - (void *) regs->psw.addr); + printk("%s PSW : %p %p", mode, (void *)regs->psw.mask, (void *)regs->psw.addr); + if (!user_mode(regs)) + printk(" (%pSR)", (void *)regs->psw.addr); + printk("\n"); printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER), mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO), diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 4ac8fafec95..1c82619eb4f 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -64,7 +64,7 @@ void update_cr_regs(struct task_struct *task) if (task->thread.per_flags & PER_FLAG_NO_TE) cr_new &= ~(1UL << 55); if (cr_new != cr) - __ctl_load(cr, 0, 0); + __ctl_load(cr_new, 0, 0); /* Set or clear transaction execution TDC bits 62 and 63. */ __ctl_store(cr, 2, 2); cr_new = cr & ~3UL; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f70f2489fa5..88d1ca81e2d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1027,3 +1027,35 @@ void __init setup_arch(char **cmdline_p) /* Setup zfcpdump support */ setup_zfcpdump(); } + +#ifdef CONFIG_32BIT +static int no_removal_warning __initdata; + +static int __init parse_no_removal_warning(char *str) +{ + no_removal_warning = 1; + return 0; +} +__setup("no_removal_warning", parse_no_removal_warning); + +static int __init removal_warning(void) +{ + if (no_removal_warning) + return 0; + printk(KERN_ALERT "\n\n"); + printk(KERN_CONT "Warning - you are using a 31 bit kernel!\n\n"); + printk(KERN_CONT "We plan to remove 31 bit kernel support from the kernel sources in March 2015.\n"); + printk(KERN_CONT "Currently we assume that nobody is using the 31 bit kernel on old 31 bit\n"); + printk(KERN_CONT "hardware anymore. If you think that the code should not be removed and also\n"); + printk(KERN_CONT "future versions of the Linux kernel should be able to run in 31 bit mode\n"); + printk(KERN_CONT "please let us know. Please write to:\n"); + printk(KERN_CONT "linux390@de.ibm.com (mail address) and/or\n"); + printk(KERN_CONT "linux-s390@vger.kernel.org (mailing list).\n\n"); + printk(KERN_CONT "Thank you!\n\n"); + printk(KERN_CONT "If this kernel runs on a 64 bit machine you may consider using a 64 bit kernel.\n"); + printk(KERN_CONT "This message can be disabled with the \"no_removal_warning\" kernel parameter.\n"); + schedule_timeout_uninterruptible(300 * HZ); + return 0; +} +early_initcall(removal_warning); +#endif diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 512ce1cde2a..86e65ec3422 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -82,21 +82,6 @@ DEFINE_MUTEX(smp_cpu_state_mutex); /* * Signal processor helper functions. */ -static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status) -{ - register unsigned int reg1 asm ("1") = parm; - int cc; - - asm volatile( - " sigp %1,%2,0(%3)\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc"); - if (status && cc == 1) - *status = reg1; - return cc; -} - static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status) { int cc; diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 542ef488bac..fe5cdf29a00 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -355,3 +355,4 @@ SYSCALL(sys_kcmp,sys_kcmp,compat_sys_kcmp) SYSCALL(sys_finit_module,sys_finit_module,compat_sys_finit_module) SYSCALL(sys_sched_setattr,sys_sched_setattr,compat_sys_sched_setattr) /* 345 */ SYSCALL(sys_sched_getattr,sys_sched_getattr,compat_sys_sched_getattr) +SYSCALL(sys_renameat2,sys_renameat2,compat_sys_renameat2) diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index 23f866b4c7f..7416efe8eae 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -338,9 +338,6 @@ static inline unsigned long strnlen_user_srst(const char __user *src, register unsigned long reg0 asm("0") = 0; unsigned long tmp1, tmp2; - if (unlikely(!size)) - return 0; - update_primary_asce(current); asm volatile( " la %2,0(%1)\n" " la %3,0(%0,%1)\n" @@ -359,6 +356,8 @@ static inline unsigned long strnlen_user_srst(const char __user *src, unsigned long __strnlen_user(const char __user *src, unsigned long size) { + if (unlikely(!size)) + return 0; update_primary_asce(current); return strnlen_user_srst(src, size); } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 19f623f1f21..2f51a998a67 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -126,6 +126,133 @@ static inline int user_space_fault(struct pt_regs *regs) return 0; } +static int bad_address(void *p) +{ + unsigned long dummy; + + return probe_kernel_address((unsigned long *)p, dummy); +} + +#ifdef CONFIG_64BIT +static void dump_pagetable(unsigned long asce, unsigned long address) +{ + unsigned long *table = __va(asce & PAGE_MASK); + + pr_alert("AS:%016lx ", asce); + switch (asce & _ASCE_TYPE_MASK) { + case _ASCE_TYPE_REGION1: + table = table + ((address >> 53) & 0x7ff); + if (bad_address(table)) + goto bad; + pr_cont("R1:%016lx ", *table); + if (*table & _REGION_ENTRY_INVALID) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ + case _ASCE_TYPE_REGION2: + table = table + ((address >> 42) & 0x7ff); + if (bad_address(table)) + goto bad; + pr_cont("R2:%016lx ", *table); + if (*table & _REGION_ENTRY_INVALID) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ + case _ASCE_TYPE_REGION3: + table = table + ((address >> 31) & 0x7ff); + if (bad_address(table)) + goto bad; + pr_cont("R3:%016lx ", *table); + if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE)) + goto out; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ + case _ASCE_TYPE_SEGMENT: + table = table + ((address >> 20) & 0x7ff); + if (bad_address(table)) + goto bad; + pr_cont(KERN_CONT "S:%016lx ", *table); + if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE)) + goto out; + table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); + } + table = table + ((address >> 12) & 0xff); + if (bad_address(table)) + goto bad; + pr_cont("P:%016lx ", *table); +out: + pr_cont("\n"); + return; +bad: + pr_cont("BAD\n"); +} + +#else /* CONFIG_64BIT */ + +static void dump_pagetable(unsigned long asce, unsigned long address) +{ + unsigned long *table = __va(asce & PAGE_MASK); + + pr_alert("AS:%08lx ", asce); + table = table + ((address >> 20) & 0x7ff); + if (bad_address(table)) + goto bad; + pr_cont("S:%08lx ", *table); + if (*table & _SEGMENT_ENTRY_INVALID) + goto out; + table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); + table = table + ((address >> 12) & 0xff); + if (bad_address(table)) + goto bad; + pr_cont("P:%08lx ", *table); +out: + pr_cont("\n"); + return; +bad: + pr_cont("BAD\n"); +} + +#endif /* CONFIG_64BIT */ + +static void dump_fault_info(struct pt_regs *regs) +{ + unsigned long asce; + + pr_alert("Fault in "); + switch (regs->int_parm_long & 3) { + case 3: + pr_cont("home space "); + break; + case 2: + pr_cont("secondary space "); + break; + case 1: + pr_cont("access register "); + break; + case 0: + pr_cont("primary space "); + break; + } + pr_cont("mode while using "); + if (!user_space_fault(regs)) { + asce = S390_lowcore.kernel_asce; + pr_cont("kernel "); + } +#ifdef CONFIG_PGSTE + else if ((current->flags & PF_VCPU) && S390_lowcore.gmap) { + struct gmap *gmap = (struct gmap *)S390_lowcore.gmap; + asce = gmap->asce; + pr_cont("gmap "); + } +#endif + else { + asce = S390_lowcore.user_asce; + pr_cont("user "); + } + pr_cont("ASCE.\n"); + dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK); +} + static inline void report_user_fault(struct pt_regs *regs, long signr) { if ((task_pid_nr(current) > 1) && !show_unhandled_signals) @@ -138,8 +265,9 @@ static inline void report_user_fault(struct pt_regs *regs, long signr) regs->int_code); print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN); printk(KERN_CONT "\n"); - printk(KERN_ALERT "failing address: %lX\n", - regs->int_parm_long & __FAIL_ADDR_MASK); + printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n", + regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); + dump_fault_info(regs); show_regs(regs); } @@ -177,11 +305,13 @@ static noinline void do_no_context(struct pt_regs *regs) address = regs->int_parm_long & __FAIL_ADDR_MASK; if (!user_space_fault(regs)) printk(KERN_ALERT "Unable to handle kernel pointer dereference" - " at virtual kernel address %p\n", (void *)address); + " in virtual kernel address space\n"); else printk(KERN_ALERT "Unable to handle kernel paging request" - " at virtual user address %p\n", (void *)address); - + " in virtual user address space\n"); + printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n", + regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); + dump_fault_info(regs); die(regs, "Oops"); do_exit(SIGKILL); } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index fcaf9c96126..7de069afb38 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -60,7 +60,7 @@ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ - | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) + | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP)) #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index bea60671ef8..f47a104a749 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -308,7 +308,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, const u32 kvm_supported_word9_x86_features = F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | - F(ADX); + F(ADX) | F(SMAP); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index a2a1bb7ed8c..eeecbed26ac 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -48,6 +48,14 @@ static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu) return best && (best->ebx & bit(X86_FEATURE_SMEP)); } +static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->ebx & bit(X86_FEATURE_SMAP)); +} + static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f5704d9e5dd..813d31038b9 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3601,20 +3601,27 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, } } -static void update_permission_bitmask(struct kvm_vcpu *vcpu, +void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, bool ept) { unsigned bit, byte, pfec; u8 map; - bool fault, x, w, u, wf, uf, ff, smep; + bool fault, x, w, u, wf, uf, ff, smapf, cr4_smap, cr4_smep, smap = 0; - smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); + cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); + cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) { pfec = byte << 1; map = 0; wf = pfec & PFERR_WRITE_MASK; uf = pfec & PFERR_USER_MASK; ff = pfec & PFERR_FETCH_MASK; + /* + * PFERR_RSVD_MASK bit is set in PFEC if the access is not + * subject to SMAP restrictions, and cleared otherwise. The + * bit is only meaningful if the SMAP bit is set in CR4. + */ + smapf = !(pfec & PFERR_RSVD_MASK); for (bit = 0; bit < 8; ++bit) { x = bit & ACC_EXEC_MASK; w = bit & ACC_WRITE_MASK; @@ -3626,12 +3633,33 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, /* Allow supervisor writes if !cr0.wp */ w |= !is_write_protection(vcpu) && !uf; /* Disallow supervisor fetches of user code if cr4.smep */ - x &= !(smep && u && !uf); + x &= !(cr4_smep && u && !uf); + + /* + * SMAP:kernel-mode data accesses from user-mode + * mappings should fault. A fault is considered + * as a SMAP violation if all of the following + * conditions are ture: + * - X86_CR4_SMAP is set in CR4 + * - An user page is accessed + * - Page fault in kernel mode + * - if CPL = 3 or X86_EFLAGS_AC is clear + * + * Here, we cover the first three conditions. + * The fourth is computed dynamically in + * permission_fault() and is in smapf. + * + * Also, SMAP does not affect instruction + * fetches, add the !ff check here to make it + * clearer. + */ + smap = cr4_smap && u && !uf && !ff; } else /* Not really needed: no U/S accesses on ept */ u = 1; - fault = (ff && !x) || (uf && !u) || (wf && !w); + fault = (ff && !x) || (uf && !u) || (wf && !w) || + (smapf && smap); map |= fault << bit; } mmu->permissions[byte] = map; diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 29261527435..3842e70bdb7 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -44,11 +44,17 @@ #define PT_DIRECTORY_LEVEL 2 #define PT_PAGE_TABLE_LEVEL 1 -#define PFERR_PRESENT_MASK (1U << 0) -#define PFERR_WRITE_MASK (1U << 1) -#define PFERR_USER_MASK (1U << 2) -#define PFERR_RSVD_MASK (1U << 3) -#define PFERR_FETCH_MASK (1U << 4) +#define PFERR_PRESENT_BIT 0 +#define PFERR_WRITE_BIT 1 +#define PFERR_USER_BIT 2 +#define PFERR_RSVD_BIT 3 +#define PFERR_FETCH_BIT 4 + +#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT) +#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT) +#define PFERR_USER_MASK (1U << PFERR_USER_BIT) +#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT) +#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT) int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); @@ -73,6 +79,8 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, bool execonly); +void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + bool ept); static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) { @@ -110,10 +118,30 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu) * Will a fault with a given page-fault error code (pfec) cause a permission * fault with the given access (in ACC_* format)? */ -static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access, - unsigned pfec) +static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + unsigned pte_access, unsigned pfec) { - return (mmu->permissions[pfec >> 1] >> pte_access) & 1; + int cpl = kvm_x86_ops->get_cpl(vcpu); + unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); + + /* + * If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1. + * + * If CPL = 3, SMAP applies to all supervisor-mode data accesses + * (these are implicit supervisor accesses) regardless of the value + * of EFLAGS.AC. + * + * This computes (cpl < 3) && (rflags & X86_EFLAGS_AC), leaving + * the result in X86_EFLAGS_AC. We then insert it in place of + * the PFERR_RSVD_MASK bit; this bit will always be zero in pfec, + * but it will be one in index if SMAP checks are being overridden. + * It is important to keep this branchless. + */ + unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC); + int index = (pfec >> 1) + + (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1)); + + return (mmu->permissions[index] >> pte_access) & 1; } void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index b1e6c1bf68d..123efd3ec29 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -353,7 +353,7 @@ retry_walk: walker->ptes[walker->level - 1] = pte; } while (!is_last_gpte(mmu, walker->level, pte)); - if (unlikely(permission_fault(mmu, pte_access, access))) { + if (unlikely(permission_fault(vcpu, mmu, pte_access, access))) { errcode |= PFERR_PRESENT_MASK; goto error; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1320e0f8e61..1f68c583192 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3484,13 +3484,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) hw_cr4 &= ~X86_CR4_PAE; hw_cr4 |= X86_CR4_PSE; /* - * SMEP is disabled if CPU is in non-paging mode in - * hardware. However KVM always uses paging mode to + * SMEP/SMAP is disabled if CPU is in non-paging mode + * in hardware. However KVM always uses paging mode to * emulate guest non-paging mode with TDP. - * To emulate this behavior, SMEP needs to be manually - * disabled when guest switches to non-paging mode. + * To emulate this behavior, SMEP/SMAP needs to be + * manually disabled when guest switches to non-paging + * mode. */ - hw_cr4 &= ~X86_CR4_SMEP; + hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP); } else if (!(cr4 & X86_CR4_PAE)) { hw_cr4 &= ~X86_CR4_PAE; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9d1b5cd4d34..8b8fc0b792b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -652,6 +652,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP)) return 1; + if (!guest_cpuid_has_smap(vcpu) && (cr4 & X86_CR4_SMAP)) + return 1; + if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE)) return 1; @@ -680,6 +683,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) kvm_mmu_reset_context(vcpu); + if ((cr4 ^ old_cr4) & X86_CR4_SMAP) + update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false); + if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) kvm_update_cpuid(vcpu); @@ -1117,7 +1123,6 @@ static inline u64 get_kernel_ns(void) { struct timespec ts; - WARN_ON(preemptible()); ktime_get_ts(&ts); monotonic_to_bootbased(&ts); return timespec_to_ns(&ts); @@ -4164,7 +4169,8 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, | (write ? PFERR_WRITE_MASK : 0); if (vcpu_match_mmio_gva(vcpu, gva) - && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) { + && !permission_fault(vcpu, vcpu->arch.walk_mmu, + vcpu->arch.access, access)) { *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT | (gva & (PAGE_SIZE - 1)); trace_vcpu_match_mmio(gva, *gpa, write, false); |