diff options
-rw-r--r-- | arch/x86/include/asm/mmu_context.h | 7 | ||||
-rw-r--r-- | arch/x86/include/asm/mpx.h | 41 | ||||
-rw-r--r-- | arch/x86/include/asm/processor.h | 18 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 85 | ||||
-rw-r--r-- | arch/x86/mm/mpx.c | 223 | ||||
-rw-r--r-- | fs/exec.c | 2 | ||||
-rw-r--r-- | include/asm-generic/mmu_context.h | 5 | ||||
-rw-r--r-- | include/linux/mm_types.h | 4 | ||||
-rw-r--r-- | include/uapi/linux/prctl.h | 6 | ||||
-rw-r--r-- | kernel/sys.c | 12 |
11 files changed, 399 insertions, 6 deletions
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 166af2a8e86..0b0ba91ff1e 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -10,6 +10,7 @@ #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/paravirt.h> +#include <asm/mpx.h> #ifndef CONFIG_PARAVIRT #include <asm-generic/mm_hooks.h> @@ -102,4 +103,10 @@ do { \ } while (0) #endif +static inline void arch_bprm_mm_init(struct mm_struct *mm, + struct vm_area_struct *vma) +{ + mpx_mm_init(mm); +} + #endif /* _ASM_X86_MMU_CONTEXT_H */ diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h index 35bcb1cddf4..05eecbf8a48 100644 --- a/arch/x86/include/asm/mpx.h +++ b/arch/x86/include/asm/mpx.h @@ -5,6 +5,14 @@ #include <asm/ptrace.h> #include <asm/insn.h> +/* + * NULL is theoretically a valid place to put the bounds + * directory, so point this at an invalid address. + */ +#define MPX_INVALID_BOUNDS_DIR ((void __user *)-1) +#define MPX_BNDCFG_ENABLE_FLAG 0x1 +#define MPX_BD_ENTRY_VALID_FLAG 0x1 + #ifdef CONFIG_X86_64 /* upper 28 bits [47:20] of the virtual address in 64-bit used to @@ -18,6 +26,7 @@ #define MPX_BT_ENTRY_OFFSET 17 #define MPX_BT_ENTRY_SHIFT 5 #define MPX_IGN_BITS 3 +#define MPX_BD_ENTRY_TAIL 3 #else @@ -26,23 +35,55 @@ #define MPX_BT_ENTRY_OFFSET 10 #define MPX_BT_ENTRY_SHIFT 4 #define MPX_IGN_BITS 2 +#define MPX_BD_ENTRY_TAIL 2 #endif #define MPX_BD_SIZE_BYTES (1UL<<(MPX_BD_ENTRY_OFFSET+MPX_BD_ENTRY_SHIFT)) #define MPX_BT_SIZE_BYTES (1UL<<(MPX_BT_ENTRY_OFFSET+MPX_BT_ENTRY_SHIFT)) +#define MPX_BNDSTA_TAIL 2 +#define MPX_BNDCFG_TAIL 12 +#define MPX_BNDSTA_ADDR_MASK (~((1UL<<MPX_BNDSTA_TAIL)-1)) +#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1)) +#define MPX_BT_ADDR_MASK (~((1UL<<MPX_BD_ENTRY_TAIL)-1)) + +#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1)) #define MPX_BNDSTA_ERROR_CODE 0x3 #ifdef CONFIG_X86_INTEL_MPX siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, struct xsave_struct *xsave_buf); +int mpx_handle_bd_fault(struct xsave_struct *xsave_buf); +static inline int kernel_managing_mpx_tables(struct mm_struct *mm) +{ + return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR); +} +static inline void mpx_mm_init(struct mm_struct *mm) +{ + /* + * NULL is theoretically a valid place to put the bounds + * directory, so point this at an invalid address. + */ + mm->bd_addr = MPX_INVALID_BOUNDS_DIR; +} #else static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, struct xsave_struct *xsave_buf) { return NULL; } +static inline int mpx_handle_bd_fault(struct xsave_struct *xsave_buf) +{ + return -EINVAL; +} +static inline int kernel_managing_mpx_tables(struct mm_struct *mm) +{ + return 0; +} +static inline void mpx_mm_init(struct mm_struct *mm) +{ +} #endif /* CONFIG_X86_INTEL_MPX */ #endif /* _ASM_X86_MPX_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6571aaabacb..9617a171681 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -954,6 +954,24 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, extern int get_tsc_mode(unsigned long adr); extern int set_tsc_mode(unsigned int val); +/* Register/unregister a process' MPX related resource */ +#define MPX_ENABLE_MANAGEMENT(tsk) mpx_enable_management((tsk)) +#define MPX_DISABLE_MANAGEMENT(tsk) mpx_disable_management((tsk)) + +#ifdef CONFIG_X86_INTEL_MPX +extern int mpx_enable_management(struct task_struct *tsk); +extern int mpx_disable_management(struct task_struct *tsk); +#else +static inline int mpx_enable_management(struct task_struct *tsk) +{ + return -EINVAL; +} +static inline int mpx_disable_management(struct task_struct *tsk) +{ + return -EINVAL; +} +#endif /* CONFIG_X86_INTEL_MPX */ + extern u16 amd_get_nb_id(int cpu); static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ab08aa2276f..214245d6b99 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -960,6 +960,8 @@ void __init setup_arch(char **cmdline_p) init_mm.end_data = (unsigned long) _edata; init_mm.brk = _brk_end; + mpx_mm_init(&init_mm); + code_resource.start = __pa_symbol(_text); code_resource.end = __pa_symbol(_etext)-1; data_resource.start = __pa_symbol(_etext); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 0d0e922fafc..651d5d4f755 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -60,6 +60,7 @@ #include <asm/fixmap.h> #include <asm/mach_traps.h> #include <asm/alternative.h> +#include <asm/mpx.h> #ifdef CONFIG_X86_64 #include <asm/x86_init.h> @@ -228,7 +229,6 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ DO_ERROR(X86_TRAP_DE, SIGFPE, "divide error", divide_error) DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow) -DO_ERROR(X86_TRAP_BR, SIGSEGV, "bounds", bounds) DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op) DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) @@ -278,6 +278,89 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) } #endif +dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) +{ + struct task_struct *tsk = current; + struct xsave_struct *xsave_buf; + enum ctx_state prev_state; + struct bndcsr *bndcsr; + siginfo_t *info; + + prev_state = exception_enter(); + if (notify_die(DIE_TRAP, "bounds", regs, error_code, + X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP) + goto exit; + conditional_sti(regs); + + if (!user_mode(regs)) + die("bounds", regs, error_code); + + if (!cpu_feature_enabled(X86_FEATURE_MPX)) { + /* The exception is not from Intel MPX */ + goto exit_trap; + } + + /* + * We need to look at BNDSTATUS to resolve this exception. + * It is not directly accessible, though, so we need to + * do an xsave and then pull it out of the xsave buffer. + */ + fpu_save_init(&tsk->thread.fpu); + xsave_buf = &(tsk->thread.fpu.state->xsave); + bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR); + if (!bndcsr) + goto exit_trap; + + /* + * The error code field of the BNDSTATUS register communicates status + * information of a bound range exception #BR or operation involving + * bound directory. + */ + switch (bndcsr->bndstatus & MPX_BNDSTA_ERROR_CODE) { + case 2: /* Bound directory has invalid entry. */ + if (mpx_handle_bd_fault(xsave_buf)) + goto exit_trap; + break; /* Success, it was handled */ + case 1: /* Bound violation. */ + info = mpx_generate_siginfo(regs, xsave_buf); + if (PTR_ERR(info)) { + /* + * We failed to decode the MPX instruction. Act as if + * the exception was not caused by MPX. + */ + goto exit_trap; + } + /* + * Success, we decoded the instruction and retrieved + * an 'info' containing the address being accessed + * which caused the exception. This information + * allows and application to possibly handle the + * #BR exception itself. + */ + do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, info); + kfree(info); + break; + case 0: /* No exception caused by Intel MPX operations. */ + goto exit_trap; + default: + die("bounds", regs, error_code); + } + +exit: + exception_exit(prev_state); + return; +exit_trap: + /* + * This path out is for all the cases where we could not + * handle the exception in some way (like allocating a + * table or telling userspace about it. We will also end + * up here if the kernel has MPX turned off at compile + * time.. + */ + do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL); + exception_exit(prev_state); +} + dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) { diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 9009e094d68..96266375441 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -10,8 +10,12 @@ #include <linux/syscalls.h> #include <linux/sched/sysctl.h> +#include <asm/i387.h> +#include <asm/insn.h> #include <asm/mman.h> #include <asm/mpx.h> +#include <asm/processor.h> +#include <asm/fpu-internal.h> static const char *mpx_mapping_name(struct vm_area_struct *vma) { @@ -266,10 +270,11 @@ bad_opcode: siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, struct xsave_struct *xsave_buf) { + struct bndreg *bndregs, *bndreg; + siginfo_t *info = NULL; struct insn insn; uint8_t bndregno; int err; - siginfo_t *info; err = mpx_insn_decode(&insn, regs); if (err) @@ -285,6 +290,15 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, err = -EINVAL; goto err_out; } + /* get the bndregs _area_ of the xsave structure */ + bndregs = get_xsave_addr(xsave_buf, XSTATE_BNDREGS); + if (!bndregs) { + err = -EINVAL; + goto err_out; + } + /* now go select the individual register in the set of 4 */ + bndreg = &bndregs[bndregno]; + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) { err = -ENOMEM; @@ -300,10 +314,8 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, * complains when casting from integers to different-size * pointers. */ - info->si_lower = (void __user *)(unsigned long) - (xsave_buf->bndreg[bndregno].lower_bound); - info->si_upper = (void __user *)(unsigned long) - (~xsave_buf->bndreg[bndregno].upper_bound); + info->si_lower = (void __user *)(unsigned long)bndreg->lower_bound; + info->si_upper = (void __user *)(unsigned long)~bndreg->upper_bound; info->si_addr_lsb = 0; info->si_signo = SIGSEGV; info->si_errno = 0; @@ -319,5 +331,206 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, } return info; err_out: + /* info might be NULL, but kfree() handles that */ + kfree(info); return ERR_PTR(err); } + +static __user void *task_get_bounds_dir(struct task_struct *tsk) +{ + struct bndcsr *bndcsr; + + if (!cpu_feature_enabled(X86_FEATURE_MPX)) + return MPX_INVALID_BOUNDS_DIR; + + /* + * The bounds directory pointer is stored in a register + * only accessible if we first do an xsave. + */ + fpu_save_init(&tsk->thread.fpu); + bndcsr = get_xsave_addr(&tsk->thread.fpu.state->xsave, XSTATE_BNDCSR); + if (!bndcsr) + return MPX_INVALID_BOUNDS_DIR; + + /* + * Make sure the register looks valid by checking the + * enable bit. + */ + if (!(bndcsr->bndcfgu & MPX_BNDCFG_ENABLE_FLAG)) + return MPX_INVALID_BOUNDS_DIR; + + /* + * Lastly, mask off the low bits used for configuration + * flags, and return the address of the bounds table. + */ + return (void __user *)(unsigned long) + (bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK); +} + +int mpx_enable_management(struct task_struct *tsk) +{ + void __user *bd_base = MPX_INVALID_BOUNDS_DIR; + struct mm_struct *mm = tsk->mm; + int ret = 0; + + /* + * runtime in the userspace will be responsible for allocation of + * the bounds directory. Then, it will save the base of the bounds + * directory into XSAVE/XRSTOR Save Area and enable MPX through + * XRSTOR instruction. + * + * fpu_xsave() is expected to be very expensive. Storing the bounds + * directory here means that we do not have to do xsave in the unmap + * path; we can just use mm->bd_addr instead. + */ + bd_base = task_get_bounds_dir(tsk); + down_write(&mm->mmap_sem); + mm->bd_addr = bd_base; + if (mm->bd_addr == MPX_INVALID_BOUNDS_DIR) + ret = -ENXIO; + + up_write(&mm->mmap_sem); + return ret; +} + +int mpx_disable_management(struct task_struct *tsk) +{ + struct mm_struct *mm = current->mm; + + if (!cpu_feature_enabled(X86_FEATURE_MPX)) + return -ENXIO; + + down_write(&mm->mmap_sem); + mm->bd_addr = MPX_INVALID_BOUNDS_DIR; + up_write(&mm->mmap_sem); + return 0; +} + +/* + * With 32-bit mode, MPX_BT_SIZE_BYTES is 4MB, and the size of each + * bounds table is 16KB. With 64-bit mode, MPX_BT_SIZE_BYTES is 2GB, + * and the size of each bounds table is 4MB. + */ +static int allocate_bt(long __user *bd_entry) +{ + unsigned long expected_old_val = 0; + unsigned long actual_old_val = 0; + unsigned long bt_addr; + int ret = 0; + + /* + * Carve the virtual space out of userspace for the new + * bounds table: + */ + bt_addr = mpx_mmap(MPX_BT_SIZE_BYTES); + if (IS_ERR((void *)bt_addr)) + return PTR_ERR((void *)bt_addr); + /* + * Set the valid flag (kinda like _PAGE_PRESENT in a pte) + */ + bt_addr = bt_addr | MPX_BD_ENTRY_VALID_FLAG; + + /* + * Go poke the address of the new bounds table in to the + * bounds directory entry out in userspace memory. Note: + * we may race with another CPU instantiating the same table. + * In that case the cmpxchg will see an unexpected + * 'actual_old_val'. + * + * This can fault, but that's OK because we do not hold + * mmap_sem at this point, unlike some of the other part + * of the MPX code that have to pagefault_disable(). + */ + ret = user_atomic_cmpxchg_inatomic(&actual_old_val, bd_entry, + expected_old_val, bt_addr); + if (ret) + goto out_unmap; + + /* + * The user_atomic_cmpxchg_inatomic() will only return nonzero + * for faults, *not* if the cmpxchg itself fails. Now we must + * verify that the cmpxchg itself completed successfully. + */ + /* + * We expected an empty 'expected_old_val', but instead found + * an apparently valid entry. Assume we raced with another + * thread to instantiate this table and desclare succecss. + */ + if (actual_old_val & MPX_BD_ENTRY_VALID_FLAG) { + ret = 0; + goto out_unmap; + } + /* + * We found a non-empty bd_entry but it did not have the + * VALID_FLAG set. Return an error which will result in + * a SEGV since this probably means that somebody scribbled + * some invalid data in to a bounds table. + */ + if (expected_old_val != actual_old_val) { + ret = -EINVAL; + goto out_unmap; + } + return 0; +out_unmap: + vm_munmap(bt_addr & MPX_BT_ADDR_MASK, MPX_BT_SIZE_BYTES); + return ret; +} + +/* + * When a BNDSTX instruction attempts to save bounds to a bounds + * table, it will first attempt to look up the table in the + * first-level bounds directory. If it does not find a table in + * the directory, a #BR is generated and we get here in order to + * allocate a new table. + * + * With 32-bit mode, the size of BD is 4MB, and the size of each + * bound table is 16KB. With 64-bit mode, the size of BD is 2GB, + * and the size of each bound table is 4MB. + */ +static int do_mpx_bt_fault(struct xsave_struct *xsave_buf) +{ + unsigned long bd_entry, bd_base; + struct bndcsr *bndcsr; + + bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR); + if (!bndcsr) + return -EINVAL; + /* + * Mask off the preserve and enable bits + */ + bd_base = bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK; + /* + * The hardware provides the address of the missing or invalid + * entry via BNDSTATUS, so we don't have to go look it up. + */ + bd_entry = bndcsr->bndstatus & MPX_BNDSTA_ADDR_MASK; + /* + * Make sure the directory entry is within where we think + * the directory is. + */ + if ((bd_entry < bd_base) || + (bd_entry >= bd_base + MPX_BD_SIZE_BYTES)) + return -EINVAL; + + return allocate_bt((long __user *)bd_entry); +} + +int mpx_handle_bd_fault(struct xsave_struct *xsave_buf) +{ + /* + * Userspace never asked us to manage the bounds tables, + * so refuse to help. + */ + if (!kernel_managing_mpx_tables(current->mm)) + return -EINVAL; + + if (do_mpx_bt_fault(xsave_buf)) { + force_sig(SIGSEGV, current); + /* + * The force_sig() is essentially "handling" this + * exception, so we do not pass up the error + * from do_mpx_bt_fault(). + */ + } + return 0; +} diff --git a/fs/exec.c b/fs/exec.c index 7302b75a982..65d4f5c70ef 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -60,6 +60,7 @@ #include <asm/uaccess.h> #include <asm/mmu_context.h> #include <asm/tlb.h> +#include <asm/mpx.h> #include <trace/events/task.h> #include "internal.h" @@ -277,6 +278,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) goto err; mm->stack_vm = mm->total_vm = 1; + arch_bprm_mm_init(mm, vma); up_write(&mm->mmap_sem); bprm->p = vma->vm_end - sizeof(void *); return 0; diff --git a/include/asm-generic/mmu_context.h b/include/asm-generic/mmu_context.h index a7eec910ba6..1f2a8f9c926 100644 --- a/include/asm-generic/mmu_context.h +++ b/include/asm-generic/mmu_context.h @@ -42,4 +42,9 @@ static inline void activate_mm(struct mm_struct *prev_mm, { } +static inline void arch_bprm_mm_init(struct mm_struct *mm, + struct vm_area_struct *vma) +{ +} + #endif /* __ASM_GENERIC_MMU_CONTEXT_H */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6e0b286649f..004e9d17b47 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -454,6 +454,10 @@ struct mm_struct { bool tlb_flush_pending; #endif struct uprobes_state uprobes_state; +#ifdef CONFIG_X86_INTEL_MPX + /* address of the bounds directory */ + void __user *bd_addr; +#endif }; static inline void mm_init_cpumask(struct mm_struct *mm) diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 513df75d0fc..89f63503f90 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -179,4 +179,10 @@ struct prctl_mm_map { #define PR_SET_THP_DISABLE 41 #define PR_GET_THP_DISABLE 42 +/* + * Tell the kernel to start/stop helping userspace manage bounds tables. + */ +#define PR_MPX_ENABLE_MANAGEMENT 43 +#define PR_MPX_DISABLE_MANAGEMENT 44 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index 1eaa2f0b024..a8c9f5a7dda 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -91,6 +91,12 @@ #ifndef SET_TSC_CTL # define SET_TSC_CTL(a) (-EINVAL) #endif +#ifndef MPX_ENABLE_MANAGEMENT +# define MPX_ENABLE_MANAGEMENT(a) (-EINVAL) +#endif +#ifndef MPX_DISABLE_MANAGEMENT +# define MPX_DISABLE_MANAGEMENT(a) (-EINVAL) +#endif /* * this is where the system-wide overflow UID and GID are defined, for @@ -2203,6 +2209,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, me->mm->def_flags &= ~VM_NOHUGEPAGE; up_write(&me->mm->mmap_sem); break; + case PR_MPX_ENABLE_MANAGEMENT: + error = MPX_ENABLE_MANAGEMENT(me); + break; + case PR_MPX_DISABLE_MANAGEMENT: + error = MPX_DISABLE_MANAGEMENT(me); + break; default: error = -EINVAL; break; |