From 61365e132ef987f7719af5d2e434db4465957637 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 7 Dec 2009 12:51:42 +0100 Subject: [S390] Improve address space check. A data access in access-register mode always is a user mode access, the code to inspect the access-registers can be removed. The second change is to use a different test to check for no-execute fault. The third change is to pass the translation exception identification as parameter, in theory the trans_exc_code in the lowcore could have been overwritten by the time the call to check_space from do_no_context is done. Signed-off-by: Martin Schwidefsky --- arch/s390/mm/fault.c | 99 ++++++++++++++++++++++++---------------------------- 1 file changed, 45 insertions(+), 54 deletions(-) (limited to 'arch/s390/mm/fault.c') diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 6d507462967..3df5b918cfe 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -100,39 +100,28 @@ void bust_spinlocks(int yes) /* * Returns the address space associated with the fault. - * Returns 0 for kernel space, 1 for user space and - * 2 for code execution in user space with noexec=on. + * Returns 0 for kernel space and 1 for user space. */ -static inline int check_space(struct task_struct *tsk) +static inline int user_space_fault(unsigned long trans_exc_code) { /* - * The lowest two bits of S390_lowcore.trans_exc_code - * indicate which paging table was used. + * The lowest two bits of the translation exception + * identification indicate which paging table was used. */ - int desc = S390_lowcore.trans_exc_code & 3; - - if (desc == 3) /* Home Segment Table Descriptor */ - return switch_amode == 0; - if (desc == 2) /* Secondary Segment Table Descriptor */ - return tsk->thread.mm_segment.ar4; -#ifdef CONFIG_S390_SWITCH_AMODE - if (unlikely(desc == 1)) { /* STD determined via access register */ - /* %a0 always indicates primary space. */ - if (S390_lowcore.exc_access_id != 0) { - save_access_regs(tsk->thread.acrs); - /* - * An alet of 0 indicates primary space. - * An alet of 1 indicates secondary space. - * Any other alet values generate an - * alen-translation exception. - */ - if (tsk->thread.acrs[S390_lowcore.exc_access_id]) - return tsk->thread.mm_segment.ar4; - } - } -#endif - /* Primary Segment Table Descriptor */ - return switch_amode << s390_noexec; + trans_exc_code &= 3; + if (trans_exc_code == 2) + /* Access via secondary space, set_fs setting decides */ + return current->thread.mm_segment.ar4; + if (!switch_amode) + /* User space if the access has been done via home space. */ + return trans_exc_code == 3; + /* + * If the user space is not the home space the kernel runs in home + * space. Access via secondary space has already been covered, + * access via primary space or access register is from user space + * and access via home space is from the kernel. + */ + return trans_exc_code != 3; } /* @@ -162,9 +151,10 @@ static void do_sigsegv(struct pt_regs *regs, unsigned long error_code, } static void do_no_context(struct pt_regs *regs, unsigned long error_code, - unsigned long address) + unsigned long trans_exc_code) { const struct exception_table_entry *fixup; + unsigned long address; /* Are we prepared to handle this kernel fault? */ fixup = search_exception_tables(regs->psw.addr & __FIXUP_MASK); @@ -177,7 +167,8 @@ static void do_no_context(struct pt_regs *regs, unsigned long error_code, * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ - if (check_space(current) == 0) + address = trans_exc_code & __FAIL_ADDR_MASK; + if (user_space_fault(trans_exc_code) == 0) printk(KERN_ALERT "Unable to handle kernel pointer dereference" " at virtual kernel address %p\n", (void *)address); else @@ -188,7 +179,8 @@ static void do_no_context(struct pt_regs *regs, unsigned long error_code, do_exit(SIGKILL); } -static void do_low_address(struct pt_regs *regs, unsigned long error_code) +static void do_low_address(struct pt_regs *regs, unsigned long error_code, + unsigned long trans_exc_code) { /* Low-address protection hit in kernel mode means NULL pointer write access in kernel mode. */ @@ -198,11 +190,11 @@ static void do_low_address(struct pt_regs *regs, unsigned long error_code) do_exit(SIGKILL); } - do_no_context(regs, error_code, 0); + do_no_context(regs, error_code, trans_exc_code); } static void do_sigbus(struct pt_regs *regs, unsigned long error_code, - unsigned long address) + unsigned long trans_exc_code) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; @@ -212,13 +204,13 @@ static void do_sigbus(struct pt_regs *regs, unsigned long error_code, * Send a sigbus, regardless of whether we were in kernel * or user mode. */ - tsk->thread.prot_addr = address; + tsk->thread.prot_addr = trans_exc_code & __FAIL_ADDR_MASK; tsk->thread.trap_no = error_code; force_sig(SIGBUS, tsk); /* Kernel mode? Handle exceptions or die */ if (!(regs->psw.mask & PSW_MASK_PSTATE)) - do_no_context(regs, error_code, address); + do_no_context(regs, error_code, trans_exc_code); } #ifdef CONFIG_S390_EXEC_PROTECT @@ -272,13 +264,13 @@ static int signal_return(struct mm_struct *mm, struct pt_regs *regs, * 3b Region third trans. -> Not present (nullification) */ static inline void -do_exception(struct pt_regs *regs, unsigned long error_code, int write) +do_exception(struct pt_regs *regs, unsigned long error_code, int write, + unsigned long trans_exc_code) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; unsigned long address; - int space; int si_code; int fault; @@ -288,18 +280,15 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int write) tsk = current; mm = tsk->mm; - /* get the failing address and the affected space */ - address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK; - space = check_space(tsk); - /* * Verify that the fault happened in user space, that * we are not in an interrupt and that there is a * user context. */ - if (unlikely(space == 0 || in_atomic() || !mm)) + if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) goto no_context; + address = trans_exc_code & __FAIL_ADDR_MASK; /* * When we get here, the fault happened in the current * task's user address space, so we can switch on the @@ -315,7 +304,8 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int write) goto bad_area; #ifdef CONFIG_S390_EXEC_PROTECT - if (unlikely((space == 2) && !(vma->vm_flags & VM_EXEC))) + if (unlikely((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY && + (trans_exc_code & 3) == 0 && !(vma->vm_flags & VM_EXEC))) if (!signal_return(mm, regs, address, error_code)) /* * signal_return() has done an up_read(&mm->mmap_sem) @@ -397,12 +387,14 @@ bad_area: } no_context: - do_no_context(regs, error_code, address); + do_no_context(regs, error_code, trans_exc_code); } void __kprobes do_protection_exception(struct pt_regs *regs, long error_code) { + unsigned long trans_exc_code = S390_lowcore.trans_exc_code; + /* Protection exception is supressing, decrement psw address. */ regs->psw.addr -= (error_code >> 16); /* @@ -410,31 +402,30 @@ void __kprobes do_protection_exception(struct pt_regs *regs, * as a special case because the translation exception code * field is not guaranteed to contain valid data in this case. */ - if (unlikely(!(S390_lowcore.trans_exc_code & 4))) { - do_low_address(regs, error_code); + if (unlikely(!(trans_exc_code & 4))) { + do_low_address(regs, error_code, trans_exc_code); return; } - do_exception(regs, 4, 1); + do_exception(regs, 4, 1, trans_exc_code); } void __kprobes do_dat_exception(struct pt_regs *regs, long error_code) { - do_exception(regs, error_code & 0xff, 0); + do_exception(regs, error_code & 0xff, 0, S390_lowcore.trans_exc_code); } #ifdef CONFIG_64BIT void __kprobes do_asce_exception(struct pt_regs *regs, unsigned long error_code) { + unsigned long trans_exc_code = S390_lowcore.trans_exc_code; struct mm_struct *mm; struct vm_area_struct *vma; unsigned long address; - int space; mm = current->mm; - address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK; - space = check_space(current); + address = trans_exc_code & __FAIL_ADDR_MASK; - if (unlikely(space == 0 || in_atomic() || !mm)) + if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) goto no_context; local_irq_enable(); @@ -457,7 +448,7 @@ void __kprobes do_asce_exception(struct pt_regs *regs, unsigned long error_code) } no_context: - do_no_context(regs, error_code, address); + do_no_context(regs, error_code, trans_exc_code); } #endif -- cgit v1.2.3-70-g09d2 From b11b53342773361f3353b285eb6a3fd6074e7997 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 7 Dec 2009 12:51:43 +0100 Subject: [S390] Improve address space mode selection. Introduce user_mode to replace the two variables switch_amode and s390_noexec. There are three valid combinations of the old values: 1) switch_amode == 0 && s390_noexec == 0 2) switch_amode == 1 && s390_noexec == 0 3) switch_amode == 1 && s390_noexec == 1 They get replaced by 1) user_mode == HOME_SPACE_MODE 2) user_mode == PRIMARY_SPACE_MODE 3) user_mode == SECONDARY_SPACE_MODE The new kernel parameter user_mode=[primary,secondary,home] lets you choose the address space mode the user space processes should use. In addition the CONFIG_S390_SWITCH_AMODE config option is removed. Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 15 --------------- arch/s390/defconfig | 1 - arch/s390/include/asm/mmu_context.h | 4 ++-- arch/s390/include/asm/pgalloc.h | 3 ++- arch/s390/include/asm/setup.h | 17 ++++++----------- arch/s390/kernel/setup.c | 36 ++++++++++++++++++++---------------- arch/s390/kernel/vdso.c | 9 +++++---- arch/s390/kvm/Kconfig | 1 - arch/s390/lib/uaccess_mvcos.c | 4 ---- arch/s390/mm/fault.c | 4 ++-- arch/s390/mm/pgtable.c | 2 +- 11 files changed, 38 insertions(+), 58 deletions(-) (limited to 'arch/s390/mm/fault.c') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 16c673096a2..c80235206c0 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -220,23 +220,8 @@ config AUDIT_ARCH bool default y -config S390_SWITCH_AMODE - bool "Switch kernel/user addressing modes" - help - This option allows to switch the addressing modes of kernel and user - space. The kernel parameter switch_amode=on will enable this feature, - default is disabled. Enabling this (via kernel parameter) on machines - earlier than IBM System z9-109 EC/BC will reduce system performance. - - Note that this option will also be selected by selecting the execute - protection option below. Enabling the execute protection via the - noexec kernel parameter will also switch the addressing modes, - independent of the switch_amode kernel parameter. - - config S390_EXEC_PROTECT bool "Data execute protection" - select S390_SWITCH_AMODE help This option allows to enable a buffer overflow protection for user space programs and it also selects the addressing mode option above. diff --git a/arch/s390/defconfig b/arch/s390/defconfig index ab4464486b7..f4e53c6708d 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -185,7 +185,6 @@ CONFIG_HOTPLUG_CPU=y CONFIG_COMPAT=y CONFIG_SYSVIPC_COMPAT=y CONFIG_AUDIT_ARCH=y -CONFIG_S390_SWITCH_AMODE=y CONFIG_S390_EXEC_PROTECT=y # diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index fc7edd6f41b..976e273988c 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -36,7 +36,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.has_pgste = 1; mm->context.alloc_pgste = 1; } else { - mm->context.noexec = s390_noexec; + mm->context.noexec = (user_mode == SECONDARY_SPACE_MODE); mm->context.has_pgste = 0; mm->context.alloc_pgste = 0; } @@ -58,7 +58,7 @@ static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk) pgd_t *pgd = mm->pgd; S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd); - if (switch_amode) { + if (user_mode != HOME_SPACE_MODE) { /* Load primary space page table origin. */ pgd = mm->context.noexec ? get_shadow_table(pgd) : pgd; S390_lowcore.user_exec_asce = mm->context.asce_bits | __pa(pgd); diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index ddad5903341..68940d0bad9 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -143,7 +143,8 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) spin_lock_init(&mm->context.list_lock); INIT_LIST_HEAD(&mm->context.crst_list); INIT_LIST_HEAD(&mm->context.pgtable_list); - return (pgd_t *) crst_table_alloc(mm, s390_noexec); + return (pgd_t *) + crst_table_alloc(mm, user_mode == SECONDARY_SPACE_MODE); } #define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index e37478e8728..52a779c337e 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -49,17 +49,12 @@ extern unsigned long memory_end; void detect_memory_layout(struct mem_chunk chunk[]); -#ifdef CONFIG_S390_SWITCH_AMODE -extern unsigned int switch_amode; -#else -#define switch_amode (0) -#endif - -#ifdef CONFIG_S390_EXEC_PROTECT -extern unsigned int s390_noexec; -#else -#define s390_noexec (0) -#endif +#define PRIMARY_SPACE_MODE 0 +#define ACCESS_REGISTER_MODE 1 +#define SECONDARY_SPACE_MODE 2 +#define HOME_SPACE_MODE 3 + +extern unsigned int user_mode; /* * Machine features detected in head.S diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 061479ff029..0663287fa1b 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -305,9 +305,8 @@ static int __init early_parse_mem(char *p) } early_param("mem", early_parse_mem); -#ifdef CONFIG_S390_SWITCH_AMODE -unsigned int switch_amode = 0; -EXPORT_SYMBOL_GPL(switch_amode); +unsigned int user_mode = HOME_SPACE_MODE; +EXPORT_SYMBOL_GPL(user_mode); static int set_amode_and_uaccess(unsigned long user_amode, unsigned long user32_amode) @@ -340,23 +339,29 @@ static int set_amode_and_uaccess(unsigned long user_amode, */ static int __init early_parse_switch_amode(char *p) { - switch_amode = 1; + if (user_mode != SECONDARY_SPACE_MODE) + user_mode = PRIMARY_SPACE_MODE; return 0; } early_param("switch_amode", early_parse_switch_amode); -#else /* CONFIG_S390_SWITCH_AMODE */ -static inline int set_amode_and_uaccess(unsigned long user_amode, - unsigned long user32_amode) +static int __init early_parse_user_mode(char *p) { + if (p && strcmp(p, "primary") == 0) + user_mode = PRIMARY_SPACE_MODE; +#ifdef CONFIG_S390_EXEC_PROTECT + else if (p && strcmp(p, "secondary") == 0) + user_mode = SECONDARY_SPACE_MODE; +#endif + else if (!p || strcmp(p, "home") == 0) + user_mode = HOME_SPACE_MODE; + else + return 1; return 0; } -#endif /* CONFIG_S390_SWITCH_AMODE */ +early_param("user_mode", early_parse_user_mode); #ifdef CONFIG_S390_EXEC_PROTECT -unsigned int s390_noexec = 0; -EXPORT_SYMBOL_GPL(s390_noexec); - /* * Enable execute protection? */ @@ -364,8 +369,7 @@ static int __init early_parse_noexec(char *p) { if (!strncmp(p, "off", 3)) return 0; - switch_amode = 1; - s390_noexec = 1; + user_mode = SECONDARY_SPACE_MODE; return 0; } early_param("noexec", early_parse_noexec); @@ -373,7 +377,7 @@ early_param("noexec", early_parse_noexec); static void setup_addressing_mode(void) { - if (s390_noexec) { + if (user_mode == SECONDARY_SPACE_MODE) { if (set_amode_and_uaccess(PSW_ASC_SECONDARY, PSW32_ASC_SECONDARY)) pr_info("Execute protection active, " @@ -381,7 +385,7 @@ static void setup_addressing_mode(void) else pr_info("Execute protection active, " "mvcos not available\n"); - } else if (switch_amode) { + } else if (user_mode == PRIMARY_SPACE_MODE) { if (set_amode_and_uaccess(PSW_ASC_PRIMARY, PSW32_ASC_PRIMARY)) pr_info("Address spaces switched, " "mvcos available\n"); @@ -411,7 +415,7 @@ setup_lowcore(void) lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) restart_int_handler; - if (switch_amode) + if (user_mode != HOME_SPACE_MODE) lc->restart_psw.mask |= PSW_ASC_HOME; lc->external_new_psw.mask = psw_kernel_bits; lc->external_new_psw.addr = diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index adfb32aa6d5..5f99e66c51c 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -86,7 +86,8 @@ static void vdso_init_data(struct vdso_data *vd) unsigned int facility_list; facility_list = stfl(); - vd->ectg_available = switch_amode && (facility_list & 1); + vd->ectg_available = + user_mode != HOME_SPACE_MODE && (facility_list & 1); } #ifdef CONFIG_64BIT @@ -114,7 +115,7 @@ int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore) lowcore->vdso_per_cpu_data = __LC_PASTE; - if (!switch_amode || !vdso_enabled) + if (user_mode == HOME_SPACE_MODE || !vdso_enabled) return 0; segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER); @@ -160,7 +161,7 @@ void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore) unsigned long segment_table, page_table, page_frame; u32 *psal, *aste; - if (!switch_amode || !vdso_enabled) + if (user_mode == HOME_SPACE_MODE || !vdso_enabled) return; psal = (u32 *)(addr_t) lowcore->paste[4]; @@ -184,7 +185,7 @@ static void __vdso_init_cr5(void *dummy) static void vdso_init_cr5(void) { - if (switch_amode && vdso_enabled) + if (user_mode != HOME_SPACE_MODE && vdso_enabled) on_each_cpu(__vdso_init_cr5, NULL, 1); } #endif /* CONFIG_64BIT */ diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index bf164fc2186..6ee55ae84ce 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -20,7 +20,6 @@ config KVM depends on HAVE_KVM && EXPERIMENTAL select PREEMPT_NOTIFIERS select ANON_INODES - select S390_SWITCH_AMODE ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/s390/lib/uaccess_mvcos.c b/arch/s390/lib/uaccess_mvcos.c index 58da3f46121..60455f104ea 100644 --- a/arch/s390/lib/uaccess_mvcos.c +++ b/arch/s390/lib/uaccess_mvcos.c @@ -162,7 +162,6 @@ static size_t clear_user_mvcos(size_t size, void __user *to) return size; } -#ifdef CONFIG_S390_SWITCH_AMODE static size_t strnlen_user_mvcos(size_t count, const char __user *src) { char buf[256]; @@ -200,7 +199,6 @@ static size_t strncpy_from_user_mvcos(size_t count, const char __user *src, } while ((len_str == len) && (done < count)); return done; } -#endif /* CONFIG_S390_SWITCH_AMODE */ struct uaccess_ops uaccess_mvcos = { .copy_from_user = copy_from_user_mvcos_check, @@ -215,7 +213,6 @@ struct uaccess_ops uaccess_mvcos = { .futex_atomic_cmpxchg = futex_atomic_cmpxchg_std, }; -#ifdef CONFIG_S390_SWITCH_AMODE struct uaccess_ops uaccess_mvcos_switch = { .copy_from_user = copy_from_user_mvcos, .copy_from_user_small = copy_from_user_mvcos, @@ -228,4 +225,3 @@ struct uaccess_ops uaccess_mvcos_switch = { .futex_atomic_op = futex_atomic_op_pt, .futex_atomic_cmpxchg = futex_atomic_cmpxchg_pt, }; -#endif diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 3df5b918cfe..77108e34fc1 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -112,7 +112,7 @@ static inline int user_space_fault(unsigned long trans_exc_code) if (trans_exc_code == 2) /* Access via secondary space, set_fs setting decides */ return current->thread.mm_segment.ar4; - if (!switch_amode) + if (user_mode == HOME_SPACE_MODE) /* User space if the access has been done via home space. */ return trans_exc_code == 3; /* @@ -168,7 +168,7 @@ static void do_no_context(struct pt_regs *regs, unsigned long error_code, * terminate things with extreme prejudice. */ address = trans_exc_code & __FAIL_ADDR_MASK; - if (user_space_fault(trans_exc_code) == 0) + if (!user_space_fault(trans_exc_code)) printk(KERN_ALERT "Unable to handle kernel pointer dereference" " at virtual kernel address %p\n", (void *)address); else diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 2757c5616a0..ad621e06ada 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -269,7 +269,7 @@ int s390_enable_sie(void) struct mm_struct *mm, *old_mm; /* Do we have switched amode? If no, we cannot do sie */ - if (!switch_amode) + if (user_mode == HOME_SPACE_MODE) return -EINVAL; /* Do we have pgstes? if yes, we are done */ -- cgit v1.2.3-70-g09d2 From 7ecb344ae80bc03397ded3b004e06ecfe32becf9 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 7 Dec 2009 12:51:44 +0100 Subject: [S390] Improve notify_page_fault implementation. notify_page_fault does a preempt_disable/preempt_enable for each fault generated by a kernel access to user space. If kprobes is not active that is unnecessary since the interrupts are not reenabled yet. To play safe repeat the kprobe_running check after preempt_disable(). Signed-off-by: Martin Schwidefsky --- arch/s390/mm/fault.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'arch/s390/mm/fault.c') diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 77108e34fc1..fd72c269cdb 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -52,11 +52,11 @@ extern int sysctl_userprocess_debug; #endif -#ifdef CONFIG_KPROBES -static inline int notify_page_fault(struct pt_regs *regs, long err) +static inline int notify_page_fault(struct pt_regs *regs) { int ret = 0; +#ifdef CONFIG_KPROBES /* kprobe_running() needs smp_processor_id() */ if (!user_mode(regs)) { preempt_disable(); @@ -64,15 +64,9 @@ static inline int notify_page_fault(struct pt_regs *regs, long err) ret = 1; preempt_enable(); } - +#endif return ret; } -#else -static inline int notify_page_fault(struct pt_regs *regs, long err) -{ - return 0; -} -#endif /* @@ -274,7 +268,7 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int write, int si_code; int fault; - if (notify_page_fault(regs, error_code)) + if (notify_page_fault(regs)) return; tsk = current; -- cgit v1.2.3-70-g09d2 From 50d7280d430484a890ddcadc7f738b5b6dd28bf1 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 7 Dec 2009 12:51:45 +0100 Subject: [S390] fault handler performance optimization. Slim down the do_exception function to handle only the fast path of a fault and move the exceptional cases into a new function. That slightly increases the performance of the fault handling. Build fix for !CONFIG_COMPAT by Kamalesh Babulal Signed-off-by: Martin Schwidefsky --- arch/s390/mm/fault.c | 258 +++++++++++++++++++++++++-------------------------- 1 file changed, 129 insertions(+), 129 deletions(-) (limited to 'arch/s390/mm/fault.c') diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index fd72c269cdb..0dcfcfb5b5b 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -34,16 +34,15 @@ #include #include #include +#include #include "../kernel/entry.h" #ifndef CONFIG_64BIT #define __FAIL_ADDR_MASK 0x7ffff000 -#define __FIXUP_MASK 0x7fffffff #define __SUBCODE_MASK 0x0200 #define __PF_RES_FIELD 0ULL #else /* CONFIG_64BIT */ #define __FAIL_ADDR_MASK -4096L -#define __FIXUP_MASK ~0L #define __SUBCODE_MASK 0x0600 #define __PF_RES_FIELD 0x8000000000000000ULL #endif /* CONFIG_64BIT */ @@ -52,6 +51,10 @@ extern int sysctl_userprocess_debug; #endif +#define VM_FAULT_BADCONTEXT 0x010000 +#define VM_FAULT_BADMAP 0x020000 +#define VM_FAULT_BADACCESS 0x040000 + static inline int notify_page_fault(struct pt_regs *regs) { int ret = 0; @@ -122,18 +125,22 @@ static inline int user_space_fault(unsigned long trans_exc_code) * Send SIGSEGV to task. This is an external routine * to keep the stack usage of do_page_fault small. */ -static void do_sigsegv(struct pt_regs *regs, unsigned long error_code, - int si_code, unsigned long address) +static noinline void do_sigsegv(struct pt_regs *regs, long int_code, + int si_code, unsigned long trans_exc_code) { struct siginfo si; + unsigned long address; + address = trans_exc_code & __FAIL_ADDR_MASK; + current->thread.prot_addr = address; + current->thread.trap_no = int_code; #if defined(CONFIG_SYSCTL) || defined(CONFIG_PROCESS_DEBUG) #if defined(CONFIG_SYSCTL) if (sysctl_userprocess_debug) #endif { printk("User process fault: interruption code 0x%lX\n", - error_code); + int_code); printk("failing address: %lX\n", address); show_regs(regs); } @@ -144,14 +151,14 @@ static void do_sigsegv(struct pt_regs *regs, unsigned long error_code, force_sig_info(SIGSEGV, &si, current); } -static void do_no_context(struct pt_regs *regs, unsigned long error_code, - unsigned long trans_exc_code) +static noinline void do_no_context(struct pt_regs *regs, long int_code, + unsigned long trans_exc_code) { const struct exception_table_entry *fixup; unsigned long address; /* Are we prepared to handle this kernel fault? */ - fixup = search_exception_tables(regs->psw.addr & __FIXUP_MASK); + fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); if (fixup) { regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE; return; @@ -169,107 +176,127 @@ static void do_no_context(struct pt_regs *regs, unsigned long error_code, printk(KERN_ALERT "Unable to handle kernel paging request" " at virtual user address %p\n", (void *)address); - die("Oops", regs, error_code); + die("Oops", regs, int_code); do_exit(SIGKILL); } -static void do_low_address(struct pt_regs *regs, unsigned long error_code, - unsigned long trans_exc_code) +static noinline void do_low_address(struct pt_regs *regs, long int_code, + unsigned long trans_exc_code) { /* Low-address protection hit in kernel mode means NULL pointer write access in kernel mode. */ if (regs->psw.mask & PSW_MASK_PSTATE) { /* Low-address protection hit in user mode 'cannot happen'. */ - die ("Low-address protection", regs, error_code); + die ("Low-address protection", regs, int_code); do_exit(SIGKILL); } - do_no_context(regs, error_code, trans_exc_code); + do_no_context(regs, int_code, trans_exc_code); } -static void do_sigbus(struct pt_regs *regs, unsigned long error_code, - unsigned long trans_exc_code) +static noinline void do_sigbus(struct pt_regs *regs, long int_code, + unsigned long trans_exc_code) { struct task_struct *tsk = current; - struct mm_struct *mm = tsk->mm; - up_read(&mm->mmap_sem); /* * Send a sigbus, regardless of whether we were in kernel * or user mode. */ tsk->thread.prot_addr = trans_exc_code & __FAIL_ADDR_MASK; - tsk->thread.trap_no = error_code; + tsk->thread.trap_no = int_code; force_sig(SIGBUS, tsk); - - /* Kernel mode? Handle exceptions or die */ - if (!(regs->psw.mask & PSW_MASK_PSTATE)) - do_no_context(regs, error_code, trans_exc_code); } #ifdef CONFIG_S390_EXEC_PROTECT -static int signal_return(struct mm_struct *mm, struct pt_regs *regs, - unsigned long address, unsigned long error_code) +static noinline int signal_return(struct pt_regs *regs, long int_code, + unsigned long trans_exc_code) { u16 instruction; int rc; -#ifdef CONFIG_COMPAT - int compat; -#endif - pagefault_disable(); rc = __get_user(instruction, (u16 __user *) regs->psw.addr); - pagefault_enable(); - if (rc) - return -EFAULT; - up_read(&mm->mmap_sem); - clear_tsk_thread_flag(current, TIF_SINGLE_STEP); -#ifdef CONFIG_COMPAT - compat = is_compat_task(); - if (compat && instruction == 0x0a77) - sys32_sigreturn(); - else if (compat && instruction == 0x0aad) - sys32_rt_sigreturn(); - else -#endif - if (instruction == 0x0a77) - sys_sigreturn(); - else if (instruction == 0x0aad) - sys_rt_sigreturn(); - else { - current->thread.prot_addr = address; - current->thread.trap_no = error_code; - do_sigsegv(regs, error_code, SEGV_MAPERR, address); - } + if (!rc && instruction == 0x0a77) { + clear_tsk_thread_flag(current, TIF_SINGLE_STEP); + if (is_compat_task()) + sys32_sigreturn(); + else + sys_sigreturn(); + } else if (!rc && instruction == 0x0aad) { + clear_tsk_thread_flag(current, TIF_SINGLE_STEP); + if (is_compat_task()) + sys32_rt_sigreturn(); + else + sys_rt_sigreturn(); + } else + do_sigsegv(regs, int_code, SEGV_MAPERR, trans_exc_code); return 0; } #endif /* CONFIG_S390_EXEC_PROTECT */ +static noinline void do_fault_error(struct pt_regs *regs, long int_code, + unsigned long trans_exc_code, int fault) +{ + int si_code; + + switch (fault) { + case VM_FAULT_BADACCESS: +#ifdef CONFIG_S390_EXEC_PROTECT + if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY && + (trans_exc_code & 3) == 0) { + signal_return(regs, int_code, trans_exc_code); + break; + } +#endif /* CONFIG_S390_EXEC_PROTECT */ + case VM_FAULT_BADMAP: + /* Bad memory access. Check if it is kernel or user space. */ + if (regs->psw.mask & PSW_MASK_PSTATE) { + /* User mode accesses just cause a SIGSEGV */ + si_code = (fault == VM_FAULT_BADMAP) ? + SEGV_MAPERR : SEGV_ACCERR; + do_sigsegv(regs, int_code, si_code, trans_exc_code); + return; + } + case VM_FAULT_BADCONTEXT: + do_no_context(regs, int_code, trans_exc_code); + break; + default: /* fault & VM_FAULT_ERROR */ + if (fault & VM_FAULT_OOM) + pagefault_out_of_memory(); + else if (fault & VM_FAULT_SIGBUS) { + do_sigbus(regs, int_code, trans_exc_code); + /* Kernel mode? Handle exceptions or die */ + if (!(regs->psw.mask & PSW_MASK_PSTATE)) + do_no_context(regs, int_code, trans_exc_code); + } else + BUG(); + break; + } +} + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. * - * error_code: + * interruption code (int_code): * 04 Protection -> Write-Protection (suprression) * 10 Segment translation -> Not present (nullification) * 11 Page translation -> Not present (nullification) * 3b Region third trans. -> Not present (nullification) */ -static inline void -do_exception(struct pt_regs *regs, unsigned long error_code, int write, - unsigned long trans_exc_code) +static inline int do_exception(struct pt_regs *regs, int write, + unsigned long trans_exc_code) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; unsigned long address; - int si_code; int fault; if (notify_page_fault(regs)) - return; + return 0; tsk = current; mm = tsk->mm; @@ -279,8 +306,9 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int write, * we are not in an interrupt and that there is a * user context. */ + fault = VM_FAULT_BADCONTEXT; if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) - goto no_context; + goto out; address = trans_exc_code & __FAIL_ADDR_MASK; /* @@ -292,41 +320,35 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int write, perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); down_read(&mm->mmap_sem); - si_code = SEGV_MAPERR; + fault = VM_FAULT_BADMAP; vma = find_vma(mm, address); if (!vma) - goto bad_area; + goto out_up; + if (unlikely(vma->vm_start > address)) { + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto out_up; + if (expand_stack(vma, address)) + goto out_up; + } + + /* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ + fault = VM_FAULT_BADACCESS; #ifdef CONFIG_S390_EXEC_PROTECT if (unlikely((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY && (trans_exc_code & 3) == 0 && !(vma->vm_flags & VM_EXEC))) - if (!signal_return(mm, regs, address, error_code)) - /* - * signal_return() has done an up_read(&mm->mmap_sem) - * if it returns 0. - */ - return; + goto out_up; #endif - - if (vma->vm_start <= address) - goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; - if (expand_stack(vma, address)) - goto bad_area; -/* - * Ok, we have a good vm_area for this memory access, so - * we can handle it.. - */ -good_area: - si_code = SEGV_ACCERR; if (!write) { /* page not present, check vm flags */ if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) - goto bad_area; + goto out_up; } else { if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; + goto out_up; } if (is_vm_hugetlb_page(vma)) @@ -337,17 +359,9 @@ good_area: * the fault. */ fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0); - if (unlikely(fault & VM_FAULT_ERROR)) { - if (fault & VM_FAULT_OOM) { - up_read(&mm->mmap_sem); - pagefault_out_of_memory(); - return; - } else if (fault & VM_FAULT_SIGBUS) { - do_sigbus(regs, error_code, address); - return; - } - BUG(); - } + if (unlikely(fault & VM_FAULT_ERROR)) + goto out_up; + if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, @@ -357,67 +371,55 @@ good_area: perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } - up_read(&mm->mmap_sem); /* * The instruction that caused the program check will * be repeated. Don't signal single step via SIGTRAP. */ clear_tsk_thread_flag(tsk, TIF_SINGLE_STEP); - return; - -/* - * Something tried to access memory that isn't in our memory map.. - * Fix it, but check if it's kernel or user first.. - */ -bad_area: + fault = 0; +out_up: up_read(&mm->mmap_sem); - - /* User mode accesses just cause a SIGSEGV */ - if (regs->psw.mask & PSW_MASK_PSTATE) { - tsk->thread.prot_addr = address; - tsk->thread.trap_no = error_code; - do_sigsegv(regs, error_code, si_code, address); - return; - } - -no_context: - do_no_context(regs, error_code, trans_exc_code); +out: + return fault; } -void __kprobes do_protection_exception(struct pt_regs *regs, - long error_code) +void __kprobes do_protection_exception(struct pt_regs *regs, long int_code) { unsigned long trans_exc_code = S390_lowcore.trans_exc_code; + int fault; /* Protection exception is supressing, decrement psw address. */ - regs->psw.addr -= (error_code >> 16); + regs->psw.addr -= (int_code >> 16); /* * Check for low-address protection. This needs to be treated * as a special case because the translation exception code * field is not guaranteed to contain valid data in this case. */ if (unlikely(!(trans_exc_code & 4))) { - do_low_address(regs, error_code, trans_exc_code); + do_low_address(regs, int_code, trans_exc_code); return; } - do_exception(regs, 4, 1, trans_exc_code); + fault = do_exception(regs, 1, trans_exc_code); + if (unlikely(fault)) + do_fault_error(regs, 4, trans_exc_code, fault); } -void __kprobes do_dat_exception(struct pt_regs *regs, long error_code) +void __kprobes do_dat_exception(struct pt_regs *regs, long int_code) { - do_exception(regs, error_code & 0xff, 0, S390_lowcore.trans_exc_code); + unsigned long trans_exc_code = S390_lowcore.trans_exc_code; + int fault; + + fault = do_exception(regs, 0, trans_exc_code); + if (unlikely(fault)) + do_fault_error(regs, int_code & 255, trans_exc_code, fault); } #ifdef CONFIG_64BIT -void __kprobes do_asce_exception(struct pt_regs *regs, unsigned long error_code) +void __kprobes do_asce_exception(struct pt_regs *regs, long int_code) { unsigned long trans_exc_code = S390_lowcore.trans_exc_code; - struct mm_struct *mm; + struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long address; - - mm = current->mm; - address = trans_exc_code & __FAIL_ADDR_MASK; if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) goto no_context; @@ -425,7 +427,7 @@ void __kprobes do_asce_exception(struct pt_regs *regs, unsigned long error_code) local_irq_enable(); down_read(&mm->mmap_sem); - vma = find_vma(mm, address); + vma = find_vma(mm, trans_exc_code & __FAIL_ADDR_MASK); up_read(&mm->mmap_sem); if (vma) { @@ -435,14 +437,12 @@ void __kprobes do_asce_exception(struct pt_regs *regs, unsigned long error_code) /* User mode accesses just cause a SIGSEGV */ if (regs->psw.mask & PSW_MASK_PSTATE) { - current->thread.prot_addr = address; - current->thread.trap_no = error_code; - do_sigsegv(regs, error_code, SEGV_MAPERR, address); + do_sigsegv(regs, int_code, SEGV_MAPERR, trans_exc_code); return; } no_context: - do_no_context(regs, error_code, trans_exc_code); + do_no_context(regs, int_code, trans_exc_code); } #endif @@ -507,7 +507,7 @@ void pfault_fini(void) : : "a" (&refbk), "m" (refbk) : "cc"); } -static void pfault_interrupt(__u16 error_code) +static void pfault_interrupt(__u16 int_code) { struct task_struct *tsk; __u16 subcode; -- cgit v1.2.3-70-g09d2 From 1ab947de293f43812276b60cf9fa21127e7a5bb2 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 7 Dec 2009 12:51:46 +0100 Subject: [S390] fault handler access flags check. Simplify the check of the vma->flags in do_exception for the different fault types. Signed-off-by: Martin Schwidefsky --- arch/s390/mm/fault.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) (limited to 'arch/s390/mm/fault.c') diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 0dcfcfb5b5b..5a9e9a77dc1 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -286,7 +286,7 @@ static noinline void do_fault_error(struct pt_regs *regs, long int_code, * 11 Page translation -> Not present (nullification) * 3b Region third trans. -> Not present (nullification) */ -static inline int do_exception(struct pt_regs *regs, int write, +static inline int do_exception(struct pt_regs *regs, int access, unsigned long trans_exc_code) { struct task_struct *tsk; @@ -337,19 +337,8 @@ static inline int do_exception(struct pt_regs *regs, int write, * we can handle it.. */ fault = VM_FAULT_BADACCESS; -#ifdef CONFIG_S390_EXEC_PROTECT - if (unlikely((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY && - (trans_exc_code & 3) == 0 && !(vma->vm_flags & VM_EXEC))) + if (unlikely(!(vma->vm_flags & access))) goto out_up; -#endif - if (!write) { - /* page not present, check vm flags */ - if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) - goto out_up; - } else { - if (!(vma->vm_flags & VM_WRITE)) - goto out_up; - } if (is_vm_hugetlb_page(vma)) address &= HPAGE_MASK; @@ -358,7 +347,8 @@ static inline int do_exception(struct pt_regs *regs, int write, * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0); + fault = handle_mm_fault(mm, vma, address, + (access == VM_WRITE) ? FAULT_FLAG_WRITE : 0); if (unlikely(fault & VM_FAULT_ERROR)) goto out_up; @@ -399,7 +389,7 @@ void __kprobes do_protection_exception(struct pt_regs *regs, long int_code) do_low_address(regs, int_code, trans_exc_code); return; } - fault = do_exception(regs, 1, trans_exc_code); + fault = do_exception(regs, VM_WRITE, trans_exc_code); if (unlikely(fault)) do_fault_error(regs, 4, trans_exc_code, fault); } @@ -407,9 +397,15 @@ void __kprobes do_protection_exception(struct pt_regs *regs, long int_code) void __kprobes do_dat_exception(struct pt_regs *regs, long int_code) { unsigned long trans_exc_code = S390_lowcore.trans_exc_code; - int fault; + int access, fault; - fault = do_exception(regs, 0, trans_exc_code); + access = VM_READ | VM_EXEC | VM_WRITE; +#ifdef CONFIG_S390_EXEC_PROTECT + if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY && + (trans_exc_code & 3) == 0) + access = VM_EXEC; +#endif + fault = do_exception(regs, access, trans_exc_code); if (unlikely(fault)) do_fault_error(regs, int_code & 255, trans_exc_code, fault); } -- cgit v1.2.3-70-g09d2 From 6c1e3e79430615d0472dbf9f8fed89c571e66423 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Mon, 7 Dec 2009 12:51:47 +0100 Subject: [S390] Use do_exception() in pagetable walk usercopy functions. The pagetable walk usercopy functions have used a modified copy of the do_exception() function for fault handling. This lead to inconsistencies with recent changes to do_exception(), e.g. performance counters. This patch changes the pagetable walk usercopy code to call do_exception() directly, eliminating the redundancy. A new parameter is added to do_exception() to specify the fault address. Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/uaccess.h | 2 + arch/s390/lib/uaccess_pt.c | 147 ++++++++++++++-------------------------- arch/s390/mm/fault.c | 23 +++++++ 3 files changed, 76 insertions(+), 96 deletions(-) (limited to 'arch/s390/mm/fault.c') diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 8377e91533d..cbf0a8745bf 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -93,6 +93,8 @@ extern struct uaccess_ops uaccess_mvcos; extern struct uaccess_ops uaccess_mvcos_switch; extern struct uaccess_ops uaccess_pt; +extern int __handle_fault(unsigned long, unsigned long, int); + static inline int __put_user_fn(size_t size, void __user *ptr, void *x) { size = uaccess.copy_to_user_small(size, ptr, x); diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index cb5d59eab0e..404f2de296d 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -23,86 +23,21 @@ static inline pte_t *follow_table(struct mm_struct *mm, unsigned long addr) pgd = pgd_offset(mm, addr); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) - return NULL; + return (pte_t *) 0x3a; pud = pud_offset(pgd, addr); if (pud_none(*pud) || unlikely(pud_bad(*pud))) - return NULL; + return (pte_t *) 0x3b; pmd = pmd_offset(pud, addr); if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) - return NULL; + return (pte_t *) 0x10; return pte_offset_map(pmd, addr); } -static int __handle_fault(struct mm_struct *mm, unsigned long address, - int write_access) -{ - struct vm_area_struct *vma; - int ret = -EFAULT; - int fault; - - if (in_atomic()) - return ret; - down_read(&mm->mmap_sem); - vma = find_vma(mm, address); - if (unlikely(!vma)) - goto out; - if (unlikely(vma->vm_start > address)) { - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto out; - if (expand_stack(vma, address)) - goto out; - } - - if (!write_access) { - /* page not present, check vm flags */ - if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) - goto out; - } else { - if (!(vma->vm_flags & VM_WRITE)) - goto out; - } - -survive: - fault = handle_mm_fault(mm, vma, address, write_access ? FAULT_FLAG_WRITE : 0); - if (unlikely(fault & VM_FAULT_ERROR)) { - if (fault & VM_FAULT_OOM) - goto out_of_memory; - else if (fault & VM_FAULT_SIGBUS) - goto out_sigbus; - BUG(); - } - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; - ret = 0; -out: - up_read(&mm->mmap_sem); - return ret; - -out_of_memory: - up_read(&mm->mmap_sem); - if (is_global_init(current)) { - yield(); - down_read(&mm->mmap_sem); - goto survive; - } - printk("VM: killing process %s\n", current->comm); - return ret; - -out_sigbus: - up_read(&mm->mmap_sem); - current->thread.prot_addr = address; - current->thread.trap_no = 0x11; - force_sig(SIGBUS, current); - return ret; -} - -static size_t __user_copy_pt(unsigned long uaddr, void *kptr, - size_t n, int write_user) +static __always_inline size_t __user_copy_pt(unsigned long uaddr, void *kptr, + size_t n, int write_user) { struct mm_struct *mm = current->mm; unsigned long offset, pfn, done, size; @@ -114,12 +49,17 @@ retry: spin_lock(&mm->page_table_lock); do { pte = follow_table(mm, uaddr); - if (!pte || !pte_present(*pte) || - (write_user && !pte_write(*pte))) + if ((unsigned long) pte < 0x1000) goto fault; + if (!pte_present(*pte)) { + pte = (pte_t *) 0x11; + goto fault; + } else if (write_user && !pte_write(*pte)) { + pte = (pte_t *) 0x04; + goto fault; + } pfn = pte_pfn(*pte); - offset = uaddr & (PAGE_SIZE - 1); size = min(n - done, PAGE_SIZE - offset); if (write_user) { @@ -137,7 +77,7 @@ retry: return n - done; fault: spin_unlock(&mm->page_table_lock); - if (__handle_fault(mm, uaddr, write_user)) + if (__handle_fault(uaddr, (unsigned long) pte, write_user)) return n - done; goto retry; } @@ -146,30 +86,31 @@ fault: * Do DAT for user address by page table walk, return kernel address. * This function needs to be called with current->mm->page_table_lock held. */ -static unsigned long __dat_user_addr(unsigned long uaddr) +static __always_inline unsigned long __dat_user_addr(unsigned long uaddr) { struct mm_struct *mm = current->mm; - unsigned long pfn, ret; + unsigned long pfn; pte_t *pte; int rc; - ret = 0; retry: pte = follow_table(mm, uaddr); - if (!pte || !pte_present(*pte)) + if ((unsigned long) pte < 0x1000) goto fault; + if (!pte_present(*pte)) { + pte = (pte_t *) 0x11; + goto fault; + } pfn = pte_pfn(*pte); - ret = (pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE - 1)); -out: - return ret; + return (pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE - 1)); fault: spin_unlock(&mm->page_table_lock); - rc = __handle_fault(mm, uaddr, 0); + rc = __handle_fault(uaddr, (unsigned long) pte, 0); spin_lock(&mm->page_table_lock); - if (rc) - goto out; - goto retry; + if (!rc) + goto retry; + return 0; } size_t copy_from_user_pt(size_t n, const void __user *from, void *to) @@ -234,8 +175,12 @@ retry: spin_lock(&mm->page_table_lock); do { pte = follow_table(mm, uaddr); - if (!pte || !pte_present(*pte)) + if ((unsigned long) pte < 0x1000) + goto fault; + if (!pte_present(*pte)) { + pte = (pte_t *) 0x11; goto fault; + } pfn = pte_pfn(*pte); offset = uaddr & (PAGE_SIZE-1); @@ -249,9 +194,8 @@ retry: return done + 1; fault: spin_unlock(&mm->page_table_lock); - if (__handle_fault(mm, uaddr, 0)) { + if (__handle_fault(uaddr, (unsigned long) pte, 0)) return 0; - } goto retry; } @@ -284,7 +228,7 @@ static size_t copy_in_user_pt(size_t n, void __user *to, { struct mm_struct *mm = current->mm; unsigned long offset_from, offset_to, offset_max, pfn_from, pfn_to, - uaddr, done, size; + uaddr, done, size, error_code; unsigned long uaddr_from = (unsigned long) from; unsigned long uaddr_to = (unsigned long) to; pte_t *pte_from, *pte_to; @@ -298,17 +242,28 @@ static size_t copy_in_user_pt(size_t n, void __user *to, retry: spin_lock(&mm->page_table_lock); do { + write_user = 0; + uaddr = uaddr_from; pte_from = follow_table(mm, uaddr_from); - if (!pte_from || !pte_present(*pte_from)) { - uaddr = uaddr_from; - write_user = 0; + error_code = (unsigned long) pte_from; + if (error_code < 0x1000) + goto fault; + if (!pte_present(*pte_from)) { + error_code = 0x11; goto fault; } + write_user = 1; + uaddr = uaddr_to; pte_to = follow_table(mm, uaddr_to); - if (!pte_to || !pte_present(*pte_to) || !pte_write(*pte_to)) { - uaddr = uaddr_to; - write_user = 1; + error_code = (unsigned long) pte_to; + if (error_code < 0x1000) + goto fault; + if (!pte_present(*pte_to)) { + error_code = 0x11; + goto fault; + } else if (!pte_write(*pte_to)) { + error_code = 0x04; goto fault; } @@ -329,7 +284,7 @@ retry: return n - done; fault: spin_unlock(&mm->page_table_lock); - if (__handle_fault(mm, uaddr, write_user)) + if (__handle_fault(uaddr, error_code, write_user)) return n - done; goto retry; } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 5a9e9a77dc1..fc102e70d9c 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -442,6 +442,29 @@ no_context: } #endif +int __handle_fault(unsigned long uaddr, unsigned long int_code, int write_user) +{ + struct pt_regs regs; + int access, fault; + + regs.psw.mask = psw_kernel_bits; + if (!irqs_disabled()) + regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT; + regs.psw.addr = (unsigned long) __builtin_return_address(0); + regs.psw.addr |= PSW_ADDR_AMODE; + uaddr &= PAGE_MASK; + access = write_user ? VM_WRITE : VM_READ; + fault = do_exception(®s, access, uaddr | 2); + if (unlikely(fault)) { + if (fault & VM_FAULT_OOM) { + pagefault_out_of_memory(); + fault = 0; + } else if (fault & VM_FAULT_SIGBUS) + do_sigbus(®s, int_code, uaddr); + } + return fault ? -EFAULT : 0; +} + #ifdef CONFIG_PFAULT /* * 'pfault' pseudo page faults routines. -- cgit v1.2.3-70-g09d2