diff options
Diffstat (limited to 'arch/s390')
-rw-r--r-- | arch/s390/Kconfig | 25 | ||||
-rw-r--r-- | arch/s390/defconfig | 2 | ||||
-rw-r--r-- | arch/s390/kernel/compat_linux.c | 6 | ||||
-rw-r--r-- | arch/s390/kernel/compat_linux.h | 31 | ||||
-rw-r--r-- | arch/s390/kernel/compat_signal.c | 2 | ||||
-rw-r--r-- | arch/s390/kernel/ipl.c | 4 | ||||
-rw-r--r-- | arch/s390/kernel/process.c | 4 | ||||
-rw-r--r-- | arch/s390/kernel/ptrace.c | 10 | ||||
-rw-r--r-- | arch/s390/kernel/setup.c | 98 | ||||
-rw-r--r-- | arch/s390/kernel/signal.c | 2 | ||||
-rw-r--r-- | arch/s390/kernel/smp.c | 2 | ||||
-rw-r--r-- | arch/s390/lib/uaccess_mvcos.c | 53 | ||||
-rw-r--r-- | arch/s390/lib/uaccess_pt.c | 320 | ||||
-rw-r--r-- | arch/s390/mm/fault.c | 88 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 6 | ||||
-rw-r--r-- | arch/s390/mm/vmem.c | 14 |
16 files changed, 606 insertions, 61 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 12272361c01..5c7e981c115 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -134,6 +134,31 @@ config AUDIT_ARCH bool default y +config S390_SWITCH_AMODE + bool "Switch kernel/user addressing modes" + help + This option allows to switch the addressing modes of kernel and user + space. The kernel parameter switch_amode=on will enable this feature, + default is disabled. Enabling this (via kernel parameter) on machines + earlier than IBM System z9-109 EC/BC will reduce system performance. + + Note that this option will also be selected by selecting the execute + protection option below. Enabling the execute protection via the + noexec kernel parameter will also switch the addressing modes, + independent of the switch_amode kernel parameter. + + +config S390_EXEC_PROTECT + bool "Data execute protection" + select S390_SWITCH_AMODE + help + This option allows to enable a buffer overflow protection for user + space programs and it also selects the addressing mode option above. + The kernel parameter noexec=on will enable this feature and also + switch the addressing modes, default is disabled. Enabling this (via + kernel parameter) on machines earlier than IBM System z9-109 EC/BC + will reduce system performance. + comment "Code generation options" choice diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 46bb38515b0..12eb97f9c1d 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -108,6 +108,8 @@ CONFIG_DEFAULT_MIGRATION_COST=1000000 CONFIG_COMPAT=y CONFIG_SYSVIPC_COMPAT=y CONFIG_AUDIT_ARCH=y +CONFIG_S390_SWITCH_AMODE=y +CONFIG_S390_EXEC_PROTECT=y # # Code generation options diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index cf84d697dae..666bb6daa14 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -69,6 +69,12 @@ #include "compat_linux.h" +long psw_user32_bits = (PSW_BASE32_BITS | PSW_MASK_DAT | PSW_ASC_HOME | + PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | + PSW_MASK_PSTATE | PSW_DEFAULT_KEY); +long psw32_user_bits = (PSW32_BASE_BITS | PSW32_MASK_DAT | PSW32_ASC_HOME | + PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK | + PSW32_MASK_PSTATE); /* For this source file, we want overflow handling. */ diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index 1a18e29668e..e89f8c0c42a 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -115,37 +115,6 @@ typedef struct __u32 addr; } _psw_t32 __attribute__ ((aligned(8))); -#define PSW32_MASK_PER 0x40000000UL -#define PSW32_MASK_DAT 0x04000000UL -#define PSW32_MASK_IO 0x02000000UL -#define PSW32_MASK_EXT 0x01000000UL -#define PSW32_MASK_KEY 0x00F00000UL -#define PSW32_MASK_MCHECK 0x00040000UL -#define PSW32_MASK_WAIT 0x00020000UL -#define PSW32_MASK_PSTATE 0x00010000UL -#define PSW32_MASK_ASC 0x0000C000UL -#define PSW32_MASK_CC 0x00003000UL -#define PSW32_MASK_PM 0x00000f00UL - -#define PSW32_ADDR_AMODE31 0x80000000UL -#define PSW32_ADDR_INSN 0x7FFFFFFFUL - -#define PSW32_BASE_BITS 0x00080000UL - -#define PSW32_ASC_PRIMARY 0x00000000UL -#define PSW32_ASC_ACCREG 0x00004000UL -#define PSW32_ASC_SECONDARY 0x00008000UL -#define PSW32_ASC_HOME 0x0000C000UL - -#define PSW32_USER_BITS (PSW32_BASE_BITS | PSW32_MASK_DAT | PSW32_ASC_HOME | \ - PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK | \ - PSW32_MASK_PSTATE) - -#define PSW32_MASK_MERGE(CURRENT,NEW) \ - (((CURRENT) & ~(PSW32_MASK_CC|PSW32_MASK_PM)) | \ - ((NEW) & (PSW32_MASK_CC|PSW32_MASK_PM))) - - typedef struct { _psw_t32 psw; diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 8d17b2ab6f2..887a9881d0d 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -298,7 +298,7 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) _s390_regs_common32 regs32; int err, i; - regs32.psw.mask = PSW32_MASK_MERGE(PSW32_USER_BITS, + regs32.psw.mask = PSW32_MASK_MERGE(psw32_user_bits, (__u32)(regs->psw.mask >> 32)); regs32.psw.addr = PSW32_ADDR_AMODE31 | (__u32) regs->psw.addr; for (i = 0; i < NUM_GPRS; i++) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 9e9972e8a52..2c91226e1d4 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -1016,12 +1016,12 @@ void s390_reset_system(void) __ctl_clear_bit(0,28); /* Set new machine check handler */ - S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_MCHECK; + S390_lowcore.mcck_new_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; S390_lowcore.mcck_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) &reset_mcck_handler; /* Set new program check handler */ - S390_lowcore.program_new_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_MCHECK; + S390_lowcore.program_new_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; S390_lowcore.program_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) &reset_pgm_handler; diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 6603fbb41d0..5acfac654f9 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -144,7 +144,7 @@ static void default_idle(void) trace_hardirqs_on(); /* Wait for external, I/O or machine check interrupt. */ - __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_WAIT | + __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT); } @@ -190,7 +190,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) struct pt_regs regs; memset(®s, 0, sizeof(regs)); - regs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO | PSW_MASK_EXT; + regs.psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT; regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE; regs.gprs[9] = (unsigned long) fn; regs.gprs[10] = (unsigned long) arg; diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 29fde70090f..2a8f0872ea8 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -230,9 +230,9 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data) */ if (addr == (addr_t) &dummy->regs.psw.mask && #ifdef CONFIG_COMPAT - data != PSW_MASK_MERGE(PSW_USER32_BITS, data) && + data != PSW_MASK_MERGE(psw_user32_bits, data) && #endif - data != PSW_MASK_MERGE(PSW_USER_BITS, data)) + data != PSW_MASK_MERGE(psw_user_bits, data)) /* Invalid psw mask. */ return -EINVAL; #ifndef CONFIG_64BIT @@ -393,7 +393,7 @@ peek_user_emu31(struct task_struct *child, addr_t addr, addr_t data) if (addr == (addr_t) &dummy32->regs.psw.mask) { /* Fake a 31 bit psw mask. */ tmp = (__u32)(task_pt_regs(child)->psw.mask >> 32); - tmp = PSW32_MASK_MERGE(PSW32_USER_BITS, tmp); + tmp = PSW32_MASK_MERGE(psw32_user_bits, tmp); } else if (addr == (addr_t) &dummy32->regs.psw.addr) { /* Fake a 31 bit psw address. */ tmp = (__u32) task_pt_regs(child)->psw.addr | @@ -468,11 +468,11 @@ poke_user_emu31(struct task_struct *child, addr_t addr, addr_t data) */ if (addr == (addr_t) &dummy32->regs.psw.mask) { /* Build a 64 bit psw mask from 31 bit mask. */ - if (tmp != PSW32_MASK_MERGE(PSW32_USER_BITS, tmp)) + if (tmp != PSW32_MASK_MERGE(psw32_user_bits, tmp)) /* Invalid psw mask. */ return -EINVAL; task_pt_regs(child)->psw.mask = - PSW_MASK_MERGE(PSW_USER32_BITS, (__u64) tmp << 32); + PSW_MASK_MERGE(psw_user32_bits, (__u64) tmp << 32); } else if (addr == (addr_t) &dummy32->regs.psw.addr) { /* Build a 64 bit psw address from 31 bit address. */ task_pt_regs(child)->psw.addr = diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 25bf7277d31..b1b9a931237 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -50,6 +50,13 @@ #include <asm/page.h> #include <asm/ptrace.h> #include <asm/sections.h> +#include <asm/compat.h> + +long psw_kernel_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY | + PSW_MASK_MCHECK | PSW_DEFAULT_KEY); +long psw_user_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME | + PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | + PSW_MASK_PSTATE | PSW_DEFAULT_KEY); /* * User copy operations. @@ -383,6 +390,84 @@ static int __init early_parse_ipldelay(char *p) } early_param("ipldelay", early_parse_ipldelay); +#ifdef CONFIG_S390_SWITCH_AMODE +unsigned int switch_amode = 0; +EXPORT_SYMBOL_GPL(switch_amode); + +static inline void set_amode_and_uaccess(unsigned long user_amode, + unsigned long user32_amode) +{ + psw_user_bits = PSW_BASE_BITS | PSW_MASK_DAT | user_amode | + PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | + PSW_MASK_PSTATE | PSW_DEFAULT_KEY; +#ifdef CONFIG_COMPAT + psw_user32_bits = PSW_BASE32_BITS | PSW_MASK_DAT | user_amode | + PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | + PSW_MASK_PSTATE | PSW_DEFAULT_KEY; + psw32_user_bits = PSW32_BASE_BITS | PSW32_MASK_DAT | user32_amode | + PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK | + PSW32_MASK_PSTATE; +#endif + psw_kernel_bits = PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME | + PSW_MASK_MCHECK | PSW_DEFAULT_KEY; + + if (MACHINE_HAS_MVCOS) { + printk("mvcos available.\n"); + memcpy(&uaccess, &uaccess_mvcos_switch, sizeof(uaccess)); + } else { + printk("mvcos not available.\n"); + memcpy(&uaccess, &uaccess_pt, sizeof(uaccess)); + } +} + +/* + * Switch kernel/user addressing modes? + */ +static int __init early_parse_switch_amode(char *p) +{ + switch_amode = 1; + return 0; +} +early_param("switch_amode", early_parse_switch_amode); + +#else /* CONFIG_S390_SWITCH_AMODE */ +static inline void set_amode_and_uaccess(unsigned long user_amode, + unsigned long user32_amode) +{ +} +#endif /* CONFIG_S390_SWITCH_AMODE */ + +#ifdef CONFIG_S390_EXEC_PROTECT +unsigned int s390_noexec = 0; +EXPORT_SYMBOL_GPL(s390_noexec); + +/* + * Enable execute protection? + */ +static int __init early_parse_noexec(char *p) +{ + if (!strncmp(p, "off", 3)) + return 0; + switch_amode = 1; + s390_noexec = 1; + return 0; +} +early_param("noexec", early_parse_noexec); +#endif /* CONFIG_S390_EXEC_PROTECT */ + +static void setup_addressing_mode(void) +{ + if (s390_noexec) { + printk("S390 execute protection active, "); + set_amode_and_uaccess(PSW_ASC_SECONDARY, PSW32_ASC_SECONDARY); + return; + } + if (switch_amode) { + printk("S390 address spaces switched, "); + set_amode_and_uaccess(PSW_ASC_PRIMARY, PSW32_ASC_PRIMARY); + } +} + static void __init setup_lowcore(void) { @@ -399,19 +484,21 @@ setup_lowcore(void) lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) restart_int_handler; - lc->external_new_psw.mask = PSW_KERNEL_BITS; + if (switch_amode) + lc->restart_psw.mask |= PSW_ASC_HOME; + lc->external_new_psw.mask = psw_kernel_bits; lc->external_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) ext_int_handler; - lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO | PSW_MASK_EXT; + lc->svc_new_psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT; lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call; - lc->program_new_psw.mask = PSW_KERNEL_BITS; + lc->program_new_psw.mask = psw_kernel_bits; lc->program_new_psw.addr = PSW_ADDR_AMODE | (unsigned long)pgm_check_handler; lc->mcck_new_psw.mask = - PSW_KERNEL_BITS & ~PSW_MASK_MCHECK & ~PSW_MASK_DAT; + psw_kernel_bits & ~PSW_MASK_MCHECK & ~PSW_MASK_DAT; lc->mcck_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) mcck_int_handler; - lc->io_new_psw.mask = PSW_KERNEL_BITS; + lc->io_new_psw.mask = psw_kernel_bits; lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler; lc->ipl_device = S390_lowcore.ipl_device; lc->jiffy_timer = -1LL; @@ -645,6 +732,7 @@ setup_arch(char **cmdline_p) parse_early_param(); setup_memory_end(); + setup_addressing_mode(); setup_memory(); setup_resources(); setup_lowcore(); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 4c8a7954ef4..554f9cf7499 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -119,7 +119,7 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) /* Copy a 'clean' PSW mask to the user to avoid leaking information about whether PER is currently on. */ - user_sregs.regs.psw.mask = PSW_MASK_MERGE(PSW_USER_BITS, regs->psw.mask); + user_sregs.regs.psw.mask = PSW_MASK_MERGE(psw_user_bits, regs->psw.mask); user_sregs.regs.psw.addr = regs->psw.addr; memcpy(&user_sregs.regs.gprs, ®s->gprs, sizeof(sregs->regs.gprs)); memcpy(&user_sregs.regs.acrs, current->thread.acrs, diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 3cb7e103207..cb155d9fd74 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -244,7 +244,7 @@ static inline void do_wait_for_stop(void) void smp_send_stop(void) { /* Disable all interrupts/machine checks */ - __load_psw_mask(PSW_KERNEL_BITS & ~PSW_MASK_MCHECK); + __load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK); /* write magic number to zero page (absolute 0) */ lowcore_ptr[smp_processor_id()]->panic_magic = __PANIC_MAGIC; diff --git a/arch/s390/lib/uaccess_mvcos.c b/arch/s390/lib/uaccess_mvcos.c index 78c48f88f5f..6d8772339d7 100644 --- a/arch/s390/lib/uaccess_mvcos.c +++ b/arch/s390/lib/uaccess_mvcos.c @@ -162,6 +162,44 @@ static size_t clear_user_mvcos(size_t size, void __user *to) return size; } +static size_t strnlen_user_mvcos(size_t count, const char __user *src) +{ + char buf[256]; + int rc; + size_t done, len, len_str; + + done = 0; + do { + len = min(count - done, (size_t) 256); + rc = uaccess.copy_from_user(len, src + done, buf); + if (unlikely(rc == len)) + return 0; + len -= rc; + len_str = strnlen(buf, len); + done += len_str; + } while ((len_str == len) && (done < count)); + return done + 1; +} + +static size_t strncpy_from_user_mvcos(size_t count, const char __user *src, + char *dst) +{ + int rc; + size_t done, len, len_str; + + done = 0; + do { + len = min(count - done, (size_t) 4096); + rc = uaccess.copy_from_user(len, src + done, dst); + if (unlikely(rc == len)) + return -EFAULT; + len -= rc; + len_str = strnlen(dst, len); + done += len_str; + } while ((len_str == len) && (done < count)); + return done; +} + struct uaccess_ops uaccess_mvcos = { .copy_from_user = copy_from_user_mvcos_check, .copy_from_user_small = copy_from_user_std, @@ -174,3 +212,18 @@ struct uaccess_ops uaccess_mvcos = { .futex_atomic_op = futex_atomic_op_std, .futex_atomic_cmpxchg = futex_atomic_cmpxchg_std, }; + +#ifdef CONFIG_S390_SWITCH_AMODE +struct uaccess_ops uaccess_mvcos_switch = { + .copy_from_user = copy_from_user_mvcos, + .copy_from_user_small = copy_from_user_mvcos, + .copy_to_user = copy_to_user_mvcos, + .copy_to_user_small = copy_to_user_mvcos, + .copy_in_user = copy_in_user_mvcos, + .clear_user = clear_user_mvcos, + .strnlen_user = strnlen_user_mvcos, + .strncpy_from_user = strncpy_from_user_mvcos, + .futex_atomic_op = futex_atomic_op_pt, + .futex_atomic_cmpxchg = futex_atomic_cmpxchg_pt, +}; +#endif diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 24ead559c7b..637192fa7c9 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -1,7 +1,8 @@ /* * arch/s390/lib/uaccess_pt.c * - * User access functions based on page table walks. + * User access functions based on page table walks for enhanced + * system layout without hardware support. * * Copyright IBM Corp. 2006 * Author(s): Gerald Schaefer (gerald.schaefer@de.ibm.com) @@ -134,6 +135,49 @@ fault: goto retry; } +/* + * Do DAT for user address by page table walk, return kernel address. + * This function needs to be called with current->mm->page_table_lock held. + */ +static inline unsigned long __dat_user_addr(unsigned long uaddr) +{ + struct mm_struct *mm = current->mm; + unsigned long pfn, ret; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + int rc; + + ret = 0; +retry: + pgd = pgd_offset(mm, uaddr); + if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) + goto fault; + + pmd = pmd_offset(pgd, uaddr); + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) + goto fault; + + pte = pte_offset_map(pmd, uaddr); + if (!pte || !pte_present(*pte)) + goto fault; + + pfn = pte_pfn(*pte); + if (!pfn_valid(pfn)) + goto out; + + ret = (pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE - 1)); +out: + return ret; +fault: + spin_unlock(&mm->page_table_lock); + rc = __handle_fault(mm, uaddr, 0); + spin_lock(&mm->page_table_lock); + if (rc) + goto out; + goto retry; +} + size_t copy_from_user_pt(size_t n, const void __user *from, void *to) { size_t rc; @@ -156,3 +200,277 @@ size_t copy_to_user_pt(size_t n, void __user *to, const void *from) } return __user_copy_pt((unsigned long) to, (void *) from, n, 1); } + +static size_t clear_user_pt(size_t n, void __user *to) +{ + long done, size, ret; + + if (segment_eq(get_fs(), KERNEL_DS)) { + memset((void __kernel __force *) to, 0, n); + return 0; + } + done = 0; + do { + if (n - done > PAGE_SIZE) + size = PAGE_SIZE; + else + size = n - done; + ret = __user_copy_pt((unsigned long) to + done, + &empty_zero_page, size, 1); + done += size; + if (ret) + return ret + n - done; + } while (done < n); + return 0; +} + +static size_t strnlen_user_pt(size_t count, const char __user *src) +{ + char *addr; + unsigned long uaddr = (unsigned long) src; + struct mm_struct *mm = current->mm; + unsigned long offset, pfn, done, len; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + size_t len_str; + + if (segment_eq(get_fs(), KERNEL_DS)) + return strnlen((const char __kernel __force *) src, count) + 1; + done = 0; +retry: + spin_lock(&mm->page_table_lock); + do { + pgd = pgd_offset(mm, uaddr); + if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) + goto fault; + + pmd = pmd_offset(pgd, uaddr); + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) + goto fault; + + pte = pte_offset_map(pmd, uaddr); + if (!pte || !pte_present(*pte)) + goto fault; + + pfn = pte_pfn(*pte); + if (!pfn_valid(pfn)) { + done = -1; + goto out; + } + + offset = uaddr & (PAGE_SIZE-1); + addr = (char *)(pfn << PAGE_SHIFT) + offset; + len = min(count - done, PAGE_SIZE - offset); + len_str = strnlen(addr, len); + done += len_str; + uaddr += len_str; + } while ((len_str == len) && (done < count)); +out: + spin_unlock(&mm->page_table_lock); + return done + 1; +fault: + spin_unlock(&mm->page_table_lock); + if (__handle_fault(mm, uaddr, 0)) { + return 0; + } + goto retry; +} + +static size_t strncpy_from_user_pt(size_t count, const char __user *src, + char *dst) +{ + size_t n = strnlen_user_pt(count, src); + + if (!n) + return -EFAULT; + if (n > count) + n = count; + if (segment_eq(get_fs(), KERNEL_DS)) { + memcpy(dst, (const char __kernel __force *) src, n); + if (dst[n-1] == '\0') + return n-1; + else + return n; + } + if (__user_copy_pt((unsigned long) src, dst, n, 0)) + return -EFAULT; + if (dst[n-1] == '\0') + return n-1; + else + return n; +} + +static size_t copy_in_user_pt(size_t n, void __user *to, + const void __user *from) +{ + struct mm_struct *mm = current->mm; + unsigned long offset_from, offset_to, offset_max, pfn_from, pfn_to, + uaddr, done, size; + unsigned long uaddr_from = (unsigned long) from; + unsigned long uaddr_to = (unsigned long) to; + pgd_t *pgd_from, *pgd_to; + pmd_t *pmd_from, *pmd_to; + pte_t *pte_from, *pte_to; + int write_user; + + done = 0; +retry: + spin_lock(&mm->page_table_lock); + do { + pgd_from = pgd_offset(mm, uaddr_from); + if (pgd_none(*pgd_from) || unlikely(pgd_bad(*pgd_from))) { + uaddr = uaddr_from; + write_user = 0; + goto fault; + } + pgd_to = pgd_offset(mm, uaddr_to); + if (pgd_none(*pgd_to) || unlikely(pgd_bad(*pgd_to))) { + uaddr = uaddr_to; + write_user = 1; + goto fault; + } + + pmd_from = pmd_offset(pgd_from, uaddr_from); + if (pmd_none(*pmd_from) || unlikely(pmd_bad(*pmd_from))) { + uaddr = uaddr_from; + write_user = 0; + goto fault; + } + pmd_to = pmd_offset(pgd_to, uaddr_to); + if (pmd_none(*pmd_to) || unlikely(pmd_bad(*pmd_to))) { + uaddr = uaddr_to; + write_user = 1; + goto fault; + } + + pte_from = pte_offset_map(pmd_from, uaddr_from); + if (!pte_from || !pte_present(*pte_from)) { + uaddr = uaddr_from; + write_user = 0; + goto fault; + } + pte_to = pte_offset_map(pmd_to, uaddr_to); + if (!pte_to || !pte_present(*pte_to) || !pte_write(*pte_to)) { + uaddr = uaddr_to; + write_user = 1; + goto fault; + } + + pfn_from = pte_pfn(*pte_from); + if (!pfn_valid(pfn_from)) + goto out; + pfn_to = pte_pfn(*pte_to); + if (!pfn_valid(pfn_to)) + goto out; + + offset_from = uaddr_from & (PAGE_SIZE-1); + offset_to = uaddr_from & (PAGE_SIZE-1); + offset_max = max(offset_from, offset_to); + size = min(n - done, PAGE_SIZE - offset_max); + + memcpy((void *)(pfn_to << PAGE_SHIFT) + offset_to, + (void *)(pfn_from << PAGE_SHIFT) + offset_from, size); + done += size; + uaddr_from += size; + uaddr_to += size; + } while (done < n); +out: + spin_unlock(&mm->page_table_lock); + return n - done; +fault: + spin_unlock(&mm->page_table_lock); + if (__handle_fault(mm, uaddr, write_user)) + return n - done; + goto retry; +} + +#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \ + asm volatile("0: l %1,0(%6)\n" \ + "1: " insn \ + "2: cs %1,%2,0(%6)\n" \ + "3: jl 1b\n" \ + " lhi %0,0\n" \ + "4:\n" \ + EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \ + : "=d" (ret), "=&d" (oldval), "=&d" (newval), \ + "=m" (*uaddr) \ + : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ + "m" (*uaddr) : "cc" ); + +int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old) +{ + int oldval = 0, newval, ret; + + spin_lock(¤t->mm->page_table_lock); + uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr); + if (!uaddr) { + spin_unlock(¤t->mm->page_table_lock); + return -EFAULT; + } + get_page(virt_to_page(uaddr)); + spin_unlock(¤t->mm->page_table_lock); + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op("lr %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op("lr %2,%1\nar %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op("lr %2,%1\nor %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op("lr %2,%1\nnr %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op("lr %2,%1\nxr %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } + put_page(virt_to_page(uaddr)); + *old = oldval; + return ret; +} + +int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval) +{ + int ret; + + spin_lock(¤t->mm->page_table_lock); + uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr); + if (!uaddr) { + spin_unlock(¤t->mm->page_table_lock); + return -EFAULT; + } + get_page(virt_to_page(uaddr)); + spin_unlock(¤t->mm->page_table_lock); + asm volatile(" cs %1,%4,0(%5)\n" + "0: lr %0,%1\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (ret), "+d" (oldval), "=m" (*uaddr) + : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) + : "cc", "memory" ); + put_page(virt_to_page(uaddr)); + return ret; +} + +struct uaccess_ops uaccess_pt = { + .copy_from_user = copy_from_user_pt, + .copy_from_user_small = copy_from_user_pt, + .copy_to_user = copy_to_user_pt, + .copy_to_user_small = copy_to_user_pt, + .copy_in_user = copy_in_user_pt, + .clear_user = clear_user_pt, + .strnlen_user = strnlen_user_pt, + .strncpy_from_user = strncpy_from_user_pt, + .futex_atomic_op = futex_atomic_op_pt, + .futex_atomic_cmpxchg = futex_atomic_cmpxchg_pt, +}; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 3382e29f34a..9ff143e8774 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -137,7 +137,9 @@ static int __check_access_register(struct pt_regs *regs, int error_code) /* * Check which address space the address belongs to. - * Returns 1 for user space and 0 for kernel space. + * May return 1 or 2 for user space and 0 for kernel space. + * Returns 2 for user space in primary addressing mode with + * CONFIG_S390_EXEC_PROTECT on and kernel parameter noexec=on. */ static inline int check_user_space(struct pt_regs *regs, int error_code) { @@ -154,7 +156,7 @@ static inline int check_user_space(struct pt_regs *regs, int error_code) return __check_access_register(regs, error_code); if (descriptor == 2) return current->thread.mm_segment.ar4; - return descriptor != 0; + return ((descriptor != 0) ^ (switch_amode)) << s390_noexec; } /* @@ -183,6 +185,77 @@ static void do_sigsegv(struct pt_regs *regs, unsigned long error_code, force_sig_info(SIGSEGV, &si, current); } +#ifdef CONFIG_S390_EXEC_PROTECT +extern long sys_sigreturn(struct pt_regs *regs); +extern long sys_rt_sigreturn(struct pt_regs *regs); +extern long sys32_sigreturn(struct pt_regs *regs); +extern long sys32_rt_sigreturn(struct pt_regs *regs); + +static inline void do_sigreturn(struct mm_struct *mm, struct pt_regs *regs, + int rt) +{ + up_read(&mm->mmap_sem); + clear_tsk_thread_flag(current, TIF_SINGLE_STEP); +#ifdef CONFIG_COMPAT + if (test_tsk_thread_flag(current, TIF_31BIT)) { + if (rt) + sys32_rt_sigreturn(regs); + else + sys32_sigreturn(regs); + return; + } +#endif /* CONFIG_COMPAT */ + if (rt) + sys_rt_sigreturn(regs); + else + sys_sigreturn(regs); + return; +} + +static int signal_return(struct mm_struct *mm, struct pt_regs *regs, + unsigned long address, unsigned long error_code) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + u16 *instruction; + unsigned long pfn, uaddr = regs->psw.addr; + + spin_lock(&mm->page_table_lock); + pgd = pgd_offset(mm, uaddr); + if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) + goto out_fault; + pmd = pmd_offset(pgd, uaddr); + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) + goto out_fault; + pte = pte_offset_map(pmd_offset(pgd_offset(mm, uaddr), uaddr), uaddr); + if (!pte || !pte_present(*pte)) + goto out_fault; + pfn = pte_pfn(*pte); + if (!pfn_valid(pfn)) + goto out_fault; + spin_unlock(&mm->page_table_lock); + + instruction = (u16 *) ((pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE-1))); + if (*instruction == 0x0a77) + do_sigreturn(mm, regs, 0); + else if (*instruction == 0x0aad) + do_sigreturn(mm, regs, 1); + else { + printk("- XXX - do_exception: task = %s, primary, NO EXEC " + "-> SIGSEGV\n", current->comm); + up_read(&mm->mmap_sem); + current->thread.prot_addr = address; + current->thread.trap_no = error_code; + do_sigsegv(regs, error_code, SEGV_MAPERR, address); + } + return 0; +out_fault: + spin_unlock(&mm->page_table_lock); + return -EFAULT; +} +#endif /* CONFIG_S390_EXEC_PROTECT */ + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -260,6 +333,17 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection) vma = find_vma(mm, address); if (!vma) goto bad_area; + +#ifdef CONFIG_S390_EXEC_PROTECT + if (unlikely((user_address == 2) && !(vma->vm_flags & VM_EXEC))) + if (!signal_return(mm, regs, address, error_code)) + /* + * signal_return() has done an up_read(&mm->mmap_sem) + * if it returns 0. + */ + return; +#endif + if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 0e7e9acab9e..162a338a557 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -104,7 +104,7 @@ static void __init setup_ro_region(void) pmd = pmd_offset(pgd, address); pte = pte_offset_kernel(pmd, address); new_pte = mk_pte_phys(address, __pgprot(_PAGE_RO)); - set_pte(pte, new_pte); + *pte = new_pte; } } @@ -124,11 +124,11 @@ void __init paging_init(void) #ifdef CONFIG_64BIT pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERN_REGION_TABLE; for (i = 0; i < PTRS_PER_PGD; i++) - pgd_clear(pg_dir + i); + pgd_clear_kernel(pg_dir + i); #else pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE; for (i = 0; i < PTRS_PER_PGD; i++) - pmd_clear((pmd_t *)(pg_dir + i)); + pmd_clear_kernel((pmd_t *)(pg_dir + i)); #endif vmem_map_init(); setup_ro_region(); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index cd3d93e8c21..92a56519002 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -82,7 +82,7 @@ static inline pmd_t *vmem_pmd_alloc(void) if (!pmd) return NULL; for (i = 0; i < PTRS_PER_PMD; i++) - pmd_clear(pmd + i); + pmd_clear_kernel(pmd + i); return pmd; } @@ -97,7 +97,7 @@ static inline pte_t *vmem_pte_alloc(void) return NULL; pte_val(empty_pte) = _PAGE_TYPE_EMPTY; for (i = 0; i < PTRS_PER_PTE; i++) - set_pte(pte + i, empty_pte); + pte[i] = empty_pte; return pte; } @@ -119,7 +119,7 @@ static int vmem_add_range(unsigned long start, unsigned long size) pm_dir = vmem_pmd_alloc(); if (!pm_dir) goto out; - pgd_populate(&init_mm, pg_dir, pm_dir); + pgd_populate_kernel(&init_mm, pg_dir, pm_dir); } pm_dir = pmd_offset(pg_dir, address); @@ -132,7 +132,7 @@ static int vmem_add_range(unsigned long start, unsigned long size) pt_dir = pte_offset_kernel(pm_dir, address); pte = pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL); - set_pte(pt_dir, pte); + *pt_dir = pte; } ret = 0; out: @@ -161,7 +161,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size) if (pmd_none(*pm_dir)) continue; pt_dir = pte_offset_kernel(pm_dir, address); - set_pte(pt_dir, pte); + *pt_dir = pte; } flush_tlb_kernel_range(start, start + size); } @@ -191,7 +191,7 @@ static int vmem_add_mem_map(unsigned long start, unsigned long size) pm_dir = vmem_pmd_alloc(); if (!pm_dir) goto out; - pgd_populate(&init_mm, pg_dir, pm_dir); + pgd_populate_kernel(&init_mm, pg_dir, pm_dir); } pm_dir = pmd_offset(pg_dir, address); @@ -210,7 +210,7 @@ static int vmem_add_mem_map(unsigned long start, unsigned long size) if (!new_page) goto out; pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL); - set_pte(pt_dir, pte); + *pt_dir = pte; } } ret = 0; |