summaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig5
-rw-r--r--arch/s390/include/asm/Kbuild2
-rw-r--r--arch/s390/include/asm/hugetlb.h19
-rw-r--r--arch/s390/include/asm/pgtable.h210
-rw-r--r--arch/s390/include/asm/processor.h4
-rw-r--r--arch/s390/include/asm/setup.h5
-rw-r--r--arch/s390/include/asm/tlb.h1
-rw-r--r--arch/s390/include/asm/unistd.h2
-rw-r--r--arch/s390/kernel/compat_linux.c26
-rw-r--r--arch/s390/kernel/compat_linux.h2
-rw-r--r--arch/s390/kernel/compat_wrapper.S2
-rw-r--r--arch/s390/kernel/early.c2
-rw-r--r--arch/s390/kernel/entry.S51
-rw-r--r--arch/s390/kernel/entry.h3
-rw-r--r--arch/s390/kernel/entry64.S50
-rw-r--r--arch/s390/kernel/process.c107
-rw-r--r--arch/s390/mm/fault.c1
-rw-r--r--arch/s390/mm/gup.c11
-rw-r--r--arch/s390/mm/pgtable.c108
19 files changed, 424 insertions, 187 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index f9acddd9ace..99d2d790d15 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -68,6 +68,7 @@ config S390
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_C_RECORDMCOUNT
select HAVE_SYSCALL_TRACEPOINTS
+ select SYSCTL_EXCEPTION_TRACE
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_REGS_AND_STACK_ACCESS_API
@@ -80,6 +81,7 @@ config S390
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select ARCH_HAVE_NMI_SAFE_CMPXCHG
+ select HAVE_DEBUG_KMEMLEAK
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_LZMA
@@ -126,12 +128,14 @@ config S390
select ARCH_INLINE_WRITE_UNLOCK_BH
select ARCH_INLINE_WRITE_UNLOCK_IRQ
select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
+ select HAVE_UID16 if 32BIT
select ARCH_WANT_IPC_PARSE_VERSION
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
select GENERIC_CLOCKEVENTS
select KTIME_SCALAR if 32BIT
select HAVE_ARCH_SECCOMP_FILTER
+ select GENERIC_KERNEL_THREAD
config SCHED_OMIT_FRAME_POINTER
def_bool y
@@ -656,7 +660,6 @@ config S390_GUEST
depends on 64BIT && EXPERIMENTAL
select VIRTUALIZATION
select VIRTIO
- select VIRTIO_RING
select VIRTIO_CONSOLE
help
Enabling this option adds support for virtio based paravirtual device
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 287d7bbb6d3..f18fc796bee 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -13,3 +13,5 @@ header-y += tape390.h
header-y += ucontext.h
header-y += vtoc.h
header-y += zcrypt.h
+
+generic-y += clkdev.h
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index 2d6e6e38056..593753ee07f 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -33,6 +33,7 @@ static inline int prepare_hugepage_range(struct file *file,
}
#define hugetlb_prefault_arch_hook(mm) do { } while (0)
+#define arch_clear_hugepage_flags(page) do { } while (0)
int arch_prepare_hugepage(struct page *page);
void arch_release_hugepage(struct page *page);
@@ -77,23 +78,6 @@ static inline void __pmd_csp(pmd_t *pmdp)
" csp %1,%3"
: "=m" (*pmdp)
: "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
- pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY;
-}
-
-static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
-{
- unsigned long sto = (unsigned long) pmdp -
- pmd_index(address) * sizeof(pmd_t);
-
- if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) {
- asm volatile(
- " .insn rrf,0xb98e0000,%2,%3,0,0"
- : "=m" (*pmdp)
- : "m" (*pmdp), "a" (sto),
- "a" ((address & HPAGE_MASK))
- );
- }
- pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY;
}
static inline void huge_ptep_invalidate(struct mm_struct *mm,
@@ -105,6 +89,7 @@ static inline void huge_ptep_invalidate(struct mm_struct *mm,
__pmd_idte(address, pmdp);
else
__pmd_csp(pmdp);
+ pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY;
}
static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 6bd7d748301..979fe3dc078 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -42,6 +42,7 @@ extern void fault_init(void);
* tables contain all the necessary information.
*/
#define update_mmu_cache(vma, address, ptep) do { } while (0)
+#define update_mmu_cache_pmd(vma, address, ptep) do { } while (0)
/*
* ZERO_PAGE is a global shared page that is always zero; used
@@ -347,6 +348,12 @@ extern struct page *vmemmap;
#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */
#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */
+#define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */
+#define _SEGMENT_ENTRY_SPLIT (1UL << _SEGMENT_ENTRY_SPLIT_BIT)
+
+/* Set of bits not changed in pmd_modify */
+#define _SEGMENT_CHG_MASK (_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \
+ | _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO)
/* Page status table bits for virtualization */
#define RCP_ACC_BITS 0xf000000000000000UL
@@ -506,6 +513,30 @@ static inline int pmd_bad(pmd_t pmd)
return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY;
}
+#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
+extern void pmdp_splitting_flush(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+extern int pmdp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp,
+ pmd_t entry, int dirty);
+
+#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
+extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMD_WRITE
+static inline int pmd_write(pmd_t pmd)
+{
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_RO) == 0;
+}
+
+static inline int pmd_young(pmd_t pmd)
+{
+ return 0;
+}
+
static inline int pte_none(pte_t pte)
{
return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT);
@@ -1159,6 +1190,185 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
#define pte_unmap(pte) do { } while (0)
+static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
+{
+ unsigned long sto = (unsigned long) pmdp -
+ pmd_index(address) * sizeof(pmd_t);
+
+ if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) {
+ asm volatile(
+ " .insn rrf,0xb98e0000,%2,%3,0,0"
+ : "=m" (*pmdp)
+ : "m" (*pmdp), "a" (sto),
+ "a" ((address & HPAGE_MASK))
+ : "cc"
+ );
+ }
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable);
+
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm);
+
+static inline int pmd_trans_splitting(pmd_t pmd)
+{
+ return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT;
+}
+
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t entry)
+{
+ *pmdp = entry;
+}
+
+static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
+{
+ unsigned long pgprot_pmd = 0;
+
+ if (pgprot_val(pgprot) & _PAGE_INVALID) {
+ if (pgprot_val(pgprot) & _PAGE_SWT)
+ pgprot_pmd |= _HPAGE_TYPE_NONE;
+ pgprot_pmd |= _SEGMENT_ENTRY_INV;
+ }
+ if (pgprot_val(pgprot) & _PAGE_RO)
+ pgprot_pmd |= _SEGMENT_ENTRY_RO;
+ return pgprot_pmd;
+}
+
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+ pmd_val(pmd) &= _SEGMENT_CHG_MASK;
+ pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+ return pmd;
+}
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+ pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkwrite(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_RO;
+ return pmd;
+}
+
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
+{
+ pmd_val(pmd) |= _SEGMENT_ENTRY_RO;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkdirty(pmd_t pmd)
+{
+ /* No dirty bit in the segment table entry. */
+ return pmd;
+}
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
+{
+ /* No referenced bit in the segment table entry. */
+ return pmd;
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+ /* No referenced bit in the segment table entry. */
+ return pmd;
+}
+
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ unsigned long pmd_addr = pmd_val(*pmdp) & HPAGE_MASK;
+ long tmp, rc;
+ int counter;
+
+ rc = 0;
+ if (MACHINE_HAS_RRBM) {
+ counter = PTRS_PER_PTE >> 6;
+ asm volatile(
+ "0: .insn rre,0xb9ae0000,%0,%3\n" /* rrbm */
+ " ogr %1,%0\n"
+ " la %3,0(%4,%3)\n"
+ " brct %2,0b\n"
+ : "=&d" (tmp), "+&d" (rc), "+d" (counter),
+ "+a" (pmd_addr)
+ : "a" (64 * 4096UL) : "cc");
+ rc = !!rc;
+ } else {
+ counter = PTRS_PER_PTE;
+ asm volatile(
+ "0: rrbe 0,%2\n"
+ " la %2,0(%3,%2)\n"
+ " brc 12,1f\n"
+ " lhi %0,1\n"
+ "1: brct %1,0b\n"
+ : "+d" (rc), "+d" (counter), "+a" (pmd_addr)
+ : "a" (4096UL) : "cc");
+ }
+ return rc;
+}
+
+#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
+static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
+{
+ pmd_t pmd = *pmdp;
+
+ __pmd_idte(address, pmdp);
+ pmd_clear(pmdp);
+ return pmd;
+}
+
+#define __HAVE_ARCH_PMDP_CLEAR_FLUSH
+static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ return pmdp_get_and_clear(vma->vm_mm, address, pmdp);
+}
+
+#define __HAVE_ARCH_PMDP_INVALIDATE
+static inline void pmdp_invalidate(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ __pmd_idte(address, pmdp);
+}
+
+static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
+{
+ pmd_t __pmd;
+ pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
+ return __pmd;
+}
+
+#define pfn_pmd(pfn, pgprot) mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot))
+#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
+
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+ return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE;
+}
+
+static inline int has_transparent_hugepage(void)
+{
+ return MACHINE_HAS_HPAGE ? 1 : 0;
+}
+
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+ if (pmd_trans_huge(pmd))
+ return pmd_val(pmd) >> HPAGE_SHIFT;
+ else
+ return pmd_val(pmd) >> PAGE_SHIFT;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
/*
* 31 bit swap entry format:
* A page-table entry has some bits we have to treat in a special way.
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 56831dfa919..94e749c9023 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -35,6 +35,7 @@ static inline void get_cpu_id(struct cpuid *ptr)
extern void s390_adjust_jiffies(void);
extern const struct seq_operations cpuinfo_op;
extern int sysctl_ieee_emulation_warnings;
+extern void execve_tail(void);
/*
* User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
@@ -126,6 +127,7 @@ struct stack_frame {
regs->psw.mask = psw_user_bits | PSW_MASK_EA | PSW_MASK_BA; \
regs->psw.addr = new_psw | PSW_ADDR_AMODE; \
regs->gprs[15] = new_stackp; \
+ execve_tail(); \
} while (0)
#define start_thread31(regs, new_psw, new_stackp) do { \
@@ -135,6 +137,7 @@ struct stack_frame {
__tlb_flush_mm(current->mm); \
crst_table_downgrade(current->mm, 1UL << 31); \
update_mm(current->mm, current); \
+ execve_tail(); \
} while (0)
/* Forward declaration, a strange C thing */
@@ -150,7 +153,6 @@ static inline void show_cacheinfo(struct seq_file *m) { }
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
-extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
/*
* Return saved PC of a blocked thread.
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 87b47ca954f..8cfd731a18d 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -81,6 +81,7 @@ extern unsigned int s390_user_mode;
#define MACHINE_FLAG_SPP (1UL << 13)
#define MACHINE_FLAG_TOPOLOGY (1UL << 14)
#define MACHINE_FLAG_TE (1UL << 15)
+#define MACHINE_FLAG_RRBM (1UL << 16)
#define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM)
#define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
@@ -99,7 +100,8 @@ extern unsigned int s390_user_mode;
#define MACHINE_HAS_PFMF (0)
#define MACHINE_HAS_SPP (0)
#define MACHINE_HAS_TOPOLOGY (0)
-#define MACHINE_HAS_TE (0)
+#define MACHINE_HAS_TE (0)
+#define MACHINE_HAS_RRBM (0)
#else /* CONFIG_64BIT */
#define MACHINE_HAS_IEEE (1)
#define MACHINE_HAS_CSP (1)
@@ -112,6 +114,7 @@ extern unsigned int s390_user_mode;
#define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP)
#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE)
+#define MACHINE_HAS_RRBM (S390_lowcore.machine_flags & MACHINE_FLAG_RRBM)
#endif /* CONFIG_64BIT */
#define ZFCPDUMP_HSA_SIZE (32UL<<20)
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 06e5acbc84b..b75d7d68668 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -137,6 +137,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
#define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0)
+#define tlb_remove_pmd_tlb_entry(tlb, pmdp, addr) do { } while (0)
#define tlb_migrate_finish(mm) do { } while (0)
#endif /* _S390_TLB_H */
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 4e64b5cd155..8192e292753 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -417,6 +417,8 @@
# define __ARCH_WANT_COMPAT_SYS_TIME
# define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
# endif
+#define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_KERNEL_EXECVE
/*
* "Conditional" syscalls
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 189963c90c6..65cca95843e 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -432,32 +432,6 @@ sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo)
return ret;
}
-/*
- * sys32_execve() executes a new program after the asm stub has set
- * things up for us. This should basically do what I want it to.
- */
-asmlinkage long sys32_execve(const char __user *name, compat_uptr_t __user *argv,
- compat_uptr_t __user *envp)
-{
- struct pt_regs *regs = task_pt_regs(current);
- char *filename;
- long rc;
-
- filename = getname(name);
- rc = PTR_ERR(filename);
- if (IS_ERR(filename))
- return rc;
- rc = compat_do_execve(filename, argv, envp, regs);
- if (rc)
- goto out;
- current->thread.fp_regs.fpc=0;
- asm volatile("sfpc %0,0" : : "d" (0));
- rc = regs->gprs[2];
-out:
- putname(filename);
- return rc;
-}
-
asmlinkage long sys32_pread64(unsigned int fd, char __user *ubuf,
size_t count, u32 poshi, u32 poslo)
{
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
index 90887bd98cf..d4d0239970a 100644
--- a/arch/s390/kernel/compat_linux.h
+++ b/arch/s390/kernel/compat_linux.h
@@ -125,8 +125,6 @@ long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
compat_sigset_t __user *oset, size_t sigsetsize);
long sys32_rt_sigpending(compat_sigset_t __user *set, size_t sigsetsize);
long sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo);
-long sys32_execve(const char __user *name, compat_uptr_t __user *argv,
- compat_uptr_t __user *envp);
long sys32_init_module(void __user *umod, unsigned long len,
const char __user *uargs);
long sys32_delete_module(const char __user *name_user, unsigned int flags);
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 3afba804fe9..ad79b846535 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1576,7 +1576,7 @@ ENTRY(sys32_execve_wrapper)
llgtr %r2,%r2 # char *
llgtr %r3,%r3 # compat_uptr_t *
llgtr %r4,%r4 # compat_uptr_t *
- jg sys32_execve # branch to system call
+ jg compat_sys_execve # branch to system call
ENTRY(sys_fanotify_init_wrapper)
llgfr %r2,%r2 # unsigned int
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 7f4717675c1..00d11444506 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -388,6 +388,8 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_SPP;
if (test_facility(50) && test_facility(73))
S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
+ if (test_facility(66))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_RRBM;
#endif
}
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 870bad6d56f..ef46f66bc0d 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -331,45 +331,38 @@ ENTRY(ret_from_fork)
l %r12,__LC_THREAD_INFO
l %r13,__LC_SVC_NEW_PSW+4
tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ?
- jo 0f
- st %r15,__PT_R15(%r11) # store stack pointer for new kthread
-0: l %r1,BASED(.Lschedule_tail)
+ je 1f
+ l %r1,BASED(.Lschedule_tail)
basr %r14,%r1 # call schedule_tail
TRACE_IRQS_ON
ssm __LC_SVC_NEW_PSW # reenable interrupts
j sysc_tracenogo
+1: # it's a kernel thread
+ st %r15,__PT_R15(%r11) # store stack pointer for new kthread
+ l %r1,BASED(.Lschedule_tail)
+ basr %r14,%r1 # call schedule_tail
+ TRACE_IRQS_ON
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
+ lm %r9,%r11,__PT_R9(%r11) # load gprs
+ENTRY(kernel_thread_starter)
+ la %r2,0(%r10)
+ basr %r14,%r9
+ la %r2,0
+ br %r11 # do_exit
+
#
# kernel_execve function needs to deal with pt_regs that is not
# at the usual place
#
-ENTRY(kernel_execve)
- stm %r12,%r15,48(%r15)
- lr %r14,%r15
- l %r13,__LC_SVC_NEW_PSW+4
- ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- st %r14,__SF_BACKCHAIN(%r15)
- la %r12,STACK_FRAME_OVERHEAD(%r15)
- xc 0(__PT_SIZE,%r12),0(%r12)
- l %r1,BASED(.Ldo_execve)
- lr %r5,%r12
- basr %r14,%r1 # call do_execve
- ltr %r2,%r2
- je 0f
- ahi %r15,(STACK_FRAME_OVERHEAD + __PT_SIZE)
- lm %r12,%r15,48(%r15)
- br %r14
- # execve succeeded.
-0: ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
- l %r15,__LC_KERNEL_STACK # load ksp
- ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- la %r11,STACK_FRAME_OVERHEAD(%r15)
- mvc 0(__PT_SIZE,%r11),0(%r12) # copy pt_regs
- l %r12,__LC_THREAD_INFO
+ENTRY(ret_from_kernel_execve)
+ ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
+ lr %r15,%r2
+ lr %r11,%r2
+ ahi %r15,-STACK_FRAME_OVERHEAD
xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
+ l %r12,__LC_THREAD_INFO
ssm __LC_SVC_NEW_PSW # reenable interrupts
- l %r1,BASED(.Lexecve_tail)
- basr %r14,%r1 # call execve_tail
j sysc_return
/*
@@ -931,8 +924,6 @@ cleanup_idle_wait:
.Ldo_signal: .long do_signal
.Ldo_notify_resume: .long do_notify_resume
.Ldo_per_trap: .long do_per_trap
-.Ldo_execve: .long do_execve
-.Lexecve_tail: .long execve_tail
.Ljump_table: .long pgm_check_table
.Lschedule: .long schedule
#ifdef CONFIG_PREEMPT
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index a5f4dc42a5d..d0d3f69a734 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -58,9 +58,6 @@ long sys_fork(void);
long sys_clone(unsigned long newsp, unsigned long clone_flags,
int __user *parent_tidptr, int __user *child_tidptr);
long sys_vfork(void);
-void execve_tail(void);
-long sys_execve(const char __user *name, const char __user *const __user *argv,
- const char __user *const __user *envp);
long sys_sigsuspend(int history0, int history1, old_sigset_t mask);
long sys_sigaction(int sig, const struct old_sigaction __user *act,
struct old_sigaction __user *oact);
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 7549985402f..f9761f806c9 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -353,41 +353,31 @@ ENTRY(ret_from_fork)
la %r11,STACK_FRAME_OVERHEAD(%r15)
lg %r12,__LC_THREAD_INFO
tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ?
- jo 0f
- stg %r15,__PT_R15(%r11) # store stack pointer for new kthread
-0: brasl %r14,schedule_tail
+ je 1f
+ brasl %r14,schedule_tail
TRACE_IRQS_ON
ssm __LC_SVC_NEW_PSW # reenable interrupts
j sysc_tracenogo
-
-#
-# kernel_execve function needs to deal with pt_regs that is not
-# at the usual place
-#
-ENTRY(kernel_execve)
- stmg %r12,%r15,96(%r15)
- lgr %r14,%r15
- aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- stg %r14,__SF_BACKCHAIN(%r15)
- la %r12,STACK_FRAME_OVERHEAD(%r15)
- xc 0(__PT_SIZE,%r12),0(%r12)
- lgr %r5,%r12
- brasl %r14,do_execve
- ltgfr %r2,%r2
- je 0f
- aghi %r15,(STACK_FRAME_OVERHEAD + __PT_SIZE)
- lmg %r12,%r15,96(%r15)
- br %r14
- # execve succeeded.
-0: ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
- lg %r15,__LC_KERNEL_STACK # load ksp
- aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- la %r11,STACK_FRAME_OVERHEAD(%r15)
- mvc 0(__PT_SIZE,%r11),0(%r12) # copy pt_regs
- lg %r12,__LC_THREAD_INFO
+1: # it's a kernel thread
+ stg %r15,__PT_R15(%r11) # store stack pointer for new kthread
+ brasl %r14,schedule_tail
+ TRACE_IRQS_ON
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
+ lmg %r9,%r11,__PT_R9(%r11) # load gprs
+ENTRY(kernel_thread_starter)
+ la %r2,0(%r10)
+ basr %r14,%r9
+ la %r2,0
+ br %r11 # do_exit
+
+ENTRY(ret_from_kernel_execve)
+ ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
+ lgr %r15,%r2
+ lgr %r11,%r2
+ aghi %r15,-STACK_FRAME_OVERHEAD
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ lg %r12,__LC_THREAD_INFO
ssm __LC_SVC_NEW_PSW # reenable interrupts
- brasl %r14,execve_tail
j sysc_return
/*
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 5024be27df4..cd31ad457a9 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -100,35 +100,6 @@ void cpu_idle(void)
extern void __kprobes kernel_thread_starter(void);
-asm(
- ".section .kprobes.text, \"ax\"\n"
- ".global kernel_thread_starter\n"
- "kernel_thread_starter:\n"
- " la 2,0(10)\n"
- " basr 14,9\n"
- " la 2,0\n"
- " br 11\n"
- ".previous\n");
-
-int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
- struct pt_regs regs;
-
- memset(&regs, 0, sizeof(regs));
- regs.psw.mask = psw_kernel_bits |
- PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
- regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE;
- regs.gprs[9] = (unsigned long) fn;
- regs.gprs[10] = (unsigned long) arg;
- regs.gprs[11] = (unsigned long) do_exit;
- regs.orig_gpr2 = -1;
-
- /* Ok, create the new process.. */
- return do_fork(flags | CLONE_VM | CLONE_UNTRACED,
- 0, &regs, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
/*
* Free current thread data structures etc..
*/
@@ -146,7 +117,7 @@ void release_thread(struct task_struct *dead_task)
}
int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
- unsigned long unused,
+ unsigned long arg,
struct task_struct *p, struct pt_regs *regs)
{
struct thread_info *ti;
@@ -158,20 +129,44 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
frame = container_of(task_pt_regs(p), struct fake_frame, childregs);
p->thread.ksp = (unsigned long) frame;
- /* Store access registers to kernel stack of new process. */
- frame->childregs = *regs;
- frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */
- frame->childregs.gprs[15] = new_stackp;
- frame->sf.back_chain = 0;
+ /* Save access registers to new thread structure. */
+ save_access_regs(&p->thread.acrs[0]);
+ /* start new process with ar4 pointing to the correct address space */
+ p->thread.mm_segment = get_fs();
+ /* Don't copy debug registers */
+ memset(&p->thread.per_user, 0, sizeof(p->thread.per_user));
+ memset(&p->thread.per_event, 0, sizeof(p->thread.per_event));
+ clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
+ clear_tsk_thread_flag(p, TIF_PER_TRAP);
+ /* Initialize per thread user and system timer values */
+ ti = task_thread_info(p);
+ ti->user_timer = 0;
+ ti->system_timer = 0;
+ frame->sf.back_chain = 0;
/* new return point is ret_from_fork */
frame->sf.gprs[8] = (unsigned long) ret_from_fork;
-
/* fake return stack for resume(), don't go back to schedule */
frame->sf.gprs[9] = (unsigned long) frame;
- /* Save access registers to new thread structure. */
- save_access_regs(&p->thread.acrs[0]);
+ /* Store access registers to kernel stack of new process. */
+ if (unlikely(!regs)) {
+ /* kernel thread */
+ memset(&frame->childregs, 0, sizeof(struct pt_regs));
+ frame->childregs.psw.mask = psw_kernel_bits | PSW_MASK_DAT |
+ PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ frame->childregs.psw.addr = PSW_ADDR_AMODE |
+ (unsigned long) kernel_thread_starter;
+ frame->childregs.gprs[9] = new_stackp; /* function */
+ frame->childregs.gprs[10] = arg;
+ frame->childregs.gprs[11] = (unsigned long) do_exit;
+ frame->childregs.orig_gpr2 = -1;
+
+ return 0;
+ }
+ frame->childregs = *regs;
+ frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */
+ frame->childregs.gprs[15] = new_stackp;
/* Don't copy runtime instrumentation info */
p->thread.ri_cb = NULL;
@@ -202,17 +197,6 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
}
}
#endif /* CONFIG_64BIT */
- /* start new process with ar4 pointing to the correct address space */
- p->thread.mm_segment = get_fs();
- /* Don't copy debug registers */
- memset(&p->thread.per_user, 0, sizeof(p->thread.per_user));
- memset(&p->thread.per_event, 0, sizeof(p->thread.per_event));
- clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
- clear_tsk_thread_flag(p, TIF_PER_TRAP);
- /* Initialize per thread user and system timer values */
- ti = task_thread_info(p);
- ti->user_timer = 0;
- ti->system_timer = 0;
return 0;
}
@@ -258,31 +242,6 @@ asmlinkage void execve_tail(void)
}
/*
- * sys_execve() executes a new program.
- */
-SYSCALL_DEFINE3(execve, const char __user *, name,
- const char __user *const __user *, argv,
- const char __user *const __user *, envp)
-{
- struct pt_regs *regs = task_pt_regs(current);
- char *filename;
- long rc;
-
- filename = getname(name);
- rc = PTR_ERR(filename);
- if (IS_ERR(filename))
- return rc;
- rc = do_execve(filename, argv, envp, regs);
- if (rc)
- goto out;
- execve_tail();
- rc = regs->gprs[2];
-out:
- putname(filename);
- return rc;
-}
-
-/*
* fill in the FPU structure for a core dump.
*/
int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index ac9122ca115..04ad4001a28 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -367,6 +367,7 @@ retry:
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
down_read(&mm->mmap_sem);
goto retry;
}
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index eeaf8023851..60acb93a468 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -115,7 +115,16 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
pmd = *pmdp;
barrier();
next = pmd_addr_end(addr, end);
- if (pmd_none(pmd))
+ /*
+ * The pmd_trans_splitting() check below explains why
+ * pmdp_splitting_flush() has to serialize with
+ * smp_call_function() against our disabled IRQs, to stop
+ * this gup-fast code from running while we set the
+ * splitting bit in the pmd. Returning zero will take
+ * the slow path that will call wait_split_huge_page()
+ * if the pmd is still in splitting state.
+ */
+ if (pmd_none(pmd) || pmd_trans_splitting(pmd))
return 0;
if (unlikely(pmd_huge(pmd))) {
if (!gup_huge_pmd(pmdp, pmd, addr, next,
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index b402991e43d..c8188a18af0 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -787,6 +787,30 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
tlb_table_flush(tlb);
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void thp_split_vma(struct vm_area_struct *vma)
+{
+ unsigned long addr;
+ struct page *page;
+
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
+ page = follow_page(vma, addr, FOLL_SPLIT);
+ }
+}
+
+void thp_split_mm(struct mm_struct *mm)
+{
+ struct vm_area_struct *vma = mm->mmap;
+
+ while (vma != NULL) {
+ thp_split_vma(vma);
+ vma->vm_flags &= ~VM_HUGEPAGE;
+ vma->vm_flags |= VM_NOHUGEPAGE;
+ vma = vma->vm_next;
+ }
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
/*
* switch on pgstes for its userspace process (for kvm)
*/
@@ -824,6 +848,12 @@ int s390_enable_sie(void)
if (!mm)
return -ENOMEM;
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ /* split thp mappings and disable thp for future mappings */
+ thp_split_mm(mm);
+ mm->def_flags |= VM_NOHUGEPAGE;
+#endif
+
/* Now lets check again if something happened */
task_lock(tsk);
if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
@@ -866,3 +896,81 @@ bool kernel_page_present(struct page *page)
return cc == 0;
}
#endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp)
+{
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ /* No need to flush TLB
+ * On s390 reference bits are in storage key and never in TLB */
+ return pmdp_test_and_clear_young(vma, address, pmdp);
+}
+
+int pmdp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp,
+ pmd_t entry, int dirty)
+{
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+
+ if (pmd_same(*pmdp, entry))
+ return 0;
+ pmdp_invalidate(vma, address, pmdp);
+ set_pmd_at(vma->vm_mm, address, pmdp, entry);
+ return 1;
+}
+
+static void pmdp_splitting_flush_sync(void *arg)
+{
+ /* Simply deliver the interrupt */
+}
+
+void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp)
+{
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT,
+ (unsigned long *) pmdp)) {
+ /* need to serialize against gup-fast (IRQ disabled) */
+ smp_call_function(pmdp_splitting_flush_sync, NULL, 1);
+ }
+}
+
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable)
+{
+ struct list_head *lh = (struct list_head *) pgtable;
+
+ assert_spin_locked(&mm->page_table_lock);
+
+ /* FIFO */
+ if (!mm->pmd_huge_pte)
+ INIT_LIST_HEAD(lh);
+ else
+ list_add(lh, (struct list_head *) mm->pmd_huge_pte);
+ mm->pmd_huge_pte = pgtable;
+}
+
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm)
+{
+ struct list_head *lh;
+ pgtable_t pgtable;
+ pte_t *ptep;
+
+ assert_spin_locked(&mm->page_table_lock);
+
+ /* FIFO */
+ pgtable = mm->pmd_huge_pte;
+ lh = (struct list_head *) pgtable;
+ if (list_empty(lh))
+ mm->pmd_huge_pte = NULL;
+ else {
+ mm->pmd_huge_pte = (pgtable_t) lh->next;
+ list_del(lh);
+ }
+ ptep = (pte_t *) pgtable;
+ pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ ptep++;
+ pte_val(*ptep) = _PAGE_TYPE_EMPTY;
+ return pgtable;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */