summaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig3
-rw-r--r--arch/s390/configs/default_defconfig1
-rw-r--r--arch/s390/include/asm/atomic.h70
-rw-r--r--arch/s390/include/asm/bitops.h41
-rw-r--r--arch/s390/include/asm/ccwgroup.h1
-rw-r--r--arch/s390/include/asm/cio.h2
-rw-r--r--arch/s390/include/asm/futex.h66
-rw-r--r--arch/s390/include/asm/irq.h19
-rw-r--r--arch/s390/include/asm/kvm_host.h98
-rw-r--r--arch/s390/include/asm/mmu.h2
-rw-r--r--arch/s390/include/asm/mmu_context.h45
-rw-r--r--arch/s390/include/asm/pgtable.h130
-rw-r--r--arch/s390/include/asm/processor.h1
-rw-r--r--arch/s390/include/asm/setup.h3
-rw-r--r--arch/s390/include/asm/switch_to.h1
-rw-r--r--arch/s390/include/asm/syscall.h7
-rw-r--r--arch/s390/include/asm/thread_info.h2
-rw-r--r--arch/s390/include/asm/tlb.h14
-rw-r--r--arch/s390/include/asm/tlbflush.h115
-rw-r--r--arch/s390/include/asm/uaccess.h2
-rw-r--r--arch/s390/include/uapi/asm/kvm.h43
-rw-r--r--arch/s390/kernel/asm-offsets.c1
-rw-r--r--arch/s390/kernel/cache.c5
-rw-r--r--arch/s390/kernel/early.c2
-rw-r--r--arch/s390/kernel/entry.S24
-rw-r--r--arch/s390/kernel/entry64.S24
-rw-r--r--arch/s390/kernel/ftrace.c3
-rw-r--r--arch/s390/kernel/irq.c11
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c6
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c6
-rw-r--r--arch/s390/kernel/runtime_instr.c3
-rw-r--r--arch/s390/kernel/sclp.S5
-rw-r--r--arch/s390/kernel/smp.c23
-rw-r--r--arch/s390/kernel/time.c6
-rw-r--r--arch/s390/kvm/Kconfig4
-rw-r--r--arch/s390/kvm/Makefile2
-rw-r--r--arch/s390/kvm/diag.c88
-rw-r--r--arch/s390/kvm/interrupt.c704
-rw-r--r--arch/s390/kvm/irq.h22
-rw-r--r--arch/s390/kvm/kvm-s390.c212
-rw-r--r--arch/s390/kvm/kvm-s390.h7
-rw-r--r--arch/s390/kvm/priv.c7
-rw-r--r--arch/s390/kvm/sigp.c157
-rw-r--r--arch/s390/kvm/trace.h46
-rw-r--r--arch/s390/lib/Makefile2
-rw-r--r--arch/s390/lib/uaccess.c407
-rw-r--r--arch/s390/lib/uaccess.h16
-rw-r--r--arch/s390/lib/uaccess_mvcos.c263
-rw-r--r--arch/s390/lib/uaccess_pt.c471
-rw-r--r--arch/s390/mm/fault.c79
-rw-r--r--arch/s390/mm/hugetlbpage.c5
-rw-r--r--arch/s390/mm/init.c7
-rw-r--r--arch/s390/mm/pgtable.c15
-rw-r--r--arch/s390/mm/vmem.c2
-rw-r--r--arch/s390/net/bpf_jit_comp.c13
-rw-r--r--arch/s390/oprofile/hwsampler.c4
-rw-r--r--arch/s390/pci/pci.c16
-rw-r--r--arch/s390/pci/pci_sysfs.c18
58 files changed, 2137 insertions, 1215 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 953f17c8d17..d68fe34799b 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -52,7 +52,7 @@ config KEXEC
config AUDIT_ARCH
def_bool y
-config NO_IOPORT
+config NO_IOPORT_MAP
def_bool y
config PCI_QUIRKS
@@ -103,6 +103,7 @@ config S390
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
+ select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index ddaae2f5c91..8df022c43af 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -581,7 +581,6 @@ CONFIG_LOCK_STAT=y
CONFIG_DEBUG_LOCKDEP=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
-CONFIG_DEBUG_WRITECOUNT=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index fa9aaf7144b..1d4706114a4 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -15,23 +15,29 @@
#include <linux/compiler.h>
#include <linux/types.h>
+#include <asm/barrier.h>
#include <asm/cmpxchg.h>
#define ATOMIC_INIT(i) { (i) }
+#define __ATOMIC_NO_BARRIER "\n"
+
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
#define __ATOMIC_OR "lao"
#define __ATOMIC_AND "lan"
#define __ATOMIC_ADD "laa"
+#define __ATOMIC_BARRIER "bcr 14,0\n"
-#define __ATOMIC_LOOP(ptr, op_val, op_string) \
+#define __ATOMIC_LOOP(ptr, op_val, op_string, __barrier) \
({ \
int old_val; \
\
typecheck(atomic_t *, ptr); \
asm volatile( \
+ __barrier \
op_string " %0,%2,%1\n" \
+ __barrier \
: "=d" (old_val), "+Q" ((ptr)->counter) \
: "d" (op_val) \
: "cc", "memory"); \
@@ -43,8 +49,9 @@
#define __ATOMIC_OR "or"
#define __ATOMIC_AND "nr"
#define __ATOMIC_ADD "ar"
+#define __ATOMIC_BARRIER "\n"
-#define __ATOMIC_LOOP(ptr, op_val, op_string) \
+#define __ATOMIC_LOOP(ptr, op_val, op_string, __barrier) \
({ \
int old_val, new_val; \
\
@@ -82,7 +89,7 @@ static inline void atomic_set(atomic_t *v, int i)
static inline int atomic_add_return(int i, atomic_t *v)
{
- return __ATOMIC_LOOP(v, i, __ATOMIC_ADD) + i;
+ return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER) + i;
}
static inline void atomic_add(int i, atomic_t *v)
@@ -94,12 +101,10 @@ static inline void atomic_add(int i, atomic_t *v)
: "+Q" (v->counter)
: "i" (i)
: "cc", "memory");
- } else {
- atomic_add_return(i, v);
+ return;
}
-#else
- atomic_add_return(i, v);
#endif
+ __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_NO_BARRIER);
}
#define atomic_add_negative(_i, _v) (atomic_add_return(_i, _v) < 0)
@@ -115,12 +120,12 @@ static inline void atomic_add(int i, atomic_t *v)
static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
{
- __ATOMIC_LOOP(v, ~mask, __ATOMIC_AND);
+ __ATOMIC_LOOP(v, ~mask, __ATOMIC_AND, __ATOMIC_NO_BARRIER);
}
static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
{
- __ATOMIC_LOOP(v, mask, __ATOMIC_OR);
+ __ATOMIC_LOOP(v, mask, __ATOMIC_OR, __ATOMIC_NO_BARRIER);
}
#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
@@ -157,19 +162,24 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
#ifdef CONFIG_64BIT
+#define __ATOMIC64_NO_BARRIER "\n"
+
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
#define __ATOMIC64_OR "laog"
#define __ATOMIC64_AND "lang"
#define __ATOMIC64_ADD "laag"
+#define __ATOMIC64_BARRIER "bcr 14,0\n"
-#define __ATOMIC64_LOOP(ptr, op_val, op_string) \
+#define __ATOMIC64_LOOP(ptr, op_val, op_string, __barrier) \
({ \
long long old_val; \
\
typecheck(atomic64_t *, ptr); \
asm volatile( \
+ __barrier \
op_string " %0,%2,%1\n" \
+ __barrier \
: "=d" (old_val), "+Q" ((ptr)->counter) \
: "d" (op_val) \
: "cc", "memory"); \
@@ -181,8 +191,9 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
#define __ATOMIC64_OR "ogr"
#define __ATOMIC64_AND "ngr"
#define __ATOMIC64_ADD "agr"
+#define __ATOMIC64_BARRIER "\n"
-#define __ATOMIC64_LOOP(ptr, op_val, op_string) \
+#define __ATOMIC64_LOOP(ptr, op_val, op_string, __barrier) \
({ \
long long old_val, new_val; \
\
@@ -220,17 +231,32 @@ static inline void atomic64_set(atomic64_t *v, long long i)
static inline long long atomic64_add_return(long long i, atomic64_t *v)
{
- return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD) + i;
+ return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER) + i;
+}
+
+static inline void atomic64_add(long long i, atomic64_t *v)
+{
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+ if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
+ asm volatile(
+ "agsi %0,%1\n"
+ : "+Q" (v->counter)
+ : "i" (i)
+ : "cc", "memory");
+ return;
+ }
+#endif
+ __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_NO_BARRIER);
}
static inline void atomic64_clear_mask(unsigned long mask, atomic64_t *v)
{
- __ATOMIC64_LOOP(v, ~mask, __ATOMIC64_AND);
+ __ATOMIC64_LOOP(v, ~mask, __ATOMIC64_AND, __ATOMIC64_NO_BARRIER);
}
static inline void atomic64_set_mask(unsigned long mask, atomic64_t *v)
{
- __ATOMIC64_LOOP(v, mask, __ATOMIC64_OR);
+ __ATOMIC64_LOOP(v, mask, __ATOMIC64_OR, __ATOMIC64_NO_BARRIER);
}
#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
@@ -334,25 +360,13 @@ static inline void atomic64_clear_mask(unsigned long long mask, atomic64_t *v)
} while (atomic64_cmpxchg(v, old, new) != old);
}
-#endif /* CONFIG_64BIT */
-
static inline void atomic64_add(long long i, atomic64_t *v)
{
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
- if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
- asm volatile(
- "agsi %0,%1\n"
- : "+Q" (v->counter)
- : "i" (i)
- : "cc", "memory");
- } else {
- atomic64_add_return(i, v);
- }
-#else
atomic64_add_return(i, v);
-#endif
}
+#endif /* CONFIG_64BIT */
+
static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u)
{
long long c, old;
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index ec5ef891db6..52054247767 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -47,14 +47,18 @@
#include <linux/typecheck.h>
#include <linux/compiler.h>
+#include <asm/barrier.h>
+
+#define __BITOPS_NO_BARRIER "\n"
#ifndef CONFIG_64BIT
#define __BITOPS_OR "or"
#define __BITOPS_AND "nr"
#define __BITOPS_XOR "xr"
+#define __BITOPS_BARRIER "\n"
-#define __BITOPS_LOOP(__addr, __val, __op_string) \
+#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \
({ \
unsigned long __old, __new; \
\
@@ -67,7 +71,7 @@
" jl 0b" \
: "=&d" (__old), "=&d" (__new), "+Q" (*(__addr))\
: "d" (__val) \
- : "cc"); \
+ : "cc", "memory"); \
__old; \
})
@@ -78,17 +82,20 @@
#define __BITOPS_OR "laog"
#define __BITOPS_AND "lang"
#define __BITOPS_XOR "laxg"
+#define __BITOPS_BARRIER "bcr 14,0\n"
-#define __BITOPS_LOOP(__addr, __val, __op_string) \
+#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \
({ \
unsigned long __old; \
\
typecheck(unsigned long *, (__addr)); \
asm volatile( \
+ __barrier \
__op_string " %0,%2,%1\n" \
+ __barrier \
: "=d" (__old), "+Q" (*(__addr)) \
: "d" (__val) \
- : "cc"); \
+ : "cc", "memory"); \
__old; \
})
@@ -97,8 +104,9 @@
#define __BITOPS_OR "ogr"
#define __BITOPS_AND "ngr"
#define __BITOPS_XOR "xgr"
+#define __BITOPS_BARRIER "\n"
-#define __BITOPS_LOOP(__addr, __val, __op_string) \
+#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \
({ \
unsigned long __old, __new; \
\
@@ -111,7 +119,7 @@
" jl 0b" \
: "=&d" (__old), "=&d" (__new), "+Q" (*(__addr))\
: "d" (__val) \
- : "cc"); \
+ : "cc", "memory"); \
__old; \
})
@@ -149,12 +157,12 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *ptr)
"oi %0,%b1\n"
: "+Q" (*caddr)
: "i" (1 << (nr & 7))
- : "cc");
+ : "cc", "memory");
return;
}
#endif
mask = 1UL << (nr & (BITS_PER_LONG - 1));
- __BITOPS_LOOP(addr, mask, __BITOPS_OR);
+ __BITOPS_LOOP(addr, mask, __BITOPS_OR, __BITOPS_NO_BARRIER);
}
static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr)
@@ -170,12 +178,12 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr)
"ni %0,%b1\n"
: "+Q" (*caddr)
: "i" (~(1 << (nr & 7)))
- : "cc");
+ : "cc", "memory");
return;
}
#endif
mask = ~(1UL << (nr & (BITS_PER_LONG - 1)));
- __BITOPS_LOOP(addr, mask, __BITOPS_AND);
+ __BITOPS_LOOP(addr, mask, __BITOPS_AND, __BITOPS_NO_BARRIER);
}
static inline void change_bit(unsigned long nr, volatile unsigned long *ptr)
@@ -191,12 +199,12 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *ptr)
"xi %0,%b1\n"
: "+Q" (*caddr)
: "i" (1 << (nr & 7))
- : "cc");
+ : "cc", "memory");
return;
}
#endif
mask = 1UL << (nr & (BITS_PER_LONG - 1));
- __BITOPS_LOOP(addr, mask, __BITOPS_XOR);
+ __BITOPS_LOOP(addr, mask, __BITOPS_XOR, __BITOPS_NO_BARRIER);
}
static inline int
@@ -206,8 +214,7 @@ test_and_set_bit(unsigned long nr, volatile unsigned long *ptr)
unsigned long old, mask;
mask = 1UL << (nr & (BITS_PER_LONG - 1));
- old = __BITOPS_LOOP(addr, mask, __BITOPS_OR);
- barrier();
+ old = __BITOPS_LOOP(addr, mask, __BITOPS_OR, __BITOPS_BARRIER);
return (old & mask) != 0;
}
@@ -218,8 +225,7 @@ test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr)
unsigned long old, mask;
mask = ~(1UL << (nr & (BITS_PER_LONG - 1)));
- old = __BITOPS_LOOP(addr, mask, __BITOPS_AND);
- barrier();
+ old = __BITOPS_LOOP(addr, mask, __BITOPS_AND, __BITOPS_BARRIER);
return (old & ~mask) != 0;
}
@@ -230,8 +236,7 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *ptr)
unsigned long old, mask;
mask = 1UL << (nr & (BITS_PER_LONG - 1));
- old = __BITOPS_LOOP(addr, mask, __BITOPS_XOR);
- barrier();
+ old = __BITOPS_LOOP(addr, mask, __BITOPS_XOR, __BITOPS_BARRIER);
return (old & mask) != 0;
}
diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h
index 23723ce5ca7..6e670f88d12 100644
--- a/arch/s390/include/asm/ccwgroup.h
+++ b/arch/s390/include/asm/ccwgroup.h
@@ -23,6 +23,7 @@ struct ccwgroup_device {
unsigned int count;
struct device dev;
struct ccw_device *cdev[0];
+ struct work_struct ungroup_work;
};
/**
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index d42625053c3..09633920776 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -199,7 +199,7 @@ struct esw_eadm {
/**
* struct irb - interruption response block
* @scsw: subchannel status word
- * @esw: extened status word
+ * @esw: extended status word
* @ecw: extended control word
*
* The irb that is handed to the device driver when an interrupt occurs. For
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index fda46bd38c9..69cf5b5eddc 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -1,12 +1,25 @@
#ifndef _ASM_S390_FUTEX_H
#define _ASM_S390_FUTEX_H
-#include <linux/futex.h>
#include <linux/uaccess.h>
+#include <linux/futex.h>
+#include <asm/mmu_context.h>
#include <asm/errno.h>
-int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval);
-int __futex_atomic_op_inuser(int op, u32 __user *uaddr, int oparg, int *old);
+#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \
+ asm volatile( \
+ " sacf 256\n" \
+ "0: l %1,0(%6)\n" \
+ "1:"insn \
+ "2: cs %1,%2,0(%6)\n" \
+ "3: jl 1b\n" \
+ " lhi %0,0\n" \
+ "4: sacf 768\n" \
+ EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
+ : "=d" (ret), "=&d" (oldval), "=&d" (newval), \
+ "=m" (*uaddr) \
+ : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
+ "m" (*uaddr) : "cc");
static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
{
@@ -14,13 +27,37 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
int cmp = (encoded_op >> 24) & 15;
int oparg = (encoded_op << 8) >> 20;
int cmparg = (encoded_op << 20) >> 20;
- int oldval, ret;
+ int oldval = 0, newval, ret;
+ update_primary_asce(current);
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
pagefault_disable();
- ret = __futex_atomic_op_inuser(op, uaddr, oparg, &oldval);
+ switch (op) {
+ case FUTEX_OP_SET:
+ __futex_atomic_op("lr %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ADD:
+ __futex_atomic_op("lr %2,%1\nar %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ case FUTEX_OP_OR:
+ __futex_atomic_op("lr %2,%1\nor %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ANDN:
+ __futex_atomic_op("lr %2,%1\nnr %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ case FUTEX_OP_XOR:
+ __futex_atomic_op("lr %2,%1\nxr %2,%5\n",
+ ret, oldval, newval, uaddr, oparg);
+ break;
+ default:
+ ret = -ENOSYS;
+ }
pagefault_enable();
if (!ret) {
@@ -37,4 +74,23 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
return ret;
}
+static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
+{
+ int ret;
+
+ update_primary_asce(current);
+ asm volatile(
+ " sacf 256\n"
+ "0: cs %1,%4,0(%5)\n"
+ "1: la %0,0\n"
+ "2: sacf 768\n"
+ EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+ : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
+ : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
+ : "cc", "memory");
+ *uval = oldval;
+ return ret;
+}
+
#endif /* _ASM_S390_FUTEX_H */
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index 5f8bcc5fe42..c4dd400a279 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -16,6 +16,20 @@
/* This number is used when no interrupt has been assigned */
#define NO_IRQ 0
+/* External interruption codes */
+#define EXT_IRQ_INTERRUPT_KEY 0x0040
+#define EXT_IRQ_CLK_COMP 0x1004
+#define EXT_IRQ_CPU_TIMER 0x1005
+#define EXT_IRQ_WARNING_TRACK 0x1007
+#define EXT_IRQ_MALFUNC_ALERT 0x1200
+#define EXT_IRQ_EMERGENCY_SIG 0x1201
+#define EXT_IRQ_EXTERNAL_CALL 0x1202
+#define EXT_IRQ_TIMING_ALERT 0x1406
+#define EXT_IRQ_MEASURE_ALERT 0x1407
+#define EXT_IRQ_SERVICE_SIG 0x2401
+#define EXT_IRQ_CP_SERVICE 0x2603
+#define EXT_IRQ_IUCV 0x4000
+
#ifndef __ASSEMBLY__
#include <linux/hardirq.h>
@@ -53,6 +67,7 @@ enum interruption_class {
IRQIO_PCI,
IRQIO_MSI,
IRQIO_VIR,
+ IRQIO_VAI,
NMI_NMI,
CPU_RST,
NR_ARCH_IRQS
@@ -76,8 +91,8 @@ struct ext_code {
typedef void (*ext_int_handler_t)(struct ext_code, unsigned int, unsigned long);
-int register_external_interrupt(u16 code, ext_int_handler_t handler);
-int unregister_external_interrupt(u16 code, ext_int_handler_t handler);
+int register_external_irq(u16 code, ext_int_handler_t handler);
+int unregister_external_irq(u16 code, ext_int_handler_t handler);
enum irq_subclass {
IRQ_SUBCLASS_MEASUREMENT_ALERT = 5,
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 9bf95bb30f1..154b60089be 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -16,12 +16,22 @@
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/kvm_host.h>
+#include <linux/kvm.h>
#include <asm/debug.h>
#include <asm/cpu.h>
+#include <asm/isc.h>
#define KVM_MAX_VCPUS 64
#define KVM_USER_MEM_SLOTS 32
+/*
+ * These seem to be used for allocating ->chip in the routing table,
+ * which we don't use. 4096 is an out-of-thin-air value. If we need
+ * to look at ->chip later on, we'll need to revisit this.
+ */
+#define KVM_NR_IRQCHIPS 1
+#define KVM_IRQCHIP_NUM_PINS 4096
+
struct sca_entry {
atomic_t scn;
__u32 reserved;
@@ -108,7 +118,9 @@ struct kvm_s390_sie_block {
__u32 fac; /* 0x01a0 */
__u8 reserved1a4[20]; /* 0x01a4 */
__u64 cbrlo; /* 0x01b8 */
- __u8 reserved1c0[40]; /* 0x01c0 */
+ __u8 reserved1c0[30]; /* 0x01c0 */
+ __u64 pp; /* 0x01de */
+ __u8 reserved1e6[2]; /* 0x01e6 */
__u64 itdba; /* 0x01e8 */
__u8 reserved1f0[16]; /* 0x01f0 */
} __attribute__((packed));
@@ -171,18 +183,6 @@ struct kvm_vcpu_stat {
u32 diagnose_9c;
};
-struct kvm_s390_io_info {
- __u16 subchannel_id; /* 0x0b8 */
- __u16 subchannel_nr; /* 0x0ba */
- __u32 io_int_parm; /* 0x0bc */
- __u32 io_int_word; /* 0x0c0 */
-};
-
-struct kvm_s390_ext_info {
- __u32 ext_params;
- __u64 ext_params2;
-};
-
#define PGM_OPERATION 0x01
#define PGM_PRIVILEGED_OP 0x02
#define PGM_EXECUTE 0x03
@@ -191,27 +191,6 @@ struct kvm_s390_ext_info {
#define PGM_SPECIFICATION 0x06
#define PGM_DATA 0x07
-struct kvm_s390_pgm_info {
- __u16 code;
-};
-
-struct kvm_s390_prefix_info {
- __u32 address;
-};
-
-struct kvm_s390_extcall_info {
- __u16 code;
-};
-
-struct kvm_s390_emerg_info {
- __u16 code;
-};
-
-struct kvm_s390_mchk_info {
- __u64 cr14;
- __u64 mcic;
-};
-
struct kvm_s390_interrupt_info {
struct list_head list;
u64 type;
@@ -246,9 +225,8 @@ struct kvm_s390_float_interrupt {
struct list_head list;
atomic_t active;
int next_rr_cpu;
- unsigned long idle_mask[(KVM_MAX_VCPUS + sizeof(long) - 1)
- / sizeof(long)];
- struct kvm_s390_local_interrupt *local_int[KVM_MAX_VCPUS];
+ unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+ unsigned int irq_count;
};
@@ -265,6 +243,10 @@ struct kvm_vcpu_arch {
u64 stidp_data;
};
struct gmap *gmap;
+#define KVM_S390_PFAULT_TOKEN_INVALID (-1UL)
+ unsigned long pfault_token;
+ unsigned long pfault_select;
+ unsigned long pfault_compare;
};
struct kvm_vm_stat {
@@ -274,12 +256,36 @@ struct kvm_vm_stat {
struct kvm_arch_memory_slot {
};
+struct s390_map_info {
+ struct list_head list;
+ __u64 guest_addr;
+ __u64 addr;
+ struct page *page;
+};
+
+struct s390_io_adapter {
+ unsigned int id;
+ int isc;
+ bool maskable;
+ bool masked;
+ bool swap;
+ struct rw_semaphore maps_lock;
+ struct list_head maps;
+ atomic_t nr_maps;
+};
+
+#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
+#define MAX_S390_ADAPTER_MAPS 256
+
struct kvm_arch{
struct sca_block *sca;
debug_info_t *dbf;
struct kvm_s390_float_interrupt float_int;
+ struct kvm_device *flic;
struct gmap *gmap;
int css_support;
+ int use_irqchip;
+ struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
};
#define KVM_HVA_ERR_BAD (-1UL)
@@ -290,6 +296,24 @@ static inline bool kvm_is_error_hva(unsigned long addr)
return IS_ERR_VALUE(addr);
}
+#define ASYNC_PF_PER_VCPU 64
+struct kvm_vcpu;
+struct kvm_async_pf;
+struct kvm_arch_async_pf {
+ unsigned long pfault_token;
+};
+
+bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
+
+void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work);
+
+void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work);
+
+void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work);
+
extern int sie64a(struct kvm_s390_sie_block *, u64 *);
extern char sie_exit;
#endif
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index ff132ac64dd..f77695a82f6 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -1,9 +1,11 @@
#ifndef __MMU_H
#define __MMU_H
+#include <linux/cpumask.h>
#include <linux/errno.h>
typedef struct {
+ cpumask_t cpu_attach_mask;
atomic_t attach_count;
unsigned int flush_mm;
spinlock_t list_lock;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 38149b63dc4..71be346d0e3 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -15,6 +15,7 @@
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
{
+ cpumask_clear(&mm->context.cpu_attach_mask);
atomic_set(&mm->context.attach_count, 0);
mm->context.flush_mm = 0;
mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS;
@@ -29,41 +30,61 @@ static inline int init_new_context(struct task_struct *tsk,
#define destroy_context(mm) do { } while (0)
-#ifndef CONFIG_64BIT
-#define LCTL_OPCODE "lctl"
-#else
-#define LCTL_OPCODE "lctlg"
-#endif
-
-static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk)
+static inline void update_user_asce(struct mm_struct *mm, int load_primary)
{
pgd_t *pgd = mm->pgd;
S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd);
- /* Load primary space page table origin. */
- asm volatile(LCTL_OPCODE" 1,1,%0\n" : : "m" (S390_lowcore.user_asce));
+ if (load_primary)
+ __ctl_load(S390_lowcore.user_asce, 1, 1);
set_fs(current->thread.mm_segment);
}
+static inline void clear_user_asce(struct mm_struct *mm, int load_primary)
+{
+ S390_lowcore.user_asce = S390_lowcore.kernel_asce;
+
+ if (load_primary)
+ __ctl_load(S390_lowcore.user_asce, 1, 1);
+ __ctl_load(S390_lowcore.user_asce, 7, 7);
+}
+
+static inline void update_primary_asce(struct task_struct *tsk)
+{
+ unsigned long asce;
+
+ __ctl_store(asce, 1, 1);
+ if (asce != S390_lowcore.kernel_asce)
+ __ctl_load(S390_lowcore.kernel_asce, 1, 1);
+ set_tsk_thread_flag(tsk, TIF_ASCE);
+}
+
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
int cpu = smp_processor_id();
+ update_primary_asce(tsk);
if (prev == next)
return;
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
if (atomic_inc_return(&next->context.attach_count) >> 16) {
- /* Delay update_mm until all TLB flushes are done. */
+ /* Delay update_user_asce until all TLB flushes are done. */
set_tsk_thread_flag(tsk, TIF_TLB_WAIT);
+ /* Clear old ASCE by loading the kernel ASCE. */
+ clear_user_asce(next, 0);
} else {
cpumask_set_cpu(cpu, mm_cpumask(next));
- update_mm(next, tsk);
+ update_user_asce(next, 0);
if (next->context.flush_mm)
/* Flush pending TLBs */
__tlb_flush_mm(next);
}
atomic_dec(&prev->context.attach_count);
WARN_ON(atomic_read(&prev->context.attach_count) < 0);
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
}
#define finish_arch_post_lock_switch finish_arch_post_lock_switch
@@ -80,7 +101,7 @@ static inline void finish_arch_post_lock_switch(void)
cpu_relax();
cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
- update_mm(mm, tsk);
+ update_user_asce(mm, 0);
if (mm->context.flush_mm)
__tlb_flush_mm(mm);
preempt_enable();
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 1ab75eaacbd..12f75313e08 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -782,6 +782,7 @@ static inline void pgste_set_pte(pte_t *ptep, pte_t entry)
* @table: pointer to the page directory
* @asce: address space control element for gmap page table
* @crst_list: list of all crst tables used in the guest address space
+ * @pfault_enabled: defines if pfaults are applicable for the guest
*/
struct gmap {
struct list_head list;
@@ -790,6 +791,7 @@ struct gmap {
unsigned long asce;
void *private;
struct list_head crst_list;
+ bool pfault_enabled;
};
/**
@@ -1068,12 +1070,35 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
: "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address));
}
+static inline void __ptep_ipte_local(unsigned long address, pte_t *ptep)
+{
+ unsigned long pto = (unsigned long) ptep;
+
+#ifndef CONFIG_64BIT
+ /* pto in ESA mode must point to the start of the segment table */
+ pto &= 0x7ffffc00;
+#endif
+ /* Invalidation + local TLB flush for the pte */
+ asm volatile(
+ " .insn rrf,0xb2210000,%2,%3,0,1"
+ : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address));
+}
+
static inline void ptep_flush_direct(struct mm_struct *mm,
unsigned long address, pte_t *ptep)
{
+ int active, count;
+
if (pte_val(*ptep) & _PAGE_INVALID)
return;
- __ptep_ipte(address, ptep);
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+ cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+ __ptep_ipte_local(address, ptep);
+ else
+ __ptep_ipte(address, ptep);
+ atomic_sub(0x10000, &mm->context.attach_count);
}
static inline void ptep_flush_lazy(struct mm_struct *mm,
@@ -1382,35 +1407,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
#define pte_unmap(pte) do { } while (0)
-static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
-{
- unsigned long sto = (unsigned long) pmdp -
- pmd_index(address) * sizeof(pmd_t);
-
- if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)) {
- asm volatile(
- " .insn rrf,0xb98e0000,%2,%3,0,0"
- : "=m" (*pmdp)
- : "m" (*pmdp), "a" (sto),
- "a" ((address & HPAGE_MASK))
- : "cc"
- );
- }
-}
-
-static inline void __pmd_csp(pmd_t *pmdp)
-{
- register unsigned long reg2 asm("2") = pmd_val(*pmdp);
- register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
- _SEGMENT_ENTRY_INVALID;
- register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
-
- asm volatile(
- " csp %1,%3"
- : "=m" (*pmdp)
- : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
-}
-
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
{
@@ -1479,18 +1475,80 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd)
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
+static inline void __pmdp_csp(pmd_t *pmdp)
+{
+ register unsigned long reg2 asm("2") = pmd_val(*pmdp);
+ register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
+ _SEGMENT_ENTRY_INVALID;
+ register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
+
+ asm volatile(
+ " csp %1,%3"
+ : "=m" (*pmdp)
+ : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
+}
+
+static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp)
+{
+ unsigned long sto;
+
+ sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
+ asm volatile(
+ " .insn rrf,0xb98e0000,%2,%3,0,0"
+ : "=m" (*pmdp)
+ : "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK))
+ : "cc" );
+}
+
+static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp)
+{
+ unsigned long sto;
+
+ sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
+ asm volatile(
+ " .insn rrf,0xb98e0000,%2,%3,0,1"
+ : "=m" (*pmdp)
+ : "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK))
+ : "cc" );
+}
+
+static inline void pmdp_flush_direct(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
+{
+ int active, count;
+
+ if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
+ return;
+ if (!MACHINE_HAS_IDTE) {
+ __pmdp_csp(pmdp);
+ return;
+ }
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+ cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+ __pmdp_idte_local(address, pmdp);
+ else
+ __pmdp_idte(address, pmdp);
+ atomic_sub(0x10000, &mm->context.attach_count);
+}
+
static inline void pmdp_flush_lazy(struct mm_struct *mm,
unsigned long address, pmd_t *pmdp)
{
int active, count;
+ if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
+ return;
active = (mm == current->active_mm) ? 1 : 0;
count = atomic_add_return(0x10000, &mm->context.attach_count);
if ((count & 0xffff) <= active) {
pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
mm->context.flush_mm = 1;
- } else
- __pmd_idte(address, pmdp);
+ } else if (MACHINE_HAS_IDTE)
+ __pmdp_idte(address, pmdp);
+ else
+ __pmdp_csp(pmdp);
atomic_sub(0x10000, &mm->context.attach_count);
}
@@ -1543,7 +1601,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
pmd_t pmd;
pmd = *pmdp;
- __pmd_idte(address, pmdp);
+ pmdp_flush_direct(vma->vm_mm, address, pmdp);
*pmdp = pmd_mkold(pmd);
return pmd_young(pmd);
}
@@ -1554,7 +1612,7 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
{
pmd_t pmd = *pmdp;
- __pmd_idte(address, pmdp);
+ pmdp_flush_direct(mm, address, pmdp);
pmd_clear(pmdp);
return pmd;
}
@@ -1570,7 +1628,7 @@ static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
static inline void pmdp_invalidate(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
{
- __pmd_idte(address, pmdp);
+ pmdp_flush_direct(vma->vm_mm, address, pmdp);
}
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
@@ -1580,7 +1638,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
pmd_t pmd = *pmdp;
if (pmd_write(pmd)) {
- __pmd_idte(address, pmdp);
+ pmdp_flush_direct(mm, address, pmdp);
set_pmd_at(mm, address, pmdp, pmd_wrprotect(pmd));
}
}
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 0a876bc543d..dc5fc4f90e5 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -79,6 +79,7 @@ struct thread_struct {
unsigned long ksp; /* kernel stack pointer */
mm_segment_t mm_segment;
unsigned long gmap_addr; /* address of last gmap fault. */
+ unsigned int gmap_pfault; /* signal of a pending guest pfault */
struct per_regs per_user; /* User specified PER registers */
struct per_event per_event; /* Cause of the last PER trap */
unsigned long per_flags; /* Flags to control debug behavior */
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 406f3a1e63e..b31b22dba94 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -68,6 +68,7 @@ void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr,
#define MACHINE_FLAG_TOPOLOGY (1UL << 14)
#define MACHINE_FLAG_TE (1UL << 15)
#define MACHINE_FLAG_RRBM (1UL << 16)
+#define MACHINE_FLAG_TLB_LC (1UL << 17)
#define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM)
#define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
@@ -90,6 +91,7 @@ void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr,
#define MACHINE_HAS_TOPOLOGY (0)
#define MACHINE_HAS_TE (0)
#define MACHINE_HAS_RRBM (0)
+#define MACHINE_HAS_TLB_LC (0)
#else /* CONFIG_64BIT */
#define MACHINE_HAS_IEEE (1)
#define MACHINE_HAS_CSP (1)
@@ -102,6 +104,7 @@ void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr,
#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE)
#define MACHINE_HAS_RRBM (S390_lowcore.machine_flags & MACHINE_FLAG_RRBM)
+#define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
#endif /* CONFIG_64BIT */
/*
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index 29c81f82705..e759181357f 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -132,6 +132,7 @@ static inline void restore_access_regs(unsigned int *acrs)
update_cr_regs(next); \
} \
prev = __switch_to(prev,next); \
+ update_primary_asce(current); \
} while (0)
#define finish_arch_switch(prev) do { \
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index cd29d2f4e4f..777687055e7 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -12,7 +12,7 @@
#ifndef _ASM_SYSCALL_H
#define _ASM_SYSCALL_H 1
-#include <linux/audit.h>
+#include <uapi/linux/audit.h>
#include <linux/sched.h>
#include <linux/err.h>
#include <asm/ptrace.h>
@@ -89,11 +89,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
regs->orig_gpr2 = args[0];
}
-static inline int syscall_get_arch(struct task_struct *task,
- struct pt_regs *regs)
+static inline int syscall_get_arch(void)
{
#ifdef CONFIG_COMPAT
- if (test_tsk_thread_flag(task, TIF_31BIT))
+ if (test_tsk_thread_flag(current, TIF_31BIT))
return AUDIT_ARCH_S390;
#endif
return sizeof(long) == 8 ? AUDIT_ARCH_S390X : AUDIT_ARCH_S390;
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 3ccd71b9034..50630e6a35d 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -82,6 +82,7 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
#define TIF_TLB_WAIT 4 /* wait for TLB flush completion */
+#define TIF_ASCE 5 /* primary asce needs fixup / uaccess */
#define TIF_PER_TRAP 6 /* deliver sigtrap on return to user */
#define TIF_MCCK_PENDING 7 /* machine check handling is pending */
#define TIF_SYSCALL_TRACE 8 /* syscall trace active */
@@ -99,6 +100,7 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
#define _TIF_TLB_WAIT (1<<TIF_TLB_WAIT)
+#define _TIF_ASCE (1<<TIF_ASCE)
#define _TIF_PER_TRAP (1<<TIF_PER_TRAP)
#define _TIF_MCCK_PENDING (1<<TIF_MCCK_PENDING)
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 2cb846c4b37..c544b6f05d9 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -57,8 +57,6 @@ static inline void tlb_gather_mmu(struct mmu_gather *tlb,
tlb->end = end;
tlb->fullmm = !(start | (end+1));
tlb->batch = NULL;
- if (tlb->fullmm)
- __tlb_flush_mm(mm);
}
static inline void tlb_flush_mmu(struct mmu_gather *tlb)
@@ -96,9 +94,7 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
unsigned long address)
{
- if (!tlb->fullmm)
- return page_table_free_rcu(tlb, (unsigned long *) pte);
- page_table_free(tlb->mm, (unsigned long *) pte);
+ page_table_free_rcu(tlb, (unsigned long *) pte);
}
/*
@@ -114,9 +110,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
#ifdef CONFIG_64BIT
if (tlb->mm->context.asce_limit <= (1UL << 31))
return;
- if (!tlb->fullmm)
- return tlb_remove_table(tlb, pmd);
- crst_table_free(tlb->mm, (unsigned long *) pmd);
+ tlb_remove_table(tlb, pmd);
#endif
}
@@ -133,9 +127,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
#ifdef CONFIG_64BIT
if (tlb->mm->context.asce_limit <= (1UL << 42))
return;
- if (!tlb->fullmm)
- return tlb_remove_table(tlb, pud);
- crst_table_free(tlb->mm, (unsigned long *) pud);
+ tlb_remove_table(tlb, pud);
#endif
}
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index f9fef0425fe..16c9c88658c 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -7,19 +7,41 @@
#include <asm/pgalloc.h>
/*
- * Flush all tlb entries on the local cpu.
+ * Flush all TLB entries on the local CPU.
*/
static inline void __tlb_flush_local(void)
{
asm volatile("ptlb" : : : "memory");
}
-#ifdef CONFIG_SMP
/*
- * Flush all tlb entries on all cpus.
+ * Flush TLB entries for a specific ASCE on all CPUs
*/
+static inline void __tlb_flush_idte(unsigned long asce)
+{
+ /* Global TLB flush for the mm */
+ asm volatile(
+ " .insn rrf,0xb98e0000,0,%0,%1,0"
+ : : "a" (2048), "a" (asce) : "cc");
+}
+
+/*
+ * Flush TLB entries for a specific ASCE on the local CPU
+ */
+static inline void __tlb_flush_idte_local(unsigned long asce)
+{
+ /* Local TLB flush for the mm */
+ asm volatile(
+ " .insn rrf,0xb98e0000,0,%0,%1,1"
+ : : "a" (2048), "a" (asce) : "cc");
+}
+
+#ifdef CONFIG_SMP
void smp_ptlb_all(void);
+/*
+ * Flush all TLB entries on all CPUs.
+ */
static inline void __tlb_flush_global(void)
{
register unsigned long reg2 asm("2");
@@ -42,36 +64,89 @@ static inline void __tlb_flush_global(void)
: : "d" (reg2), "d" (reg3), "d" (reg4), "m" (dummy) : "cc" );
}
+/*
+ * Flush TLB entries for a specific mm on all CPUs (in case gmap is used
+ * this implicates multiple ASCEs!).
+ */
static inline void __tlb_flush_full(struct mm_struct *mm)
{
- cpumask_t local_cpumask;
-
preempt_disable();
- /*
- * If the process only ran on the local cpu, do a local flush.
- */
- cpumask_copy(&local_cpumask, cpumask_of(smp_processor_id()));
- if (cpumask_equal(mm_cpumask(mm), &local_cpumask))
+ atomic_add(0x10000, &mm->context.attach_count);
+ if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
+ /* Local TLB flush */
__tlb_flush_local();
- else
+ } else {
+ /* Global TLB flush */
__tlb_flush_global();
+ /* Reset TLB flush mask */
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_copy(mm_cpumask(mm),
+ &mm->context.cpu_attach_mask);
+ }
+ atomic_sub(0x10000, &mm->context.attach_count);
preempt_enable();
}
+
+/*
+ * Flush TLB entries for a specific ASCE on all CPUs.
+ */
+static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
+{
+ int active, count;
+
+ preempt_disable();
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+ cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
+ __tlb_flush_idte_local(asce);
+ } else {
+ if (MACHINE_HAS_IDTE)
+ __tlb_flush_idte(asce);
+ else
+ __tlb_flush_global();
+ /* Reset TLB flush mask */
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_copy(mm_cpumask(mm),
+ &mm->context.cpu_attach_mask);
+ }
+ atomic_sub(0x10000, &mm->context.attach_count);
+ preempt_enable();
+}
+
+static inline void __tlb_flush_kernel(void)
+{
+ if (MACHINE_HAS_IDTE)
+ __tlb_flush_idte((unsigned long) init_mm.pgd |
+ init_mm.context.asce_bits);
+ else
+ __tlb_flush_global();
+}
#else
-#define __tlb_flush_full(mm) __tlb_flush_local()
#define __tlb_flush_global() __tlb_flush_local()
-#endif
+#define __tlb_flush_full(mm) __tlb_flush_local()
/*
- * Flush all tlb entries of a page table on all cpus.
+ * Flush TLB entries for a specific ASCE on all CPUs.
*/
-static inline void __tlb_flush_idte(unsigned long asce)
+static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
{
- asm volatile(
- " .insn rrf,0xb98e0000,0,%0,%1,0"
- : : "a" (2048), "a" (asce) : "cc" );
+ if (MACHINE_HAS_TLB_LC)
+ __tlb_flush_idte_local(asce);
+ else
+ __tlb_flush_local();
}
+static inline void __tlb_flush_kernel(void)
+{
+ if (MACHINE_HAS_TLB_LC)
+ __tlb_flush_idte_local((unsigned long) init_mm.pgd |
+ init_mm.context.asce_bits);
+ else
+ __tlb_flush_local();
+}
+#endif
+
static inline void __tlb_flush_mm(struct mm_struct * mm)
{
/*
@@ -80,7 +155,7 @@ static inline void __tlb_flush_mm(struct mm_struct * mm)
* only ran on the local cpu.
*/
if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list))
- __tlb_flush_idte((unsigned long) mm->pgd |
+ __tlb_flush_asce(mm, (unsigned long) mm->pgd |
mm->context.asce_bits);
else
__tlb_flush_full(mm);
@@ -130,7 +205,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
static inline void flush_tlb_kernel_range(unsigned long start,
unsigned long end)
{
- __tlb_flush_mm(&init_mm);
+ __tlb_flush_kernel();
}
#endif /* _S390_TLBFLUSH_H */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 4133b3f72fb..1be64a1506d 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -92,8 +92,6 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x)
#define ARCH_HAS_SORT_EXTABLE
#define ARCH_HAS_SEARCH_EXTABLE
-int __handle_fault(unsigned long, unsigned long, int);
-
/**
* __copy_from_user: - Copy a block of data from user space, with less checking.
* @to: Destination address, in kernel space.
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index d25da598ec6..c003c6a73b1 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -16,6 +16,44 @@
#define __KVM_S390
+/* Device control API: s390-specific devices */
+#define KVM_DEV_FLIC_GET_ALL_IRQS 1
+#define KVM_DEV_FLIC_ENQUEUE 2
+#define KVM_DEV_FLIC_CLEAR_IRQS 3
+#define KVM_DEV_FLIC_APF_ENABLE 4
+#define KVM_DEV_FLIC_APF_DISABLE_WAIT 5
+#define KVM_DEV_FLIC_ADAPTER_REGISTER 6
+#define KVM_DEV_FLIC_ADAPTER_MODIFY 7
+/*
+ * We can have up to 4*64k pending subchannels + 8 adapter interrupts,
+ * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
+ * There are also sclp and machine checks. This gives us
+ * sizeof(kvm_s390_irq)*(4*65536+8+64*64+1+1) = 72 * 266250 = 19170000
+ * Lets round up to 8192 pages.
+ */
+#define KVM_S390_MAX_FLOAT_IRQS 266250
+#define KVM_S390_FLIC_MAX_BUFFER 0x2000000
+
+struct kvm_s390_io_adapter {
+ __u32 id;
+ __u8 isc;
+ __u8 maskable;
+ __u8 swap;
+ __u8 pad;
+};
+
+#define KVM_S390_IO_ADAPTER_MASK 1
+#define KVM_S390_IO_ADAPTER_MAP 2
+#define KVM_S390_IO_ADAPTER_UNMAP 3
+
+struct kvm_s390_io_adapter_req {
+ __u32 id;
+ __u8 type;
+ __u8 mask;
+ __u16 pad0;
+ __u64 addr;
+};
+
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
/* general purpose regs for s390 */
@@ -57,4 +95,9 @@ struct kvm_sync_regs {
#define KVM_REG_S390_EPOCHDIFF (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2)
#define KVM_REG_S390_CPU_TIMER (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3)
#define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4)
+#define KVM_REG_S390_PFTOKEN (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x5)
+#define KVM_REG_S390_PFCOMPARE (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x6)
+#define KVM_REG_S390_PFSELECT (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x7)
+#define KVM_REG_S390_PP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x8)
+#define KVM_REG_S390_GBEA (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x9)
#endif
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index e4c99a18365..cc10cdd4d6a 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -136,6 +136,7 @@ int main(void)
DEFINE(__LC_RESTART_FN, offsetof(struct _lowcore, restart_fn));
DEFINE(__LC_RESTART_DATA, offsetof(struct _lowcore, restart_data));
DEFINE(__LC_RESTART_SOURCE, offsetof(struct _lowcore, restart_source));
+ DEFINE(__LC_KERNEL_ASCE, offsetof(struct _lowcore, kernel_asce));
DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce));
DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock));
DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock));
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
index 3a414c0f93e..c0b03c28d15 100644
--- a/arch/s390/kernel/cache.c
+++ b/arch/s390/kernel/cache.c
@@ -378,9 +378,12 @@ static int __init cache_init(void)
if (!test_facility(34))
return 0;
cache_build_info();
+
+ cpu_notifier_register_begin();
for_each_online_cpu(cpu)
cache_add_cpu(cpu);
- hotcpu_notifier(cache_hotplug, 0);
+ __hotcpu_notifier(cache_hotplug, 0);
+ cpu_notifier_register_done();
return 0;
}
device_initcall(cache_init);
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 6b594439cca..a734f3585ce 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -386,6 +386,8 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
if (test_facility(66))
S390_lowcore.machine_flags |= MACHINE_FLAG_RRBM;
+ if (test_facility(51))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
#endif
}
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 526d3735ed2..1662038516c 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -38,9 +38,9 @@ __PT_R14 = __PT_GPRS + 56
__PT_R15 = __PT_GPRS + 60
_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
- _TIF_MCCK_PENDING | _TIF_PER_TRAP )
+ _TIF_MCCK_PENDING | _TIF_PER_TRAP | _TIF_ASCE)
_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
- _TIF_MCCK_PENDING)
+ _TIF_MCCK_PENDING | _TIF_ASCE)
_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
_TIF_SYSCALL_TRACEPOINT)
_TIF_TRANSFER = (_TIF_MCCK_PENDING | _TIF_TLB_WAIT)
@@ -241,6 +241,8 @@ sysc_work:
jo sysc_sigpending
tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME
jo sysc_notify_resume
+ tm __TI_flags+3(%r12),_TIF_ASCE
+ jo sysc_uaccess
j sysc_return # beware of critical section cleanup
#
@@ -260,6 +262,14 @@ sysc_mcck_pending:
br %r1 # TIF bit will be cleared by handler
#
+# _TIF_ASCE is set, load user space asce
+#
+sysc_uaccess:
+ ni __TI_flags+3(%r12),255-_TIF_ASCE
+ lctl %c1,%c1,__LC_USER_ASCE # load primary asce
+ j sysc_return
+
+#
# _TIF_SIGPENDING is set, call do_signal
#
sysc_sigpending:
@@ -522,6 +532,8 @@ io_work_tif:
jo io_sigpending
tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME
jo io_notify_resume
+ tm __TI_flags+3(%r12),_TIF_ASCE
+ jo io_uaccess
j io_return # beware of critical section cleanup
#
@@ -535,6 +547,14 @@ io_mcck_pending:
j io_return
#
+# _TIF_ASCE is set, load user space asce
+#
+io_uaccess:
+ ni __TI_flags+3(%r12),255-_TIF_ASCE
+ lctl %c1,%c1,__LC_USER_ASCE # load primary asce
+ j io_return
+
+#
# _TIF_NEED_RESCHED is set, call schedule
#
io_reschedule:
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index e09dbe5f290..5963e43618b 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -43,9 +43,9 @@ STACK_SIZE = 1 << STACK_SHIFT
STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
- _TIF_MCCK_PENDING | _TIF_PER_TRAP )
+ _TIF_MCCK_PENDING | _TIF_PER_TRAP | _TIF_ASCE)
_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
- _TIF_MCCK_PENDING)
+ _TIF_MCCK_PENDING | _TIF_ASCE)
_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
_TIF_SYSCALL_TRACEPOINT)
_TIF_TRANSFER = (_TIF_MCCK_PENDING | _TIF_TLB_WAIT)
@@ -275,6 +275,8 @@ sysc_work:
jo sysc_sigpending
tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME
jo sysc_notify_resume
+ tm __TI_flags+7(%r12),_TIF_ASCE
+ jo sysc_uaccess
j sysc_return # beware of critical section cleanup
#
@@ -292,6 +294,14 @@ sysc_mcck_pending:
jg s390_handle_mcck # TIF bit will be cleared by handler
#
+# _TIF_ASCE is set, load user space asce
+#
+sysc_uaccess:
+ ni __TI_flags+7(%r12),255-_TIF_ASCE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+ j sysc_return
+
+#
# _TIF_SIGPENDING is set, call do_signal
#
sysc_sigpending:
@@ -559,6 +569,8 @@ io_work_tif:
jo io_sigpending
tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME
jo io_notify_resume
+ tm __TI_flags+7(%r12),_TIF_ASCE
+ jo io_uaccess
j io_return # beware of critical section cleanup
#
@@ -571,6 +583,14 @@ io_mcck_pending:
j io_return
#
+# _TIF_ASCE is set, load user space asce
+#
+io_uaccess:
+ ni __TI_flags+7(%r12),255-_TIF_ASCE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+ j io_return
+
+#
# _TIF_NEED_RESCHED is set, call schedule
#
io_reschedule:
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 224db03e951..54d6493c4a5 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -130,9 +130,8 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
return 0;
}
-int __init ftrace_dyn_arch_init(void *data)
+int __init ftrace_dyn_arch_init(void)
{
- *(unsigned long *) data = 0;
return 0;
}
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index a770be97db4..c7463aa0014 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -85,6 +85,7 @@ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = {
[IRQIO_PCI] = {.name = "PCI", .desc = "[I/O] PCI Interrupt" },
[IRQIO_MSI] = {.name = "MSI", .desc = "[I/O] MSI Interrupt" },
[IRQIO_VIR] = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
+ [IRQIO_VAI] = {.name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"},
[NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"},
[CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"},
};
@@ -206,7 +207,7 @@ static inline int ext_hash(u16 code)
return (code + (code >> 9)) & (ARRAY_SIZE(ext_int_hash) - 1);
}
-int register_external_interrupt(u16 code, ext_int_handler_t handler)
+int register_external_irq(u16 code, ext_int_handler_t handler)
{
struct ext_int_info *p;
unsigned long flags;
@@ -224,9 +225,9 @@ int register_external_interrupt(u16 code, ext_int_handler_t handler)
spin_unlock_irqrestore(&ext_int_hash_lock, flags);
return 0;
}
-EXPORT_SYMBOL(register_external_interrupt);
+EXPORT_SYMBOL(register_external_irq);
-int unregister_external_interrupt(u16 code, ext_int_handler_t handler)
+int unregister_external_irq(u16 code, ext_int_handler_t handler)
{
struct ext_int_info *p;
unsigned long flags;
@@ -242,7 +243,7 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler)
spin_unlock_irqrestore(&ext_int_hash_lock, flags);
return 0;
}
-EXPORT_SYMBOL(unregister_external_interrupt);
+EXPORT_SYMBOL(unregister_external_irq);
static irqreturn_t do_ext_interrupt(int irq, void *dummy)
{
@@ -252,7 +253,7 @@ static irqreturn_t do_ext_interrupt(int irq, void *dummy)
int index;
ext_code = *(struct ext_code *) &regs->int_code;
- if (ext_code.code != 0x1004)
+ if (ext_code.code != EXT_IRQ_CLK_COMP)
__get_cpu_var(s390_idle).nohz_delay = 1;
index = ext_hash(ext_code.code);
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index f51214c0485..ea75d011a6f 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -673,7 +673,8 @@ static int __init cpumf_pmu_init(void)
ctl_clear_bit(0, 48);
/* register handler for measurement-alert interruptions */
- rc = register_external_interrupt(0x1407, cpumf_measurement_alert);
+ rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
if (rc) {
pr_err("Registering for CPU-measurement alerts "
"failed with rc=%i\n", rc);
@@ -684,7 +685,8 @@ static int __init cpumf_pmu_init(void)
rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
if (rc) {
pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
- unregister_external_interrupt(0x1407, cpumf_measurement_alert);
+ unregister_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
goto out;
}
perf_cpu_notifier(cpumf_pmu_notifier);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 6c0d29827cb..ea0c7b2ef03 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1621,7 +1621,8 @@ static int __init init_cpum_sampling_pmu(void)
pr_err("Registering for s390dbf failed\n");
debug_register_view(sfdbg, &debug_sprintf_view);
- err = register_external_interrupt(0x1407, cpumf_measurement_alert);
+ err = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
if (err) {
pr_cpumsf_err(RS_INIT_FAILURE_ALRT);
goto out;
@@ -1630,7 +1631,8 @@ static int __init init_cpum_sampling_pmu(void)
err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW);
if (err) {
pr_cpumsf_err(RS_INIT_FAILURE_PERF);
- unregister_external_interrupt(0x1407, cpumf_measurement_alert);
+ unregister_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
goto out;
}
perf_cpu_notifier(cpumf_pmu_notifier);
diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c
index d817cce7e72..26b4ae96fdd 100644
--- a/arch/s390/kernel/runtime_instr.c
+++ b/arch/s390/kernel/runtime_instr.c
@@ -138,7 +138,8 @@ static int __init runtime_instr_init(void)
return 0;
irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
- rc = register_external_interrupt(0x1407, runtime_instr_int_handler);
+ rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+ runtime_instr_int_handler);
if (rc)
irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
else
diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S
index 29bd7bec417..a41f2c99dcc 100644
--- a/arch/s390/kernel/sclp.S
+++ b/arch/s390/kernel/sclp.S
@@ -9,6 +9,7 @@
*/
#include <linux/linkage.h>
+#include <asm/irq.h>
LC_EXT_NEW_PSW = 0x58 # addr of ext int handler
LC_EXT_NEW_PSW_64 = 0x1b0 # addr of ext int handler 64 bit
@@ -73,9 +74,9 @@ _sclp_wait_int:
lpsw .LwaitpswS1-.LbaseS1(%r13) # wait until interrupt
.LwaitS1:
lh %r7,LC_EXT_INT_CODE
- chi %r7,0x1004 # timeout?
+ chi %r7,EXT_IRQ_CLK_COMP # timeout?
je .LtimeoutS1
- chi %r7,0x2401 # service int?
+ chi %r7,EXT_IRQ_SERVICE_SIG # service int?
jne .LloopS1
sr %r2,%r2
l %r3,LC_EXT_INT_PARAM
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 8827883310d..512ce1cde2a 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -236,6 +236,9 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
{
struct _lowcore *lc = pcpu->lowcore;
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
+ cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
atomic_inc(&init_mm.context.attach_count);
lc->cpu_nr = cpu;
lc->percpu_offset = __per_cpu_offset[cpu];
@@ -760,6 +763,9 @@ void __cpu_die(unsigned int cpu)
cpu_relax();
pcpu_free_lowcore(pcpu);
atomic_dec(&init_mm.context.attach_count);
+ cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
}
void __noreturn cpu_die(void)
@@ -785,10 +791,10 @@ void __init smp_fill_possible_mask(void)
void __init smp_prepare_cpus(unsigned int max_cpus)
{
/* request the 0x1201 emergency signal external interrupt */
- if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0)
+ if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt))
panic("Couldn't request external interrupt 0x1201");
/* request the 0x1202 external call external interrupt */
- if (register_external_interrupt(0x1202, do_ext_call_interrupt) != 0)
+ if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
panic("Couldn't request external interrupt 0x1202");
smp_detect_cpus();
}
@@ -1057,19 +1063,24 @@ static DEVICE_ATTR(rescan, 0200, NULL, rescan_store);
static int __init s390_smp_init(void)
{
- int cpu, rc;
+ int cpu, rc = 0;
- hotcpu_notifier(smp_cpu_notify, 0);
#ifdef CONFIG_HOTPLUG_CPU
rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan);
if (rc)
return rc;
#endif
+ cpu_notifier_register_begin();
for_each_present_cpu(cpu) {
rc = smp_add_present_cpu(cpu);
if (rc)
- return rc;
+ goto out;
}
- return 0;
+
+ __hotcpu_notifier(smp_cpu_notify, 0);
+
+out:
+ cpu_notifier_register_done();
+ return rc;
}
subsys_initcall(s390_smp_init);
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index dd95f163162..386d37a228b 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -262,11 +262,11 @@ void __init time_init(void)
stp_reset();
/* request the clock comparator external interrupt */
- if (register_external_interrupt(0x1004, clock_comparator_interrupt))
- panic("Couldn't request external interrupt 0x1004");
+ if (register_external_irq(EXT_IRQ_CLK_COMP, clock_comparator_interrupt))
+ panic("Couldn't request external interrupt 0x1004");
/* request the timing alert external interrupt */
- if (register_external_interrupt(0x1406, timing_alert_interrupt))
+ if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt))
panic("Couldn't request external interrupt 0x1406");
if (clocksource_register(&clocksource_tod) != 0)
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 70b46eacf8e..10d529ac982 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -23,6 +23,10 @@ config KVM
select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_EVENTFD
+ select KVM_ASYNC_PF
+ select KVM_ASYNC_PF_SYNC
+ select HAVE_KVM_IRQCHIP
+ select HAVE_KVM_IRQ_ROUTING
---help---
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 40b4c6470f8..d3adb37e93a 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -7,7 +7,7 @@
# as published by the Free Software Foundation.
KVM := ../../../virt/kvm
-common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 6f9cfa50037..08dfc839a6c 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -18,6 +18,7 @@
#include "kvm-s390.h"
#include "trace.h"
#include "trace-s390.h"
+#include "gaccess.h"
static int diag_release_pages(struct kvm_vcpu *vcpu)
{
@@ -47,6 +48,87 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
return 0;
}
+static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
+{
+ struct prs_parm {
+ u16 code;
+ u16 subcode;
+ u16 parm_len;
+ u16 parm_version;
+ u64 token_addr;
+ u64 select_mask;
+ u64 compare_mask;
+ u64 zarch;
+ };
+ struct prs_parm parm;
+ int rc;
+ u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
+ u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
+ unsigned long hva_token = KVM_HVA_ERR_BAD;
+
+ if (vcpu->run->s.regs.gprs[rx] & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ if (copy_from_guest(vcpu, &parm, vcpu->run->s.regs.gprs[rx], sizeof(parm)))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ switch (parm.subcode) {
+ case 0: /* TOKEN */
+ if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) {
+ /*
+ * If the pagefault handshake is already activated,
+ * the token must not be changed. We have to return
+ * decimal 8 instead, as mandated in SC24-6084.
+ */
+ vcpu->run->s.regs.gprs[ry] = 8;
+ return 0;
+ }
+
+ if ((parm.compare_mask & parm.select_mask) != parm.compare_mask ||
+ parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ hva_token = gfn_to_hva(vcpu->kvm, gpa_to_gfn(parm.token_addr));
+ if (kvm_is_error_hva(hva_token))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ vcpu->arch.pfault_token = parm.token_addr;
+ vcpu->arch.pfault_select = parm.select_mask;
+ vcpu->arch.pfault_compare = parm.compare_mask;
+ vcpu->run->s.regs.gprs[ry] = 0;
+ rc = 0;
+ break;
+ case 1: /*
+ * CANCEL
+ * Specification allows to let already pending tokens survive
+ * the cancel, therefore to reduce code complexity, we assume
+ * all outstanding tokens are already pending.
+ */
+ if (parm.token_addr || parm.select_mask ||
+ parm.compare_mask || parm.zarch)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ vcpu->run->s.regs.gprs[ry] = 0;
+ /*
+ * If the pfault handling was not established or is already
+ * canceled SC24-6084 requests to return decimal 4.
+ */
+ if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+ vcpu->run->s.regs.gprs[ry] = 4;
+ else
+ vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+
+ rc = 0;
+ break;
+ default:
+ rc = -EOPNOTSUPP;
+ break;
+ }
+
+ return rc;
+}
+
static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
{
VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
@@ -85,6 +167,10 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
switch (subcode) {
+ case 0:
+ case 1:
+ page_table_reset_pgste(current->mm, 0, TASK_SIZE);
+ return -EOPNOTSUPP;
case 3:
vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
page_table_reset_pgste(current->mm, 0, TASK_SIZE);
@@ -153,6 +239,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
return __diag_time_slice_end(vcpu);
case 0x9c:
return __diag_time_slice_end_directed(vcpu);
+ case 0x258:
+ return __diag_page_ref_service(vcpu);
case 0x308:
return __diag_ipl_functions(vcpu);
case 0x500:
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 5f79d2d79ca..200a8f9390b 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1,7 +1,7 @@
/*
* handling kvm guest interrupts
*
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008,2014
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
@@ -13,6 +13,7 @@
#include <linux/interrupt.h>
#include <linux/kvm_host.h>
#include <linux/hrtimer.h>
+#include <linux/mmu_context.h>
#include <linux/signal.h>
#include <linux/slab.h>
#include <asm/asm-offsets.h>
@@ -31,7 +32,7 @@ static int is_ioint(u64 type)
return ((type & 0xfffe0000u) != 0xfffe0000u);
}
-static int psw_extint_disabled(struct kvm_vcpu *vcpu)
+int psw_extint_disabled(struct kvm_vcpu *vcpu)
{
return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
}
@@ -78,11 +79,8 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
return 1;
return 0;
case KVM_S390_INT_SERVICE:
- if (psw_extint_disabled(vcpu))
- return 0;
- if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
- return 1;
- return 0;
+ case KVM_S390_INT_PFAULT_INIT:
+ case KVM_S390_INT_PFAULT_DONE:
case KVM_S390_INT_VIRTIO:
if (psw_extint_disabled(vcpu))
return 0;
@@ -117,14 +115,12 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
static void __set_cpu_idle(struct kvm_vcpu *vcpu)
{
- BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1);
atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
}
static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
{
- BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1);
atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
}
@@ -150,6 +146,8 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
case KVM_S390_INT_EXTERNAL_CALL:
case KVM_S390_INT_EMERGENCY:
case KVM_S390_INT_SERVICE:
+ case KVM_S390_INT_PFAULT_INIT:
+ case KVM_S390_INT_PFAULT_DONE:
case KVM_S390_INT_VIRTIO:
if (psw_extint_disabled(vcpu))
__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
@@ -223,6 +221,30 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
rc |= put_guest(vcpu, inti->ext.ext_params,
(u32 __user *)__LC_EXT_PARAMS);
break;
+ case KVM_S390_INT_PFAULT_INIT:
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
+ inti->ext.ext_params2);
+ rc = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE);
+ rc |= put_guest(vcpu, 0x0600, (u16 __user *) __LC_EXT_CPU_ADDR);
+ rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+ __LC_EXT_NEW_PSW, sizeof(psw_t));
+ rc |= put_guest(vcpu, inti->ext.ext_params2,
+ (u64 __user *) __LC_EXT_PARAMS2);
+ break;
+ case KVM_S390_INT_PFAULT_DONE:
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
+ inti->ext.ext_params2);
+ rc = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE);
+ rc |= put_guest(vcpu, 0x0680, (u16 __user *) __LC_EXT_CPU_ADDR);
+ rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+ __LC_EXT_NEW_PSW, sizeof(psw_t));
+ rc |= put_guest(vcpu, inti->ext.ext_params2,
+ (u64 __user *) __LC_EXT_PARAMS2);
+ break;
case KVM_S390_INT_VIRTIO:
VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
inti->ext.ext_params, inti->ext.ext_params2);
@@ -357,7 +379,7 @@ static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
return 1;
}
-static int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
+int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
@@ -482,11 +504,26 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
struct kvm_vcpu *vcpu;
vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
+ vcpu->preempted = true;
tasklet_schedule(&vcpu->arch.tasklet);
return HRTIMER_NORESTART;
}
+void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ struct kvm_s390_interrupt_info *n, *inti = NULL;
+
+ spin_lock_bh(&li->lock);
+ list_for_each_entry_safe(inti, n, &li->list, list) {
+ list_del(&inti->list);
+ kfree(inti);
+ }
+ atomic_set(&li->active, 0);
+ spin_unlock_bh(&li->lock);
+}
+
void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -528,6 +565,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
list_for_each_entry_safe(inti, n, &fi->list, list) {
if (__interrupt_is_deliverable(vcpu, inti)) {
list_del(&inti->list);
+ fi->irq_count--;
deliver = 1;
break;
}
@@ -583,6 +621,7 @@ void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu)
if ((inti->type == KVM_S390_MCHK) &&
__interrupt_is_deliverable(vcpu, inti)) {
list_del(&inti->list);
+ fi->irq_count--;
deliver = 1;
break;
}
@@ -650,8 +689,10 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
inti = iter;
break;
}
- if (inti)
+ if (inti) {
list_del_init(&inti->list);
+ fi->irq_count--;
+ }
if (list_empty(&fi->list))
atomic_set(&fi->active, 0);
spin_unlock(&fi->lock);
@@ -659,53 +700,101 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
return inti;
}
-int kvm_s390_inject_vm(struct kvm *kvm,
- struct kvm_s390_interrupt *s390int)
+static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
{
struct kvm_s390_local_interrupt *li;
struct kvm_s390_float_interrupt *fi;
- struct kvm_s390_interrupt_info *inti, *iter;
+ struct kvm_s390_interrupt_info *iter;
+ struct kvm_vcpu *dst_vcpu = NULL;
int sigcpu;
+ int rc = 0;
+
+ mutex_lock(&kvm->lock);
+ fi = &kvm->arch.float_int;
+ spin_lock(&fi->lock);
+ if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) {
+ rc = -EINVAL;
+ goto unlock_fi;
+ }
+ fi->irq_count++;
+ if (!is_ioint(inti->type)) {
+ list_add_tail(&inti->list, &fi->list);
+ } else {
+ u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word);
+
+ /* Keep I/O interrupts sorted in isc order. */
+ list_for_each_entry(iter, &fi->list, list) {
+ if (!is_ioint(iter->type))
+ continue;
+ if (int_word_to_isc_bits(iter->io.io_int_word)
+ <= isc_bits)
+ continue;
+ break;
+ }
+ list_add_tail(&inti->list, &iter->list);
+ }
+ atomic_set(&fi->active, 1);
+ sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
+ if (sigcpu == KVM_MAX_VCPUS) {
+ do {
+ sigcpu = fi->next_rr_cpu++;
+ if (sigcpu == KVM_MAX_VCPUS)
+ sigcpu = fi->next_rr_cpu = 0;
+ } while (kvm_get_vcpu(kvm, sigcpu) == NULL);
+ }
+ dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
+ li = &dst_vcpu->arch.local_int;
+ spin_lock_bh(&li->lock);
+ atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+ if (waitqueue_active(li->wq))
+ wake_up_interruptible(li->wq);
+ kvm_get_vcpu(kvm, sigcpu)->preempted = true;
+ spin_unlock_bh(&li->lock);
+unlock_fi:
+ spin_unlock(&fi->lock);
+ mutex_unlock(&kvm->lock);
+ return rc;
+}
+
+int kvm_s390_inject_vm(struct kvm *kvm,
+ struct kvm_s390_interrupt *s390int)
+{
+ struct kvm_s390_interrupt_info *inti;
inti = kzalloc(sizeof(*inti), GFP_KERNEL);
if (!inti)
return -ENOMEM;
- switch (s390int->type) {
+ inti->type = s390int->type;
+ switch (inti->type) {
case KVM_S390_INT_VIRTIO:
VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx",
s390int->parm, s390int->parm64);
- inti->type = s390int->type;
inti->ext.ext_params = s390int->parm;
inti->ext.ext_params2 = s390int->parm64;
break;
case KVM_S390_INT_SERVICE:
VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
- inti->type = s390int->type;
inti->ext.ext_params = s390int->parm;
break;
- case KVM_S390_PROGRAM_INT:
- case KVM_S390_SIGP_STOP:
- case KVM_S390_INT_EXTERNAL_CALL:
- case KVM_S390_INT_EMERGENCY:
- kfree(inti);
- return -EINVAL;
+ case KVM_S390_INT_PFAULT_DONE:
+ inti->type = s390int->type;
+ inti->ext.ext_params2 = s390int->parm64;
+ break;
case KVM_S390_MCHK:
VM_EVENT(kvm, 5, "inject: machine check parm64:%llx",
s390int->parm64);
- inti->type = s390int->type;
inti->mchk.cr14 = s390int->parm; /* upper bits are not used */
inti->mchk.mcic = s390int->parm64;
break;
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
- if (s390int->type & IOINT_AI_MASK)
+ if (inti->type & IOINT_AI_MASK)
VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)");
else
VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x",
s390int->type & IOINT_CSSID_MASK,
s390int->type & IOINT_SSID_MASK,
s390int->type & IOINT_SCHID_MASK);
- inti->type = s390int->type;
inti->io.subchannel_id = s390int->parm >> 16;
inti->io.subchannel_nr = s390int->parm & 0x0000ffffu;
inti->io.io_int_parm = s390int->parm64 >> 32;
@@ -718,43 +807,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64,
2);
- mutex_lock(&kvm->lock);
- fi = &kvm->arch.float_int;
- spin_lock(&fi->lock);
- if (!is_ioint(inti->type))
- list_add_tail(&inti->list, &fi->list);
- else {
- u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word);
-
- /* Keep I/O interrupts sorted in isc order. */
- list_for_each_entry(iter, &fi->list, list) {
- if (!is_ioint(iter->type))
- continue;
- if (int_word_to_isc_bits(iter->io.io_int_word)
- <= isc_bits)
- continue;
- break;
- }
- list_add_tail(&inti->list, &iter->list);
- }
- atomic_set(&fi->active, 1);
- sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
- if (sigcpu == KVM_MAX_VCPUS) {
- do {
- sigcpu = fi->next_rr_cpu++;
- if (sigcpu == KVM_MAX_VCPUS)
- sigcpu = fi->next_rr_cpu = 0;
- } while (fi->local_int[sigcpu] == NULL);
- }
- li = fi->local_int[sigcpu];
- spin_lock_bh(&li->lock);
- atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(li->wq))
- wake_up_interruptible(li->wq);
- spin_unlock_bh(&li->lock);
- spin_unlock(&fi->lock);
- mutex_unlock(&kvm->lock);
- return 0;
+ return __inject_vm(kvm, inti);
}
int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
@@ -814,6 +867,10 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
inti->type = s390int->type;
inti->mchk.mcic = s390int->parm64;
break;
+ case KVM_S390_INT_PFAULT_INIT:
+ inti->type = s390int->type;
+ inti->ext.ext_params2 = s390int->parm64;
+ break;
case KVM_S390_INT_VIRTIO:
case KVM_S390_INT_SERVICE:
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
@@ -837,7 +894,528 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
if (waitqueue_active(&vcpu->wq))
wake_up_interruptible(&vcpu->wq);
+ vcpu->preempted = true;
spin_unlock_bh(&li->lock);
mutex_unlock(&vcpu->kvm->lock);
return 0;
}
+
+static void clear_floating_interrupts(struct kvm *kvm)
+{
+ struct kvm_s390_float_interrupt *fi;
+ struct kvm_s390_interrupt_info *n, *inti = NULL;
+
+ mutex_lock(&kvm->lock);
+ fi = &kvm->arch.float_int;
+ spin_lock(&fi->lock);
+ list_for_each_entry_safe(inti, n, &fi->list, list) {
+ list_del(&inti->list);
+ kfree(inti);
+ }
+ fi->irq_count = 0;
+ atomic_set(&fi->active, 0);
+ spin_unlock(&fi->lock);
+ mutex_unlock(&kvm->lock);
+}
+
+static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti,
+ u8 *addr)
+{
+ struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
+ struct kvm_s390_irq irq = {0};
+
+ irq.type = inti->type;
+ switch (inti->type) {
+ case KVM_S390_INT_PFAULT_INIT:
+ case KVM_S390_INT_PFAULT_DONE:
+ case KVM_S390_INT_VIRTIO:
+ case KVM_S390_INT_SERVICE:
+ irq.u.ext = inti->ext;
+ break;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ irq.u.io = inti->io;
+ break;
+ case KVM_S390_MCHK:
+ irq.u.mchk = inti->mchk;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (copy_to_user(uptr, &irq, sizeof(irq)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len)
+{
+ struct kvm_s390_interrupt_info *inti;
+ struct kvm_s390_float_interrupt *fi;
+ int ret = 0;
+ int n = 0;
+
+ mutex_lock(&kvm->lock);
+ fi = &kvm->arch.float_int;
+ spin_lock(&fi->lock);
+
+ list_for_each_entry(inti, &fi->list, list) {
+ if (len < sizeof(struct kvm_s390_irq)) {
+ /* signal userspace to try again */
+ ret = -ENOMEM;
+ break;
+ }
+ ret = copy_irq_to_user(inti, buf);
+ if (ret)
+ break;
+ buf += sizeof(struct kvm_s390_irq);
+ len -= sizeof(struct kvm_s390_irq);
+ n++;
+ }
+
+ spin_unlock(&fi->lock);
+ mutex_unlock(&kvm->lock);
+
+ return ret < 0 ? ret : n;
+}
+
+static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ int r;
+
+ switch (attr->group) {
+ case KVM_DEV_FLIC_GET_ALL_IRQS:
+ r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr,
+ attr->attr);
+ break;
+ default:
+ r = -EINVAL;
+ }
+
+ return r;
+}
+
+static inline int copy_irq_from_user(struct kvm_s390_interrupt_info *inti,
+ u64 addr)
+{
+ struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
+ void *target = NULL;
+ void __user *source;
+ u64 size;
+
+ if (get_user(inti->type, (u64 __user *)addr))
+ return -EFAULT;
+
+ switch (inti->type) {
+ case KVM_S390_INT_PFAULT_INIT:
+ case KVM_S390_INT_PFAULT_DONE:
+ case KVM_S390_INT_VIRTIO:
+ case KVM_S390_INT_SERVICE:
+ target = (void *) &inti->ext;
+ source = &uptr->u.ext;
+ size = sizeof(inti->ext);
+ break;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ target = (void *) &inti->io;
+ source = &uptr->u.io;
+ size = sizeof(inti->io);
+ break;
+ case KVM_S390_MCHK:
+ target = (void *) &inti->mchk;
+ source = &uptr->u.mchk;
+ size = sizeof(inti->mchk);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (copy_from_user(target, source, size))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int enqueue_floating_irq(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ struct kvm_s390_interrupt_info *inti = NULL;
+ int r = 0;
+ int len = attr->attr;
+
+ if (len % sizeof(struct kvm_s390_irq) != 0)
+ return -EINVAL;
+ else if (len > KVM_S390_FLIC_MAX_BUFFER)
+ return -EINVAL;
+
+ while (len >= sizeof(struct kvm_s390_irq)) {
+ inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (!inti)
+ return -ENOMEM;
+
+ r = copy_irq_from_user(inti, attr->addr);
+ if (r) {
+ kfree(inti);
+ return r;
+ }
+ r = __inject_vm(dev->kvm, inti);
+ if (r) {
+ kfree(inti);
+ return r;
+ }
+ len -= sizeof(struct kvm_s390_irq);
+ attr->addr += sizeof(struct kvm_s390_irq);
+ }
+
+ return r;
+}
+
+static struct s390_io_adapter *get_io_adapter(struct kvm *kvm, unsigned int id)
+{
+ if (id >= MAX_S390_IO_ADAPTERS)
+ return NULL;
+ return kvm->arch.adapters[id];
+}
+
+static int register_io_adapter(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ struct s390_io_adapter *adapter;
+ struct kvm_s390_io_adapter adapter_info;
+
+ if (copy_from_user(&adapter_info,
+ (void __user *)attr->addr, sizeof(adapter_info)))
+ return -EFAULT;
+
+ if ((adapter_info.id >= MAX_S390_IO_ADAPTERS) ||
+ (dev->kvm->arch.adapters[adapter_info.id] != NULL))
+ return -EINVAL;
+
+ adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+ if (!adapter)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&adapter->maps);
+ init_rwsem(&adapter->maps_lock);
+ atomic_set(&adapter->nr_maps, 0);
+ adapter->id = adapter_info.id;
+ adapter->isc = adapter_info.isc;
+ adapter->maskable = adapter_info.maskable;
+ adapter->masked = false;
+ adapter->swap = adapter_info.swap;
+ dev->kvm->arch.adapters[adapter->id] = adapter;
+
+ return 0;
+}
+
+int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked)
+{
+ int ret;
+ struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+
+ if (!adapter || !adapter->maskable)
+ return -EINVAL;
+ ret = adapter->masked;
+ adapter->masked = masked;
+ return ret;
+}
+
+static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
+{
+ struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+ struct s390_map_info *map;
+ int ret;
+
+ if (!adapter || !addr)
+ return -EINVAL;
+
+ map = kzalloc(sizeof(*map), GFP_KERNEL);
+ if (!map) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ INIT_LIST_HEAD(&map->list);
+ map->guest_addr = addr;
+ map->addr = gmap_translate(addr, kvm->arch.gmap);
+ if (map->addr == -EFAULT) {
+ ret = -EFAULT;
+ goto out;
+ }
+ ret = get_user_pages_fast(map->addr, 1, 1, &map->page);
+ if (ret < 0)
+ goto out;
+ BUG_ON(ret != 1);
+ down_write(&adapter->maps_lock);
+ if (atomic_inc_return(&adapter->nr_maps) < MAX_S390_ADAPTER_MAPS) {
+ list_add_tail(&map->list, &adapter->maps);
+ ret = 0;
+ } else {
+ put_page(map->page);
+ ret = -EINVAL;
+ }
+ up_write(&adapter->maps_lock);
+out:
+ if (ret)
+ kfree(map);
+ return ret;
+}
+
+static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr)
+{
+ struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+ struct s390_map_info *map, *tmp;
+ int found = 0;
+
+ if (!adapter || !addr)
+ return -EINVAL;
+
+ down_write(&adapter->maps_lock);
+ list_for_each_entry_safe(map, tmp, &adapter->maps, list) {
+ if (map->guest_addr == addr) {
+ found = 1;
+ atomic_dec(&adapter->nr_maps);
+ list_del(&map->list);
+ put_page(map->page);
+ kfree(map);
+ break;
+ }
+ }
+ up_write(&adapter->maps_lock);
+
+ return found ? 0 : -EINVAL;
+}
+
+void kvm_s390_destroy_adapters(struct kvm *kvm)
+{
+ int i;
+ struct s390_map_info *map, *tmp;
+
+ for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) {
+ if (!kvm->arch.adapters[i])
+ continue;
+ list_for_each_entry_safe(map, tmp,
+ &kvm->arch.adapters[i]->maps, list) {
+ list_del(&map->list);
+ put_page(map->page);
+ kfree(map);
+ }
+ kfree(kvm->arch.adapters[i]);
+ }
+}
+
+static int modify_io_adapter(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ struct kvm_s390_io_adapter_req req;
+ struct s390_io_adapter *adapter;
+ int ret;
+
+ if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
+ return -EFAULT;
+
+ adapter = get_io_adapter(dev->kvm, req.id);
+ if (!adapter)
+ return -EINVAL;
+ switch (req.type) {
+ case KVM_S390_IO_ADAPTER_MASK:
+ ret = kvm_s390_mask_adapter(dev->kvm, req.id, req.mask);
+ if (ret > 0)
+ ret = 0;
+ break;
+ case KVM_S390_IO_ADAPTER_MAP:
+ ret = kvm_s390_adapter_map(dev->kvm, req.id, req.addr);
+ break;
+ case KVM_S390_IO_ADAPTER_UNMAP:
+ ret = kvm_s390_adapter_unmap(dev->kvm, req.id, req.addr);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ int r = 0;
+ unsigned int i;
+ struct kvm_vcpu *vcpu;
+
+ switch (attr->group) {
+ case KVM_DEV_FLIC_ENQUEUE:
+ r = enqueue_floating_irq(dev, attr);
+ break;
+ case KVM_DEV_FLIC_CLEAR_IRQS:
+ r = 0;
+ clear_floating_interrupts(dev->kvm);
+ break;
+ case KVM_DEV_FLIC_APF_ENABLE:
+ dev->kvm->arch.gmap->pfault_enabled = 1;
+ break;
+ case KVM_DEV_FLIC_APF_DISABLE_WAIT:
+ dev->kvm->arch.gmap->pfault_enabled = 0;
+ /*
+ * Make sure no async faults are in transition when
+ * clearing the queues. So we don't need to worry
+ * about late coming workers.
+ */
+ synchronize_srcu(&dev->kvm->srcu);
+ kvm_for_each_vcpu(i, vcpu, dev->kvm)
+ kvm_clear_async_pf_completion_queue(vcpu);
+ break;
+ case KVM_DEV_FLIC_ADAPTER_REGISTER:
+ r = register_io_adapter(dev, attr);
+ break;
+ case KVM_DEV_FLIC_ADAPTER_MODIFY:
+ r = modify_io_adapter(dev, attr);
+ break;
+ default:
+ r = -EINVAL;
+ }
+
+ return r;
+}
+
+static int flic_create(struct kvm_device *dev, u32 type)
+{
+ if (!dev)
+ return -EINVAL;
+ if (dev->kvm->arch.flic)
+ return -EINVAL;
+ dev->kvm->arch.flic = dev;
+ return 0;
+}
+
+static void flic_destroy(struct kvm_device *dev)
+{
+ dev->kvm->arch.flic = NULL;
+ kfree(dev);
+}
+
+/* s390 floating irq controller (flic) */
+struct kvm_device_ops kvm_flic_ops = {
+ .name = "kvm-flic",
+ .get_attr = flic_get_attr,
+ .set_attr = flic_set_attr,
+ .create = flic_create,
+ .destroy = flic_destroy,
+};
+
+static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap)
+{
+ unsigned long bit;
+
+ bit = bit_nr + (addr % PAGE_SIZE) * 8;
+
+ return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit;
+}
+
+static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter,
+ u64 addr)
+{
+ struct s390_map_info *map;
+
+ if (!adapter)
+ return NULL;
+
+ list_for_each_entry(map, &adapter->maps, list) {
+ if (map->guest_addr == addr)
+ return map;
+ }
+ return NULL;
+}
+
+static int adapter_indicators_set(struct kvm *kvm,
+ struct s390_io_adapter *adapter,
+ struct kvm_s390_adapter_int *adapter_int)
+{
+ unsigned long bit;
+ int summary_set, idx;
+ struct s390_map_info *info;
+ void *map;
+
+ info = get_map_info(adapter, adapter_int->ind_addr);
+ if (!info)
+ return -1;
+ map = page_address(info->page);
+ bit = get_ind_bit(info->addr, adapter_int->ind_offset, adapter->swap);
+ set_bit(bit, map);
+ idx = srcu_read_lock(&kvm->srcu);
+ mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
+ set_page_dirty_lock(info->page);
+ info = get_map_info(adapter, adapter_int->summary_addr);
+ if (!info) {
+ srcu_read_unlock(&kvm->srcu, idx);
+ return -1;
+ }
+ map = page_address(info->page);
+ bit = get_ind_bit(info->addr, adapter_int->summary_offset,
+ adapter->swap);
+ summary_set = test_and_set_bit(bit, map);
+ mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
+ set_page_dirty_lock(info->page);
+ srcu_read_unlock(&kvm->srcu, idx);
+ return summary_set ? 0 : 1;
+}
+
+/*
+ * < 0 - not injected due to error
+ * = 0 - coalesced, summary indicator already active
+ * > 0 - injected interrupt
+ */
+static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id, int level,
+ bool line_status)
+{
+ int ret;
+ struct s390_io_adapter *adapter;
+
+ /* We're only interested in the 0->1 transition. */
+ if (!level)
+ return 0;
+ adapter = get_io_adapter(kvm, e->adapter.adapter_id);
+ if (!adapter)
+ return -1;
+ down_read(&adapter->maps_lock);
+ ret = adapter_indicators_set(kvm, adapter, &e->adapter);
+ up_read(&adapter->maps_lock);
+ if ((ret > 0) && !adapter->masked) {
+ struct kvm_s390_interrupt s390int = {
+ .type = KVM_S390_INT_IO(1, 0, 0, 0),
+ .parm = 0,
+ .parm64 = (adapter->isc << 27) | 0x80000000,
+ };
+ ret = kvm_s390_inject_vm(kvm, &s390int);
+ if (ret == 0)
+ ret = 1;
+ }
+ return ret;
+}
+
+int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
+ struct kvm_kernel_irq_routing_entry *e,
+ const struct kvm_irq_routing_entry *ue)
+{
+ int ret;
+
+ switch (ue->type) {
+ case KVM_IRQ_ROUTING_S390_ADAPTER:
+ e->set = set_adapter_int;
+ e->adapter.summary_addr = ue->u.adapter.summary_addr;
+ e->adapter.ind_addr = ue->u.adapter.ind_addr;
+ e->adapter.summary_offset = ue->u.adapter.summary_offset;
+ e->adapter.ind_offset = ue->u.adapter.ind_offset;
+ e->adapter.adapter_id = ue->u.adapter.adapter_id;
+ ret = 0;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
+ int irq_source_id, int level, bool line_status)
+{
+ return -EINVAL;
+}
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
new file mode 100644
index 00000000000..d98e4159643
--- /dev/null
+++ b/arch/s390/kvm/irq.h
@@ -0,0 +1,22 @@
+/*
+ * s390 irqchip routines
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ */
+#ifndef __KVM_IRQ_H
+#define __KVM_IRQ_H
+
+#include <linux/kvm_host.h>
+
+static inline int irqchip_in_kernel(struct kvm *kvm)
+{
+ return 1;
+}
+
+#endif
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 10b5db3c9bc..b3ecb8f5b6c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -153,11 +153,14 @@ int kvm_dev_ioctl_check_extension(long ext)
#ifdef CONFIG_KVM_S390_UCONTROL
case KVM_CAP_S390_UCONTROL:
#endif
+ case KVM_CAP_ASYNC_PF:
case KVM_CAP_SYNC_REGS:
case KVM_CAP_ONE_REG:
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_S390_CSS_SUPPORT:
case KVM_CAP_IOEVENTFD:
+ case KVM_CAP_DEVICE_CTRL:
+ case KVM_CAP_ENABLE_CAP_VM:
r = 1;
break;
case KVM_CAP_NR_VCPUS:
@@ -186,6 +189,25 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
return 0;
}
+static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
+{
+ int r;
+
+ if (cap->flags)
+ return -EINVAL;
+
+ switch (cap->cap) {
+ case KVM_CAP_S390_IRQCHIP:
+ kvm->arch.use_irqchip = 1;
+ r = 0;
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+ return r;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -203,6 +225,26 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_s390_inject_vm(kvm, &s390int);
break;
}
+ case KVM_ENABLE_CAP: {
+ struct kvm_enable_cap cap;
+ r = -EFAULT;
+ if (copy_from_user(&cap, argp, sizeof(cap)))
+ break;
+ r = kvm_vm_ioctl_enable_cap(kvm, &cap);
+ break;
+ }
+ case KVM_CREATE_IRQCHIP: {
+ struct kvm_irq_routing_entry routing;
+
+ r = -EINVAL;
+ if (kvm->arch.use_irqchip) {
+ /* Set up dummy routing. */
+ memset(&routing, 0, sizeof(routing));
+ kvm_set_irq_routing(kvm, &routing, 0, 0);
+ r = 0;
+ }
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -214,6 +256,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
int rc;
char debug_name[16];
+ static unsigned long sca_offset;
rc = -EINVAL;
#ifdef CONFIG_KVM_S390_UCONTROL
@@ -235,6 +278,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
if (!kvm->arch.sca)
goto out_err;
+ spin_lock(&kvm_lock);
+ sca_offset = (sca_offset + 16) & 0x7f0;
+ kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
+ spin_unlock(&kvm_lock);
sprintf(debug_name, "kvm-%u", current->pid);
@@ -255,9 +302,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (!kvm->arch.gmap)
goto out_nogmap;
kvm->arch.gmap->private = kvm;
+ kvm->arch.gmap->pfault_enabled = 0;
}
kvm->arch.css_support = 0;
+ kvm->arch.use_irqchip = 0;
return 0;
out_nogmap:
@@ -272,6 +321,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
+ kvm_clear_async_pf_completion_queue(vcpu);
if (!kvm_is_ucontrol(vcpu->kvm)) {
clear_bit(63 - vcpu->vcpu_id,
(unsigned long *) &vcpu->kvm->arch.sca->mcn);
@@ -320,11 +370,14 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
debug_unregister(kvm->arch.dbf);
if (!kvm_is_ucontrol(kvm))
gmap_free(kvm->arch.gmap);
+ kvm_s390_destroy_adapters(kvm);
}
/* Section: vcpu related */
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
+ vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+ kvm_clear_async_pf_completion_queue(vcpu);
if (kvm_is_ucontrol(vcpu->kvm)) {
vcpu->arch.gmap = gmap_alloc(current->mm);
if (!vcpu->arch.gmap)
@@ -385,7 +438,11 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
vcpu->arch.guest_fpregs.fpc = 0;
asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
vcpu->arch.sie_block->gbea = 1;
+ vcpu->arch.sie_block->pp = 0;
+ vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+ kvm_clear_async_pf_completion_queue(vcpu);
atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_local_irqs(vcpu);
}
int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
@@ -466,11 +523,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
spin_lock_init(&vcpu->arch.local_int.lock);
INIT_LIST_HEAD(&vcpu->arch.local_int.list);
vcpu->arch.local_int.float_int = &kvm->arch.float_int;
- spin_lock(&kvm->arch.float_int.lock);
- kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
vcpu->arch.local_int.wq = &vcpu->wq;
vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
- spin_unlock(&kvm->arch.float_int.lock);
rc = kvm_vcpu_init(vcpu, kvm, id);
if (rc)
@@ -490,9 +544,7 @@ out:
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
- /* kvm common code refers to this, but never calls it */
- BUG();
- return 0;
+ return kvm_cpu_has_interrupt(vcpu);
}
void s390_vcpu_block(struct kvm_vcpu *vcpu)
@@ -568,6 +620,26 @@ static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
r = put_user(vcpu->arch.sie_block->ckc,
(u64 __user *)reg->addr);
break;
+ case KVM_REG_S390_PFTOKEN:
+ r = put_user(vcpu->arch.pfault_token,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFCOMPARE:
+ r = put_user(vcpu->arch.pfault_compare,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFSELECT:
+ r = put_user(vcpu->arch.pfault_select,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PP:
+ r = put_user(vcpu->arch.sie_block->pp,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_GBEA:
+ r = put_user(vcpu->arch.sie_block->gbea,
+ (u64 __user *)reg->addr);
+ break;
default:
break;
}
@@ -597,6 +669,26 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
r = get_user(vcpu->arch.sie_block->ckc,
(u64 __user *)reg->addr);
break;
+ case KVM_REG_S390_PFTOKEN:
+ r = get_user(vcpu->arch.pfault_token,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFCOMPARE:
+ r = get_user(vcpu->arch.pfault_compare,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFSELECT:
+ r = get_user(vcpu->arch.pfault_select,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PP:
+ r = get_user(vcpu->arch.sie_block->pp,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_GBEA:
+ r = get_user(vcpu->arch.sie_block->gbea,
+ (u64 __user *)reg->addr);
+ break;
default:
break;
}
@@ -715,10 +807,100 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
return 0;
}
+static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu)
+{
+ long rc;
+ hva_t fault = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
+ struct mm_struct *mm = current->mm;
+ down_read(&mm->mmap_sem);
+ rc = get_user_pages(current, mm, fault, 1, 1, 0, NULL, NULL);
+ up_read(&mm->mmap_sem);
+ return rc;
+}
+
+static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
+ unsigned long token)
+{
+ struct kvm_s390_interrupt inti;
+ inti.parm64 = token;
+
+ if (start_token) {
+ inti.type = KVM_S390_INT_PFAULT_INIT;
+ WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
+ } else {
+ inti.type = KVM_S390_INT_PFAULT_DONE;
+ WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
+ }
+}
+
+void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+{
+ trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
+ __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
+}
+
+void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+{
+ trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
+ __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
+}
+
+void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+{
+ /* s390 will always inject the page directly */
+}
+
+bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
+{
+ /*
+ * s390 will always inject the page directly,
+ * but we still want check_async_completion to cleanup
+ */
+ return true;
+}
+
+static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
+{
+ hva_t hva;
+ struct kvm_arch_async_pf arch;
+ int rc;
+
+ if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+ return 0;
+ if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
+ vcpu->arch.pfault_compare)
+ return 0;
+ if (psw_extint_disabled(vcpu))
+ return 0;
+ if (kvm_cpu_has_interrupt(vcpu))
+ return 0;
+ if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
+ return 0;
+ if (!vcpu->arch.gmap->pfault_enabled)
+ return 0;
+
+ hva = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
+ if (copy_from_guest(vcpu, &arch.pfault_token, vcpu->arch.pfault_token, 8))
+ return 0;
+
+ rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
+ return rc;
+}
+
static int vcpu_pre_run(struct kvm_vcpu *vcpu)
{
int rc, cpuflags;
+ /*
+ * On s390 notifications for arriving pages will be delivered directly
+ * to the guest but the house keeping for completed pfaults is
+ * handled outside the worker.
+ */
+ kvm_check_async_pf_completion(vcpu);
+
memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
if (need_resched())
@@ -744,7 +926,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
{
- int rc;
+ int rc = -1;
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
vcpu->arch.sie_block->icptcode);
@@ -758,7 +940,16 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
current->thread.gmap_addr;
vcpu->run->s390_ucontrol.pgm_code = 0x10;
rc = -EREMOTE;
- } else {
+
+ } else if (current->thread.gmap_pfault) {
+ trace_kvm_s390_major_guest_pfault(vcpu);
+ current->thread.gmap_pfault = 0;
+ if (kvm_arch_setup_async_pf(vcpu) ||
+ (kvm_arch_fault_in_sync(vcpu) >= 0))
+ rc = 0;
+ }
+
+ if (rc == -1) {
VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
trace_kvm_s390_sie_fault(vcpu);
rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
@@ -768,7 +959,8 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
if (rc == 0) {
if (kvm_is_ucontrol(vcpu->kvm))
- rc = -EOPNOTSUPP;
+ /* Don't exit for host interrupts. */
+ rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
else
rc = kvm_handle_sie_intercept(vcpu);
}
@@ -831,8 +1023,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
- BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
-
switch (kvm_run->exit_reason) {
case KVM_EXIT_S390_SIEIC:
case KVM_EXIT_UNKNOWN:
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 564514f410f..3c1e2274d9e 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -129,6 +129,7 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
void kvm_s390_tasklet(unsigned long parm);
void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
+void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
int __must_check kvm_s390_inject_vm(struct kvm *kvm,
struct kvm_s390_interrupt *s390int);
int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
@@ -136,6 +137,7 @@ int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
u64 cr6, u64 schid);
+int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
/* implemented in priv.c */
int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
@@ -161,4 +163,9 @@ bool kvm_enabled_cmma(void);
/* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
+/* implemented in interrupt.c */
+int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
+int psw_extint_disabled(struct kvm_vcpu *vcpu);
+void kvm_s390_destroy_adapters(struct kvm *kvm);
+
#endif
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index aacb6b12991..476e9e218f4 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -396,15 +396,10 @@ static int handle_stidp(struct kvm_vcpu *vcpu)
static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
{
- struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
int cpus = 0;
int n;
- spin_lock(&fi->lock);
- for (n = 0; n < KVM_MAX_VCPUS; n++)
- if (fi->local_int[n])
- cpus++;
- spin_unlock(&fi->lock);
+ cpus = atomic_read(&vcpu->kvm->online_vcpus);
/* deal with other level 3 hypervisors */
if (stsi(mem, 3, 2, 2))
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 87c2b3a3bd3..26caeb530a7 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -23,29 +23,30 @@
static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
u64 *reg)
{
- struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ struct kvm_s390_local_interrupt *li;
+ struct kvm_vcpu *dst_vcpu = NULL;
+ int cpuflags;
int rc;
if (cpu_addr >= KVM_MAX_VCPUS)
return SIGP_CC_NOT_OPERATIONAL;
- spin_lock(&fi->lock);
- if (fi->local_int[cpu_addr] == NULL)
- rc = SIGP_CC_NOT_OPERATIONAL;
- else if (!(atomic_read(fi->local_int[cpu_addr]->cpuflags)
- & (CPUSTAT_ECALL_PEND | CPUSTAT_STOPPED)))
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+ li = &dst_vcpu->arch.local_int;
+
+ cpuflags = atomic_read(li->cpuflags);
+ if (!(cpuflags & (CPUSTAT_ECALL_PEND | CPUSTAT_STOPPED)))
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
else {
*reg &= 0xffffffff00000000UL;
- if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
- & CPUSTAT_ECALL_PEND)
+ if (cpuflags & CPUSTAT_ECALL_PEND)
*reg |= SIGP_STATUS_EXT_CALL_PENDING;
- if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
- & CPUSTAT_STOPPED)
+ if (cpuflags & CPUSTAT_STOPPED)
*reg |= SIGP_STATUS_STOPPED;
rc = SIGP_CC_STATUS_STORED;
}
- spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
return rc;
@@ -53,12 +54,13 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
{
- struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
struct kvm_s390_local_interrupt *li;
struct kvm_s390_interrupt_info *inti;
- int rc;
+ struct kvm_vcpu *dst_vcpu = NULL;
- if (cpu_addr >= KVM_MAX_VCPUS)
+ if (cpu_addr < KVM_MAX_VCPUS)
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
return SIGP_CC_NOT_OPERATIONAL;
inti = kzalloc(sizeof(*inti), GFP_KERNEL);
@@ -68,13 +70,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
inti->type = KVM_S390_INT_EMERGENCY;
inti->emerg.code = vcpu->vcpu_id;
- spin_lock(&fi->lock);
- li = fi->local_int[cpu_addr];
- if (li == NULL) {
- rc = SIGP_CC_NOT_OPERATIONAL;
- kfree(inti);
- goto unlock;
- }
+ li = &dst_vcpu->arch.local_int;
spin_lock_bh(&li->lock);
list_add_tail(&inti->list, &li->list);
atomic_set(&li->active, 1);
@@ -82,11 +78,9 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
if (waitqueue_active(li->wq))
wake_up_interruptible(li->wq);
spin_unlock_bh(&li->lock);
- rc = SIGP_CC_ORDER_CODE_ACCEPTED;
VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
-unlock:
- spin_unlock(&fi->lock);
- return rc;
+
+ return SIGP_CC_ORDER_CODE_ACCEPTED;
}
static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
@@ -122,12 +116,13 @@ static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
{
- struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
struct kvm_s390_local_interrupt *li;
struct kvm_s390_interrupt_info *inti;
- int rc;
+ struct kvm_vcpu *dst_vcpu = NULL;
- if (cpu_addr >= KVM_MAX_VCPUS)
+ if (cpu_addr < KVM_MAX_VCPUS)
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
return SIGP_CC_NOT_OPERATIONAL;
inti = kzalloc(sizeof(*inti), GFP_KERNEL);
@@ -137,13 +132,7 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
inti->type = KVM_S390_INT_EXTERNAL_CALL;
inti->extcall.code = vcpu->vcpu_id;
- spin_lock(&fi->lock);
- li = fi->local_int[cpu_addr];
- if (li == NULL) {
- rc = SIGP_CC_NOT_OPERATIONAL;
- kfree(inti);
- goto unlock;
- }
+ li = &dst_vcpu->arch.local_int;
spin_lock_bh(&li->lock);
list_add_tail(&inti->list, &li->list);
atomic_set(&li->active, 1);
@@ -151,11 +140,9 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
if (waitqueue_active(li->wq))
wake_up_interruptible(li->wq);
spin_unlock_bh(&li->lock);
- rc = SIGP_CC_ORDER_CODE_ACCEPTED;
VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
-unlock:
- spin_unlock(&fi->lock);
- return rc;
+
+ return SIGP_CC_ORDER_CODE_ACCEPTED;
}
static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
@@ -189,31 +176,26 @@ out:
static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
{
- struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
struct kvm_s390_local_interrupt *li;
+ struct kvm_vcpu *dst_vcpu = NULL;
int rc;
if (cpu_addr >= KVM_MAX_VCPUS)
return SIGP_CC_NOT_OPERATIONAL;
- spin_lock(&fi->lock);
- li = fi->local_int[cpu_addr];
- if (li == NULL) {
- rc = SIGP_CC_NOT_OPERATIONAL;
- goto unlock;
- }
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+ li = &dst_vcpu->arch.local_int;
rc = __inject_sigp_stop(li, action);
-unlock:
- spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) {
/* If the CPU has already been stopped, we still have
* to save the status when doing stop-and-store. This
* has to be done after unlocking all spinlocks. */
- struct kvm_vcpu *dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
rc = kvm_s390_store_status_unloaded(dst_vcpu,
KVM_S390_STORE_STATUS_NOADDR);
}
@@ -224,6 +206,8 @@ unlock:
static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
{
int rc;
+ unsigned int i;
+ struct kvm_vcpu *v;
switch (parameter & 0xff) {
case 0:
@@ -231,6 +215,11 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
break;
case 1:
case 2:
+ kvm_for_each_vcpu(i, v, vcpu->kvm) {
+ v->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+ kvm_clear_async_pf_completion_queue(v);
+ }
+
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
break;
default:
@@ -242,12 +231,18 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
u64 *reg)
{
- struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
- struct kvm_s390_local_interrupt *li = NULL;
+ struct kvm_s390_local_interrupt *li;
+ struct kvm_vcpu *dst_vcpu = NULL;
struct kvm_s390_interrupt_info *inti;
int rc;
u8 tmp;
+ if (cpu_addr < KVM_MAX_VCPUS)
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+ li = &dst_vcpu->arch.local_int;
+
/* make sure that the new value is valid memory */
address = address & 0x7fffe000u;
if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
@@ -261,18 +256,6 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
if (!inti)
return SIGP_CC_BUSY;
- spin_lock(&fi->lock);
- if (cpu_addr < KVM_MAX_VCPUS)
- li = fi->local_int[cpu_addr];
-
- if (li == NULL) {
- *reg &= 0xffffffff00000000UL;
- *reg |= SIGP_STATUS_INCORRECT_STATE;
- rc = SIGP_CC_STATUS_STORED;
- kfree(inti);
- goto out_fi;
- }
-
spin_lock_bh(&li->lock);
/* cpu must be in stopped state */
if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
@@ -295,8 +278,6 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
out_li:
spin_unlock_bh(&li->lock);
-out_fi:
- spin_unlock(&fi->lock);
return rc;
}
@@ -334,28 +315,26 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
u64 *reg)
{
+ struct kvm_s390_local_interrupt *li;
+ struct kvm_vcpu *dst_vcpu = NULL;
int rc;
- struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
if (cpu_addr >= KVM_MAX_VCPUS)
return SIGP_CC_NOT_OPERATIONAL;
- spin_lock(&fi->lock);
- if (fi->local_int[cpu_addr] == NULL)
- rc = SIGP_CC_NOT_OPERATIONAL;
- else {
- if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
- & CPUSTAT_RUNNING) {
- /* running */
- rc = SIGP_CC_ORDER_CODE_ACCEPTED;
- } else {
- /* not running */
- *reg &= 0xffffffff00000000UL;
- *reg |= SIGP_STATUS_NOT_RUNNING;
- rc = SIGP_CC_STATUS_STORED;
- }
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+ li = &dst_vcpu->arch.local_int;
+ if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
+ /* running */
+ rc = SIGP_CC_ORDER_CODE_ACCEPTED;
+ } else {
+ /* not running */
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_NOT_RUNNING;
+ rc = SIGP_CC_STATUS_STORED;
}
- spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr,
rc);
@@ -366,26 +345,22 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
/* Test whether the destination CPU is available and not busy */
static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
{
- struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
struct kvm_s390_local_interrupt *li;
int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
+ struct kvm_vcpu *dst_vcpu = NULL;
if (cpu_addr >= KVM_MAX_VCPUS)
return SIGP_CC_NOT_OPERATIONAL;
- spin_lock(&fi->lock);
- li = fi->local_int[cpu_addr];
- if (li == NULL) {
- rc = SIGP_CC_NOT_OPERATIONAL;
- goto out;
- }
-
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+ li = &dst_vcpu->arch.local_int;
spin_lock_bh(&li->lock);
if (li->action_bits & ACTION_STOP_ON_STOP)
rc = SIGP_CC_BUSY;
spin_unlock_bh(&li->lock);
-out:
- spin_unlock(&fi->lock);
+
return rc;
}
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h
index 3db76b2daed..e8e7213d4cc 100644
--- a/arch/s390/kvm/trace.h
+++ b/arch/s390/kvm/trace.h
@@ -30,6 +30,52 @@
TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id, \
__entry->pswmask, __entry->pswaddr, p_args)
+TRACE_EVENT(kvm_s390_major_guest_pfault,
+ TP_PROTO(VCPU_PROTO_COMMON),
+ TP_ARGS(VCPU_ARGS_COMMON),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ ),
+ VCPU_TP_PRINTK("%s", "major fault, maybe applicable for pfault")
+ );
+
+TRACE_EVENT(kvm_s390_pfault_init,
+ TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
+ TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(long, pfault_token)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->pfault_token = pfault_token;
+ ),
+ VCPU_TP_PRINTK("init pfault token %ld", __entry->pfault_token)
+ );
+
+TRACE_EVENT(kvm_s390_pfault_done,
+ TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
+ TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(long, pfault_token)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->pfault_token = pfault_token;
+ ),
+ VCPU_TP_PRINTK("done pfault token %ld", __entry->pfault_token)
+ );
+
/*
* Tracepoints for SIE entry and exit.
*/
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index e3fffe1dff5..c6d752e8bf2 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -2,7 +2,7 @@
# Makefile for s390-specific library files..
#
-lib-y += delay.o string.o uaccess_pt.o uaccess_mvcos.o find.o
+lib-y += delay.o string.o uaccess.o find.o
obj-$(CONFIG_32BIT) += div64.o qrnnd.o ucmpdi2.o mem32.o
obj-$(CONFIG_64BIT) += mem64.o
lib-$(CONFIG_SMP) += spinlock.o
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
new file mode 100644
index 00000000000..23f866b4c7f
--- /dev/null
+++ b/arch/s390/lib/uaccess.c
@@ -0,0 +1,407 @@
+/*
+ * Standard user space access functions based on mvcp/mvcs and doing
+ * interesting things in the secondary space mode.
+ *
+ * Copyright IBM Corp. 2006,2014
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ * Gerald Schaefer (gerald.schaefer@de.ibm.com)
+ */
+
+#include <linux/jump_label.h>
+#include <linux/uaccess.h>
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <asm/mmu_context.h>
+#include <asm/facility.h>
+
+#ifndef CONFIG_64BIT
+#define AHI "ahi"
+#define ALR "alr"
+#define CLR "clr"
+#define LHI "lhi"
+#define SLR "slr"
+#else
+#define AHI "aghi"
+#define ALR "algr"
+#define CLR "clgr"
+#define LHI "lghi"
+#define SLR "slgr"
+#endif
+
+static struct static_key have_mvcos = STATIC_KEY_INIT_FALSE;
+
+static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
+ unsigned long size)
+{
+ register unsigned long reg0 asm("0") = 0x81UL;
+ unsigned long tmp1, tmp2;
+
+ tmp1 = -4096UL;
+ asm volatile(
+ "0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n"
+ "9: jz 7f\n"
+ "1:"ALR" %0,%3\n"
+ " "SLR" %1,%3\n"
+ " "SLR" %2,%3\n"
+ " j 0b\n"
+ "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */
+ " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */
+ " "SLR" %4,%1\n"
+ " "CLR" %0,%4\n" /* copy crosses next page boundary? */
+ " jnh 4f\n"
+ "3: .insn ss,0xc80000000000,0(%4,%2),0(%1),0\n"
+ "10:"SLR" %0,%4\n"
+ " "ALR" %2,%4\n"
+ "4:"LHI" %4,-1\n"
+ " "ALR" %4,%0\n" /* copy remaining size, subtract 1 */
+ " bras %3,6f\n" /* memset loop */
+ " xc 0(1,%2),0(%2)\n"
+ "5: xc 0(256,%2),0(%2)\n"
+ " la %2,256(%2)\n"
+ "6:"AHI" %4,-256\n"
+ " jnm 5b\n"
+ " ex %4,0(%3)\n"
+ " j 8f\n"
+ "7:"SLR" %0,%0\n"
+ "8:\n"
+ EX_TABLE(0b,2b) EX_TABLE(3b,4b) EX_TABLE(9b,2b) EX_TABLE(10b,4b)
+ : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+ : "d" (reg0) : "cc", "memory");
+ return size;
+}
+
+static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
+ unsigned long size)
+{
+ unsigned long tmp1, tmp2;
+
+ update_primary_asce(current);
+ tmp1 = -256UL;
+ asm volatile(
+ " sacf 0\n"
+ "0: mvcp 0(%0,%2),0(%1),%3\n"
+ "10:jz 8f\n"
+ "1:"ALR" %0,%3\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ "2: mvcp 0(%0,%2),0(%1),%3\n"
+ "11:jnz 1b\n"
+ " j 8f\n"
+ "3: la %4,255(%1)\n" /* %4 = ptr + 255 */
+ " "LHI" %3,-4096\n"
+ " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */
+ " "SLR" %4,%1\n"
+ " "CLR" %0,%4\n" /* copy crosses next page boundary? */
+ " jnh 5f\n"
+ "4: mvcp 0(%4,%2),0(%1),%3\n"
+ "12:"SLR" %0,%4\n"
+ " "ALR" %2,%4\n"
+ "5:"LHI" %4,-1\n"
+ " "ALR" %4,%0\n" /* copy remaining size, subtract 1 */
+ " bras %3,7f\n" /* memset loop */
+ " xc 0(1,%2),0(%2)\n"
+ "6: xc 0(256,%2),0(%2)\n"
+ " la %2,256(%2)\n"
+ "7:"AHI" %4,-256\n"
+ " jnm 6b\n"
+ " ex %4,0(%3)\n"
+ " j 9f\n"
+ "8:"SLR" %0,%0\n"
+ "9: sacf 768\n"
+ EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,5b)
+ EX_TABLE(10b,3b) EX_TABLE(11b,3b) EX_TABLE(12b,5b)
+ : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+ : : "cc", "memory");
+ return size;
+}
+
+unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ if (static_key_false(&have_mvcos))
+ return copy_from_user_mvcos(to, from, n);
+ return copy_from_user_mvcp(to, from, n);
+}
+EXPORT_SYMBOL(__copy_from_user);
+
+static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
+ unsigned long size)
+{
+ register unsigned long reg0 asm("0") = 0x810000UL;
+ unsigned long tmp1, tmp2;
+
+ tmp1 = -4096UL;
+ asm volatile(
+ "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
+ "6: jz 4f\n"
+ "1:"ALR" %0,%3\n"
+ " "SLR" %1,%3\n"
+ " "SLR" %2,%3\n"
+ " j 0b\n"
+ "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */
+ " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */
+ " "SLR" %4,%1\n"
+ " "CLR" %0,%4\n" /* copy crosses next page boundary? */
+ " jnh 5f\n"
+ "3: .insn ss,0xc80000000000,0(%4,%1),0(%2),0\n"
+ "7:"SLR" %0,%4\n"
+ " j 5f\n"
+ "4:"SLR" %0,%0\n"
+ "5:\n"
+ EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b)
+ : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+ : "d" (reg0) : "cc", "memory");
+ return size;
+}
+
+static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
+ unsigned long size)
+{
+ unsigned long tmp1, tmp2;
+
+ update_primary_asce(current);
+ tmp1 = -256UL;
+ asm volatile(
+ " sacf 0\n"
+ "0: mvcs 0(%0,%1),0(%2),%3\n"
+ "7: jz 5f\n"
+ "1:"ALR" %0,%3\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ "2: mvcs 0(%0,%1),0(%2),%3\n"
+ "8: jnz 1b\n"
+ " j 5f\n"
+ "3: la %4,255(%1)\n" /* %4 = ptr + 255 */
+ " "LHI" %3,-4096\n"
+ " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */
+ " "SLR" %4,%1\n"
+ " "CLR" %0,%4\n" /* copy crosses next page boundary? */
+ " jnh 6f\n"
+ "4: mvcs 0(%4,%1),0(%2),%3\n"
+ "9:"SLR" %0,%4\n"
+ " j 6f\n"
+ "5:"SLR" %0,%0\n"
+ "6: sacf 768\n"
+ EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b)
+ EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
+ : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+ : : "cc", "memory");
+ return size;
+}
+
+unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ if (static_key_false(&have_mvcos))
+ return copy_to_user_mvcos(to, from, n);
+ return copy_to_user_mvcs(to, from, n);
+}
+EXPORT_SYMBOL(__copy_to_user);
+
+static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ register unsigned long reg0 asm("0") = 0x810081UL;
+ unsigned long tmp1, tmp2;
+
+ tmp1 = -4096UL;
+ /* FIXME: copy with reduced length. */
+ asm volatile(
+ "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
+ " jz 2f\n"
+ "1:"ALR" %0,%3\n"
+ " "SLR" %1,%3\n"
+ " "SLR" %2,%3\n"
+ " j 0b\n"
+ "2:"SLR" %0,%0\n"
+ "3: \n"
+ EX_TABLE(0b,3b)
+ : "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2)
+ : "d" (reg0) : "cc", "memory");
+ return size;
+}
+
+static inline unsigned long copy_in_user_mvc(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ unsigned long tmp1;
+
+ update_primary_asce(current);
+ asm volatile(
+ " sacf 256\n"
+ " "AHI" %0,-1\n"
+ " jo 5f\n"
+ " bras %3,3f\n"
+ "0:"AHI" %0,257\n"
+ "1: mvc 0(1,%1),0(%2)\n"
+ " la %1,1(%1)\n"
+ " la %2,1(%2)\n"
+ " "AHI" %0,-1\n"
+ " jnz 1b\n"
+ " j 5f\n"
+ "2: mvc 0(256,%1),0(%2)\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ "3:"AHI" %0,-256\n"
+ " jnm 2b\n"
+ "4: ex %0,1b-0b(%3)\n"
+ "5: "SLR" %0,%0\n"
+ "6: sacf 768\n"
+ EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
+ : "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1)
+ : : "cc", "memory");
+ return size;
+}
+
+unsigned long __copy_in_user(void __user *to, const void __user *from, unsigned long n)
+{
+ if (static_key_false(&have_mvcos))
+ return copy_in_user_mvcos(to, from, n);
+ return copy_in_user_mvc(to, from, n);
+}
+EXPORT_SYMBOL(__copy_in_user);
+
+static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size)
+{
+ register unsigned long reg0 asm("0") = 0x810000UL;
+ unsigned long tmp1, tmp2;
+
+ tmp1 = -4096UL;
+ asm volatile(
+ "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n"
+ " jz 4f\n"
+ "1:"ALR" %0,%2\n"
+ " "SLR" %1,%2\n"
+ " j 0b\n"
+ "2: la %3,4095(%1)\n"/* %4 = to + 4095 */
+ " nr %3,%2\n" /* %4 = (to + 4095) & -4096 */
+ " "SLR" %3,%1\n"
+ " "CLR" %0,%3\n" /* copy crosses next page boundary? */
+ " jnh 5f\n"
+ "3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n"
+ " "SLR" %0,%3\n"
+ " j 5f\n"
+ "4:"SLR" %0,%0\n"
+ "5:\n"
+ EX_TABLE(0b,2b) EX_TABLE(3b,5b)
+ : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
+ : "a" (empty_zero_page), "d" (reg0) : "cc", "memory");
+ return size;
+}
+
+static inline unsigned long clear_user_xc(void __user *to, unsigned long size)
+{
+ unsigned long tmp1, tmp2;
+
+ update_primary_asce(current);
+ asm volatile(
+ " sacf 256\n"
+ " "AHI" %0,-1\n"
+ " jo 5f\n"
+ " bras %3,3f\n"
+ " xc 0(1,%1),0(%1)\n"
+ "0:"AHI" %0,257\n"
+ " la %2,255(%1)\n" /* %2 = ptr + 255 */
+ " srl %2,12\n"
+ " sll %2,12\n" /* %2 = (ptr + 255) & -4096 */
+ " "SLR" %2,%1\n"
+ " "CLR" %0,%2\n" /* clear crosses next page boundary? */
+ " jnh 5f\n"
+ " "AHI" %2,-1\n"
+ "1: ex %2,0(%3)\n"
+ " "AHI" %2,1\n"
+ " "SLR" %0,%2\n"
+ " j 5f\n"
+ "2: xc 0(256,%1),0(%1)\n"
+ " la %1,256(%1)\n"
+ "3:"AHI" %0,-256\n"
+ " jnm 2b\n"
+ "4: ex %0,0(%3)\n"
+ "5: "SLR" %0,%0\n"
+ "6: sacf 768\n"
+ EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
+ : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2)
+ : : "cc", "memory");
+ return size;
+}
+
+unsigned long __clear_user(void __user *to, unsigned long size)
+{
+ if (static_key_false(&have_mvcos))
+ return clear_user_mvcos(to, size);
+ return clear_user_xc(to, size);
+}
+EXPORT_SYMBOL(__clear_user);
+
+static inline unsigned long strnlen_user_srst(const char __user *src,
+ unsigned long size)
+{
+ register unsigned long reg0 asm("0") = 0;
+ unsigned long tmp1, tmp2;
+
+ if (unlikely(!size))
+ return 0;
+ update_primary_asce(current);
+ asm volatile(
+ " la %2,0(%1)\n"
+ " la %3,0(%0,%1)\n"
+ " "SLR" %0,%0\n"
+ " sacf 256\n"
+ "0: srst %3,%2\n"
+ " jo 0b\n"
+ " la %0,1(%3)\n" /* strnlen_user results includes \0 */
+ " "SLR" %0,%1\n"
+ "1: sacf 768\n"
+ EX_TABLE(0b,1b)
+ : "+a" (size), "+a" (src), "=a" (tmp1), "=a" (tmp2)
+ : "d" (reg0) : "cc", "memory");
+ return size;
+}
+
+unsigned long __strnlen_user(const char __user *src, unsigned long size)
+{
+ update_primary_asce(current);
+ return strnlen_user_srst(src, size);
+}
+EXPORT_SYMBOL(__strnlen_user);
+
+long __strncpy_from_user(char *dst, const char __user *src, long size)
+{
+ size_t done, len, offset, len_str;
+
+ if (unlikely(size <= 0))
+ return 0;
+ done = 0;
+ do {
+ offset = (size_t)src & ~PAGE_MASK;
+ len = min(size - done, PAGE_SIZE - offset);
+ if (copy_from_user(dst, src, len))
+ return -EFAULT;
+ len_str = strnlen(dst, len);
+ done += len_str;
+ src += len_str;
+ dst += len_str;
+ } while ((len_str == len) && (done < size));
+ return done;
+}
+EXPORT_SYMBOL(__strncpy_from_user);
+
+/*
+ * The "old" uaccess variant without mvcos can be enforced with the
+ * uaccess_primary kernel parameter. This is mainly for debugging purposes.
+ */
+static int uaccess_primary __initdata;
+
+static int __init parse_uaccess_pt(char *__unused)
+{
+ uaccess_primary = 1;
+ return 0;
+}
+early_param("uaccess_primary", parse_uaccess_pt);
+
+static int __init uaccess_init(void)
+{
+ if (IS_ENABLED(CONFIG_64BIT) && !uaccess_primary && test_facility(27))
+ static_key_slow_inc(&have_mvcos);
+ return 0;
+}
+early_initcall(uaccess_init);
diff --git a/arch/s390/lib/uaccess.h b/arch/s390/lib/uaccess.h
deleted file mode 100644
index c7e0e81f4b4..00000000000
--- a/arch/s390/lib/uaccess.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * Copyright IBM Corp. 2007
- *
- */
-
-#ifndef __ARCH_S390_LIB_UACCESS_H
-#define __ARCH_S390_LIB_UACCESS_H
-
-unsigned long copy_from_user_pt(void *to, const void __user *from, unsigned long n);
-unsigned long copy_to_user_pt(void __user *to, const void *from, unsigned long n);
-unsigned long copy_in_user_pt(void __user *to, const void __user *from, unsigned long n);
-unsigned long clear_user_pt(void __user *to, unsigned long n);
-unsigned long strnlen_user_pt(const char __user *src, unsigned long count);
-long strncpy_from_user_pt(char *dst, const char __user *src, long count);
-
-#endif /* __ARCH_S390_LIB_UACCESS_H */
diff --git a/arch/s390/lib/uaccess_mvcos.c b/arch/s390/lib/uaccess_mvcos.c
deleted file mode 100644
index ae97b8df11a..00000000000
--- a/arch/s390/lib/uaccess_mvcos.c
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * Optimized user space space access functions based on mvcos.
- *
- * Copyright IBM Corp. 2006
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- * Gerald Schaefer (gerald.schaefer@de.ibm.com)
- */
-
-#include <linux/jump_label.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <asm/facility.h>
-#include <asm/uaccess.h>
-#include <asm/futex.h>
-#include "uaccess.h"
-
-#ifndef CONFIG_64BIT
-#define AHI "ahi"
-#define ALR "alr"
-#define CLR "clr"
-#define LHI "lhi"
-#define SLR "slr"
-#else
-#define AHI "aghi"
-#define ALR "algr"
-#define CLR "clgr"
-#define LHI "lghi"
-#define SLR "slgr"
-#endif
-
-static struct static_key have_mvcos = STATIC_KEY_INIT_TRUE;
-
-static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
- unsigned long size)
-{
- register unsigned long reg0 asm("0") = 0x81UL;
- unsigned long tmp1, tmp2;
-
- tmp1 = -4096UL;
- asm volatile(
- "0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n"
- "9: jz 7f\n"
- "1:"ALR" %0,%3\n"
- " "SLR" %1,%3\n"
- " "SLR" %2,%3\n"
- " j 0b\n"
- "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */
- " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */
- " "SLR" %4,%1\n"
- " "CLR" %0,%4\n" /* copy crosses next page boundary? */
- " jnh 4f\n"
- "3: .insn ss,0xc80000000000,0(%4,%2),0(%1),0\n"
- "10:"SLR" %0,%4\n"
- " "ALR" %2,%4\n"
- "4:"LHI" %4,-1\n"
- " "ALR" %4,%0\n" /* copy remaining size, subtract 1 */
- " bras %3,6f\n" /* memset loop */
- " xc 0(1,%2),0(%2)\n"
- "5: xc 0(256,%2),0(%2)\n"
- " la %2,256(%2)\n"
- "6:"AHI" %4,-256\n"
- " jnm 5b\n"
- " ex %4,0(%3)\n"
- " j 8f\n"
- "7:"SLR" %0,%0\n"
- "8: \n"
- EX_TABLE(0b,2b) EX_TABLE(3b,4b) EX_TABLE(9b,2b) EX_TABLE(10b,4b)
- : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
- : "d" (reg0) : "cc", "memory");
- return size;
-}
-
-unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n)
-{
- if (static_key_true(&have_mvcos))
- return copy_from_user_mvcos(to, from, n);
- return copy_from_user_pt(to, from, n);
-}
-EXPORT_SYMBOL(__copy_from_user);
-
-static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
- unsigned long size)
-{
- register unsigned long reg0 asm("0") = 0x810000UL;
- unsigned long tmp1, tmp2;
-
- tmp1 = -4096UL;
- asm volatile(
- "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
- "6: jz 4f\n"
- "1:"ALR" %0,%3\n"
- " "SLR" %1,%3\n"
- " "SLR" %2,%3\n"
- " j 0b\n"
- "2: la %4,4095(%1)\n"/* %4 = ptr + 4095 */
- " nr %4,%3\n" /* %4 = (ptr + 4095) & -4096 */
- " "SLR" %4,%1\n"
- " "CLR" %0,%4\n" /* copy crosses next page boundary? */
- " jnh 5f\n"
- "3: .insn ss,0xc80000000000,0(%4,%1),0(%2),0\n"
- "7:"SLR" %0,%4\n"
- " j 5f\n"
- "4:"SLR" %0,%0\n"
- "5: \n"
- EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b)
- : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
- : "d" (reg0) : "cc", "memory");
- return size;
-}
-
-unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n)
-{
- if (static_key_true(&have_mvcos))
- return copy_to_user_mvcos(to, from, n);
- return copy_to_user_pt(to, from, n);
-}
-EXPORT_SYMBOL(__copy_to_user);
-
-static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from,
- unsigned long size)
-{
- register unsigned long reg0 asm("0") = 0x810081UL;
- unsigned long tmp1, tmp2;
-
- tmp1 = -4096UL;
- /* FIXME: copy with reduced length. */
- asm volatile(
- "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
- " jz 2f\n"
- "1:"ALR" %0,%3\n"
- " "SLR" %1,%3\n"
- " "SLR" %2,%3\n"
- " j 0b\n"
- "2:"SLR" %0,%0\n"
- "3: \n"
- EX_TABLE(0b,3b)
- : "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2)
- : "d" (reg0) : "cc", "memory");
- return size;
-}
-
-unsigned long __copy_in_user(void __user *to, const void __user *from, unsigned long n)
-{
- if (static_key_true(&have_mvcos))
- return copy_in_user_mvcos(to, from, n);
- return copy_in_user_pt(to, from, n);
-}
-EXPORT_SYMBOL(__copy_in_user);
-
-static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size)
-{
- register unsigned long reg0 asm("0") = 0x810000UL;
- unsigned long tmp1, tmp2;
-
- tmp1 = -4096UL;
- asm volatile(
- "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n"
- " jz 4f\n"
- "1:"ALR" %0,%2\n"
- " "SLR" %1,%2\n"
- " j 0b\n"
- "2: la %3,4095(%1)\n"/* %4 = to + 4095 */
- " nr %3,%2\n" /* %4 = (to + 4095) & -4096 */
- " "SLR" %3,%1\n"
- " "CLR" %0,%3\n" /* copy crosses next page boundary? */
- " jnh 5f\n"
- "3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n"
- " "SLR" %0,%3\n"
- " j 5f\n"
- "4:"SLR" %0,%0\n"
- "5: \n"
- EX_TABLE(0b,2b) EX_TABLE(3b,5b)
- : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
- : "a" (empty_zero_page), "d" (reg0) : "cc", "memory");
- return size;
-}
-
-unsigned long __clear_user(void __user *to, unsigned long size)
-{
- if (static_key_true(&have_mvcos))
- return clear_user_mvcos(to, size);
- return clear_user_pt(to, size);
-}
-EXPORT_SYMBOL(__clear_user);
-
-static inline unsigned long strnlen_user_mvcos(const char __user *src,
- unsigned long count)
-{
- unsigned long done, len, offset, len_str;
- char buf[256];
-
- done = 0;
- do {
- offset = (unsigned long)src & ~PAGE_MASK;
- len = min(256UL, PAGE_SIZE - offset);
- len = min(count - done, len);
- if (copy_from_user_mvcos(buf, src, len))
- return 0;
- len_str = strnlen(buf, len);
- done += len_str;
- src += len_str;
- } while ((len_str == len) && (done < count));
- return done + 1;
-}
-
-unsigned long __strnlen_user(const char __user *src, unsigned long count)
-{
- if (static_key_true(&have_mvcos))
- return strnlen_user_mvcos(src, count);
- return strnlen_user_pt(src, count);
-}
-EXPORT_SYMBOL(__strnlen_user);
-
-static inline long strncpy_from_user_mvcos(char *dst, const char __user *src,
- long count)
-{
- unsigned long done, len, offset, len_str;
-
- if (unlikely(count <= 0))
- return 0;
- done = 0;
- do {
- offset = (unsigned long)src & ~PAGE_MASK;
- len = min(count - done, PAGE_SIZE - offset);
- if (copy_from_user_mvcos(dst, src, len))
- return -EFAULT;
- len_str = strnlen(dst, len);
- done += len_str;
- src += len_str;
- dst += len_str;
- } while ((len_str == len) && (done < count));
- return done;
-}
-
-long __strncpy_from_user(char *dst, const char __user *src, long count)
-{
- if (static_key_true(&have_mvcos))
- return strncpy_from_user_mvcos(dst, src, count);
- return strncpy_from_user_pt(dst, src, count);
-}
-EXPORT_SYMBOL(__strncpy_from_user);
-
-/*
- * The uaccess page tabe walk variant can be enforced with the "uaccesspt"
- * kernel parameter. This is mainly for debugging purposes.
- */
-static int force_uaccess_pt __initdata;
-
-static int __init parse_uaccess_pt(char *__unused)
-{
- force_uaccess_pt = 1;
- return 0;
-}
-early_param("uaccesspt", parse_uaccess_pt);
-
-static int __init uaccess_init(void)
-{
- if (IS_ENABLED(CONFIG_32BIT) || force_uaccess_pt || !test_facility(27))
- static_key_slow_dec(&have_mvcos);
- return 0;
-}
-early_initcall(uaccess_init);
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
deleted file mode 100644
index 8d39760bae6..00000000000
--- a/arch/s390/lib/uaccess_pt.c
+++ /dev/null
@@ -1,471 +0,0 @@
-/*
- * User access functions based on page table walks for enhanced
- * system layout without hardware support.
- *
- * Copyright IBM Corp. 2006, 2012
- * Author(s): Gerald Schaefer (gerald.schaefer@de.ibm.com)
- */
-
-#include <linux/errno.h>
-#include <linux/hardirq.h>
-#include <linux/mm.h>
-#include <linux/hugetlb.h>
-#include <asm/uaccess.h>
-#include <asm/futex.h>
-#include "uaccess.h"
-
-#ifndef CONFIG_64BIT
-#define AHI "ahi"
-#define SLR "slr"
-#else
-#define AHI "aghi"
-#define SLR "slgr"
-#endif
-
-static unsigned long strnlen_kernel(const char __user *src, unsigned long count)
-{
- register unsigned long reg0 asm("0") = 0UL;
- unsigned long tmp1, tmp2;
-
- asm volatile(
- " la %2,0(%1)\n"
- " la %3,0(%0,%1)\n"
- " "SLR" %0,%0\n"
- "0: srst %3,%2\n"
- " jo 0b\n"
- " la %0,1(%3)\n" /* strnlen_kernel results includes \0 */
- " "SLR" %0,%1\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "+a" (count), "+a" (src), "=a" (tmp1), "=a" (tmp2)
- : "d" (reg0) : "cc", "memory");
- return count;
-}
-
-static unsigned long copy_in_kernel(void __user *to, const void __user *from,
- unsigned long count)
-{
- unsigned long tmp1;
-
- asm volatile(
- " "AHI" %0,-1\n"
- " jo 5f\n"
- " bras %3,3f\n"
- "0:"AHI" %0,257\n"
- "1: mvc 0(1,%1),0(%2)\n"
- " la %1,1(%1)\n"
- " la %2,1(%2)\n"
- " "AHI" %0,-1\n"
- " jnz 1b\n"
- " j 5f\n"
- "2: mvc 0(256,%1),0(%2)\n"
- " la %1,256(%1)\n"
- " la %2,256(%2)\n"
- "3:"AHI" %0,-256\n"
- " jnm 2b\n"
- "4: ex %0,1b-0b(%3)\n"
- "5:"SLR" %0,%0\n"
- "6:\n"
- EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
- : "+a" (count), "+a" (to), "+a" (from), "=a" (tmp1)
- : : "cc", "memory");
- return count;
-}
-
-/*
- * Returns kernel address for user virtual address. If the returned address is
- * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occurred and the
- * address contains the (negative) exception code.
- */
-#ifdef CONFIG_64BIT
-
-static unsigned long follow_table(struct mm_struct *mm,
- unsigned long address, int write)
-{
- unsigned long *table = (unsigned long *)__pa(mm->pgd);
-
- if (unlikely(address > mm->context.asce_limit - 1))
- return -0x38UL;
- switch (mm->context.asce_bits & _ASCE_TYPE_MASK) {
- case _ASCE_TYPE_REGION1:
- table = table + ((address >> 53) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INVALID))
- return -0x39UL;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- /* fallthrough */
- case _ASCE_TYPE_REGION2:
- table = table + ((address >> 42) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INVALID))
- return -0x3aUL;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- /* fallthrough */
- case _ASCE_TYPE_REGION3:
- table = table + ((address >> 31) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INVALID))
- return -0x3bUL;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- /* fallthrough */
- case _ASCE_TYPE_SEGMENT:
- table = table + ((address >> 20) & 0x7ff);
- if (unlikely(*table & _SEGMENT_ENTRY_INVALID))
- return -0x10UL;
- if (unlikely(*table & _SEGMENT_ENTRY_LARGE)) {
- if (write && (*table & _SEGMENT_ENTRY_PROTECT))
- return -0x04UL;
- return (*table & _SEGMENT_ENTRY_ORIGIN_LARGE) +
- (address & ~_SEGMENT_ENTRY_ORIGIN_LARGE);
- }
- table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
- }
- table = table + ((address >> 12) & 0xff);
- if (unlikely(*table & _PAGE_INVALID))
- return -0x11UL;
- if (write && (*table & _PAGE_PROTECT))
- return -0x04UL;
- return (*table & PAGE_MASK) + (address & ~PAGE_MASK);
-}
-
-#else /* CONFIG_64BIT */
-
-static unsigned long follow_table(struct mm_struct *mm,
- unsigned long address, int write)
-{
- unsigned long *table = (unsigned long *)__pa(mm->pgd);
-
- table = table + ((address >> 20) & 0x7ff);
- if (unlikely(*table & _SEGMENT_ENTRY_INVALID))
- return -0x10UL;
- table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
- table = table + ((address >> 12) & 0xff);
- if (unlikely(*table & _PAGE_INVALID))
- return -0x11UL;
- if (write && (*table & _PAGE_PROTECT))
- return -0x04UL;
- return (*table & PAGE_MASK) + (address & ~PAGE_MASK);
-}
-
-#endif /* CONFIG_64BIT */
-
-static inline unsigned long __user_copy_pt(unsigned long uaddr, void *kptr,
- unsigned long n, int write_user)
-{
- struct mm_struct *mm = current->mm;
- unsigned long offset, done, size, kaddr;
- void *from, *to;
-
- if (!mm)
- return n;
- done = 0;
-retry:
- spin_lock(&mm->page_table_lock);
- do {
- kaddr = follow_table(mm, uaddr, write_user);
- if (IS_ERR_VALUE(kaddr))
- goto fault;
-
- offset = uaddr & ~PAGE_MASK;
- size = min(n - done, PAGE_SIZE - offset);
- if (write_user) {
- to = (void *) kaddr;
- from = kptr + done;
- } else {
- from = (void *) kaddr;
- to = kptr + done;
- }
- memcpy(to, from, size);
- done += size;
- uaddr += size;
- } while (done < n);
- spin_unlock(&mm->page_table_lock);
- return n - done;
-fault:
- spin_unlock(&mm->page_table_lock);
- if (__handle_fault(uaddr, -kaddr, write_user))
- return n - done;
- goto retry;
-}
-
-/*
- * Do DAT for user address by page table walk, return kernel address.
- * This function needs to be called with current->mm->page_table_lock held.
- */
-static inline unsigned long __dat_user_addr(unsigned long uaddr, int write)
-{
- struct mm_struct *mm = current->mm;
- unsigned long kaddr;
- int rc;
-
-retry:
- kaddr = follow_table(mm, uaddr, write);
- if (IS_ERR_VALUE(kaddr))
- goto fault;
-
- return kaddr;
-fault:
- spin_unlock(&mm->page_table_lock);
- rc = __handle_fault(uaddr, -kaddr, write);
- spin_lock(&mm->page_table_lock);
- if (!rc)
- goto retry;
- return 0;
-}
-
-unsigned long copy_from_user_pt(void *to, const void __user *from, unsigned long n)
-{
- unsigned long rc;
-
- if (segment_eq(get_fs(), KERNEL_DS))
- return copy_in_kernel((void __user *) to, from, n);
- rc = __user_copy_pt((unsigned long) from, to, n, 0);
- if (unlikely(rc))
- memset(to + n - rc, 0, rc);
- return rc;
-}
-
-unsigned long copy_to_user_pt(void __user *to, const void *from, unsigned long n)
-{
- if (segment_eq(get_fs(), KERNEL_DS))
- return copy_in_kernel(to, (void __user *) from, n);
- return __user_copy_pt((unsigned long) to, (void *) from, n, 1);
-}
-
-unsigned long clear_user_pt(void __user *to, unsigned long n)
-{
- void *zpage = (void *) empty_zero_page;
- unsigned long done, size, ret;
-
- done = 0;
- do {
- if (n - done > PAGE_SIZE)
- size = PAGE_SIZE;
- else
- size = n - done;
- if (segment_eq(get_fs(), KERNEL_DS))
- ret = copy_in_kernel(to, (void __user *) zpage, n);
- else
- ret = __user_copy_pt((unsigned long) to, zpage, size, 1);
- done += size;
- to += size;
- if (ret)
- return ret + n - done;
- } while (done < n);
- return 0;
-}
-
-unsigned long strnlen_user_pt(const char __user *src, unsigned long count)
-{
- unsigned long uaddr = (unsigned long) src;
- struct mm_struct *mm = current->mm;
- unsigned long offset, done, len, kaddr;
- unsigned long len_str;
-
- if (unlikely(!count))
- return 0;
- if (segment_eq(get_fs(), KERNEL_DS))
- return strnlen_kernel(src, count);
- if (!mm)
- return 0;
- done = 0;
-retry:
- spin_lock(&mm->page_table_lock);
- do {
- kaddr = follow_table(mm, uaddr, 0);
- if (IS_ERR_VALUE(kaddr))
- goto fault;
-
- offset = uaddr & ~PAGE_MASK;
- len = min(count - done, PAGE_SIZE - offset);
- len_str = strnlen((char *) kaddr, len);
- done += len_str;
- uaddr += len_str;
- } while ((len_str == len) && (done < count));
- spin_unlock(&mm->page_table_lock);
- return done + 1;
-fault:
- spin_unlock(&mm->page_table_lock);
- if (__handle_fault(uaddr, -kaddr, 0))
- return 0;
- goto retry;
-}
-
-long strncpy_from_user_pt(char *dst, const char __user *src, long count)
-{
- unsigned long done, len, offset, len_str;
-
- if (unlikely(count <= 0))
- return 0;
- done = 0;
- do {
- offset = (unsigned long)src & ~PAGE_MASK;
- len = min(count - done, PAGE_SIZE - offset);
- if (segment_eq(get_fs(), KERNEL_DS)) {
- if (copy_in_kernel((void __user *) dst, src, len))
- return -EFAULT;
- } else {
- if (__user_copy_pt((unsigned long) src, dst, len, 0))
- return -EFAULT;
- }
- len_str = strnlen(dst, len);
- done += len_str;
- src += len_str;
- dst += len_str;
- } while ((len_str == len) && (done < count));
- return done;
-}
-
-unsigned long copy_in_user_pt(void __user *to, const void __user *from,
- unsigned long n)
-{
- struct mm_struct *mm = current->mm;
- unsigned long offset_max, uaddr, done, size, error_code;
- unsigned long uaddr_from = (unsigned long) from;
- unsigned long uaddr_to = (unsigned long) to;
- unsigned long kaddr_to, kaddr_from;
- int write_user;
-
- if (segment_eq(get_fs(), KERNEL_DS))
- return copy_in_kernel(to, from, n);
- if (!mm)
- return n;
- done = 0;
-retry:
- spin_lock(&mm->page_table_lock);
- do {
- write_user = 0;
- uaddr = uaddr_from;
- kaddr_from = follow_table(mm, uaddr_from, 0);
- error_code = kaddr_from;
- if (IS_ERR_VALUE(error_code))
- goto fault;
-
- write_user = 1;
- uaddr = uaddr_to;
- kaddr_to = follow_table(mm, uaddr_to, 1);
- error_code = (unsigned long) kaddr_to;
- if (IS_ERR_VALUE(error_code))
- goto fault;
-
- offset_max = max(uaddr_from & ~PAGE_MASK,
- uaddr_to & ~PAGE_MASK);
- size = min(n - done, PAGE_SIZE - offset_max);
-
- memcpy((void *) kaddr_to, (void *) kaddr_from, size);
- done += size;
- uaddr_from += size;
- uaddr_to += size;
- } while (done < n);
- spin_unlock(&mm->page_table_lock);
- return n - done;
-fault:
- spin_unlock(&mm->page_table_lock);
- if (__handle_fault(uaddr, -error_code, write_user))
- return n - done;
- goto retry;
-}
-
-#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \
- asm volatile("0: l %1,0(%6)\n" \
- "1: " insn \
- "2: cs %1,%2,0(%6)\n" \
- "3: jl 1b\n" \
- " lhi %0,0\n" \
- "4:\n" \
- EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
- : "=d" (ret), "=&d" (oldval), "=&d" (newval), \
- "=m" (*uaddr) \
- : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
- "m" (*uaddr) : "cc" );
-
-static int __futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old)
-{
- int oldval = 0, newval, ret;
-
- switch (op) {
- case FUTEX_OP_SET:
- __futex_atomic_op("lr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- case FUTEX_OP_ADD:
- __futex_atomic_op("lr %2,%1\nar %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- case FUTEX_OP_OR:
- __futex_atomic_op("lr %2,%1\nor %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- case FUTEX_OP_ANDN:
- __futex_atomic_op("lr %2,%1\nnr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- case FUTEX_OP_XOR:
- __futex_atomic_op("lr %2,%1\nxr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
- break;
- default:
- ret = -ENOSYS;
- }
- if (ret == 0)
- *old = oldval;
- return ret;
-}
-
-int __futex_atomic_op_inuser(int op, u32 __user *uaddr, int oparg, int *old)
-{
- int ret;
-
- if (segment_eq(get_fs(), KERNEL_DS))
- return __futex_atomic_op_pt(op, uaddr, oparg, old);
- if (unlikely(!current->mm))
- return -EFAULT;
- spin_lock(&current->mm->page_table_lock);
- uaddr = (u32 __force __user *)
- __dat_user_addr((__force unsigned long) uaddr, 1);
- if (!uaddr) {
- spin_unlock(&current->mm->page_table_lock);
- return -EFAULT;
- }
- get_page(virt_to_page(uaddr));
- spin_unlock(&current->mm->page_table_lock);
- ret = __futex_atomic_op_pt(op, uaddr, oparg, old);
- put_page(virt_to_page(uaddr));
- return ret;
-}
-
-static int __futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr,
- u32 oldval, u32 newval)
-{
- int ret;
-
- asm volatile("0: cs %1,%4,0(%5)\n"
- "1: la %0,0\n"
- "2:\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
- : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
- : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
- : "cc", "memory" );
- *uval = oldval;
- return ret;
-}
-
-int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
- u32 oldval, u32 newval)
-{
- int ret;
-
- if (segment_eq(get_fs(), KERNEL_DS))
- return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval);
- if (unlikely(!current->mm))
- return -EFAULT;
- spin_lock(&current->mm->page_table_lock);
- uaddr = (u32 __force __user *)
- __dat_user_addr((__force unsigned long) uaddr, 1);
- if (!uaddr) {
- spin_unlock(&current->mm->page_table_lock);
- return -EFAULT;
- }
- get_page(virt_to_page(uaddr));
- spin_unlock(&current->mm->page_table_lock);
- ret = __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval);
- put_page(virt_to_page(uaddr));
- return ret;
-}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index d95265b2719..19f623f1f21 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -50,6 +50,7 @@
#define VM_FAULT_BADMAP 0x020000
#define VM_FAULT_BADACCESS 0x040000
#define VM_FAULT_SIGNAL 0x080000
+#define VM_FAULT_PFAULT 0x100000
static unsigned long store_indication __read_mostly;
@@ -105,21 +106,24 @@ void bust_spinlocks(int yes)
* Returns the address space associated with the fault.
* Returns 0 for kernel space and 1 for user space.
*/
-static inline int user_space_fault(unsigned long trans_exc_code)
+static inline int user_space_fault(struct pt_regs *regs)
{
+ unsigned long trans_exc_code;
+
/*
* The lowest two bits of the translation exception
* identification indicate which paging table was used.
*/
- trans_exc_code &= 3;
- if (trans_exc_code == 2)
- /* Access via secondary space, set_fs setting decides */
+ trans_exc_code = regs->int_parm_long & 3;
+ if (trans_exc_code == 3) /* home space -> kernel */
+ return 0;
+ if (user_mode(regs))
+ return 1;
+ if (trans_exc_code == 2) /* secondary space -> set_fs */
return current->thread.mm_segment.ar4;
- /*
- * Access via primary space or access register is from user space
- * and access via home space is from the kernel.
- */
- return trans_exc_code != 3;
+ if (current->flags & PF_VCPU)
+ return 1;
+ return 0;
}
static inline void report_user_fault(struct pt_regs *regs, long signr)
@@ -171,7 +175,7 @@ static noinline void do_no_context(struct pt_regs *regs)
* terminate things with extreme prejudice.
*/
address = regs->int_parm_long & __FAIL_ADDR_MASK;
- if (!user_space_fault(regs->int_parm_long))
+ if (!user_space_fault(regs))
printk(KERN_ALERT "Unable to handle kernel pointer dereference"
" at virtual kernel address %p\n", (void *)address);
else
@@ -227,6 +231,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault)
return;
}
case VM_FAULT_BADCONTEXT:
+ case VM_FAULT_PFAULT:
do_no_context(regs);
break;
case VM_FAULT_SIGNAL:
@@ -264,6 +269,9 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault)
*/
static inline int do_exception(struct pt_regs *regs, int access)
{
+#ifdef CONFIG_PGSTE
+ struct gmap *gmap;
+#endif
struct task_struct *tsk;
struct mm_struct *mm;
struct vm_area_struct *vma;
@@ -291,7 +299,7 @@ static inline int do_exception(struct pt_regs *regs, int access)
* user context.
*/
fault = VM_FAULT_BADCONTEXT;
- if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm))
+ if (unlikely(!user_space_fault(regs) || in_atomic() || !mm))
goto out;
address = trans_exc_code & __FAIL_ADDR_MASK;
@@ -304,9 +312,10 @@ static inline int do_exception(struct pt_regs *regs, int access)
down_read(&mm->mmap_sem);
#ifdef CONFIG_PGSTE
- if ((current->flags & PF_VCPU) && S390_lowcore.gmap) {
- address = __gmap_fault(address,
- (struct gmap *) S390_lowcore.gmap);
+ gmap = (struct gmap *)
+ ((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0);
+ if (gmap) {
+ address = __gmap_fault(address, gmap);
if (address == -EFAULT) {
fault = VM_FAULT_BADMAP;
goto out_up;
@@ -315,6 +324,8 @@ static inline int do_exception(struct pt_regs *regs, int access)
fault = VM_FAULT_OOM;
goto out_up;
}
+ if (gmap->pfault_enabled)
+ flags |= FAULT_FLAG_RETRY_NOWAIT;
}
#endif
@@ -371,9 +382,19 @@ retry:
regs, address);
}
if (fault & VM_FAULT_RETRY) {
+#ifdef CONFIG_PGSTE
+ if (gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) {
+ /* FAULT_FLAG_RETRY_NOWAIT has been set,
+ * mmap_sem has not been released */
+ current->thread.gmap_pfault = 1;
+ fault = VM_FAULT_PFAULT;
+ goto out_up;
+ }
+#endif
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */
- flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags &= ~(FAULT_FLAG_ALLOW_RETRY |
+ FAULT_FLAG_RETRY_NOWAIT);
flags |= FAULT_FLAG_TRIED;
down_read(&mm->mmap_sem);
goto retry;
@@ -423,30 +444,6 @@ void __kprobes do_dat_exception(struct pt_regs *regs)
do_fault_error(regs, fault);
}
-int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write)
-{
- struct pt_regs regs;
- int access, fault;
-
- /* Emulate a uaccess fault from kernel mode. */
- regs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_MCHECK;
- if (!irqs_disabled())
- regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT;
- regs.psw.addr = (unsigned long) __builtin_return_address(0);
- regs.psw.addr |= PSW_ADDR_AMODE;
- regs.int_code = pgm_int_code;
- regs.int_parm_long = (uaddr & PAGE_MASK) | 2;
- access = write ? VM_WRITE : VM_READ;
- fault = do_exception(&regs, access);
- /*
- * Since the fault happened in kernel mode while performing a uaccess
- * all we need to do now is emulating a fixup in case "fault" is not
- * zero.
- * For the calling uaccess functions this results always in -EFAULT.
- */
- return fault ? -EFAULT : 0;
-}
-
#ifdef CONFIG_PFAULT
/*
* 'pfault' pseudo page faults routines.
@@ -627,7 +624,7 @@ static int __init pfault_irq_init(void)
{
int rc;
- rc = register_external_interrupt(0x2603, pfault_interrupt);
+ rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
if (rc)
goto out_extint;
rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP;
@@ -638,7 +635,7 @@ static int __init pfault_irq_init(void)
return 0;
out_pfault:
- unregister_external_interrupt(0x2603, pfault_interrupt);
+ unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
out_extint:
pfault_disable = 1;
return rc;
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index d261c62e40a..0727a55d87d 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -123,10 +123,7 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
pmd_t *pmdp = (pmd_t *) ptep;
pte_t pte = huge_ptep_get(ptep);
- if (MACHINE_HAS_IDTE)
- __pmd_idte(addr, pmdp);
- else
- __pmd_csp(pmdp);
+ pmdp_flush_direct(mm, addr, pmdp);
pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
return pte;
}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index ad446b0c55b..0c1073ed1e8 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -124,8 +124,6 @@ void __init paging_init(void)
__ctl_load(S390_lowcore.kernel_asce, 13, 13);
arch_local_irq_restore(4UL << (BITS_PER_LONG - 8));
- atomic_set(&init_mm.context.attach_count, 1);
-
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
@@ -136,6 +134,11 @@ void __init paging_init(void)
void __init mem_init(void)
{
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
+ cpumask_set_cpu(0, mm_cpumask(&init_mm));
+ atomic_set(&init_mm.context.attach_count, 1);
+
max_mapnr = max_low_pfn;
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 796c9320c70..d7cfd57815f 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -54,7 +54,7 @@ static void __crst_table_upgrade(void *arg)
struct mm_struct *mm = arg;
if (current->active_mm == mm)
- update_mm(mm, current);
+ update_user_asce(mm, 1);
__tlb_flush_local();
}
@@ -107,8 +107,10 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
{
pgd_t *pgd;
- if (current->active_mm == mm)
+ if (current->active_mm == mm) {
+ clear_user_asce(mm, 1);
__tlb_flush_mm(mm);
+ }
while (mm->context.asce_limit > limit) {
pgd = mm->pgd;
switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
@@ -132,7 +134,7 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
crst_table_free(mm, (unsigned long *) pgd);
}
if (current->active_mm == mm)
- update_mm(mm, current);
+ update_user_asce(mm, 1);
}
#endif
@@ -198,7 +200,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
static void gmap_flush_tlb(struct gmap *gmap)
{
if (MACHINE_HAS_IDTE)
- __tlb_flush_idte((unsigned long) gmap->table |
+ __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table |
_ASCE_TYPE_REGION1);
else
__tlb_flush_global();
@@ -217,7 +219,7 @@ void gmap_free(struct gmap *gmap)
/* Flush tlb. */
if (MACHINE_HAS_IDTE)
- __tlb_flush_idte((unsigned long) gmap->table |
+ __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table |
_ASCE_TYPE_REGION1);
else
__tlb_flush_global();
@@ -505,6 +507,9 @@ static int gmap_connect_pgtable(unsigned long address, unsigned long segment,
if (!pmd_present(*pmd) &&
__pte_alloc(mm, vma, pmd, vmaddr))
return -ENOMEM;
+ /* large pmds cannot yet be handled */
+ if (pmd_large(*pmd))
+ return -EFAULT;
/* pmd now points to a valid segment table entry. */
rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT);
if (!rmap)
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index bcfb70b60be..72b04de1828 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -138,7 +138,6 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
}
ret = 0;
out:
- flush_tlb_kernel_range(start, end);
return ret;
}
@@ -265,7 +264,6 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
memset((void *)start, 0, end - start);
ret = 0;
out:
- flush_tlb_kernel_range(start, end);
return ret;
}
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 708d60e4006..9c36dc398f9 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -737,10 +737,10 @@ call_fn: /* lg %r1,<d(function)>(%r13) */
/* icm %r5,3,<d(type)>(%r1) */
EMIT4_DISP(0xbf531000, offsetof(struct net_device, type));
break;
- case BPF_S_ANC_RXHASH: /* A = skb->rxhash */
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4);
- /* l %r5,<d(rxhash)>(%r2) */
- EMIT4_DISP(0x58502000, offsetof(struct sk_buff, rxhash));
+ case BPF_S_ANC_RXHASH: /* A = skb->hash */
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
+ /* l %r5,<d(hash)>(%r2) */
+ EMIT4_DISP(0x58502000, offsetof(struct sk_buff, hash));
break;
case BPF_S_ANC_VLAN_TAG:
case BPF_S_ANC_VLAN_TAG_PRESENT:
@@ -877,6 +877,7 @@ void bpf_jit_compile(struct sk_filter *fp)
if (jit.start) {
set_memory_ro((unsigned long)header, header->pages);
fp->bpf_func = (void *) jit.start;
+ fp->jited = 1;
}
out:
kfree(addrs);
@@ -887,10 +888,12 @@ void bpf_jit_free(struct sk_filter *fp)
unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
struct bpf_binary_header *header = (void *)addr;
- if (fp->bpf_func == sk_run_filter)
+ if (!fp->jited)
goto free_filter;
+
set_memory_rw(addr, header->pages);
module_free(NULL, header);
+
free_filter:
kfree(fp);
}
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
index a32c96761ea..276f2e26c76 100644
--- a/arch/s390/oprofile/hwsampler.c
+++ b/arch/s390/oprofile/hwsampler.c
@@ -1033,7 +1033,7 @@ int hwsampler_setup(void)
max_sampler_rate = cb->qsi.max_sampl_rate;
}
}
- register_external_interrupt(0x1407, hws_ext_handler);
+ register_external_irq(EXT_IRQ_MEASURE_ALERT, hws_ext_handler);
hws_state = HWS_DEALLOCATED;
rc = 0;
@@ -1068,7 +1068,7 @@ int hwsampler_shutdown(void)
hws_wq = NULL;
}
- unregister_external_interrupt(0x1407, hws_ext_handler);
+ unregister_external_irq(EXT_IRQ_MEASURE_ALERT, hws_ext_handler);
hws_state = HWS_INIT;
rc = 0;
}
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 66670ff262a..1df1d29ac81 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -686,27 +686,13 @@ int pcibios_add_device(struct pci_dev *pdev)
int pcibios_enable_device(struct pci_dev *pdev, int mask)
{
struct zpci_dev *zdev = get_zdev(pdev);
- struct resource *res;
- u16 cmd;
- int i;
zdev->pdev = pdev;
zpci_debug_init_device(zdev);
zpci_fmb_enable_device(zdev);
zpci_map_resources(zdev);
- pci_read_config_word(pdev, PCI_COMMAND, &cmd);
- for (i = 0; i < PCI_BAR_COUNT; i++) {
- res = &pdev->resource[i];
-
- if (res->flags & IORESOURCE_IO)
- return -EINVAL;
-
- if (res->flags & IORESOURCE_MEM)
- cmd |= PCI_COMMAND_MEMORY;
- }
- pci_write_config_word(pdev, PCI_COMMAND, cmd);
- return 0;
+ return pci_enable_resources(pdev, mask);
}
void pcibios_disable_device(struct pci_dev *pdev)
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index cf8a12ff733..ab4a9139300 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -48,29 +48,27 @@ static ssize_t show_pfgid(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR(pfgid, S_IRUGO, show_pfgid, NULL);
-static void recover_callback(struct device *dev)
+static ssize_t store_recover(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct zpci_dev *zdev = get_zdev(pdev);
int ret;
+ if (!device_remove_file_self(dev, attr))
+ return count;
+
pci_stop_and_remove_bus_device(pdev);
ret = zpci_disable_device(zdev);
if (ret)
- return;
+ return ret;
ret = zpci_enable_device(zdev);
if (ret)
- return;
+ return ret;
pci_rescan_bus(zdev->bus);
-}
-
-static ssize_t store_recover(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- int rc = device_schedule_callback(dev, recover_callback);
- return rc ? rc : count;
+ return count;
}
static DEVICE_ATTR(recover, S_IWUSR, NULL, store_recover);