diff options
Diffstat (limited to 'arch/s390')
73 files changed, 1177 insertions, 661 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 2c9789da0e2..8a4cae78f03 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -98,7 +98,6 @@ config S390 select CLONE_BACKWARDS2 select GENERIC_CLOCKEVENTS select GENERIC_CPU_DEVICES if !SMP - select GENERIC_KERNEL_THREAD select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL_OLD select HAVE_ALIGNED_STRUCT_PAGE if SLUB @@ -119,6 +118,7 @@ config S390 select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP + select HAVE_KERNEL_LZ4 select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO select HAVE_KERNEL_XZ @@ -228,11 +228,12 @@ config MARCH_Z196 not work on older machines. config MARCH_ZEC12 - bool "IBM zEC12" + bool "IBM zBC12 and zEC12" select HAVE_MARCH_ZEC12_FEATURES if 64BIT help - Select this to enable optimizations for IBM zEC12 (2827 series). The - kernel will be slightly faster but will not work on older machines. + Select this to enable optimizations for IBM zBC12 and zEC12 (2828 and + 2827 series). The kernel will be slightly faster but will not work on + older machines. endchoice @@ -302,7 +303,6 @@ config HOTPLUG_CPU def_bool y prompt "Support for hot-pluggable CPUs" depends on SMP - select HOTPLUG help Say Y here to be able to turn CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu/cpu#. @@ -711,6 +711,7 @@ config S390_GUEST def_bool y prompt "s390 support for virtio devices" depends on 64BIT + select TTY select VIRTUALIZATION select VIRTIO select VIRTIO_CONSOLE diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index bae0f402bf2..87a22092b68 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -212,7 +212,9 @@ appldata_timer_handler(ctl_table *ctl, int write, return 0; } if (!write) { - len = sprintf(buf, appldata_timer_active ? "1\n" : "0\n"); + strncpy(buf, appldata_timer_active ? "1\n" : "0\n", + ARRAY_SIZE(buf)); + len = strnlen(buf, ARRAY_SIZE(buf)); if (len > *lenp) len = *lenp; if (copy_to_user(buffer, buf, len)) @@ -317,7 +319,8 @@ appldata_generic_handler(ctl_table *ctl, int write, return 0; } if (!write) { - len = sprintf(buf, ops->active ? "1\n" : "0\n"); + strncpy(buf, ops->active ? "1\n" : "0\n", ARRAY_SIZE(buf)); + len = strnlen(buf, ARRAY_SIZE(buf)); if (len > *lenp) len = *lenp; if (copy_to_user(buffer, buf, len)) { diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c index 7ef60b52d6e..42be5374313 100644 --- a/arch/s390/appldata/appldata_mem.c +++ b/arch/s390/appldata/appldata_mem.c @@ -32,7 +32,7 @@ * book: * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml */ -static struct appldata_mem_data { +struct appldata_mem_data { u64 timestamp; u32 sync_count_1; /* after VM collected the record data, */ u32 sync_count_2; /* sync_count_1 and sync_count_2 should be the @@ -63,7 +63,7 @@ static struct appldata_mem_data { u64 pgmajfault; /* page faults (major only) */ // <-- New in 2.6 -} __attribute__((packed)) appldata_mem_data; +} __packed; /* @@ -118,7 +118,6 @@ static struct appldata_ops ops = { .record_nr = APPLDATA_RECORD_MEM_ID, .size = sizeof(struct appldata_mem_data), .callback = &appldata_get_mem_data, - .data = &appldata_mem_data, .owner = THIS_MODULE, .mod_lvl = {0xF0, 0xF0}, /* EBCDIC "00" */ }; @@ -131,7 +130,17 @@ static struct appldata_ops ops = { */ static int __init appldata_mem_init(void) { - return appldata_register_ops(&ops); + int ret; + + ops.data = kzalloc(sizeof(struct appldata_mem_data), GFP_KERNEL); + if (!ops.data) + return -ENOMEM; + + ret = appldata_register_ops(&ops); + if (ret) + kfree(ops.data); + + return ret; } /* @@ -142,6 +151,7 @@ static int __init appldata_mem_init(void) static void __exit appldata_mem_exit(void) { appldata_unregister_ops(&ops); + kfree(ops.data); } diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c index 2d224b94535..66037d2622b 100644 --- a/arch/s390/appldata/appldata_net_sum.c +++ b/arch/s390/appldata/appldata_net_sum.c @@ -29,7 +29,7 @@ * book: * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml */ -static struct appldata_net_sum_data { +struct appldata_net_sum_data { u64 timestamp; u32 sync_count_1; /* after VM collected the record data, */ u32 sync_count_2; /* sync_count_1 and sync_count_2 should be the @@ -51,7 +51,7 @@ static struct appldata_net_sum_data { u64 rx_dropped; /* no space in linux buffers */ u64 tx_dropped; /* no space available in linux */ u64 collisions; /* collisions while transmitting */ -} __attribute__((packed)) appldata_net_sum_data; +} __packed; /* @@ -121,7 +121,6 @@ static struct appldata_ops ops = { .record_nr = APPLDATA_RECORD_NET_SUM_ID, .size = sizeof(struct appldata_net_sum_data), .callback = &appldata_get_net_sum_data, - .data = &appldata_net_sum_data, .owner = THIS_MODULE, .mod_lvl = {0xF0, 0xF0}, /* EBCDIC "00" */ }; @@ -134,7 +133,17 @@ static struct appldata_ops ops = { */ static int __init appldata_net_init(void) { - return appldata_register_ops(&ops); + int ret; + + ops.data = kzalloc(sizeof(struct appldata_net_sum_data), GFP_KERNEL); + if (!ops.data) + return -ENOMEM; + + ret = appldata_register_ops(&ops); + if (ret) + kfree(ops.data); + + return ret; } /* @@ -145,6 +154,7 @@ static int __init appldata_net_init(void) static void __exit appldata_net_exit(void) { appldata_unregister_ops(&ops); + kfree(ops.data); } diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 3ad8f61c998..866ecbe670e 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -6,9 +6,9 @@ BITS := $(if $(CONFIG_64BIT),64,31) -targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 \ - vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo misc.o piggy.o \ - sizes.h head$(BITS).o +targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 +targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 +targets += misc.o piggy.o sizes.h head$(BITS).o KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING @@ -48,6 +48,7 @@ vmlinux.bin.all-y := $(obj)/vmlinux.bin suffix-$(CONFIG_KERNEL_GZIP) := gz suffix-$(CONFIG_KERNEL_BZIP2) := bz2 +suffix-$(CONFIG_KERNEL_LZ4) := lz4 suffix-$(CONFIG_KERNEL_LZMA) := lzma suffix-$(CONFIG_KERNEL_LZO) := lzo suffix-$(CONFIG_KERNEL_XZ) := xz @@ -56,6 +57,8 @@ $(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) $(call if_changed,gzip) $(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) $(call if_changed,bzip2) +$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) + $(call if_changed,lz4) $(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) $(call if_changed,lzma) $(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index c4c6a1cf221..57cbaff1f39 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -47,6 +47,10 @@ static unsigned long free_mem_end_ptr; #include "../../../../lib/decompress_bunzip2.c" #endif +#ifdef CONFIG_KERNEL_LZ4 +#include "../../../../lib/decompress_unlz4.c" +#endif + #ifdef CONFIG_KERNEL_LZMA #include "../../../../lib/decompress_unlzma.c" #endif diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 7fd3690b676..138893e5f73 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -651,9 +651,7 @@ static int hypfs_create_cpu_files(struct super_block *sb, } diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer); rc = hypfs_create_str(sb, cpu_dir, "type", buffer); - if (IS_ERR(rc)) - return PTR_ERR(rc); - return 0; + return PTR_RET(rc); } static void *hypfs_create_lpar_files(struct super_block *sb, @@ -702,9 +700,7 @@ static int hypfs_create_phys_cpu_files(struct super_block *sb, return PTR_ERR(rc); diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer); rc = hypfs_create_str(sb, cpu_dir, "type", buffer); - if (IS_ERR(rc)) - return PTR_ERR(rc); - return 0; + return PTR_RET(rc); } static void *hypfs_create_phys_files(struct super_block *sb, diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h index 9819891ed7a..4066cee0c2d 100644 --- a/arch/s390/include/asm/airq.h +++ b/arch/s390/include/asm/airq.h @@ -9,9 +9,18 @@ #ifndef _ASM_S390_AIRQ_H #define _ASM_S390_AIRQ_H -typedef void (*adapter_int_handler_t)(void *, void *); +struct airq_struct { + struct hlist_node list; /* Handler queueing. */ + void (*handler)(struct airq_struct *); /* Thin-interrupt handler */ + u8 *lsi_ptr; /* Local-Summary-Indicator pointer */ + u8 lsi_mask; /* Local-Summary-Indicator mask */ + u8 isc; /* Interrupt-subclass */ + u8 flags; +}; -void *s390_register_adapter_interrupt(adapter_int_handler_t, void *, u8); -void s390_unregister_adapter_interrupt(void *, u8); +#define AIRQ_PTR_ALLOCATED 0x01 + +int register_adapter_interrupt(struct airq_struct *airq); +void unregister_adapter_interrupt(struct airq_struct *airq); #endif /* _ASM_S390_AIRQ_H */ diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 4d8604e311f..7d467675873 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -693,7 +693,7 @@ static inline int find_next_bit_left(const unsigned long *addr, size -= offset; p = addr + offset / BITS_PER_LONG; if (bit) { - set = __flo_word(0, *p & (~0UL << bit)); + set = __flo_word(0, *p & (~0UL >> bit)); if (set >= size) return size + offset; if (set < BITS_PER_LONG) diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h index 9411db653ba..3fbc67d9e19 100644 --- a/arch/s390/include/asm/dma-mapping.h +++ b/arch/s390/include/asm/dma-mapping.h @@ -50,9 +50,10 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { struct dma_map_ops *dma_ops = get_dma_ops(dev); + debug_dma_mapping_error(dev, dma_addr); if (dma_ops->mapping_error) return dma_ops->mapping_error(dev, dma_addr); - return (dma_addr == 0UL); + return dma_addr == DMA_ERROR_CODE; } static inline void *dma_alloc_coherent(struct device *dev, size_t size, @@ -71,8 +72,8 @@ static inline void dma_free_coherent(struct device *dev, size_t size, { struct dma_map_ops *dma_ops = get_dma_ops(dev); - dma_ops->free(dev, size, cpu_addr, dma_handle, NULL); debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); + dma_ops->free(dev, size, cpu_addr, dma_handle, NULL); } #endif /* _ASM_S390_DMA_MAPPING_H */ diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index 2ee66a65f2d..0aa6a7ed95a 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -13,6 +13,16 @@ #define MAX_FACILITY_BIT (256*8) /* stfle_fac_list has 256 bytes */ +static inline int __test_facility(unsigned long nr, void *facilities) +{ + unsigned char *ptr; + + if (nr >= MAX_FACILITY_BIT) + return 0; + ptr = (unsigned char *) facilities + (nr >> 3); + return (*ptr & (0x80 >> (nr & 7))) != 0; +} + /* * The test_facility function uses the bit odering where the MSB is bit 0. * That makes it easier to query facility bits with the bit number as @@ -20,12 +30,7 @@ */ static inline int test_facility(unsigned long nr) { - unsigned char *ptr; - - if (nr >= MAX_FACILITY_BIT) - return 0; - ptr = (unsigned char *) &S390_lowcore.stfle_fac_list + (nr >> 3); - return (*ptr & (0x80 >> (nr & 7))) != 0; + return __test_facility(nr, &S390_lowcore.stfle_fac_list); } /** diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index b7931faaef6..bf246dae136 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -9,11 +9,6 @@ struct dyn_arch_ftrace { }; #define MCOUNT_ADDR ((long)_mcount) -#ifdef CONFIG_64BIT -#define MCOUNT_INSN_SIZE 12 -#else -#define MCOUNT_INSN_SIZE 20 -#endif static inline unsigned long ftrace_call_adjust(unsigned long addr) { @@ -21,4 +16,11 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) } #endif /* __ASSEMBLY__ */ + +#ifdef CONFIG_64BIT +#define MCOUNT_INSN_SIZE 12 +#else +#define MCOUNT_INSN_SIZE 22 +#endif + #endif /* _ASM_S390_FTRACE_H */ diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index 379d96e2105..cd6b9ee7b69 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -13,29 +13,8 @@ #include <asm/page.h> #include <asm/pci_io.h> -/* - * Change virtual addresses to physical addresses and vv. - * These are pretty trivial - */ -static inline unsigned long virt_to_phys(volatile void * address) -{ - unsigned long real_address; - asm volatile( - " lra %0,0(%1)\n" - " jz 0f\n" - " la %0,0\n" - "0:" - : "=a" (real_address) : "a" (address) : "cc"); - return real_address; -} -#define virt_to_phys virt_to_phys - -static inline void * phys_to_virt(unsigned long address) -{ - return (void *) address; -} - void *xlate_dev_mem_ptr(unsigned long phys); +#define xlate_dev_mem_ptr xlate_dev_mem_ptr void unxlate_dev_mem_ptr(unsigned long phys, void *addr); /* diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 16bd5d169cd..3238d4004e8 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -62,13 +62,20 @@ struct sca_block { #define CPUSTAT_MCDS 0x00000100 #define CPUSTAT_SM 0x00000080 #define CPUSTAT_G 0x00000008 +#define CPUSTAT_GED 0x00000004 #define CPUSTAT_J 0x00000002 #define CPUSTAT_P 0x00000001 struct kvm_s390_sie_block { atomic_t cpuflags; /* 0x0000 */ __u32 prefix; /* 0x0004 */ - __u8 reserved8[32]; /* 0x0008 */ + __u8 reserved08[4]; /* 0x0008 */ +#define PROG_IN_SIE (1<<0) + __u32 prog0c; /* 0x000c */ + __u8 reserved10[16]; /* 0x0010 */ +#define PROG_BLOCK_SIE 0x00000001 + atomic_t prog20; /* 0x0020 */ + __u8 reserved24[4]; /* 0x0024 */ __u64 cputm; /* 0x0028 */ __u64 ckc; /* 0x0030 */ __u64 epoch; /* 0x0038 */ @@ -90,7 +97,8 @@ struct kvm_s390_sie_block { __u32 scaoh; /* 0x005c */ __u8 reserved60; /* 0x0060 */ __u8 ecb; /* 0x0061 */ - __u8 reserved62[2]; /* 0x0062 */ + __u8 ecb2; /* 0x0062 */ + __u8 reserved63[1]; /* 0x0063 */ __u32 scaol; /* 0x0064 */ __u8 reserved68[4]; /* 0x0068 */ __u32 todpr; /* 0x006c */ @@ -130,6 +138,7 @@ struct kvm_vcpu_stat { u32 deliver_program_int; u32 deliver_io_int; u32 exit_wait_state; + u32 instruction_pfmf; u32 instruction_stidp; u32 instruction_spx; u32 instruction_stpx; @@ -166,7 +175,7 @@ struct kvm_s390_ext_info { }; #define PGM_OPERATION 0x01 -#define PGM_PRIVILEGED_OPERATION 0x02 +#define PGM_PRIVILEGED_OP 0x02 #define PGM_EXECUTE 0x03 #define PGM_PROTECTION 0x04 #define PGM_ADDRESSING 0x05 @@ -219,7 +228,7 @@ struct kvm_s390_local_interrupt { atomic_t active; struct kvm_s390_float_interrupt *float_int; int timer_due; /* event indicator for waitqueue below */ - wait_queue_head_t wq; + wait_queue_head_t *wq; atomic_t *cpuflags; unsigned int action_bits; }; @@ -266,4 +275,5 @@ struct kvm_arch{ }; extern int sie64a(struct kvm_s390_sie_block *, u64 *); +extern char sie_exit; #endif diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 75ce9b065f9..5d64fb7619c 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -32,7 +32,7 @@ void storage_key_init_range(unsigned long start, unsigned long end); -static unsigned long pfmf(unsigned long function, unsigned long address) +static inline unsigned long pfmf(unsigned long function, unsigned long address) { asm volatile( " .insn rre,0xb9af0000,%[function],%[address]" @@ -44,17 +44,13 @@ static unsigned long pfmf(unsigned long function, unsigned long address) static inline void clear_page(void *page) { - if (MACHINE_HAS_PFMF) { - pfmf(0x10000, (unsigned long)page); - } else { - register unsigned long reg1 asm ("1") = 0; - register void *reg2 asm ("2") = page; - register unsigned long reg3 asm ("3") = 4096; - asm volatile( - " mvcl 2,0" - : "+d" (reg2), "+d" (reg3) : "d" (reg1) - : "memory", "cc"); - } + register unsigned long reg1 asm ("1") = 0; + register void *reg2 asm ("2") = page; + register unsigned long reg3 asm ("3") = 4096; + asm volatile( + " mvcl 2,0" + : "+d" (reg2), "+d" (reg3) : "d" (reg1) + : "memory", "cc"); } static inline void copy_page(void *to, void *from) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 6c1801235db..6e577ba0e5d 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -120,7 +120,6 @@ struct zpci_dev { struct dentry *debugfs_dev; struct dentry *debugfs_perf; - struct dentry *debugfs_debug; }; struct pci_hp_callback_ops { @@ -143,7 +142,6 @@ int zpci_enable_device(struct zpci_dev *); int zpci_disable_device(struct zpci_dev *); void zpci_stop_device(struct zpci_dev *); void zpci_free_device(struct zpci_dev *); -int zpci_scan_device(struct zpci_dev *); int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); int zpci_unregister_ioat(struct zpci_dev *, u8); diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 5f0173a3169..1141fb3e7b2 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -14,3 +14,13 @@ /* Per-CPU flags for PMU states */ #define PMU_F_RESERVED 0x1000 #define PMU_F_ENABLED 0x2000 + +#ifdef CONFIG_64BIT + +/* Perf callbacks */ +struct pt_regs; +extern unsigned long perf_instruction_pointer(struct pt_regs *regs); +extern unsigned long perf_misc_flags(struct pt_regs *regs); +#define perf_misc_flags(regs) perf_misc_flags(regs) + +#endif /* CONFIG_64BIT */ diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 590c3219c63..e1408ddb94f 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -22,6 +22,9 @@ unsigned long *page_table_alloc(struct mm_struct *, unsigned long); void page_table_free(struct mm_struct *, unsigned long *); void page_table_free_rcu(struct mmu_gather *, unsigned long *); +int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned long key, bool nq); + static inline void clear_table(unsigned long *s, unsigned long val, size_t n) { typedef struct { char _[n]; } addrtype; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 4105b8221fd..75fb726de91 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -58,9 +58,6 @@ extern unsigned long zero_page_mask; #define __HAVE_COLOR_ZERO_PAGE /* TODO: s390 cannot support io_remap_pfn_range... */ -#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ - remap_pfn_range(vma, vaddr, pfn, size, prot) - #endif /* !__ASSEMBLY__ */ /* @@ -299,18 +296,16 @@ extern unsigned long MODULES_END; #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) /* Page status table bits for virtualization */ -#define RCP_ACC_BITS 0xf0000000UL -#define RCP_FP_BIT 0x08000000UL -#define RCP_PCL_BIT 0x00800000UL -#define RCP_HR_BIT 0x00400000UL -#define RCP_HC_BIT 0x00200000UL -#define RCP_GR_BIT 0x00040000UL -#define RCP_GC_BIT 0x00020000UL -#define RCP_IN_BIT 0x00008000UL /* IPTE notify bit */ - -/* User dirty / referenced bit for KVM's migration feature */ -#define KVM_UR_BIT 0x00008000UL -#define KVM_UC_BIT 0x00004000UL +#define PGSTE_ACC_BITS 0xf0000000UL +#define PGSTE_FP_BIT 0x08000000UL +#define PGSTE_PCL_BIT 0x00800000UL +#define PGSTE_HR_BIT 0x00400000UL +#define PGSTE_HC_BIT 0x00200000UL +#define PGSTE_GR_BIT 0x00040000UL +#define PGSTE_GC_BIT 0x00020000UL +#define PGSTE_UR_BIT 0x00008000UL +#define PGSTE_UC_BIT 0x00004000UL /* user dirty (migration) */ +#define PGSTE_IN_BIT 0x00002000UL /* IPTE notify bit */ #else /* CONFIG_64BIT */ @@ -367,18 +362,16 @@ extern unsigned long MODULES_END; | _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO) /* Page status table bits for virtualization */ -#define RCP_ACC_BITS 0xf000000000000000UL -#define RCP_FP_BIT 0x0800000000000000UL -#define RCP_PCL_BIT 0x0080000000000000UL -#define RCP_HR_BIT 0x0040000000000000UL -#define RCP_HC_BIT 0x0020000000000000UL -#define RCP_GR_BIT 0x0004000000000000UL -#define RCP_GC_BIT 0x0002000000000000UL -#define RCP_IN_BIT 0x0000800000000000UL /* IPTE notify bit */ - -/* User dirty / referenced bit for KVM's migration feature */ -#define KVM_UR_BIT 0x0000800000000000UL -#define KVM_UC_BIT 0x0000400000000000UL +#define PGSTE_ACC_BITS 0xf000000000000000UL +#define PGSTE_FP_BIT 0x0800000000000000UL +#define PGSTE_PCL_BIT 0x0080000000000000UL +#define PGSTE_HR_BIT 0x0040000000000000UL +#define PGSTE_HC_BIT 0x0020000000000000UL +#define PGSTE_GR_BIT 0x0004000000000000UL +#define PGSTE_GC_BIT 0x0002000000000000UL +#define PGSTE_UR_BIT 0x0000800000000000UL +#define PGSTE_UC_BIT 0x0000400000000000UL /* user dirty (migration) */ +#define PGSTE_IN_BIT 0x0000200000000000UL /* IPTE notify bit */ #endif /* CONFIG_64BIT */ @@ -618,12 +611,12 @@ static inline pgste_t pgste_get_lock(pte_t *ptep) asm( " lg %0,%2\n" "0: lgr %1,%0\n" - " nihh %0,0xff7f\n" /* clear RCP_PCL_BIT in old */ - " oihh %1,0x0080\n" /* set RCP_PCL_BIT in new */ + " nihh %0,0xff7f\n" /* clear PCL bit in old */ + " oihh %1,0x0080\n" /* set PCL bit in new */ " csg %0,%1,%2\n" " jl 0b\n" : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) - : "Q" (ptep[PTRS_PER_PTE]) : "cc"); + : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); #endif return __pgste(new); } @@ -632,21 +625,29 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE asm( - " nihh %1,0xff7f\n" /* clear RCP_PCL_BIT */ + " nihh %1,0xff7f\n" /* clear PCL bit */ " stg %1,%0\n" : "=Q" (ptep[PTRS_PER_PTE]) - : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) : "cc"); + : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) + : "cc", "memory"); preempt_enable(); #endif } +static inline void pgste_set(pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; +#endif +} + static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE unsigned long address, bits; unsigned char skey; - if (!pte_present(*ptep)) + if (pte_val(*ptep) & _PAGE_INVALID) return pgste; address = pte_val(*ptep) & PAGE_MASK; skey = page_get_storage_key(address); @@ -657,14 +658,14 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) else if (bits) page_reset_referenced(address); /* Transfer page changed & referenced bit to guest bits in pgste */ - pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */ + pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ /* Get host changed & referenced bits from pgste */ - bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52; + bits |= (pgste_val(pgste) & (PGSTE_HR_BIT | PGSTE_HC_BIT)) >> 52; /* Transfer page changed & referenced bit to kvm user bits */ - pgste_val(pgste) |= bits << 45; /* KVM_UR_BIT & KVM_UC_BIT */ + pgste_val(pgste) |= bits << 45; /* PGSTE_UR_BIT & PGSTE_UC_BIT */ /* Clear relevant host bits in pgste. */ - pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT); - pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT); + pgste_val(pgste) &= ~(PGSTE_HR_BIT | PGSTE_HC_BIT); + pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); /* Copy page access key and fetch protection bit to pgste */ pgste_val(pgste) |= (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; @@ -680,20 +681,20 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) #ifdef CONFIG_PGSTE int young; - if (!pte_present(*ptep)) + if (pte_val(*ptep) & _PAGE_INVALID) return pgste; /* Get referenced bit from storage key */ young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); if (young) - pgste_val(pgste) |= RCP_GR_BIT; + pgste_val(pgste) |= PGSTE_GR_BIT; /* Get host referenced bit from pgste */ - if (pgste_val(pgste) & RCP_HR_BIT) { - pgste_val(pgste) &= ~RCP_HR_BIT; + if (pgste_val(pgste) & PGSTE_HR_BIT) { + pgste_val(pgste) &= ~PGSTE_HR_BIT; young = 1; } /* Transfer referenced bit to kvm user bits and pte */ if (young) { - pgste_val(pgste) |= KVM_UR_BIT; + pgste_val(pgste) |= PGSTE_UR_BIT; pte_val(*ptep) |= _PAGE_SWR; } #endif @@ -704,17 +705,19 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry) { #ifdef CONFIG_PGSTE unsigned long address; - unsigned long okey, nkey; + unsigned long nkey; - if (!pte_present(entry)) + if (pte_val(entry) & _PAGE_INVALID) return; + VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); address = pte_val(entry) & PAGE_MASK; - okey = nkey = page_get_storage_key(address); - nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT); - /* Set page access key and fetch protection bit from pgste */ - nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56; - if (okey != nkey) - page_set_storage_key(address, nkey, 0); + /* + * Set page access key and fetch protection bit from pgste. + * The guest C/R information is still in the PGSTE, set real + * key C/R to 0. + */ + nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; + page_set_storage_key(address, nkey, 0); #endif } @@ -743,6 +746,7 @@ struct gmap { struct mm_struct *mm; unsigned long *table; unsigned long asce; + void *private; struct list_head crst_list; }; @@ -801,8 +805,8 @@ static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - if (pgste_val(pgste) & RCP_IN_BIT) { - pgste_val(pgste) &= ~RCP_IN_BIT; + if (pgste_val(pgste) & PGSTE_IN_BIT) { + pgste_val(pgste) &= ~PGSTE_IN_BIT; gmap_do_ipte_notify(mm, addr, ptep); } #endif @@ -970,8 +974,8 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); pgste = pgste_update_all(ptep, pgste); - dirty = !!(pgste_val(pgste) & KVM_UC_BIT); - pgste_val(pgste) &= ~KVM_UC_BIT; + dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); + pgste_val(pgste) &= ~PGSTE_UC_BIT; pgste_set_unlock(ptep, pgste); return dirty; } @@ -990,8 +994,8 @@ static inline int ptep_test_and_clear_user_young(struct mm_struct *mm, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); pgste = pgste_update_young(ptep, pgste); - young = !!(pgste_val(pgste) & KVM_UR_BIT); - pgste_val(pgste) &= ~KVM_UR_BIT; + young = !!(pgste_val(pgste) & PGSTE_UR_BIT); + pgste_val(pgste) &= ~PGSTE_UR_BIT; pgste_set_unlock(ptep, pgste); } return young; @@ -1098,6 +1102,11 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, pte = *ptep; if (!mm_exclusive(mm)) __ptep_ipte(address, ptep); + + if (mm_has_pgste(mm)) { + pgste = pgste_update_all(&pte, pgste); + pgste_set(ptep, pgste); + } return pte; } @@ -1105,9 +1114,13 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long address, pte_t *ptep, pte_t pte) { + pgste_t pgste; + if (mm_has_pgste(mm)) { + pgste = *(pgste_t *)(ptep + PTRS_PER_PTE); + pgste_set_key(ptep, pgste, pte); pgste_set_pte(ptep, pte); - pgste_set_unlock(ptep, *(pgste_t *)(ptep + PTRS_PER_PTE)); + pgste_set_unlock(ptep, pgste); } else *ptep = pte; } @@ -1351,10 +1364,11 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd) #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_PGTABLE_DEPOSIT -extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); #define __HAVE_ARCH_PGTABLE_WITHDRAW -extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); static inline int pmd_trans_splitting(pmd_t pmd) { diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 6b499870662..b0e6435b2f0 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -91,7 +91,15 @@ struct thread_struct { #endif }; -#define PER_FLAG_NO_TE 1UL /* Flag to disable transactions. */ +/* Flag to disable transactions. */ +#define PER_FLAG_NO_TE 1UL +/* Flag to enable random transaction aborts. */ +#define PER_FLAG_TE_ABORT_RAND 2UL +/* Flag to specify random transaction abort mode: + * - abort each transaction at a random instruction before TEND if set. + * - abort random transactions at a random instruction if cleared. + */ +#define PER_FLAG_TE_ABORT_RAND_TEND 4UL typedef struct thread_struct thread_struct; diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 559512a455d..52b56533c57 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -24,6 +24,7 @@ struct pt_regs unsigned long gprs[NUM_GPRS]; unsigned long orig_gpr2; unsigned int int_code; + unsigned int int_parm; unsigned long int_parm_long; }; diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index f3a9e0f9270..80b6f11263c 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -10,7 +10,7 @@ #include <linux/thread_info.h> extern struct task_struct *__switch_to(void *, void *); -extern void update_per_regs(struct task_struct *task); +extern void update_cr_regs(struct task_struct *task); static inline void save_fp_regs(s390_fp_regs *fpregs) { @@ -86,7 +86,7 @@ static inline void restore_access_regs(unsigned int *acrs) restore_fp_regs(&next->thread.fp_regs); \ restore_access_regs(&next->thread.acrs[0]); \ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ - update_per_regs(next); \ + update_cr_regs(next); \ } \ prev = __switch_to(prev,next); \ } while (0) diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index b75d7d68668..6d6d92b4ea1 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -32,6 +32,7 @@ struct mmu_gather { struct mm_struct *mm; struct mmu_table_batch *batch; unsigned int fullmm; + unsigned long start, end; }; struct mmu_table_batch { @@ -48,10 +49,13 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table); static inline void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, - unsigned int full_mm_flush) + unsigned long start, + unsigned long end) { tlb->mm = mm; - tlb->fullmm = full_mm_flush; + tlb->start = start; + tlb->end = end; + tlb->fullmm = !(start | (end+1)); tlb->batch = NULL; if (tlb->fullmm) __tlb_flush_mm(mm); diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index 9ccd1905bda..6a9a9eb645f 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild @@ -35,6 +35,7 @@ header-y += siginfo.h header-y += signal.h header-y += socket.h header-y += sockios.h +header-y += sclp_ctl.h header-y += stat.h header-y += statfs.h header-y += swab.h diff --git a/arch/s390/include/uapi/asm/chsc.h b/arch/s390/include/uapi/asm/chsc.h index 1c6a7f85a58..65dc694725a 100644 --- a/arch/s390/include/uapi/asm/chsc.h +++ b/arch/s390/include/uapi/asm/chsc.h @@ -29,6 +29,16 @@ struct chsc_async_area { __u8 data[CHSC_SIZE - sizeof(struct chsc_async_header)]; } __attribute__ ((packed)); +struct chsc_header { + __u16 length; + __u16 code; +} __attribute__ ((packed)); + +struct chsc_sync_area { + struct chsc_header header; + __u8 data[CHSC_SIZE - sizeof(struct chsc_header)]; +} __attribute__ ((packed)); + struct chsc_response_struct { __u16 length; __u16 code; @@ -126,5 +136,8 @@ struct chsc_cpd_info { #define CHSC_INFO_CCL _IOWR(CHSC_IOCTL_MAGIC, 0x86, struct chsc_comp_list) #define CHSC_INFO_CPD _IOWR(CHSC_IOCTL_MAGIC, 0x87, struct chsc_cpd_info) #define CHSC_INFO_DCAL _IOWR(CHSC_IOCTL_MAGIC, 0x88, struct chsc_dcal) +#define CHSC_START_SYNC _IOWR(CHSC_IOCTL_MAGIC, 0x89, struct chsc_sync_area) +#define CHSC_ON_CLOSE_SET _IOWR(CHSC_IOCTL_MAGIC, 0x8a, struct chsc_async_area) +#define CHSC_ON_CLOSE_REMOVE _IO(CHSC_IOCTL_MAGIC, 0x8b) #endif diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h index 38eca3ba40e..5812a3b2df9 100644 --- a/arch/s390/include/uapi/asm/dasd.h +++ b/arch/s390/include/uapi/asm/dasd.h @@ -261,6 +261,10 @@ struct dasd_snid_ioctl_data { #define BIODASDQUIESCE _IO(DASD_IOCTL_LETTER,6) /* Resume IO on device */ #define BIODASDRESUME _IO(DASD_IOCTL_LETTER,7) +/* Abort all I/O on a device */ +#define BIODASDABORTIO _IO(DASD_IOCTL_LETTER, 240) +/* Allow I/O on a device */ +#define BIODASDALLOWIO _IO(DASD_IOCTL_LETTER, 241) /* retrieve API version number */ diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h index 3aa9f1ec5b2..7a84619e315 100644 --- a/arch/s390/include/uapi/asm/ptrace.h +++ b/arch/s390/include/uapi/asm/ptrace.h @@ -400,6 +400,7 @@ typedef struct #define PTRACE_POKE_SYSTEM_CALL 0x5008 #define PTRACE_ENABLE_TE 0x5009 #define PTRACE_DISABLE_TE 0x5010 +#define PTRACE_TE_ABORT_RAND 0x5011 /* * PT_PROT definition is loosely based on hppa bsd definition in diff --git a/arch/s390/include/uapi/asm/sclp_ctl.h b/arch/s390/include/uapi/asm/sclp_ctl.h new file mode 100644 index 00000000000..f2818613ee4 --- /dev/null +++ b/arch/s390/include/uapi/asm/sclp_ctl.h @@ -0,0 +1,24 @@ +/* + * IOCTL interface for SCLP + * + * Copyright IBM Corp. 2012 + * + * Author: Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#ifndef _ASM_SCLP_CTL_H +#define _ASM_SCLP_CTL_H + +#include <linux/types.h> + +struct sclp_ctl_sccb { + __u32 cmdw; + __u64 sccb; +} __attribute__((packed)); + +#define SCLP_CTL_IOCTL_MAGIC 0x10 + +#define SCLP_CTL_SCCB \ + _IOWR(SCLP_CTL_IOCTL_MAGIC, 0x10, struct sclp_ctl_sccb) + +#endif diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index 2dacb306835..92494494692 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -80,4 +80,6 @@ #define SO_SELECT_ERR_QUEUE 45 +#define SO_BUSY_POLL 46 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 7a82f9f7010..2416138ebd3 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -7,6 +7,7 @@ #define ASM_OFFSETS_C #include <linux/kbuild.h> +#include <linux/kvm_host.h> #include <linux/sched.h> #include <asm/cputime.h> #include <asm/vdso.h> @@ -47,6 +48,7 @@ int main(void) DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs)); DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2)); DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code)); + DEFINE(__PT_INT_PARM, offsetof(struct pt_regs, int_parm)); DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long)); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); @@ -161,6 +163,8 @@ int main(void) DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb)); DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb)); DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce)); + DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c)); + DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20)); #endif /* CONFIG_32BIT */ return 0; } diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index 64b24650e4f..dd62071624b 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c @@ -173,7 +173,7 @@ error: } } -static struct cache_dir *__cpuinit cache_create_cache_dir(int cpu) +static struct cache_dir *cache_create_cache_dir(int cpu) { struct cache_dir *cache_dir; struct kobject *kobj = NULL; @@ -289,9 +289,8 @@ static struct kobj_type cache_index_type = { .default_attrs = cache_index_default_attrs, }; -static int __cpuinit cache_create_index_dir(struct cache_dir *cache_dir, - struct cache *cache, int index, - int cpu) +static int cache_create_index_dir(struct cache_dir *cache_dir, + struct cache *cache, int index, int cpu) { struct cache_index_dir *index_dir; int rc; @@ -313,7 +312,7 @@ out: return rc; } -static int __cpuinit cache_add_cpu(int cpu) +static int cache_add_cpu(int cpu) { struct cache_dir *cache_dir; struct cache *cache; @@ -335,7 +334,7 @@ static int __cpuinit cache_add_cpu(int cpu) return 0; } -static void __cpuinit cache_remove_cpu(int cpu) +static void cache_remove_cpu(int cpu) { struct cache_index_dir *index, *next; struct cache_dir *cache_dir; @@ -354,8 +353,8 @@ static void __cpuinit cache_remove_cpu(int cpu) cache_dir_cpu[cpu] = NULL; } -static int __cpuinit cache_hotplug(struct notifier_block *nfb, - unsigned long action, void *hcpu) +static int cache_hotplug(struct notifier_block *nfb, unsigned long action, + void *hcpu) { int cpu = (long)hcpu; int rc = 0; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index f703d91bf72..d8f35565717 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -21,6 +21,48 @@ #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) #define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y)))) + +/* + * Return physical address for virtual address + */ +static inline void *load_real_addr(void *addr) +{ + unsigned long real_addr; + + asm volatile( + " lra %0,0(%1)\n" + " jz 0f\n" + " la %0,0\n" + "0:" + : "=a" (real_addr) : "a" (addr) : "cc"); + return (void *)real_addr; +} + +/* + * Copy up to one page to vmalloc or real memory + */ +static ssize_t copy_page_real(void *buf, void *src, size_t csize) +{ + size_t size; + + if (is_vmalloc_addr(buf)) { + BUG_ON(csize >= PAGE_SIZE); + /* If buf is not page aligned, copy first part */ + size = min(roundup(__pa(buf), PAGE_SIZE) - __pa(buf), csize); + if (size) { + if (memcpy_real(load_real_addr(buf), src, size)) + return -EFAULT; + buf += size; + src += size; + } + /* Copy second part */ + size = csize - size; + return (size) ? memcpy_real(load_real_addr(buf), src, size) : 0; + } else { + return memcpy_real(buf, src, csize); + } +} + /* * Copy one page from "oldmem" * @@ -32,6 +74,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, unsigned long offset, int userbuf) { unsigned long src; + int rc; if (!csize) return 0; @@ -43,11 +86,11 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, src < OLDMEM_BASE + OLDMEM_SIZE) src -= OLDMEM_BASE; if (userbuf) - copy_to_user_real((void __force __user *) buf, (void *) src, - csize); + rc = copy_to_user_real((void __force __user *) buf, + (void *) src, csize); else - memcpy_real(buf, (void *) src, csize); - return csize; + rc = copy_page_real(buf, (void *) src, csize); + return (rc == 0) ? csize : rc; } /* diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 7f4a4a8c847..be87d3e05a5 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1862,6 +1862,8 @@ void print_fn_code(unsigned char *code, unsigned long len) while (len) { ptr = buffer; opsize = insn_length(*code); + if (opsize > len) + break; ptr += sprintf(ptr, "%p: ", code); for (i = 0; i < opsize; i++) ptr += sprintf(ptr, "%02x", code[i]); diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 29829747725..87acc38f73c 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -74,6 +74,8 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high) static void show_trace(struct task_struct *task, unsigned long *stack) { + const unsigned long frame_size = + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); register unsigned long __r15 asm ("15"); unsigned long sp; @@ -82,11 +84,13 @@ static void show_trace(struct task_struct *task, unsigned long *stack) sp = task ? task->thread.ksp : __r15; printk("Call Trace:\n"); #ifdef CONFIG_CHECK_STACK - sp = __show_trace(sp, S390_lowcore.panic_stack - 4096, - S390_lowcore.panic_stack); + sp = __show_trace(sp, + S390_lowcore.panic_stack + frame_size - 4096, + S390_lowcore.panic_stack + frame_size); #endif - sp = __show_trace(sp, S390_lowcore.async_stack - ASYNC_SIZE, - S390_lowcore.async_stack); + sp = __show_trace(sp, + S390_lowcore.async_stack + frame_size - ASYNC_SIZE, + S390_lowcore.async_stack + frame_size); if (task) __show_trace(sp, (unsigned long) task_stack_page(task), (unsigned long) task_stack_page(task) + THREAD_SIZE); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 4d5e6f8a797..be7a408be7a 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -429,11 +429,19 @@ io_skip: stm %r0,%r7,__PT_R0(%r11) mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID TRACE_IRQS_OFF xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) +io_loop: l %r1,BASED(.Ldo_IRQ) lr %r2,%r11 # pass pointer to pt_regs basr %r14,%r1 # call do_IRQ + tm __LC_MACHINE_FLAGS+2,0x10 # MACHINE_FLAG_LPAR + jz io_return + tpi 0 + jz io_return + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID + j io_loop io_return: LOCKDEP_SYS_EXIT TRACE_IRQS_ON @@ -573,10 +581,10 @@ ext_skip: stm %r0,%r7,__PT_R0(%r11) mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR + mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS TRACE_IRQS_OFF lr %r2,%r11 # pass pointer to pt_regs - l %r3,__LC_EXT_CPU_ADDR # get cpu address + interruption code - l %r4,__LC_EXT_PARAMS # get external parameters l %r1,BASED(.Ldo_extint) basr %r14,%r1 # call do_extint j io_return diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index aa0ab02e959..3ddbc26d246 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -54,7 +54,7 @@ void handle_signal32(unsigned long sig, struct k_sigaction *ka, void do_notify_resume(struct pt_regs *regs); struct ext_code; -void do_extint(struct pt_regs *regs, struct ext_code, unsigned int, unsigned long); +void do_extint(struct pt_regs *regs); void do_restart(void); void __init startup_init(void); void die(struct pt_regs *regs, const char *str); diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 4c17eece707..1c039d0c24c 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -47,7 +47,6 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) -_TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) #define BASED(name) name-system_call(%r13) @@ -81,23 +80,27 @@ _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) #endif .endm - .macro HANDLE_SIE_INTERCEPT scratch,pgmcheck + .macro HANDLE_SIE_INTERCEPT scratch,reason #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) tmhh %r8,0x0001 # interrupting from user ? - jnz .+42 + jnz .+62 lgr \scratch,%r9 - slg \scratch,BASED(.Lsie_loop) - clg \scratch,BASED(.Lsie_length) - .if \pgmcheck + slg \scratch,BASED(.Lsie_critical) + clg \scratch,BASED(.Lsie_critical_length) + .if \reason==1 # Some program interrupts are suppressing (e.g. protection). # We must also check the instruction after SIE in that case. # do_protection_exception will rewind to rewind_pad - jh .+22 + jh .+42 .else - jhe .+22 + jhe .+42 .endif - lg %r9,BASED(.Lsie_loop) - LPP BASED(.Lhost_id) # set host id + lg %r14,__SF_EMPTY(%r15) # get control block pointer + LPP __SF_EMPTY+16(%r15) # set host id + ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + larl %r9,sie_exit # skip forward to sie_exit + mvi __SF_EMPTY+31(%r15),\reason # set exit reason #endif .endm @@ -450,7 +453,7 @@ ENTRY(io_int_handler) lg %r12,__LC_THREAD_INFO larl %r13,system_call lmg %r8,%r9,__LC_IO_OLD_PSW - HANDLE_SIE_INTERCEPT %r14,0 + HANDLE_SIE_INTERCEPT %r14,2 SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT tmhh %r8,0x0001 # interrupting from user? jz io_skip @@ -460,10 +463,18 @@ io_skip: stmg %r0,%r7,__PT_R0(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID TRACE_IRQS_OFF xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) +io_loop: lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,do_IRQ + tm __LC_MACHINE_FLAGS+6,0x10 # MACHINE_FLAG_LPAR + jz io_return + tpi 0 + jz io_return + mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID + j io_loop io_return: LOCKDEP_SYS_EXIT TRACE_IRQS_ON @@ -595,7 +606,7 @@ ENTRY(ext_int_handler) lg %r12,__LC_THREAD_INFO larl %r13,system_call lmg %r8,%r9,__LC_EXT_OLD_PSW - HANDLE_SIE_INTERCEPT %r14,0 + HANDLE_SIE_INTERCEPT %r14,3 SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT tmhh %r8,0x0001 # interrupting from user ? jz ext_skip @@ -605,13 +616,13 @@ ext_skip: stmg %r0,%r7,__PT_R0(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) + lghi %r1,__LC_EXT_PARAMS2 + mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR + mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS + mvc __PT_INT_PARM_LONG(8,%r11),0(%r1) TRACE_IRQS_OFF xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - lghi %r1,4096 lgr %r2,%r11 # pass pointer to pt_regs - llgf %r3,__LC_EXT_CPU_ADDR # get cpu address + interruption code - llgf %r4,__LC_EXT_PARAMS # get external parameter - lg %r5,__LC_EXT_PARAMS2-4096(%r1) # get 64 bit external parameter brasl %r14,do_extint j io_return @@ -643,7 +654,7 @@ ENTRY(mcck_int_handler) lg %r12,__LC_THREAD_INFO larl %r13,system_call lmg %r8,%r9,__LC_MCK_OLD_PSW - HANDLE_SIE_INTERCEPT %r14,0 + HANDLE_SIE_INTERCEPT %r14,4 tm __LC_MCCK_CODE,0x80 # system damage? jo mcck_panic # yes -> rest of mcck code invalid lghi %r14,__LC_CPU_TIMER_SAVE_AREA @@ -937,56 +948,50 @@ ENTRY(sie64a) stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers stg %r2,__SF_EMPTY(%r15) # save control block pointer stg %r3,__SF_EMPTY+8(%r15) # save guest register save area - xc __SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # host id == 0 + xc __SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason lmg %r0,%r13,0(%r3) # load guest gprs 0-13 -# some program checks are suppressing. C code (e.g. do_protection_exception) -# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other -# instructions in the sie_loop should not cause program interrupts. So -# lets use a nop (47 00 00 00) as a landing pad. -# See also HANDLE_SIE_INTERCEPT -rewind_pad: - nop 0 -sie_loop: - lg %r14,__LC_THREAD_INFO # pointer thread_info struct - tm __TI_flags+7(%r14),_TIF_EXIT_SIE - jnz sie_exit lg %r14,__LC_GMAP # get gmap pointer ltgr %r14,%r14 jz sie_gmap lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce sie_gmap: lg %r14,__SF_EMPTY(%r15) # get control block pointer + oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now + tm __SIE_PROG20+3(%r14),1 # last exit... + jnz sie_done LPP __SF_EMPTY(%r15) # set guest id sie 0(%r14) sie_done: LPP __SF_EMPTY+16(%r15) # set host id - lg %r14,__LC_THREAD_INFO # pointer thread_info struct -sie_exit: + ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce +# some program checks are suppressing. C code (e.g. do_protection_exception) +# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other +# instructions beween sie64a and sie_done should not cause program +# interrupts. So lets use a nop (47 00 00 00) as a landing pad. +# See also HANDLE_SIE_INTERCEPT +rewind_pad: + nop 0 + .globl sie_exit +sie_exit: lg %r14,__SF_EMPTY+8(%r15) # load guest register save area stmg %r0,%r13,0(%r14) # save guest gprs 0-13 lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers - lghi %r2,0 + lg %r2,__SF_EMPTY+24(%r15) # return exit reason code br %r14 sie_fault: - lctlg %c1,%c1,__LC_USER_ASCE # load primary asce - lg %r14,__LC_THREAD_INFO # pointer thread_info struct - lg %r14,__SF_EMPTY+8(%r15) # load guest register save area - stmg %r0,%r13,0(%r14) # save guest gprs 0-13 - lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers - lghi %r2,-EFAULT - br %r14 + lghi %r14,-EFAULT + stg %r14,__SF_EMPTY+24(%r15) # set exit reason code + j sie_exit .align 8 -.Lsie_loop: - .quad sie_loop -.Lsie_length: - .quad sie_done - sie_loop -.Lhost_id: - .quad 0 +.Lsie_critical: + .quad sie_gmap +.Lsie_critical_length: + .quad sie_done - sie_gmap EX_TABLE(rewind_pad,sie_fault) - EX_TABLE(sie_loop,sie_fault) + EX_TABLE(sie_exit,sie_fault) #endif .section .rodata, "a" diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 78bdf0e5dff..e3043aef87a 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -16,12 +16,6 @@ #include <trace/syscall.h> #include <asm/asm-offsets.h> -#ifdef CONFIG_64BIT -#define MCOUNT_OFFSET_RET 12 -#else -#define MCOUNT_OFFSET_RET 22 -#endif - #ifdef CONFIG_DYNAMIC_FTRACE void ftrace_disable_code(void); @@ -155,9 +149,10 @@ unsigned long __kprobes prepare_ftrace_return(unsigned long parent, if (unlikely(atomic_read(¤t->tracing_graph_pause))) goto out; + ip = (ip & PSW_ADDR_INSN) - MCOUNT_INSN_SIZE; if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY) goto out; - trace.func = (ip & PSW_ADDR_INSN) - MCOUNT_OFFSET_RET; + trace.func = ip; /* Only trace if the calling function expects to. */ if (!ftrace_graph_entry(&trace)) { current->curr_ret_stack--; diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index d8a6a385d04..feb719d3c85 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -754,9 +754,9 @@ static struct bin_attribute sys_reipl_fcp_scp_data_attr = { .write = reipl_fcp_scpdata_write, }; -DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%016llx\n", +DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n", reipl_block_fcp->ipl_info.fcp.wwpn); -DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%016llx\n", +DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n", reipl_block_fcp->ipl_info.fcp.lun); DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n", reipl_block_fcp->ipl_info.fcp.bootprog); @@ -1323,9 +1323,9 @@ static struct shutdown_action __refdata reipl_action = { /* FCP dump device attributes */ -DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%016llx\n", +DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n", dump_block_fcp->ipl_info.fcp.wwpn); -DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%016llx\n", +DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n", dump_block_fcp->ipl_info.fcp.lun); DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n", dump_block_fcp->ipl_info.fcp.bootprog); diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index f7fb58903f6..54b0995514e 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -234,9 +234,9 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler) } EXPORT_SYMBOL(unregister_external_interrupt); -void __irq_entry do_extint(struct pt_regs *regs, struct ext_code ext_code, - unsigned int param32, unsigned long param64) +void __irq_entry do_extint(struct pt_regs *regs) { + struct ext_code ext_code; struct pt_regs *old_regs; struct ext_int_info *p; int index; @@ -248,6 +248,7 @@ void __irq_entry do_extint(struct pt_regs *regs, struct ext_code ext_code, clock_comparator_work(); } kstat_incr_irqs_this_cpu(EXTERNAL_INTERRUPT, NULL); + ext_code = *(struct ext_code *) ®s->int_code; if (ext_code.code != 0x1004) __get_cpu_var(s390_idle).nohz_delay = 1; @@ -255,7 +256,8 @@ void __irq_entry do_extint(struct pt_regs *regs, struct ext_code ext_code, rcu_read_lock(); list_for_each_entry_rcu(p, &ext_int_hash[index], entry) if (likely(p->code == ext_code.code)) - p->handler(ext_code, param32, param64); + p->handler(ext_code, regs->int_parm, + regs->int_parm_long); rcu_read_unlock(); irq_exit(); set_irq_regs(old_regs); @@ -311,3 +313,69 @@ void measurement_alert_subclass_unregister(void) spin_unlock(&ma_subclass_lock); } EXPORT_SYMBOL(measurement_alert_subclass_unregister); + +#ifdef CONFIG_SMP +void synchronize_irq(unsigned int irq) +{ + /* + * Not needed, the handler is protected by a lock and IRQs that occur + * after the handler is deleted are just NOPs. + */ +} +EXPORT_SYMBOL_GPL(synchronize_irq); +#endif + +#ifndef CONFIG_PCI + +/* Only PCI devices have dynamically-defined IRQ handlers */ + +int request_irq(unsigned int irq, irq_handler_t handler, + unsigned long irqflags, const char *devname, void *dev_id) +{ + return -EINVAL; +} +EXPORT_SYMBOL_GPL(request_irq); + +void free_irq(unsigned int irq, void *dev_id) +{ + WARN_ON(1); +} +EXPORT_SYMBOL_GPL(free_irq); + +void enable_irq(unsigned int irq) +{ + WARN_ON(1); +} +EXPORT_SYMBOL_GPL(enable_irq); + +void disable_irq(unsigned int irq) +{ + WARN_ON(1); +} +EXPORT_SYMBOL_GPL(disable_irq); + +#endif /* !CONFIG_PCI */ + +void disable_irq_nosync(unsigned int irq) +{ + disable_irq(irq); +} +EXPORT_SYMBOL_GPL(disable_irq_nosync); + +unsigned long probe_irq_on(void) +{ + return 0; +} +EXPORT_SYMBOL_GPL(probe_irq_on); + +int probe_irq_off(unsigned long val) +{ + return 0; +} +EXPORT_SYMBOL_GPL(probe_irq_off); + +unsigned int probe_irq_mask(unsigned long val) +{ + return val; +} +EXPORT_SYMBOL_GPL(probe_irq_mask); diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 4567ce20d90..08dcf21cb8d 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -7,6 +7,7 @@ #include <linux/linkage.h> #include <asm/asm-offsets.h> +#include <asm/ftrace.h> .section .kprobes.text, "ax" @@ -33,6 +34,7 @@ ENTRY(ftrace_caller) la %r2,0(%r14) st %r0,__SF_BACKCHAIN(%r15) la %r3,0(%r3) + ahi %r2,-MCOUNT_INSN_SIZE l %r14,0b-0b(%r1) l %r14,0(%r14) basr %r14,%r14 diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S index 11332193db3..1c52eae3396 100644 --- a/arch/s390/kernel/mcount64.S +++ b/arch/s390/kernel/mcount64.S @@ -7,6 +7,7 @@ #include <linux/linkage.h> #include <asm/asm-offsets.h> +#include <asm/ftrace.h> .section .kprobes.text, "ax" @@ -29,6 +30,7 @@ ENTRY(ftrace_caller) stg %r1,__SF_BACKCHAIN(%r15) lgr %r2,%r14 lg %r3,168(%r15) + aghi %r2,-MCOUNT_INSN_SIZE larl %r14,ftrace_trace_function lg %r14,0(%r14) basr %r14,%r14 diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 390d9ae57bb..fb99c2057b8 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -639,8 +639,8 @@ static struct pmu cpumf_pmu = { .cancel_txn = cpumf_pmu_cancel_txn, }; -static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self, - unsigned long action, void *hcpu) +static int cpumf_pmu_notifier(struct notifier_block *self, unsigned long action, + void *hcpu) { unsigned int cpu = (long) hcpu; int flags; diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index f58f37f6682..500aa1029bc 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/perf_event.h> +#include <linux/kvm_host.h> #include <linux/percpu.h> #include <linux/export.h> #include <asm/irq.h> @@ -39,6 +40,58 @@ int perf_num_counters(void) } EXPORT_SYMBOL(perf_num_counters); +static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs) +{ + struct stack_frame *stack = (struct stack_frame *) regs->gprs[15]; + + if (!stack) + return NULL; + + return (struct kvm_s390_sie_block *) stack->empty1[0]; +} + +static bool is_in_guest(struct pt_regs *regs) +{ + if (user_mode(regs)) + return false; +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) + return instruction_pointer(regs) == (unsigned long) &sie_exit; +#else + return false; +#endif +} + +static unsigned long guest_is_user_mode(struct pt_regs *regs) +{ + return sie_block(regs)->gpsw.mask & PSW_MASK_PSTATE; +} + +static unsigned long instruction_pointer_guest(struct pt_regs *regs) +{ + return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN; +} + +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + return is_in_guest(regs) ? instruction_pointer_guest(regs) + : instruction_pointer(regs); +} + +static unsigned long perf_misc_guest_flags(struct pt_regs *regs) +{ + return guest_is_user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER + : PERF_RECORD_MISC_GUEST_KERNEL; +} + +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + if (is_in_guest(regs)) + return perf_misc_guest_flags(regs); + + return user_mode(regs) ? PERF_RECORD_MISC_USER + : PERF_RECORD_MISC_KERNEL; +} + void perf_event_print_debug(void) { struct cpumf_ctr_info cf_info; diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 753c41d0ffd..24612029f45 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -21,7 +21,7 @@ static DEFINE_PER_CPU(struct cpuid, cpu_id); /* * cpu_init - initializes state that is per-CPU. */ -void __cpuinit cpu_init(void) +void cpu_init(void) { struct s390_idle_data *idle = &__get_cpu_var(s390_idle); struct cpuid *id = &__get_cpu_var(cpu_id); diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index a314c57f4e9..e9fadb04e3c 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -47,7 +47,7 @@ enum s390_regset { REGSET_GENERAL_EXTENDED, }; -void update_per_regs(struct task_struct *task) +void update_cr_regs(struct task_struct *task) { struct pt_regs *regs = task_pt_regs(task); struct thread_struct *thread = &task->thread; @@ -56,17 +56,25 @@ void update_per_regs(struct task_struct *task) #ifdef CONFIG_64BIT /* Take care of the enable/disable of transactional execution. */ if (MACHINE_HAS_TE) { - unsigned long cr0, cr0_new; + unsigned long cr[3], cr_new[3]; - __ctl_store(cr0, 0, 0); - /* set or clear transaction execution bits 8 and 9. */ + __ctl_store(cr, 0, 2); + cr_new[1] = cr[1]; + /* Set or clear transaction execution TXC/PIFO bits 8 and 9. */ if (task->thread.per_flags & PER_FLAG_NO_TE) - cr0_new = cr0 & ~(3UL << 54); + cr_new[0] = cr[0] & ~(3UL << 54); else - cr0_new = cr0 | (3UL << 54); - /* Only load control register 0 if necessary. */ - if (cr0 != cr0_new) - __ctl_load(cr0_new, 0, 0); + cr_new[0] = cr[0] | (3UL << 54); + /* Set or clear transaction execution TDC bits 62 and 63. */ + cr_new[2] = cr[2] & ~3UL; + if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { + if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) + cr_new[2] |= 1UL; + else + cr_new[2] |= 2UL; + } + if (memcmp(&cr_new, &cr, sizeof(cr))) + __ctl_load(cr_new, 0, 2); } #endif /* Copy user specified PER registers */ @@ -100,14 +108,14 @@ void user_enable_single_step(struct task_struct *task) { set_tsk_thread_flag(task, TIF_SINGLE_STEP); if (task == current) - update_per_regs(task); + update_cr_regs(task); } void user_disable_single_step(struct task_struct *task) { clear_tsk_thread_flag(task, TIF_SINGLE_STEP); if (task == current) - update_per_regs(task); + update_cr_regs(task); } /* @@ -447,6 +455,26 @@ long arch_ptrace(struct task_struct *child, long request, if (!MACHINE_HAS_TE) return -EIO; child->thread.per_flags |= PER_FLAG_NO_TE; + child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND; + return 0; + case PTRACE_TE_ABORT_RAND: + if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE)) + return -EIO; + switch (data) { + case 0UL: + child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND; + break; + case 1UL: + child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND; + child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND_TEND; + break; + case 2UL: + child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND; + child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND_TEND; + break; + default: + return -EINVAL; + } return 0; default: /* Removing high order bit from addr (only for 31 bit). */ diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c index 9bdbcef1da9..3bac589844a 100644 --- a/arch/s390/kernel/s390_ksyms.c +++ b/arch/s390/kernel/s390_ksyms.c @@ -7,6 +7,7 @@ EXPORT_SYMBOL(_mcount); #endif #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) EXPORT_SYMBOL(sie64a); +EXPORT_SYMBOL(sie_exit); #endif EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memset); diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S index b6506ee32a3..29bd7bec417 100644 --- a/arch/s390/kernel/sclp.S +++ b/arch/s390/kernel/sclp.S @@ -225,7 +225,7 @@ _sclp_print: ahi %r2,1 ltr %r0,%r0 # end of string? jz .LfinalizemtoS4 - chi %r0,0x15 # end of line (NL)? + chi %r0,0x0a # end of line (NL)? jz .LfinalizemtoS4 stc %r0,0(%r6,%r7) # copy to mto la %r11,0(%r6,%r7) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 0a49095104c..aeed8a61fa0 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -719,10 +719,6 @@ static void reserve_oldmem(void) } create_mem_hole(memory_chunk, OLDMEM_BASE, OLDMEM_SIZE); create_mem_hole(memory_chunk, OLDMEM_SIZE, real_size - OLDMEM_SIZE); - if (OLDMEM_BASE + OLDMEM_SIZE == real_size) - saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1; - else - saved_max_pfn = PFN_DOWN(real_size) - 1; #endif } @@ -998,6 +994,7 @@ static void __init setup_hwcaps(void) strcpy(elf_platform, "z196"); break; case 0x2827: + case 0x2828: strcpy(elf_platform, "zEC12"); break; } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 8074cb4b7cb..d386c4e9d2e 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -49,7 +49,6 @@ enum { ec_schedule = 0, - ec_call_function, ec_call_function_single, ec_stop_cpu, }; @@ -166,7 +165,7 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) pcpu_sigp_retry(pcpu, order, 0); } -static int __cpuinit pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) +static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) { struct _lowcore *lc; @@ -428,34 +427,25 @@ void smp_stop_cpu(void) * This is the main routine where commands issued by other * cpus are handled. */ -static void do_ext_call_interrupt(struct ext_code ext_code, - unsigned int param32, unsigned long param64) +static void smp_handle_ext_call(void) { unsigned long bits; - int cpu; - - cpu = smp_processor_id(); - if (ext_code.code == 0x1202) - inc_irq_stat(IRQEXT_EXC); - else - inc_irq_stat(IRQEXT_EMS); - /* - * handle bit signal external calls - */ - bits = xchg(&pcpu_devices[cpu].ec_mask, 0); + /* handle bit signal external calls */ + bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0); if (test_bit(ec_stop_cpu, &bits)) smp_stop_cpu(); - if (test_bit(ec_schedule, &bits)) scheduler_ipi(); - - if (test_bit(ec_call_function, &bits)) - generic_smp_call_function_interrupt(); - if (test_bit(ec_call_function_single, &bits)) generic_smp_call_function_single_interrupt(); +} +static void do_ext_call_interrupt(struct ext_code ext_code, + unsigned int param32, unsigned long param64) +{ + inc_irq_stat(ext_code.code == 0x1202 ? IRQEXT_EXC : IRQEXT_EMS); + smp_handle_ext_call(); } void arch_send_call_function_ipi_mask(const struct cpumask *mask) @@ -463,7 +453,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) int cpu; for_each_cpu(cpu, mask) - pcpu_ec_call(pcpu_devices + cpu, ec_call_function); + pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); } void arch_send_call_function_single_ipi(int cpu) @@ -626,10 +616,9 @@ static struct sclp_cpu_info *smp_get_cpu_info(void) return info; } -static int __cpuinit smp_add_present_cpu(int cpu); +static int smp_add_present_cpu(int cpu); -static int __cpuinit __smp_rescan_cpus(struct sclp_cpu_info *info, - int sysfs_add) +static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add) { struct pcpu *pcpu; cpumask_t avail; @@ -645,7 +634,7 @@ static int __cpuinit __smp_rescan_cpus(struct sclp_cpu_info *info, continue; pcpu = pcpu_devices + cpu; pcpu->address = info->cpu[i].address; - pcpu->state = (cpu >= info->configured) ? + pcpu->state = (i >= info->configured) ? CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); set_cpu_present(cpu, true); @@ -695,7 +684,7 @@ static void __init smp_detect_cpus(void) /* * Activate a secondary processor. */ -static void __cpuinit smp_start_secondary(void *cpuvoid) +static void smp_start_secondary(void *cpuvoid) { S390_lowcore.last_update_clock = get_tod_clock(); S390_lowcore.restart_stack = (unsigned long) restart_stack; @@ -718,7 +707,7 @@ static void __cpuinit smp_start_secondary(void *cpuvoid) } /* Upping and downing of CPUs */ -int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle) +int __cpu_up(unsigned int cpu, struct task_struct *tidle) { struct pcpu *pcpu; int rc; @@ -760,6 +749,8 @@ int __cpu_disable(void) { unsigned long cregs[16]; + /* Handle possible pending IPIs */ + smp_handle_ext_call(); set_cpu_online(smp_processor_id(), false); /* Disable pseudo page faults on this cpu. */ pfault_fini(); @@ -972,8 +963,8 @@ static struct attribute_group cpu_online_attr_group = { .attrs = cpu_online_attrs, }; -static int __cpuinit smp_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int smp_cpu_notify(struct notifier_block *self, unsigned long action, + void *hcpu) { unsigned int cpu = (unsigned int)(long)hcpu; struct cpu *c = &pcpu_devices[cpu].cpu; @@ -991,7 +982,7 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self, return notifier_from_errno(err); } -static int __cpuinit smp_add_present_cpu(int cpu) +static int smp_add_present_cpu(int cpu) { struct cpu *c = &pcpu_devices[cpu].cpu; struct device *s = &c->dev; diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 62f89d98e88..811f542b8ed 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -418,7 +418,7 @@ void s390_adjust_jiffies(void) /* * calibrate the delay loop */ -void __cpuinit calibrate_delay(void) +void calibrate_delay(void) { s390_adjust_jiffies(); /* Print the good old Bogomips line .. */ diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 3fb09359eda..9b9c1b78ec6 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -371,14 +371,14 @@ EXPORT_SYMBOL(del_virt_timer); /* * Start the virtual CPU timer on the current CPU. */ -void __cpuinit init_cpu_vtimer(void) +void init_cpu_vtimer(void) { /* set initial cpu timer */ set_vtimer(VTIMER_MAX_SLICE); } -static int __cpuinit s390_nohz_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int s390_nohz_notify(struct notifier_block *self, unsigned long action, + void *hcpu) { struct s390_idle_data *idle; long cpu = (long) hcpu; diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 8fe9d65a458..40b4c6470f8 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -6,7 +6,8 @@ # it under the terms of the GNU General Public License (version 2 only) # as published by the Free Software Foundation. -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o) +KVM := ../../../virt/kvm +common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o ccflags-y := -Ivirt/kvm -Iarch/s390/kvm diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 1c01a991298..3074475c8ae 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -132,6 +132,9 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) { int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + trace_kvm_s390_handle_diag(vcpu, code); switch (code) { case 0x10: diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index b7d1b2edeeb..5ee56e5acc2 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -22,87 +22,6 @@ #include "trace.h" #include "trace-s390.h" -static int handle_lctlg(struct kvm_vcpu *vcpu) -{ - int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; - int reg3 = vcpu->arch.sie_block->ipa & 0x000f; - u64 useraddr; - int reg, rc; - - vcpu->stat.instruction_lctlg++; - - useraddr = kvm_s390_get_base_disp_rsy(vcpu); - - if (useraddr & 7) - return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - - reg = reg1; - - VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, - useraddr); - trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr); - - do { - rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg], - (u64 __user *) useraddr); - if (rc) - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - useraddr += 8; - if (reg == reg3) - break; - reg = (reg + 1) % 16; - } while (1); - return 0; -} - -static int handle_lctl(struct kvm_vcpu *vcpu) -{ - int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; - int reg3 = vcpu->arch.sie_block->ipa & 0x000f; - u64 useraddr; - u32 val = 0; - int reg, rc; - - vcpu->stat.instruction_lctl++; - - useraddr = kvm_s390_get_base_disp_rs(vcpu); - - if (useraddr & 3) - return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - - VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, - useraddr); - trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr); - - reg = reg1; - do { - rc = get_guest(vcpu, val, (u32 __user *) useraddr); - if (rc) - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; - vcpu->arch.sie_block->gcr[reg] |= val; - useraddr += 4; - if (reg == reg3) - break; - reg = (reg + 1) % 16; - } while (1); - return 0; -} - -static const intercept_handler_t eb_handlers[256] = { - [0x2f] = handle_lctlg, - [0x8a] = kvm_s390_handle_priv_eb, -}; - -static int handle_eb(struct kvm_vcpu *vcpu) -{ - intercept_handler_t handler; - - handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff]; - if (handler) - return handler(vcpu); - return -EOPNOTSUPP; -} static const intercept_handler_t instruction_handlers[256] = { [0x01] = kvm_s390_handle_01, @@ -110,10 +29,10 @@ static const intercept_handler_t instruction_handlers[256] = { [0x83] = kvm_s390_handle_diag, [0xae] = kvm_s390_handle_sigp, [0xb2] = kvm_s390_handle_b2, - [0xb7] = handle_lctl, + [0xb7] = kvm_s390_handle_lctl, [0xb9] = kvm_s390_handle_b9, [0xe5] = kvm_s390_handle_e5, - [0xeb] = handle_eb, + [0xeb] = kvm_s390_handle_eb, }; static int handle_noop(struct kvm_vcpu *vcpu) @@ -174,47 +93,12 @@ static int handle_stop(struct kvm_vcpu *vcpu) static int handle_validity(struct kvm_vcpu *vcpu) { - unsigned long vmaddr; int viwhy = vcpu->arch.sie_block->ipb >> 16; - int rc; vcpu->stat.exit_validity++; trace_kvm_s390_intercept_validity(vcpu, viwhy); - if (viwhy == 0x37) { - vmaddr = gmap_fault(vcpu->arch.sie_block->prefix, - vcpu->arch.gmap); - if (IS_ERR_VALUE(vmaddr)) { - rc = -EOPNOTSUPP; - goto out; - } - rc = fault_in_pages_writeable((char __user *) vmaddr, - PAGE_SIZE); - if (rc) { - /* user will receive sigsegv, exit to user */ - rc = -EOPNOTSUPP; - goto out; - } - vmaddr = gmap_fault(vcpu->arch.sie_block->prefix + PAGE_SIZE, - vcpu->arch.gmap); - if (IS_ERR_VALUE(vmaddr)) { - rc = -EOPNOTSUPP; - goto out; - } - rc = fault_in_pages_writeable((char __user *) vmaddr, - PAGE_SIZE); - if (rc) { - /* user will receive sigsegv, exit to user */ - rc = -EOPNOTSUPP; - goto out; - } - } else - rc = -EOPNOTSUPP; - -out: - if (rc) - VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d", - viwhy); - return rc; + WARN_ONCE(true, "kvm: unhandled validity intercept 0x%x\n", viwhy); + return -EOPNOTSUPP; } static int handle_instruction(struct kvm_vcpu *vcpu) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 5c948177529..7f35cb33e51 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -438,7 +438,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) no_timer: spin_lock(&vcpu->arch.local_int.float_int->lock); spin_lock_bh(&vcpu->arch.local_int.lock); - add_wait_queue(&vcpu->arch.local_int.wq, &wait); + add_wait_queue(&vcpu->wq, &wait); while (list_empty(&vcpu->arch.local_int.list) && list_empty(&vcpu->arch.local_int.float_int->list) && (!vcpu->arch.local_int.timer_due) && @@ -452,7 +452,7 @@ no_timer: } __unset_cpu_idle(vcpu); __set_current_state(TASK_RUNNING); - remove_wait_queue(&vcpu->arch.local_int.wq, &wait); + remove_wait_queue(&vcpu->wq, &wait); spin_unlock_bh(&vcpu->arch.local_int.lock); spin_unlock(&vcpu->arch.local_int.float_int->lock); hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); @@ -465,8 +465,8 @@ void kvm_s390_tasklet(unsigned long parm) spin_lock(&vcpu->arch.local_int.lock); vcpu->arch.local_int.timer_due = 1; - if (waitqueue_active(&vcpu->arch.local_int.wq)) - wake_up_interruptible(&vcpu->arch.local_int.wq); + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); spin_unlock(&vcpu->arch.local_int.lock); } @@ -613,7 +613,7 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) spin_lock_bh(&li->lock); list_add(&inti->list, &li->list); atomic_set(&li->active, 1); - BUG_ON(waitqueue_active(&li->wq)); + BUG_ON(waitqueue_active(li->wq)); spin_unlock_bh(&li->lock); return 0; } @@ -746,8 +746,8 @@ int kvm_s390_inject_vm(struct kvm *kvm, li = fi->local_int[sigcpu]; spin_lock_bh(&li->lock); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); spin_unlock_bh(&li->lock); spin_unlock(&fi->lock); mutex_unlock(&kvm->lock); @@ -832,8 +832,8 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, if (inti->type == KVM_S390_SIGP_STOP) li->action_bits |= ACTION_STOP_ON_STOP; atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&vcpu->arch.local_int.wq); + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); spin_unlock_bh(&li->lock); mutex_unlock(&vcpu->kvm->lock); return 0; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c1c7c683fa2..34c1c9a90be 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -59,6 +59,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, { "exit_wait_state", VCPU_STAT(exit_wait_state) }, + { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, { "instruction_stidp", VCPU_STAT(instruction_stidp) }, { "instruction_spx", VCPU_STAT(instruction_spx) }, { "instruction_stpx", VCPU_STAT(instruction_stpx) }, @@ -84,6 +85,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { }; static unsigned long long *facilities; +static struct gmap_notifier gmap_notifier; /* Section: not file related */ int kvm_arch_hardware_enable(void *garbage) @@ -96,13 +98,18 @@ void kvm_arch_hardware_disable(void *garbage) { } +static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address); + int kvm_arch_hardware_setup(void) { + gmap_notifier.notifier_call = kvm_gmap_notifier; + gmap_register_ipte_notifier(&gmap_notifier); return 0; } void kvm_arch_hardware_unsetup(void) { + gmap_unregister_ipte_notifier(&gmap_notifier); } void kvm_arch_check_processor_compat(void *rtn) @@ -239,6 +246,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.gmap = gmap_alloc(current->mm); if (!kvm->arch.gmap) goto out_nogmap; + kvm->arch.gmap->private = kvm; } kvm->arch.css_support = 0; @@ -270,7 +278,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) free_page((unsigned long)(vcpu->arch.sie_block)); kvm_vcpu_uninit(vcpu); - kfree(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu); } static void kvm_free_vcpus(struct kvm *kvm) @@ -309,6 +317,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.gmap = gmap_alloc(current->mm); if (!vcpu->arch.gmap) return -ENOMEM; + vcpu->arch.gmap->private = vcpu->kvm; return 0; } @@ -373,8 +382,10 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | CPUSTAT_SM | - CPUSTAT_STOPPED); + CPUSTAT_STOPPED | + CPUSTAT_GED); vcpu->arch.sie_block->ecb = 6; + vcpu->arch.sie_block->ecb2 = 8; vcpu->arch.sie_block->eca = 0xC1002001U; vcpu->arch.sie_block->fac = (int) (long) facilities; hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); @@ -397,7 +408,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, rc = -ENOMEM; - vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); if (!vcpu) goto out; @@ -427,7 +438,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.local_int.float_int = &kvm->arch.float_int; spin_lock(&kvm->arch.float_int.lock); kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; - init_waitqueue_head(&vcpu->arch.local_int.wq); + vcpu->arch.local_int.wq = &vcpu->wq; vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; spin_unlock(&kvm->arch.float_int.lock); @@ -442,7 +453,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, out_free_sie_block: free_page((unsigned long)(vcpu->arch.sie_block)); out_free_cpu: - kfree(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu); out: return ERR_PTR(rc); } @@ -454,6 +465,50 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return 0; } +void s390_vcpu_block(struct kvm_vcpu *vcpu) +{ + atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); +} + +void s390_vcpu_unblock(struct kvm_vcpu *vcpu) +{ + atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); +} + +/* + * Kick a guest cpu out of SIE and wait until SIE is not running. + * If the CPU is not running (e.g. waiting as idle) the function will + * return immediately. */ +void exit_sie(struct kvm_vcpu *vcpu) +{ + atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags); + while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) + cpu_relax(); +} + +/* Kick a guest cpu out of SIE and prevent SIE-reentry */ +void exit_sie_sync(struct kvm_vcpu *vcpu) +{ + s390_vcpu_block(vcpu); + exit_sie(vcpu); +} + +static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address) +{ + int i; + struct kvm *kvm = gmap->private; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) { + /* match against both prefix pages */ + if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) { + VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address); + kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); + exit_sie_sync(vcpu); + } + } +} + int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { /* kvm common code refers to this, but never calls it */ @@ -606,6 +661,27 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, return -EINVAL; /* not implemented yet */ } +static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) +{ + /* + * We use MMU_RELOAD just to re-arm the ipte notifier for the + * guest prefix page. gmap_ipte_notify will wait on the ptl lock. + * This ensures that the ipte instruction for this request has + * already finished. We might race against a second unmapper that + * wants to set the blocking bit. Lets just retry the request loop. + */ + while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { + int rc; + rc = gmap_ipte_notify(vcpu->arch.gmap, + vcpu->arch.sie_block->prefix, + PAGE_SIZE * 2); + if (rc) + return rc; + s390_vcpu_unblock(vcpu); + } + return 0; +} + static int __vcpu_run(struct kvm_vcpu *vcpu) { int rc; @@ -621,16 +697,33 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) if (!kvm_is_ucontrol(vcpu->kvm)) kvm_s390_deliver_pending_interrupts(vcpu); + rc = kvm_s390_handle_requests(vcpu); + if (rc) + return rc; + vcpu->arch.sie_block->icptcode = 0; - preempt_disable(); - kvm_guest_enter(); - preempt_enable(); VCPU_EVENT(vcpu, 6, "entering sie flags %x", atomic_read(&vcpu->arch.sie_block->cpuflags)); trace_kvm_s390_sie_enter(vcpu, atomic_read(&vcpu->arch.sie_block->cpuflags)); + + /* + * As PF_VCPU will be used in fault handler, between guest_enter + * and guest_exit should be no uaccess. + */ + preempt_disable(); + kvm_guest_enter(); + preempt_enable(); rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); - if (rc) { + kvm_guest_exit(); + + VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", + vcpu->arch.sie_block->icptcode); + trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); + + if (rc > 0) + rc = 0; + if (rc < 0) { if (kvm_is_ucontrol(vcpu->kvm)) { rc = SIE_INTERCEPT_UCONTROL; } else { @@ -639,10 +732,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } } - VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", - vcpu->arch.sie_block->icptcode); - trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); - kvm_guest_exit(); memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); return rc; @@ -1046,7 +1135,7 @@ static int __init kvm_s390_init(void) return -ENOMEM; } memcpy(facilities, S390_lowcore.stfle_fac_list, 16); - facilities[0] &= 0xff00fff3f47c0000ULL; + facilities[0] &= 0xff82fff3f47c0000ULL; facilities[1] &= 0x001c000000000000ULL; return 0; } @@ -1059,3 +1148,12 @@ static void __exit kvm_s390_exit(void) module_init(kvm_s390_init); module_exit(kvm_s390_exit); + +/* + * Enable autoloading of the kvm module. + * Note that we add the module alias here instead of virt/kvm/kvm_main.c + * since x86 takes a different approach. + */ +#include <linux/miscdevice.h> +MODULE_ALIAS_MISCDEV(KVM_MINOR); +MODULE_ALIAS("devname:kvm"); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index efc14f68726..028ca9fd215 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -63,6 +63,7 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) { vcpu->arch.sie_block->prefix = prefix & 0x7fffe000u; vcpu->arch.sie_block->ihcpu = 0xffff; + kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); } static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu) @@ -85,6 +86,12 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, *address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; } +static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2) +{ + *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20; + *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; +} + static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu) { u32 base2 = vcpu->arch.sie_block->ipb >> 28; @@ -125,7 +132,8 @@ int kvm_s390_handle_e5(struct kvm_vcpu *vcpu); int kvm_s390_handle_01(struct kvm_vcpu *vcpu); int kvm_s390_handle_b9(struct kvm_vcpu *vcpu); int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu); -int kvm_s390_handle_priv_eb(struct kvm_vcpu *vcpu); +int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu); +int kvm_s390_handle_eb(struct kvm_vcpu *vcpu); /* implemented in sigp.c */ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); @@ -133,6 +141,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); +void s390_vcpu_block(struct kvm_vcpu *vcpu); +void s390_vcpu_unblock(struct kvm_vcpu *vcpu); +void exit_sie(struct kvm_vcpu *vcpu); +void exit_sie_sync(struct kvm_vcpu *vcpu); /* implemented in diag.c */ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 6bbd7b5a0bb..4cdc54e63eb 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -1,7 +1,7 @@ /* * handling privileged instructions * - * Copyright IBM Corp. 2008 + * Copyright IBM Corp. 2008, 2013 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -16,10 +16,14 @@ #include <linux/errno.h> #include <linux/compat.h> #include <asm/asm-offsets.h> +#include <asm/facility.h> #include <asm/current.h> #include <asm/debug.h> #include <asm/ebcdic.h> #include <asm/sysinfo.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/io.h> #include <asm/ptrace.h> #include <asm/compat.h> #include "gaccess.h" @@ -34,6 +38,9 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) vcpu->stat.instruction_spx++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + operand2 = kvm_s390_get_base_disp_s(vcpu); /* must be word boundary */ @@ -65,6 +72,9 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) vcpu->stat.instruction_stpx++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + operand2 = kvm_s390_get_base_disp_s(vcpu); /* must be word boundary */ @@ -89,6 +99,9 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) vcpu->stat.instruction_stap++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + useraddr = kvm_s390_get_base_disp_s(vcpu); if (useraddr & 1) @@ -105,7 +118,12 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) static int handle_skey(struct kvm_vcpu *vcpu) { vcpu->stat.instruction_storage_key++; - vcpu->arch.sie_block->gpsw.addr -= 4; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + vcpu->arch.sie_block->gpsw.addr = + __rewind_psw(vcpu->arch.sie_block->gpsw, 4); VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); return 0; } @@ -129,9 +147,10 @@ static int handle_tpi(struct kvm_vcpu *vcpu) * Store the two-word I/O interruption code into the * provided area. */ - put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) addr); - put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) (addr + 2)); - put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) (addr + 4)); + if (put_guest(vcpu, inti->io.subchannel_id, (u16 __user *)addr) + || put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *)(addr + 2)) + || put_guest(vcpu, inti->io.io_int_parm, (u32 __user *)(addr + 4))) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } else { /* * Store the three-word I/O interruption code into @@ -182,6 +201,9 @@ static int handle_io_inst(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 4, "%s", "I/O instruction"); + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + if (vcpu->kvm->arch.css_support) { /* * Most I/O instructions will be handled by userspace. @@ -210,8 +232,12 @@ static int handle_stfl(struct kvm_vcpu *vcpu) int rc; vcpu->stat.instruction_stfl++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + /* only pass the facility bits, which we can handle */ - facility_list = S390_lowcore.stfl_fac_list & 0xff00fff3; + facility_list = S390_lowcore.stfl_fac_list & 0xff82fff3; rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), &facility_list, sizeof(facility_list)); @@ -255,8 +281,8 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) u64 addr; if (gpsw->mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + addr = kvm_s390_get_base_disp_s(vcpu); if (addr & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -278,6 +304,9 @@ static int handle_lpswe(struct kvm_vcpu *vcpu) psw_t new_psw; u64 addr; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + addr = kvm_s390_get_base_disp_s(vcpu); if (addr & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -296,6 +325,9 @@ static int handle_stidp(struct kvm_vcpu *vcpu) vcpu->stat.instruction_stidp++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + operand2 = kvm_s390_get_base_disp_s(vcpu); if (operand2 & 7) @@ -351,16 +383,30 @@ static int handle_stsi(struct kvm_vcpu *vcpu) vcpu->stat.instruction_stsi++; VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); - operand2 = kvm_s390_get_base_disp_s(vcpu); + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + if (fc > 3) { + vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; /* cc 3 */ + return 0; + } - if (operand2 & 0xfff && fc > 0) + if (vcpu->run->s.regs.gprs[0] & 0x0fffff00 + || vcpu->run->s.regs.gprs[1] & 0xffff0000) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - switch (fc) { - case 0: + if (fc == 0) { vcpu->run->s.regs.gprs[0] = 3 << 28; - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); /* cc 0 */ return 0; + } + + operand2 = kvm_s390_get_base_disp_s(vcpu); + + if (operand2 & 0xfff) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + switch (fc) { case 1: /* same handling for 1 and 2 */ case 2: mem = get_zeroed_page(GFP_KERNEL); @@ -377,8 +423,6 @@ static int handle_stsi(struct kvm_vcpu *vcpu) goto out_no_data; handle_stsi_3_2_2(vcpu, (void *) mem); break; - default: - goto out_no_data; } if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { @@ -432,20 +476,14 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) intercept_handler_t handler; /* - * a lot of B2 instructions are priviledged. We first check for - * the privileged ones, that we can handle in the kernel. If the - * kernel can handle this instruction, we check for the problem - * state bit and (a) handle the instruction or (b) send a code 2 - * program check. - * Anything else goes to userspace.*/ + * A lot of B2 instructions are priviledged. Here we check for + * the privileged ones, that we can handle in the kernel. + * Anything else goes to userspace. + */ handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; - if (handler) { - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); - else - return handler(vcpu); - } + if (handler) + return handler(vcpu); + return -EOPNOTSUPP; } @@ -453,8 +491,7 @@ static int handle_epsw(struct kvm_vcpu *vcpu) { int reg1, reg2; - reg1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 24; - reg2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; + kvm_s390_get_regs_rre(vcpu, ®1, ®2); /* This basically extracts the mask half of the psw. */ vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000; @@ -467,9 +504,87 @@ static int handle_epsw(struct kvm_vcpu *vcpu) return 0; } +#define PFMF_RESERVED 0xfffc0101UL +#define PFMF_SK 0x00020000UL +#define PFMF_CF 0x00010000UL +#define PFMF_UI 0x00008000UL +#define PFMF_FSC 0x00007000UL +#define PFMF_NQ 0x00000800UL +#define PFMF_MR 0x00000400UL +#define PFMF_MC 0x00000200UL +#define PFMF_KEY 0x000000feUL + +static int handle_pfmf(struct kvm_vcpu *vcpu) +{ + int reg1, reg2; + unsigned long start, end; + + vcpu->stat.instruction_pfmf++; + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + + if (!MACHINE_HAS_PFMF) + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + /* Only provide non-quiescing support if the host supports it */ + if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ && !test_facility(14)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + /* No support for conditional-SSKE */ + if (vcpu->run->s.regs.gprs[reg1] & (PFMF_MR | PFMF_MC)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { + case 0x00000000: + end = (start + (1UL << 12)) & ~((1UL << 12) - 1); + break; + case 0x00001000: + end = (start + (1UL << 20)) & ~((1UL << 20) - 1); + break; + /* We dont support EDAT2 + case 0x00002000: + end = (start + (1UL << 31)) & ~((1UL << 31) - 1); + break;*/ + default: + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + } + while (start < end) { + unsigned long useraddr; + + useraddr = gmap_translate(start, vcpu->arch.gmap); + if (IS_ERR((void *)useraddr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { + if (clear_user((void __user *)useraddr, PAGE_SIZE)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + } + + if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) { + if (set_guest_storage_key(current->mm, useraddr, + vcpu->run->s.regs.gprs[reg1] & PFMF_KEY, + vcpu->run->s.regs.gprs[reg1] & PFMF_NQ)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + } + + start += PAGE_SIZE; + } + if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) + vcpu->run->s.regs.gprs[reg2] = end; + return 0; +} + static const intercept_handler_t b9_handlers[256] = { [0x8d] = handle_epsw, [0x9c] = handle_io_inst, + [0xaf] = handle_pfmf, }; int kvm_s390_handle_b9(struct kvm_vcpu *vcpu) @@ -478,29 +593,96 @@ int kvm_s390_handle_b9(struct kvm_vcpu *vcpu) /* This is handled just as for the B2 instructions. */ handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; - if (handler) { - if ((handler != handle_epsw) && - (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); - else - return handler(vcpu); - } + if (handler) + return handler(vcpu); + return -EOPNOTSUPP; } +int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + u64 useraddr; + u32 val = 0; + int reg, rc; + + vcpu->stat.instruction_lctl++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + useraddr = kvm_s390_get_base_disp_rs(vcpu); + + if (useraddr & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, + useraddr); + trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr); + + reg = reg1; + do { + rc = get_guest(vcpu, val, (u32 __user *) useraddr); + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; + vcpu->arch.sie_block->gcr[reg] |= val; + useraddr += 4; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + + return 0; +} + +static int handle_lctlg(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + u64 useraddr; + int reg, rc; + + vcpu->stat.instruction_lctlg++; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + useraddr = kvm_s390_get_base_disp_rsy(vcpu); + + if (useraddr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + reg = reg1; + + VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, + useraddr); + trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr); + + do { + rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg], + (u64 __user *) useraddr); + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + useraddr += 8; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + + return 0; +} + static const intercept_handler_t eb_handlers[256] = { + [0x2f] = handle_lctlg, [0x8a] = handle_io_inst, }; -int kvm_s390_handle_priv_eb(struct kvm_vcpu *vcpu) +int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) { intercept_handler_t handler; - /* All eb instructions that end up here are privileged. */ - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff]; if (handler) return handler(vcpu); @@ -515,6 +697,9 @@ static int handle_tprot(struct kvm_vcpu *vcpu) vcpu->stat.instruction_tprot++; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + kvm_s390_get_base_disp_sse(vcpu, &address1, &address2); /* we only handle the Linux memory detection case: @@ -560,8 +745,7 @@ static int handle_sckpf(struct kvm_vcpu *vcpu) u32 value; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); if (vcpu->run->s.regs.gprs[0] & 0x00000000ffff0000) return kvm_s390_inject_program_int(vcpu, diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 1c48ab2845e..bec398c57ac 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -79,8 +79,8 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); spin_unlock_bh(&li->lock); rc = SIGP_CC_ORDER_CODE_ACCEPTED; VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); @@ -117,8 +117,8 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); spin_unlock_bh(&li->lock); rc = SIGP_CC_ORDER_CODE_ACCEPTED; VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr); @@ -145,8 +145,8 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) atomic_set(&li->active, 1); atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); li->action_bits |= action; - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); out: spin_unlock_bh(&li->lock); @@ -250,8 +250,8 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); - if (waitqueue_active(&li->wq)) - wake_up_interruptible(&li->wq); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); rc = SIGP_CC_ORDER_CODE_ACCEPTED; VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); @@ -333,8 +333,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) /* sigp in userspace can exit */ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, - PGM_PRIVILEGED_OPERATION); + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); order_code = kvm_s390_get_base_disp_rs(vcpu); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 047c3e4c59a..f00aefb66a4 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -639,8 +639,8 @@ out: put_task_struct(tsk); } -static int __cpuinit pfault_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int pfault_cpu_notify(struct notifier_block *self, unsigned long action, + void *hcpu) { struct thread_struct *thread, *next; struct task_struct *tsk; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 89ebae4008f..ad446b0c55b 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -69,6 +69,7 @@ static void __init setup_zero_pages(void) order = 2; break; case 0x2827: /* zEC12 */ + case 0x2828: /* zEC12 */ default: order = 5; break; @@ -135,30 +136,17 @@ void __init paging_init(void) void __init mem_init(void) { - unsigned long codesize, reservedpages, datasize, initsize; - - max_mapnr = num_physpages = max_low_pfn; + max_mapnr = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); /* Setup guest page hinting */ cmma_init(); /* this will put all low memory onto the freelists */ - totalram_pages += free_all_bootmem(); + free_all_bootmem(); setup_zero_pages(); /* Setup zeroed pages. */ - reservedpages = 0; - - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - max_mapnr << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >>10, - initsize >> 10); + mem_init_print_info(NULL); printk("Write protected kernel read-only data: %#lx - %#lx\n", (unsigned long)&_stext, PFN_ALIGN((unsigned long)&_eshared) - 1); @@ -166,13 +154,14 @@ void __init mem_init(void) void free_initmem(void) { - free_initmem_default(0); + free_initmem_default(POISON_FREE_INITMEM); } #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd"); + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c index 3cbd3b8bf31..cca388253a3 100644 --- a/arch/s390/mm/mem_detect.c +++ b/arch/s390/mm/mem_detect.c @@ -123,7 +123,8 @@ void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr, continue; } else if ((addr <= chunk->addr) && (addr + size >= chunk->addr + chunk->size)) { - memset(chunk, 0 , sizeof(*chunk)); + memmove(chunk, chunk + 1, (MEMORY_CHUNKS-i-1) * sizeof(*chunk)); + memset(&mem_chunk[MEMORY_CHUNKS-1], 0, sizeof(*chunk)); } else if (addr + size < chunk->addr + chunk->size) { chunk->size = chunk->addr + chunk->size - addr - size; chunk->addr = addr + size; diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 06bafec0027..40023290ee5 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -91,11 +91,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm) if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE; mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(); mm->get_unmapped_area = arch_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; } } @@ -176,11 +174,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm) if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE; mm->get_unmapped_area = s390_get_unmapped_area; - mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(); mm->get_unmapped_area = s390_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; } } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 7805ddca833..a8154a1a2c9 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -492,7 +492,7 @@ static int gmap_connect_pgtable(unsigned long address, unsigned long segment, mp = (struct gmap_pgtable *) page->index; rmap->gmap = gmap; rmap->entry = segment_ptr; - rmap->vmaddr = address; + rmap->vmaddr = address & PMD_MASK; spin_lock(&mm->page_table_lock); if (*segment_ptr == segment) { list_add(&rmap->list, &mp->mapper); @@ -677,8 +677,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) break; } /* Get the page mapped */ - if (get_user_pages(current, gmap->mm, addr, 1, 1, 0, - NULL, NULL) != 1) { + if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE)) { rc = -EFAULT; break; } @@ -690,7 +689,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) entry = *ptep; if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) { pgste = pgste_get_lock(ptep); - pgste_val(pgste) |= RCP_IN_BIT; + pgste_val(pgste) |= PGSTE_IN_BIT; pgste_set_unlock(ptep, pgste); start += PAGE_SIZE; len -= PAGE_SIZE; @@ -772,6 +771,54 @@ static inline void page_table_free_pgste(unsigned long *table) __free_page(page); } +int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned long key, bool nq) +{ + spinlock_t *ptl; + pgste_t old, new; + pte_t *ptep; + + down_read(&mm->mmap_sem); + ptep = get_locked_pte(current->mm, addr, &ptl); + if (unlikely(!ptep)) { + up_read(&mm->mmap_sem); + return -EFAULT; + } + + new = old = pgste_get_lock(ptep); + pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | + PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; + pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + if (!(pte_val(*ptep) & _PAGE_INVALID)) { + unsigned long address, bits; + unsigned char skey; + + address = pte_val(*ptep) & PAGE_MASK; + skey = page_get_storage_key(address); + bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); + /* Set storage key ACC and FP */ + page_set_storage_key(address, + (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)), + !nq); + + /* Merge host changed & referenced into pgste */ + pgste_val(new) |= bits << 52; + /* Transfer skey changed & referenced bit to kvm user bits */ + pgste_val(new) |= bits << 45; /* PGSTE_UR_BIT & PGSTE_UC_BIT */ + } + /* changing the guest storage key is considered a change of the page */ + if ((pgste_val(new) ^ pgste_val(old)) & + (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) + pgste_val(new) |= PGSTE_UC_BIT; + + pgste_set_unlock(ptep, new); + pte_unmap_unlock(*ptep, ptl); + up_read(&mm->mmap_sem); + return 0; +} +EXPORT_SYMBOL(set_guest_storage_key); + #else /* CONFIG_PGSTE */ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, @@ -1118,7 +1165,8 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, } } -void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) { struct list_head *lh = (struct list_head *) pgtable; @@ -1132,7 +1180,7 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) mm->pmd_huge_pte = pgtable; } -pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm) +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) { struct list_head *lh; pgtable_t pgtable; diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 82f165f8078..d5f10a43a58 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -9,6 +9,8 @@ #include <linux/netdevice.h> #include <linux/if_vlan.h> #include <linux/filter.h> +#include <linux/random.h> +#include <linux/init.h> #include <asm/cacheflush.h> #include <asm/processor.h> #include <asm/facility.h> @@ -221,6 +223,37 @@ static void bpf_jit_epilogue(struct bpf_jit *jit) EMIT2(0x07fe); } +/* Helper to find the offset of pkt_type in sk_buff + * Make sure its still a 3bit field starting at the MSBs within a byte. + */ +#define PKT_TYPE_MAX 0xe0 +static int pkt_type_offset; + +static int __init bpf_pkt_type_offset_init(void) +{ + struct sk_buff skb_probe = { + .pkt_type = ~0, + }; + char *ct = (char *)&skb_probe; + int off; + + pkt_type_offset = -1; + for (off = 0; off < sizeof(struct sk_buff); off++) { + if (!ct[off]) + continue; + if (ct[off] == PKT_TYPE_MAX) + pkt_type_offset = off; + else { + /* Found non matching bit pattern, fix needed. */ + WARN_ON_ONCE(1); + pkt_type_offset = -1; + return -1; + } + } + return 0; +} +device_initcall(bpf_pkt_type_offset_init); + /* * make sure we dont leak kernel information to user */ @@ -720,6 +753,16 @@ call_fn: /* lg %r1,<d(function)>(%r13) */ EMIT4_DISP(0x88500000, 12); } break; + case BPF_S_ANC_PKTTYPE: + if (pkt_type_offset < 0) + goto out; + /* lhi %r5,0 */ + EMIT4(0xa7580000); + /* ic %r5,<d(pkt_type_offset)>(%r2) */ + EMIT4_DISP(0x43502000, pkt_type_offset); + /* srl %r5,5 */ + EMIT4_DISP(0x88500000, 5); + break; case BPF_S_ANC_CPU: /* A = smp_processor_id() */ #ifdef CONFIG_SMP /* l %r5,<d(cpu_nr)> */ @@ -738,8 +781,41 @@ out: return -1; } +/* + * Note: for security reasons, bpf code will follow a randomly + * sized amount of illegal instructions. + */ +struct bpf_binary_header { + unsigned int pages; + u8 image[]; +}; + +static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize, + u8 **image_ptr) +{ + struct bpf_binary_header *header; + unsigned int sz, hole; + + /* Most BPF filters are really small, but if some of them fill a page, + * allow at least 128 extra bytes for illegal instructions. + */ + sz = round_up(bpfsize + sizeof(*header) + 128, PAGE_SIZE); + header = module_alloc(sz); + if (!header) + return NULL; + memset(header, 0, sz); + header->pages = sz / PAGE_SIZE; + hole = sz - bpfsize + sizeof(*header); + /* Insert random number of illegal instructions before BPF code + * and make sure the first instruction starts at an even address. + */ + *image_ptr = &header->image[(prandom_u32() % hole) & -2]; + return header; +} + void bpf_jit_compile(struct sk_filter *fp) { + struct bpf_binary_header *header = NULL; unsigned long size, prg_len, lit_len; struct bpf_jit jit, cjit; unsigned int *addrs; @@ -772,12 +848,11 @@ void bpf_jit_compile(struct sk_filter *fp) } else if (jit.prg == cjit.prg && jit.lit == cjit.lit) { prg_len = jit.prg - jit.start; lit_len = jit.lit - jit.mid; - size = max_t(unsigned long, prg_len + lit_len, - sizeof(struct work_struct)); + size = prg_len + lit_len; if (size >= BPF_SIZE_MAX) goto out; - jit.start = module_alloc(size); - if (!jit.start) + header = bpf_alloc_binary(size, &jit.start); + if (!header) goto out; jit.prg = jit.mid = jit.start + prg_len; jit.lit = jit.end = jit.start + prg_len + lit_len; @@ -788,37 +863,25 @@ void bpf_jit_compile(struct sk_filter *fp) cjit = jit; } if (bpf_jit_enable > 1) { - pr_err("flen=%d proglen=%lu pass=%d image=%p\n", - fp->len, jit.end - jit.start, pass, jit.start); - if (jit.start) { - printk(KERN_ERR "JIT code:\n"); + bpf_jit_dump(fp->len, jit.end - jit.start, pass, jit.start); + if (jit.start) print_fn_code(jit.start, jit.mid - jit.start); - print_hex_dump(KERN_ERR, "JIT literals:\n", - DUMP_PREFIX_ADDRESS, 16, 1, - jit.mid, jit.end - jit.mid, false); - } } - if (jit.start) + if (jit.start) { + set_memory_ro((unsigned long)header, header->pages); fp->bpf_func = (void *) jit.start; + } out: kfree(addrs); } -static void jit_free_defer(struct work_struct *arg) -{ - module_free(NULL, arg); -} - -/* run from softirq, we must use a work_struct to call - * module_free() from process context - */ void bpf_jit_free(struct sk_filter *fp) { - struct work_struct *work; + unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; + struct bpf_binary_header *header = (void *)addr; if (fp->bpf_func == sk_run_filter) return; - work = (struct work_struct *)fp->bpf_func; - INIT_WORK(work, jit_free_defer); - schedule_work(work); + set_memory_rw(addr, header->pages); + module_free(NULL, header); } diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h index 1912f3bb190..0022e1ebfbd 100644 --- a/arch/s390/oprofile/hwsampler.h +++ b/arch/s390/oprofile/hwsampler.h @@ -81,8 +81,8 @@ struct hws_data_entry { unsigned int:16; unsigned int prim_asn:16; /* primary ASN */ unsigned long long ia; /* Instruction Address */ - unsigned long long lpp; /* Logical-Partition Program Param. */ - unsigned long long vpp; /* Virtual-Machine Program Param. */ + unsigned long long gpp; /* Guest Program Parameter */ + unsigned long long hpp; /* Host Program Parameter */ }; struct hws_trailer_entry { diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index ffeb17ce7f3..930783d2c99 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -440,7 +440,7 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops) switch (id.machine) { case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break; case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break; - case 0x2827: ops->cpu_type = "s390/zEC12"; break; + case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break; default: return -ENODEV; } } diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index e6f15b5d8b7..e2956ad39a4 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -82,10 +82,13 @@ struct intr_bucket { static struct intr_bucket *bucket; -/* Adapter local summary indicator */ -static u8 *zpci_irq_si; +/* Adapter interrupt definitions */ +static void zpci_irq_handler(struct airq_struct *airq); -static atomic_t irq_retries = ATOMIC_INIT(0); +static struct airq_struct zpci_airq = { + .handler = zpci_irq_handler, + .isc = PCI_ISC, +}; /* I/O Map */ static DEFINE_SPINLOCK(zpci_iomap_lock); @@ -302,15 +305,6 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len) return rc; } -void synchronize_irq(unsigned int irq) -{ - /* - * Not needed, the handler is protected by a lock and IRQs that occur - * after the handler is deleted are just NOPs. - */ -} -EXPORT_SYMBOL_GPL(synchronize_irq); - void enable_irq(unsigned int irq) { struct msi_desc *msi = irq_get_msi_desc(irq); @@ -327,30 +321,6 @@ void disable_irq(unsigned int irq) } EXPORT_SYMBOL_GPL(disable_irq); -void disable_irq_nosync(unsigned int irq) -{ - disable_irq(irq); -} -EXPORT_SYMBOL_GPL(disable_irq_nosync); - -unsigned long probe_irq_on(void) -{ - return 0; -} -EXPORT_SYMBOL_GPL(probe_irq_on); - -int probe_irq_off(unsigned long val) -{ - return 0; -} -EXPORT_SYMBOL_GPL(probe_irq_off); - -unsigned int probe_irq_mask(unsigned long val) -{ - return val; -} -EXPORT_SYMBOL_GPL(probe_irq_mask); - void pcibios_fixup_bus(struct pci_bus *bus) { } @@ -437,7 +407,7 @@ static struct pci_ops pci_root_ops = { /* store the last handled bit to implement fair scheduling of devices */ static DEFINE_PER_CPU(unsigned long, next_sbit); -static void zpci_irq_handler(void *dont, void *need) +static void zpci_irq_handler(struct airq_struct *airq) { unsigned long sbit, mbit, last = 0, start = __get_cpu_var(next_sbit); int rescan = 0, max = aisb_max; @@ -485,7 +455,6 @@ scan: max = aisb_max; sbit = find_first_bit_left(bucket->aisb, max); if (sbit != max) { - atomic_inc(&irq_retries); rescan++; goto scan; } @@ -598,7 +567,21 @@ static void zpci_map_resources(struct zpci_dev *zdev) pr_debug("BAR%i: -> start: %Lx end: %Lx\n", i, pdev->resource[i].start, pdev->resource[i].end); } -}; +} + +static void zpci_unmap_resources(struct zpci_dev *zdev) +{ + struct pci_dev *pdev = zdev->pdev; + resource_size_t len; + int i; + + for (i = 0; i < PCI_BAR_COUNT; i++) { + len = pci_resource_len(pdev, i); + if (!len) + continue; + pci_iounmap(pdev, (void *) pdev->resource[i].start); + } +} struct zpci_dev *zpci_alloc_device(void) { @@ -734,25 +717,20 @@ static int __init zpci_irq_init(void) goto out_alloc; } - isc_register(PCI_ISC); - zpci_irq_si = s390_register_adapter_interrupt(&zpci_irq_handler, NULL, PCI_ISC); - if (IS_ERR(zpci_irq_si)) { - rc = PTR_ERR(zpci_irq_si); - zpci_irq_si = NULL; + rc = register_adapter_interrupt(&zpci_airq); + if (rc) goto out_ai; - } + /* Set summary to 1 to be called every time for the ISC. */ + *zpci_airq.lsi_ptr = 1; for_each_online_cpu(cpu) per_cpu(next_sbit, cpu) = 0; spin_lock_init(&bucket->lock); - /* set summary to 1 to be called every time for the ISC */ - *zpci_irq_si = 1; set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); return 0; out_ai: - isc_unregister(PCI_ISC); free_page((unsigned long) bucket->alloc); out_alloc: free_page((unsigned long) bucket->aisb); @@ -765,21 +743,10 @@ static void zpci_irq_exit(void) { free_page((unsigned long) bucket->alloc); free_page((unsigned long) bucket->aisb); - s390_unregister_adapter_interrupt(zpci_irq_si, PCI_ISC); - isc_unregister(PCI_ISC); + unregister_adapter_interrupt(&zpci_airq); kfree(bucket); } -void zpci_debug_info(struct zpci_dev *zdev, struct seq_file *m) -{ - if (!zdev) - return; - - seq_printf(m, "global irq retries: %u\n", atomic_read(&irq_retries)); - seq_printf(m, "aibv[0]:%016lx aibv[1]:%016lx aisb:%016lx\n", - get_imap(0)->aibv, get_imap(1)->aibv, *bucket->aisb); -} - static struct resource *zpci_alloc_bus_resource(unsigned long start, unsigned long size, unsigned long flags, int domain) { @@ -843,6 +810,16 @@ int pcibios_add_device(struct pci_dev *pdev) return 0; } +void pcibios_release_device(struct pci_dev *pdev) +{ + struct zpci_dev *zdev = get_zdev(pdev); + + zpci_unmap_resources(zdev); + zpci_fmb_disable_device(zdev); + zpci_debug_exit_device(zdev); + zdev->pdev = NULL; +} + static int zpci_scan_bus(struct zpci_dev *zdev) { struct resource *res; @@ -983,25 +960,6 @@ void zpci_stop_device(struct zpci_dev *zdev) } EXPORT_SYMBOL_GPL(zpci_stop_device); -int zpci_scan_device(struct zpci_dev *zdev) -{ - zdev->pdev = pci_scan_single_device(zdev->bus, ZPCI_DEVFN); - if (!zdev->pdev) { - pr_err("pci_scan_single_device failed for fid: 0x%x\n", - zdev->fid); - goto out; - } - - pci_bus_add_devices(zdev->bus); - - return 0; -out: - zpci_dma_exit_device(zdev); - clp_disable_fh(zdev); - return -EIO; -} -EXPORT_SYMBOL_GPL(zpci_scan_device); - static inline int barsize(u8 size) { return (size) ? (1 << size) >> 10 : 0; diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index bd34359d154..2e9539625d9 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -236,7 +236,6 @@ int clp_disable_fh(struct zpci_dev *zdev) if (!zdev_enabled(zdev)) return 0; - dev_info(&zdev->pdev->dev, "disabling fn handle: 0x%x\n", fh); rc = clp_set_pci_fn(&fh, 0, CLP_SET_DISABLE_PCI_FN); if (!rc) /* Success -> store disabled handle in zdev */ diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index 771b82359af..75c69b402e0 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -115,27 +115,6 @@ static const struct file_operations debugfs_pci_perf_fops = { .release = single_release, }; -static int pci_debug_show(struct seq_file *m, void *v) -{ - struct zpci_dev *zdev = m->private; - - zpci_debug_info(zdev, m); - return 0; -} - -static int pci_debug_seq_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, pci_debug_show, - file_inode(filp)->i_private); -} - -static const struct file_operations debugfs_pci_debug_fops = { - .open = pci_debug_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - void zpci_debug_init_device(struct zpci_dev *zdev) { zdev->debugfs_dev = debugfs_create_dir(dev_name(&zdev->pdev->dev), @@ -149,19 +128,11 @@ void zpci_debug_init_device(struct zpci_dev *zdev) &debugfs_pci_perf_fops); if (IS_ERR(zdev->debugfs_perf)) zdev->debugfs_perf = NULL; - - zdev->debugfs_debug = debugfs_create_file("debug", - S_IFREG | S_IRUGO | S_IWUSR, - zdev->debugfs_dev, zdev, - &debugfs_pci_debug_fops); - if (IS_ERR(zdev->debugfs_debug)) - zdev->debugfs_debug = NULL; } void zpci_debug_exit_device(struct zpci_dev *zdev) { debugfs_remove(zdev->debugfs_perf); - debugfs_remove(zdev->debugfs_debug); debugfs_remove(zdev->debugfs_dev); } diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index f8e69d5bc0a..a2343c1f6e0 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -263,7 +263,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, enum dma_data_direction direction, struct dma_attrs *attrs) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); unsigned long nr_pages, iommu_page_index; unsigned long pa = page_to_phys(page) + offset; int flags = ZPCI_PTE_VALID; @@ -304,7 +304,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction direction, struct dma_attrs *attrs) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); unsigned long iommu_page_index; int npages; @@ -323,7 +323,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, struct dma_attrs *attrs) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); struct page *page; unsigned long pa; dma_addr_t map; diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index a42cce69d0a..e99a2557f18 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -15,40 +15,36 @@ static ssize_t show_fid(struct device *dev, struct device_attribute *attr, char *buf) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); - sprintf(buf, "0x%08x\n", zdev->fid); - return strlen(buf); + return sprintf(buf, "0x%08x\n", zdev->fid); } static DEVICE_ATTR(function_id, S_IRUGO, show_fid, NULL); static ssize_t show_fh(struct device *dev, struct device_attribute *attr, char *buf) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); - sprintf(buf, "0x%08x\n", zdev->fh); - return strlen(buf); + return sprintf(buf, "0x%08x\n", zdev->fh); } static DEVICE_ATTR(function_handle, S_IRUGO, show_fh, NULL); static ssize_t show_pchid(struct device *dev, struct device_attribute *attr, char *buf) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); - sprintf(buf, "0x%04x\n", zdev->pchid); - return strlen(buf); + return sprintf(buf, "0x%04x\n", zdev->pchid); } static DEVICE_ATTR(pchid, S_IRUGO, show_pchid, NULL); static ssize_t show_pfgid(struct device *dev, struct device_attribute *attr, char *buf) { - struct zpci_dev *zdev = get_zdev(container_of(dev, struct pci_dev, dev)); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); - sprintf(buf, "0x%02x\n", zdev->pfgid); - return strlen(buf); + return sprintf(buf, "0x%02x\n", zdev->pfgid); } static DEVICE_ATTR(pfgid, S_IRUGO, show_pfgid, NULL); |