diff options
Diffstat (limited to 'arch/s390')
63 files changed, 1230 insertions, 633 deletions
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild new file mode 100644 index 00000000000..ae4b01060ed --- /dev/null +++ b/arch/s390/Kbuild @@ -0,0 +1,6 @@ +obj-y += kernel/ +obj-y += mm/ +obj-y += crypto/ +obj-y += appldata/ +obj-y += hypfs/ +obj-y += kvm/ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f0777a47e3a..068e55d1bba 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -60,6 +60,9 @@ config NO_IOMEM config NO_DMA def_bool y +config ARCH_DMA_ADDR_T_64BIT + def_bool 64BIT + config GENERIC_LOCKBREAK bool default y @@ -95,11 +98,13 @@ config S390 select HAVE_KVM if 64BIT select HAVE_ARCH_TRACEHOOK select INIT_ALL_POSSIBLE + select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO + select HAVE_GET_USER_PAGES_FAST select ARCH_INLINE_SPIN_TRYLOCK select ARCH_INLINE_SPIN_TRYLOCK_BH select ARCH_INLINE_SPIN_LOCK @@ -198,6 +203,13 @@ config HOTPLUG_CPU can be controlled through /sys/devices/system/cpu/cpu#. Say N if you want to disable CPU hotplug. +config SCHED_BOOK + bool "Book scheduler support" + depends on SMP + help + Book scheduler support improves the CPU scheduler's decision making + when dealing with machines that have several books. + config MATHEMU bool "IEEE FPU emulation" depends on MARCH_G5 @@ -278,6 +290,14 @@ config MARCH_Z10 machines such as the z990, z890, z900, z800, z9-109, z9-ec and z9-bc. +config MARCH_Z196 + bool "IBM zEnterprise 196" + help + Select this to enable optimizations for IBM zEnterprise 196. + The kernel will be slightly faster but will not work on older + machines such as the z990, z890, z900, z800, z9-109, z9-ec, + z9-bc, z10-ec and z10-bc. + endchoice config PACK_STACK diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 0c9e6c6d2a6..d5b8a6ade52 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -40,6 +40,7 @@ cflags-$(CONFIG_MARCH_Z900) += $(call cc-option,-march=z900) cflags-$(CONFIG_MARCH_Z990) += $(call cc-option,-march=z990) cflags-$(CONFIG_MARCH_Z9_109) += $(call cc-option,-march=z9-109) cflags-$(CONFIG_MARCH_Z10) += $(call cc-option,-march=z10) +cflags-$(CONFIG_MARCH_Z196) += $(call cc-option,-march=z196) #KBUILD_IMAGE is necessary for make rpm KBUILD_IMAGE :=arch/s390/boot/image @@ -94,8 +95,8 @@ head-y := arch/s390/kernel/head.o head-y += arch/s390/kernel/$(if $(CONFIG_64BIT),head64.o,head31.o) head-y += arch/s390/kernel/init_task.o -core-y += arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \ - arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/ +# See arch/s390/Kbuild for content of core part of the kernel +core-y += arch/s390/ libs-y += arch/s390/lib/ drivers-y += drivers/s390/ diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h index 0ef9829f2ad..7ee9a1b4ad9 100644 --- a/arch/s390/crypto/crypt_s390.h +++ b/arch/s390/crypto/crypt_s390.h @@ -297,7 +297,7 @@ static inline int crypt_s390_func_available(int func) int ret; /* check if CPACF facility (bit 17) is available */ - if (!(stfl() & 1ULL << (31 - 17))) + if (!test_facility(17)) return 0; switch (func & CRYPT_S390_OP_MASK) { diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index aa819dac236..975e3ab13cb 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -152,6 +152,7 @@ static const struct file_operations prng_fops = { .open = &prng_open, .release = NULL, .read = &prng_read, + .llseek = noop_llseek, }; static struct miscdevice prng_dev = { diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 1211bb1d2f2..020e51c063d 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -618,6 +618,7 @@ static const struct file_operations dbfs_d204_ops = { .open = dbfs_d204_open, .read = dbfs_d204_read, .release = dbfs_d204_release, + .llseek = no_llseek, }; static int hypfs_dbfs_init(void) diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index ee5ab1a578e..26cf177f6a3 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -275,6 +275,7 @@ static const struct file_operations dbfs_d2fc_ops = { .open = dbfs_d2fc_open, .read = dbfs_d2fc_read, .release = dbfs_d2fc_release, + .llseek = no_llseek, }; int hypfs_vm_init(void) diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 98a4a4c267a..74d98670be2 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -449,6 +449,7 @@ static const struct file_operations hypfs_file_ops = { .write = do_sync_write, .aio_read = hypfs_aio_read, .aio_write = hypfs_aio_write, + .llseek = no_llseek, }; static struct file_system_type hypfs_type = { diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 42e512ba8b4..287d7bbb6d3 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -5,6 +5,7 @@ header-y += chsc.h header-y += cmb.h header-y += dasd.h header-y += debug.h +header-y += kvm_virtio.h header-y += monwriter.h header-y += qeth.h header-y += schid.h diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index f3ba0fa98de..e8501115eca 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -92,6 +92,16 @@ struct ccw_device { }; /* + * Possible events used by the path_event notifier. + */ +#define PE_NONE 0x0 +#define PE_PATH_GONE 0x1 /* A path is no longer available. */ +#define PE_PATH_AVAILABLE 0x2 /* A path has become available and + was successfully verified. */ +#define PE_PATHGROUP_ESTABLISHED 0x4 /* A pathgroup was reset and had + to be established again. */ + +/* * Possible CIO actions triggered by the unit check handler. */ enum uc_todo { @@ -109,6 +119,7 @@ enum uc_todo { * @set_online: called when setting device online * @set_offline: called when setting device offline * @notify: notify driver of device state changes + * @path_event: notify driver of channel path events * @shutdown: called at device shutdown * @prepare: prepare for pm state transition * @complete: undo work done in @prepare @@ -127,6 +138,7 @@ struct ccw_driver { int (*set_online) (struct ccw_device *); int (*set_offline) (struct ccw_device *); int (*notify) (struct ccw_device *, int); + void (*path_event) (struct ccw_device *, int *); void (*shutdown) (struct ccw_device *); int (*prepare) (struct ccw_device *); void (*complete) (struct ccw_device *); diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h index 471234b9057..e0b69540216 100644 --- a/arch/s390/include/asm/cpu.h +++ b/arch/s390/include/asm/cpu.h @@ -20,7 +20,7 @@ struct cpuid unsigned int ident : 24; unsigned int machine : 16; unsigned int unused : 16; -} __packed; +} __attribute__ ((packed, aligned(8))); #endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_CPU_H */ diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h index 498bc389238..881d94590ae 100644 --- a/arch/s390/include/asm/hardirq.h +++ b/arch/s390/include/asm/hardirq.h @@ -12,10 +12,6 @@ #ifndef __ASM_HARDIRQ_H #define __ASM_HARDIRQ_H -#include <linux/threads.h> -#include <linux/sched.h> -#include <linux/cache.h> -#include <linux/interrupt.h> #include <asm/lowcore.h> #define local_softirq_pending() (S390_lowcore.softirq_pending) diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index bb8343d157b..b56403c2df2 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -37,32 +37,6 @@ static inline int prepare_hugepage_range(struct file *file, int arch_prepare_hugepage(struct page *page); void arch_release_hugepage(struct page *page); -static inline pte_t pte_mkhuge(pte_t pte) -{ - /* - * PROT_NONE needs to be remapped from the pte type to the ste type. - * The HW invalid bit is also different for pte and ste. The pte - * invalid bit happens to be the same as the ste _SEGMENT_ENTRY_LARGE - * bit, so we don't have to clear it. - */ - if (pte_val(pte) & _PAGE_INVALID) { - if (pte_val(pte) & _PAGE_SWT) - pte_val(pte) |= _HPAGE_TYPE_NONE; - pte_val(pte) |= _SEGMENT_ENTRY_INV; - } - /* - * Clear SW pte bits SWT and SWX, there are no SW bits in a segment - * table entry. - */ - pte_val(pte) &= ~(_PAGE_SWT | _PAGE_SWX); - /* - * Also set the change-override bit because we don't need dirty bit - * tracking for hugetlbfs pages. - */ - pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO); - return pte; -} - static inline pte_t huge_pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_RO; diff --git a/arch/s390/include/asm/ioctls.h b/arch/s390/include/asm/ioctls.h index 2f3d8736361..960a4c1ebdf 100644 --- a/arch/s390/include/asm/ioctls.h +++ b/arch/s390/include/asm/ioctls.h @@ -1,94 +1,8 @@ -/* - * include/asm-s390/ioctls.h - * - * S390 version - * - * Derived from "include/asm-i386/ioctls.h" - */ - #ifndef __ARCH_S390_IOCTLS_H__ #define __ARCH_S390_IOCTLS_H__ -#include <asm/ioctl.h> - -/* 0x54 is just a magic number to make these relatively unique ('T') */ - -#define TCGETS 0x5401 -#define TCSETS 0x5402 -#define TCSETSW 0x5403 -#define TCSETSF 0x5404 -#define TCGETA 0x5405 -#define TCSETA 0x5406 -#define TCSETAW 0x5407 -#define TCSETAF 0x5408 -#define TCSBRK 0x5409 -#define TCXONC 0x540A -#define TCFLSH 0x540B -#define TIOCEXCL 0x540C -#define TIOCNXCL 0x540D -#define TIOCSCTTY 0x540E -#define TIOCGPGRP 0x540F -#define TIOCSPGRP 0x5410 -#define TIOCOUTQ 0x5411 -#define TIOCSTI 0x5412 -#define TIOCGWINSZ 0x5413 -#define TIOCSWINSZ 0x5414 -#define TIOCMGET 0x5415 -#define TIOCMBIS 0x5416 -#define TIOCMBIC 0x5417 -#define TIOCMSET 0x5418 -#define TIOCGSOFTCAR 0x5419 -#define TIOCSSOFTCAR 0x541A -#define FIONREAD 0x541B -#define TIOCINQ FIONREAD -#define TIOCLINUX 0x541C -#define TIOCCONS 0x541D -#define TIOCGSERIAL 0x541E -#define TIOCSSERIAL 0x541F -#define TIOCPKT 0x5420 -#define FIONBIO 0x5421 -#define TIOCNOTTY 0x5422 -#define TIOCSETD 0x5423 -#define TIOCGETD 0x5424 -#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */ -#define TIOCSBRK 0x5427 /* BSD compatibility */ -#define TIOCCBRK 0x5428 /* BSD compatibility */ -#define TIOCGSID 0x5429 /* Return the session ID of FD */ -#define TCGETS2 _IOR('T',0x2A, struct termios2) -#define TCSETS2 _IOW('T',0x2B, struct termios2) -#define TCSETSW2 _IOW('T',0x2C, struct termios2) -#define TCSETSF2 _IOW('T',0x2D, struct termios2) -#define TIOCGPTN _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ -#define TIOCSPTLCK _IOW('T',0x31, int) /* Lock/unlock Pty */ -#define TIOCSIG _IOW('T',0x36, int) /* Generate signal on Pty slave */ - -#define FIONCLEX 0x5450 /* these numbers need to be adjusted. */ -#define FIOCLEX 0x5451 -#define FIOASYNC 0x5452 -#define TIOCSERCONFIG 0x5453 -#define TIOCSERGWILD 0x5454 -#define TIOCSERSWILD 0x5455 -#define TIOCGLCKTRMIOS 0x5456 -#define TIOCSLCKTRMIOS 0x5457 -#define TIOCSERGSTRUCT 0x5458 /* For debugging only */ -#define TIOCSERGETLSR 0x5459 /* Get line status register */ -#define TIOCSERGETMULTI 0x545A /* Get multiport config */ -#define TIOCSERSETMULTI 0x545B /* Set multiport config */ - -#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */ -#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ #define FIOQSIZE 0x545E -/* Used for packet mode */ -#define TIOCPKT_DATA 0 -#define TIOCPKT_FLUSHREAD 1 -#define TIOCPKT_FLUSHWRITE 2 -#define TIOCPKT_STOP 4 -#define TIOCPKT_START 8 -#define TIOCPKT_NOSTOP 16 -#define TIOCPKT_DOSTOP 32 -#define TIOCPKT_IOCTL 64 - -#define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ +#include <asm-generic/ioctls.h> #endif diff --git a/arch/s390/include/asm/irqflags.h b/arch/s390/include/asm/irqflags.h index 15b3ac25389..865d6d891ac 100644 --- a/arch/s390/include/asm/irqflags.h +++ b/arch/s390/include/asm/irqflags.h @@ -8,8 +8,8 @@ #include <linux/types.h> -/* store then or system mask. */ -#define __raw_local_irq_stosm(__or) \ +/* store then OR system mask. */ +#define __arch_local_irq_stosm(__or) \ ({ \ unsigned long __mask; \ asm volatile( \ @@ -18,8 +18,8 @@ __mask; \ }) -/* store then and system mask. */ -#define __raw_local_irq_stnsm(__and) \ +/* store then AND system mask. */ +#define __arch_local_irq_stnsm(__and) \ ({ \ unsigned long __mask; \ asm volatile( \ @@ -29,39 +29,44 @@ }) /* set system mask. */ -#define __raw_local_irq_ssm(__mask) \ -({ \ - asm volatile("ssm %0" : : "Q" (__mask) : "memory"); \ -}) +static inline void __arch_local_irq_ssm(unsigned long flags) +{ + asm volatile("ssm %0" : : "Q" (flags) : "memory"); +} -/* interrupt control.. */ -static inline unsigned long raw_local_irq_enable(void) +static inline unsigned long arch_local_save_flags(void) { - return __raw_local_irq_stosm(0x03); + return __arch_local_irq_stosm(0x00); } -static inline unsigned long raw_local_irq_disable(void) +static inline unsigned long arch_local_irq_save(void) { - return __raw_local_irq_stnsm(0xfc); + return __arch_local_irq_stnsm(0xfc); } -#define raw_local_save_flags(x) \ -do { \ - typecheck(unsigned long, x); \ - (x) = __raw_local_irq_stosm(0x00); \ -} while (0) +static inline void arch_local_irq_disable(void) +{ + arch_local_irq_save(); +} -static inline void raw_local_irq_restore(unsigned long flags) +static inline void arch_local_irq_enable(void) { - __raw_local_irq_ssm(flags); + __arch_local_irq_stosm(0x03); } -static inline int raw_irqs_disabled_flags(unsigned long flags) +static inline void arch_local_irq_restore(unsigned long flags) +{ + __arch_local_irq_ssm(flags); +} + +static inline bool arch_irqs_disabled_flags(unsigned long flags) { return !(flags & (3UL << (BITS_PER_LONG - 8))); } -/* For spinlocks etc */ -#define raw_local_irq_save(x) ((x) = raw_local_irq_disable()) +static inline bool arch_irqs_disabled(void) +{ + return arch_irqs_disabled_flags(arch_local_save_flags()); +} #endif /* __ASM_IRQFLAGS_H */ diff --git a/arch/s390/include/asm/kvm_virtio.h b/arch/s390/include/asm/kvm_virtio.h index acdfdff2661..72f614181ef 100644 --- a/arch/s390/include/asm/kvm_virtio.h +++ b/arch/s390/include/asm/kvm_virtio.h @@ -54,4 +54,11 @@ struct kvm_vqconfig { * This is pagesize for historical reasons. */ #define KVM_S390_VIRTIO_RING_ALIGN 4096 + +/* These values are supposed to be in ext_params on an interrupt */ +#define VIRTIO_PARAM_MASK 0xff +#define VIRTIO_PARAM_VRING_INTERRUPT 0x0 +#define VIRTIO_PARAM_CONFIG_CHANGED 0x1 +#define VIRTIO_PARAM_DEV_ADD 0x2 + #endif diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 0f97ef2d92a..65e172f8209 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -150,9 +150,10 @@ struct _lowcore { */ __u32 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e04 */ + __u8 pad_0x0e08[0x0f00-0x0e08]; /* 0x0e08 */ - /* Align to the top 1k of prefix area */ - __u8 pad_0x0e08[0x1000-0x0e08]; /* 0x0e08 */ + /* Extended facility list */ + __u64 stfle_fac_list[32]; /* 0x0f00 */ } __packed; #else /* CONFIG_32BIT */ @@ -285,7 +286,11 @@ struct _lowcore { */ __u64 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e08 */ - __u8 pad_0x0e0c[0x11b8-0x0e0c]; /* 0x0e0c */ + __u8 pad_0x0e0c[0x0f00-0x0e0c]; /* 0x0e0c */ + + /* Extended facility list */ + __u64 stfle_fac_list[32]; /* 0x0f00 */ + __u8 pad_0x1000[0x11b8-0x1000]; /* 0x1000 */ /* 64 bit extparam used for pfault/diag 250: defined by architecture */ __u64 ext_params2; /* 0x11B8 */ diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index af650fb4720..a8729ea7e9a 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -108,9 +108,13 @@ typedef pte_t *pgtable_t; #define __pgprot(x) ((pgprot_t) { (x) } ) static inline void -page_set_storage_key(unsigned long addr, unsigned int skey) +page_set_storage_key(unsigned long addr, unsigned int skey, int mapped) { - asm volatile("sske %0,%1" : : "d" (skey), "a" (addr)); + if (!mapped) + asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0" + : : "d" (skey), "a" (addr)); + else + asm volatile("sske %0,%1" : : "d" (skey), "a" (addr)); } static inline unsigned int diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 3840cbe7763..a75f168d271 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -4,7 +4,6 @@ * Copyright 2009 Martin Schwidefsky, IBM Corporation. */ -static inline void set_perf_event_pending(void) {} -static inline void clear_perf_event_pending(void) {} +/* Empty, just to avoid compiling error */ #define PERF_EVENT_INDEX_OFFSET 0 diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 68940d0bad9..082eb4e50e8 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -21,9 +21,11 @@ unsigned long *crst_table_alloc(struct mm_struct *, int); void crst_table_free(struct mm_struct *, unsigned long *); +void crst_table_free_rcu(struct mm_struct *, unsigned long *); unsigned long *page_table_alloc(struct mm_struct *); void page_table_free(struct mm_struct *, unsigned long *); +void page_table_free_rcu(struct mm_struct *, unsigned long *); void disable_noexec(struct mm_struct *, struct task_struct *); static inline void clear_table(unsigned long *s, unsigned long val, size_t n) @@ -176,4 +178,6 @@ static inline void pmd_populate(struct mm_struct *mm, #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte) #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte) +extern void rcu_table_freelist_finish(void); + #endif /* _S390_PGALLOC_H */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 3157441ee1d..02ace3491c5 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -38,6 +38,7 @@ extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); extern void paging_init(void); extern void vmem_map_init(void); +extern void fault_init(void); /* * The S390 doesn't have any external MMU info: the kernel page @@ -46,11 +47,27 @@ extern void vmem_map_init(void); #define update_mmu_cache(vma, address, ptep) do { } while (0) /* - * ZERO_PAGE is a global shared page that is always zero: used + * ZERO_PAGE is a global shared page that is always zero; used * for zero-mapped memory areas etc.. */ -extern char empty_zero_page[PAGE_SIZE]; -#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) + +extern unsigned long empty_zero_page; +extern unsigned long zero_page_mask; + +#define ZERO_PAGE(vaddr) \ + (virt_to_page((void *)(empty_zero_page + \ + (((unsigned long)(vaddr)) &zero_page_mask)))) + +#define is_zero_pfn is_zero_pfn +static inline int is_zero_pfn(unsigned long pfn) +{ + extern unsigned long zero_pfn; + unsigned long offset_from_zero_pfn = pfn - zero_pfn; + return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT); +} + +#define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr)) + #endif /* !__ASSEMBLY__ */ /* @@ -300,6 +317,7 @@ extern unsigned long VMALLOC_START; /* Bits in the segment table entry */ #define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */ +#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */ #define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ #define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */ #define _SEGMENT_ENTRY_PTL 0x0f /* page table length */ @@ -572,7 +590,7 @@ static inline void rcp_unlock(pte_t *ptep) } /* forward declaration for SetPageUptodate in page-flags.h*/ -static inline void page_clear_dirty(struct page *page); +static inline void page_clear_dirty(struct page *page, int mapped); #include <linux/page-flags.h> static inline void ptep_rcp_copy(pte_t *ptep) @@ -754,6 +772,34 @@ static inline pte_t pte_mkspecial(pte_t pte) return pte; } +#ifdef CONFIG_HUGETLB_PAGE +static inline pte_t pte_mkhuge(pte_t pte) +{ + /* + * PROT_NONE needs to be remapped from the pte type to the ste type. + * The HW invalid bit is also different for pte and ste. The pte + * invalid bit happens to be the same as the ste _SEGMENT_ENTRY_LARGE + * bit, so we don't have to clear it. + */ + if (pte_val(pte) & _PAGE_INVALID) { + if (pte_val(pte) & _PAGE_SWT) + pte_val(pte) |= _HPAGE_TYPE_NONE; + pte_val(pte) |= _SEGMENT_ENTRY_INV; + } + /* + * Clear SW pte bits SWT and SWX, there are no SW bits in a segment + * table entry. + */ + pte_val(pte) &= ~(_PAGE_SWT | _PAGE_SWX); + /* + * Also set the change-override bit because we don't need dirty bit + * tracking for hugetlbfs pages. + */ + pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO); + return pte; +} +#endif + #ifdef CONFIG_PGSTE /* * Get (and clear) the user dirty bit for a PTE. @@ -782,7 +828,7 @@ static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, } dirty = test_and_clear_bit_simple(KVM_UD_BIT, pgste); if (skey & _PAGE_CHANGED) - page_clear_dirty(page); + page_clear_dirty(page, 1); rcp_unlock(ptep); return dirty; } @@ -957,9 +1003,9 @@ static inline int page_test_dirty(struct page *page) } #define __HAVE_ARCH_PAGE_CLEAR_DIRTY -static inline void page_clear_dirty(struct page *page) +static inline void page_clear_dirty(struct page *page, int mapped) { - page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY); + page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY, mapped); } /* @@ -1048,9 +1094,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) #define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr)) #define pte_offset_kernel(pmd, address) pte_offset(pmd,address) #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) -#define pte_offset_map_nested(pmd, address) pte_offset_kernel(pmd, address) #define pte_unmap(pte) do { } while (0) -#define pte_unmap_nested(pte) do { } while (0) /* * 31 bit swap entry format: diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 73e259834e1..8d6f8716957 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -82,8 +82,6 @@ struct thread_struct { unsigned long prot_addr; /* address of protection-excep. */ unsigned int trap_no; per_struct per_info; - /* Used to give failing instruction back to user for ieee exceptions */ - unsigned long ieee_instruction_pointer; /* pfault_wait is used to block the process on a pfault event */ unsigned long pfault_wait; }; diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index e2c218dc68a..d9d42b1e46f 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -481,8 +481,7 @@ struct user_regs_struct * watchpoints. This is the way intel does it. */ per_struct per_info; - unsigned long ieee_instruction_pointer; - /* Used to give failing instruction back to user for ieee exceptions */ + unsigned long ieee_instruction_pointer; /* obsolete, always 0 */ }; #ifdef __KERNEL__ diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 2ba63027629..46e96bc1f5a 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -360,6 +360,7 @@ struct qdio_initialize { unsigned int no_output_qs; qdio_handler_t *input_handler; qdio_handler_t *output_handler; + void (*queue_start_poll) (struct ccw_device *, int, unsigned long); unsigned long int_parm; void **input_sbal_addr_array; void **output_sbal_addr_array; @@ -377,11 +378,13 @@ struct qdio_initialize { extern int qdio_allocate(struct qdio_initialize *); extern int qdio_establish(struct qdio_initialize *); extern int qdio_activate(struct ccw_device *); - -extern int do_QDIO(struct ccw_device *cdev, unsigned int callflags, - int q_nr, unsigned int bufnr, unsigned int count); -extern int qdio_shutdown(struct ccw_device*, int); +extern int do_QDIO(struct ccw_device *, unsigned int, int, unsigned int, + unsigned int); +extern int qdio_start_irq(struct ccw_device *, int); +extern int qdio_stop_irq(struct ccw_device *, int); +extern int qdio_get_next_buffers(struct ccw_device *, int, int *, int *); +extern int qdio_shutdown(struct ccw_device *, int); extern int qdio_free(struct ccw_device *); -extern int qdio_get_ssqd_desc(struct ccw_device *dev, struct qdio_ssqd_desc*); +extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *); #endif /* __QDIO_H__ */ diff --git a/arch/s390/include/asm/s390_ext.h b/arch/s390/include/asm/s390_ext.h index 2afc060266a..1a9307e7084 100644 --- a/arch/s390/include/asm/s390_ext.h +++ b/arch/s390/include/asm/s390_ext.h @@ -12,7 +12,7 @@ #include <linux/types.h> -typedef void (*ext_int_handler_t)(__u16 code); +typedef void (*ext_int_handler_t)(unsigned int, unsigned int, unsigned long); typedef struct ext_int_info_t { struct ext_int_info_t *next; diff --git a/arch/s390/include/asm/scatterlist.h b/arch/s390/include/asm/scatterlist.h index 35d786fe93a..6d45ef6c12a 100644 --- a/arch/s390/include/asm/scatterlist.h +++ b/arch/s390/include/asm/scatterlist.h @@ -1 +1,3 @@ #include <asm-generic/scatterlist.h> + +#define ARCH_HAS_SG_CHAIN diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 25e831d58e1..d5e2ef10537 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -73,6 +73,7 @@ extern unsigned int user_mode; #define MACHINE_FLAG_PFMF (1UL << 11) #define MACHINE_FLAG_LPAR (1UL << 12) #define MACHINE_FLAG_SPP (1UL << 13) +#define MACHINE_FLAG_TOPOLOGY (1UL << 14) #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) @@ -90,6 +91,7 @@ extern unsigned int user_mode; #define MACHINE_HAS_HPAGE (0) #define MACHINE_HAS_PFMF (0) #define MACHINE_HAS_SPP (0) +#define MACHINE_HAS_TOPOLOGY (0) #else /* __s390x__ */ #define MACHINE_HAS_IEEE (1) #define MACHINE_HAS_CSP (1) @@ -100,6 +102,7 @@ extern unsigned int user_mode; #define MACHINE_HAS_HPAGE (S390_lowcore.machine_flags & MACHINE_FLAG_HPAGE) #define MACHINE_HAS_PFMF (S390_lowcore.machine_flags & MACHINE_FLAG_PFMF) #define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP) +#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) #endif /* __s390x__ */ #define ZFCPDUMP_HSA_SIZE (32UL<<20) diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 8429686951f..5c0246b955d 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -65,8 +65,6 @@ static inline void syscall_get_arguments(struct task_struct *task, if (test_tsk_thread_flag(task, TIF_31BIT)) mask = 0xffffffff; #endif - if (i + n == 6) - args[--n] = regs->args[0] & mask; while (n-- > 0) if (i + n > 0) args[n] = regs->gprs[2 + i + n] & mask; @@ -80,8 +78,6 @@ static inline void syscall_set_arguments(struct task_struct *task, const unsigned long *args) { BUG_ON(i + n > 6); - if (i + n == 6) - regs->args[0] = args[--n]; while (n-- > 0) if (i + n > 0) regs->gprs[2 + i + n] = args[n]; diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index 22bdb2a0ee5..79d3d6e2e9c 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -14,8 +14,13 @@ #ifndef __ASM_S390_SYSINFO_H #define __ASM_S390_SYSINFO_H +#include <asm/bitsperlong.h> + struct sysinfo_1_1_1 { - char reserved_0[32]; + unsigned short :16; + unsigned char ccr; + unsigned char cai; + char reserved_0[28]; char manufacturer[16]; char type[4]; char reserved_1[12]; @@ -104,6 +109,39 @@ struct sysinfo_3_2_2 { char reserved_544[3552]; }; +#define TOPOLOGY_CPU_BITS 64 +#define TOPOLOGY_NR_MAG 6 + +struct topology_cpu { + unsigned char reserved0[4]; + unsigned char :6; + unsigned char pp:2; + unsigned char reserved1; + unsigned short origin; + unsigned long mask[TOPOLOGY_CPU_BITS / BITS_PER_LONG]; +}; + +struct topology_container { + unsigned char reserved[7]; + unsigned char id; +}; + +union topology_entry { + unsigned char nl; + struct topology_cpu cpu; + struct topology_container container; +}; + +struct sysinfo_15_1_x { + unsigned char reserved0[2]; + unsigned short length; + unsigned char mag[TOPOLOGY_NR_MAG]; + unsigned char reserved1; + unsigned char mnest; + unsigned char reserved2[4]; + union topology_entry tle[0]; +}; + static inline int stsi(void *sysinfo, int fc, int sel1, int sel2) { register int r0 asm("0") = (fc << 28) | sel1; diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index cef66210c84..3ad16dbf622 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -85,19 +85,20 @@ static inline void restore_access_regs(unsigned int *acrs) asm volatile("lam 0,15,%0" : : "Q" (*acrs)); } -#define switch_to(prev,next,last) do { \ - if (prev == next) \ - break; \ - save_fp_regs(&prev->thread.fp_regs); \ - restore_fp_regs(&next->thread.fp_regs); \ - save_access_regs(&prev->thread.acrs[0]); \ - restore_access_regs(&next->thread.acrs[0]); \ - prev = __switch_to(prev,next); \ +#define switch_to(prev,next,last) do { \ + if (prev->mm) { \ + save_fp_regs(&prev->thread.fp_regs); \ + save_access_regs(&prev->thread.acrs[0]); \ + } \ + if (next->mm) { \ + restore_fp_regs(&next->thread.fp_regs); \ + restore_access_regs(&next->thread.acrs[0]); \ + } \ + prev = __switch_to(prev,next); \ } while (0) extern void account_vtime(struct task_struct *, struct task_struct *); extern void account_tick_vtime(struct task_struct *); -extern void account_system_vtime(struct task_struct *); #ifdef CONFIG_PFAULT extern void pfault_irq_init(void); @@ -399,7 +400,7 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, static inline void __set_psw_mask(unsigned long mask) { - __load_psw_mask(mask | (__raw_local_irq_stosm(0x00) & ~(-1UL >> 8))); + __load_psw_mask(mask | (arch_local_save_flags() & ~(-1UL >> 8))); } #define local_mcck_enable() __set_psw_mask(psw_kernel_bits) @@ -419,30 +420,21 @@ extern void smp_ctl_clear_bit(int cr, int bit); #endif /* CONFIG_SMP */ -static inline unsigned int stfl(void) -{ - asm volatile( - " .insn s,0xb2b10000,0(0)\n" /* stfl */ - "0:\n" - EX_TABLE(0b,0b)); - return S390_lowcore.stfl_fac_list; -} +#define MAX_FACILITY_BIT (256*8) /* stfle_fac_list has 256 bytes */ -static inline int __stfle(unsigned long long *list, int doublewords) +/* + * The test_facility function uses the bit odering where the MSB is bit 0. + * That makes it easier to query facility bits with the bit number as + * documented in the Principles of Operation. + */ +static inline int test_facility(unsigned long nr) { - typedef struct { unsigned long long _[doublewords]; } addrtype; - register unsigned long __nr asm("0") = doublewords - 1; - - asm volatile(".insn s,0xb2b00000,%0" /* stfle */ - : "=m" (*(addrtype *) list), "+d" (__nr) : : "cc"); - return __nr + 1; -} + unsigned char *ptr; -static inline int stfle(unsigned long long *list, int doublewords) -{ - if (!(stfl() & (1UL << 24))) - return -EOPNOTSUPP; - return __stfle(list, doublewords); + if (nr >= MAX_FACILITY_BIT) + return 0; + ptr = (unsigned char *) &S390_lowcore.stfle_fac_list + (nr >> 3); + return (*ptr & (0x80 >> (nr & 7))) != 0; } static inline unsigned short stap(void) diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index fd1c00d08bf..f1f644f2240 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -64,10 +64,9 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb, if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pxds < TLB_NR_PTRS)) __tlb_flush_mm(tlb->mm); while (tlb->nr_ptes > 0) - pte_free(tlb->mm, tlb->array[--tlb->nr_ptes]); + page_table_free_rcu(tlb->mm, tlb->array[--tlb->nr_ptes]); while (tlb->nr_pxds < TLB_NR_PTRS) - /* pgd_free frees the pointer as region or segment table */ - pgd_free(tlb->mm, tlb->array[tlb->nr_pxds++]); + crst_table_free_rcu(tlb->mm, tlb->array[tlb->nr_pxds++]); } static inline void tlb_finish_mmu(struct mmu_gather *tlb, @@ -75,6 +74,8 @@ static inline void tlb_finish_mmu(struct mmu_gather *tlb, { tlb_flush_mmu(tlb, start, end); + rcu_table_freelist_finish(); + /* keep the page table cache within bounds */ check_pgt_cache(); @@ -103,7 +104,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, if (tlb->nr_ptes >= tlb->nr_pxds) tlb_flush_mmu(tlb, 0, 0); } else - pte_free(tlb->mm, pte); + page_table_free(tlb->mm, (unsigned long *) pte); } /* @@ -124,7 +125,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, if (tlb->nr_ptes >= tlb->nr_pxds) tlb_flush_mmu(tlb, 0, 0); } else - pmd_free(tlb->mm, pmd); + crst_table_free(tlb->mm, (unsigned long *) pmd); #endif } @@ -146,7 +147,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, if (tlb->nr_ptes >= tlb->nr_pxds) tlb_flush_mmu(tlb, 0, 0); } else - pud_free(tlb->mm, pud); + crst_table_free(tlb->mm, (unsigned long *) pud); #endif } diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 831bd033ea7..c5338834ddb 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -2,19 +2,38 @@ #define _ASM_S390_TOPOLOGY_H #include <linux/cpumask.h> - -#define mc_capable() (1) - -const struct cpumask *cpu_coregroup_mask(unsigned int cpu); +#include <asm/sysinfo.h> extern unsigned char cpu_core_id[NR_CPUS]; extern cpumask_t cpu_core_map[NR_CPUS]; +static inline const struct cpumask *cpu_coregroup_mask(unsigned int cpu) +{ + return &cpu_core_map[cpu]; +} + #define topology_core_id(cpu) (cpu_core_id[cpu]) #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) +#define mc_capable() (1) + +#ifdef CONFIG_SCHED_BOOK + +extern unsigned char cpu_book_id[NR_CPUS]; +extern cpumask_t cpu_book_map[NR_CPUS]; + +static inline const struct cpumask *cpu_book_mask(unsigned int cpu) +{ + return &cpu_book_map[cpu]; +} + +#define topology_book_id(cpu) (cpu_book_id[cpu]) +#define topology_book_cpumask(cpu) (&cpu_book_map[cpu]) + +#endif /* CONFIG_SCHED_BOOK */ int topology_set_cpu_management(int fc); void topology_schedule_update(void); +void store_topology(struct sysinfo_15_1_x *info); #define POLARIZATION_UNKNWN (-1) #define POLARIZATION_HRZ (0) @@ -30,6 +49,8 @@ static inline void s390_init_cpu_topology(void) }; #endif +#define SD_BOOK_INIT SD_CPU_INIT + #include <asm-generic/topology.h> #endif /* _ASM_S390_TOPOLOGY_H */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 5232278d79a..f3c1b823c9a 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -84,6 +84,7 @@ int main(void) DEFINE(__LC_SVC_INT_CODE, offsetof(struct _lowcore, svc_code)); DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc)); DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code)); + DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code)); DEFINE(__LC_PER_ATMID, offsetof(struct _lowcore, per_perc_atmid)); DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address)); DEFINE(__LC_PER_ACCESS_ID, offsetof(struct _lowcore, per_access_id)); @@ -142,10 +143,8 @@ int main(void) DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area)); DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area)); #ifdef CONFIG_32BIT - DEFINE(__LC_PFAULT_INTPARM, offsetof(struct _lowcore, ext_params)); DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr)); #else /* CONFIG_32BIT */ - DEFINE(__LC_PFAULT_INTPARM, offsetof(struct _lowcore, ext_params2)); DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2)); DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area)); DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste)); diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h index 123dd660d7f..3141025724f 100644 --- a/arch/s390/kernel/compat_ptrace.h +++ b/arch/s390/kernel/compat_ptrace.h @@ -51,8 +51,7 @@ struct user_regs_struct32 * watchpoints. This is the way intel does it. */ per_struct32 per_info; - u32 ieee_instruction_pointer; - /* Used to give failing instruction back to user for ieee exceptions */ + u32 ieee_instruction_pointer; /* obsolete, always 0 */ }; struct user32 { diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 98192261491..5ad6bc078bf 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -174,6 +174,7 @@ static const struct file_operations debug_file_ops = { .write = debug_input, .open = debug_open, .release = debug_close, + .llseek = no_llseek, }; static struct dentry *debug_debugfs_root_entry; diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index b39b27d68b4..c83726c9fe0 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -113,7 +113,7 @@ enum { INSTR_INVALID, INSTR_E, INSTR_RIE_R0IU, INSTR_RIE_R0UU, INSTR_RIE_RRP, INSTR_RIE_RRPU, - INSTR_RIE_RRUUU, INSTR_RIE_RUPI, INSTR_RIE_RUPU, + INSTR_RIE_RRUUU, INSTR_RIE_RUPI, INSTR_RIE_RUPU, INSTR_RIE_RRI0, INSTR_RIL_RI, INSTR_RIL_RP, INSTR_RIL_RU, INSTR_RIL_UP, INSTR_RIS_R0RDU, INSTR_RIS_R0UU, INSTR_RIS_RURDI, INSTR_RIS_RURDU, INSTR_RI_RI, INSTR_RI_RP, INSTR_RI_RU, INSTR_RI_UP, @@ -122,13 +122,14 @@ enum { INSTR_RRE_RR, INSTR_RRE_RR_OPT, INSTR_RRF_0UFF, INSTR_RRF_F0FF, INSTR_RRF_F0FF2, INSTR_RRF_F0FR, INSTR_RRF_FFRU, INSTR_RRF_FUFF, INSTR_RRF_M0RR, INSTR_RRF_R0RR, - INSTR_RRF_RURR, INSTR_RRF_U0FF, INSTR_RRF_U0RF, INSTR_RRF_U0RR, - INSTR_RRF_UUFF, INSTR_RRR_F0FF, INSTR_RRS_RRRDU, + INSTR_RRF_R0RR2, INSTR_RRF_RURR, INSTR_RRF_U0FF, INSTR_RRF_U0RF, + INSTR_RRF_U0RR, INSTR_RRF_UUFF, INSTR_RRR_F0FF, INSTR_RRS_RRRDU, INSTR_RR_FF, INSTR_RR_R0, INSTR_RR_RR, INSTR_RR_U0, INSTR_RR_UR, INSTR_RSE_CCRD, INSTR_RSE_RRRD, INSTR_RSE_RURD, INSTR_RSI_RRP, INSTR_RSL_R0RD, INSTR_RSY_AARD, INSTR_RSY_CCRD, INSTR_RSY_RRRD, INSTR_RSY_RURD, + INSTR_RSY_RDRM, INSTR_RS_AARD, INSTR_RS_CCRD, INSTR_RS_R0RD, INSTR_RS_RRRD, INSTR_RS_RURD, INSTR_RXE_FRRD, INSTR_RXE_RRRD, @@ -139,7 +140,7 @@ enum { INSTR_SIY_IRD, INSTR_SIY_URD, INSTR_SI_URD, INSTR_SSE_RDRD, - INSTR_SSF_RRDRD, + INSTR_SSF_RRDRD, INSTR_SSF_RRDRD2, INSTR_SS_L0RDRD, INSTR_SS_LIRDRD, INSTR_SS_LLRDRD, INSTR_SS_RRRDRD, INSTR_SS_RRRDRD2, INSTR_SS_RRRDRD3, INSTR_S_00, INSTR_S_RD, @@ -152,7 +153,7 @@ struct operand { }; struct insn { - const char name[6]; + const char name[5]; unsigned char opfrag; unsigned char format; }; @@ -217,6 +218,7 @@ static const unsigned char formats[][7] = { [INSTR_RIE_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 }, [INSTR_RIE_RRUUU] = { 0xff, R_8,R_12,U8_16,U8_24,U8_32,0 }, [INSTR_RIE_RUPI] = { 0xff, R_8,I8_32,U4_12,J16_16,0,0 }, + [INSTR_RIE_RRI0] = { 0xff, R_8,R_12,I16_16,0,0,0 }, [INSTR_RIL_RI] = { 0x0f, R_8,I32_16,0,0,0,0 }, [INSTR_RIL_RP] = { 0x0f, R_8,J32_16,0,0,0,0 }, [INSTR_RIL_RU] = { 0x0f, R_8,U32_16,0,0,0,0 }, @@ -248,6 +250,7 @@ static const unsigned char formats[][7] = { [INSTR_RRF_FUFF] = { 0xff, F_24,F_16,F_28,U4_20,0,0 }, [INSTR_RRF_M0RR] = { 0xff, R_24,R_28,M_16,0,0,0 }, [INSTR_RRF_R0RR] = { 0xff, R_24,R_16,R_28,0,0,0 }, + [INSTR_RRF_R0RR2] = { 0xff, R_24,R_28,R_16,0,0,0 }, [INSTR_RRF_RURR] = { 0xff, R_24,R_28,R_16,U4_20,0,0 }, [INSTR_RRF_U0FF] = { 0xff, F_24,U4_16,F_28,0,0,0 }, [INSTR_RRF_U0RF] = { 0xff, R_24,U4_16,F_28,0,0,0 }, @@ -269,6 +272,7 @@ static const unsigned char formats[][7] = { [INSTR_RSY_CCRD] = { 0xff, C_8,C_12,D20_20,B_16,0,0 }, [INSTR_RSY_RRRD] = { 0xff, R_8,R_12,D20_20,B_16,0,0 }, [INSTR_RSY_RURD] = { 0xff, R_8,U4_12,D20_20,B_16,0,0 }, + [INSTR_RSY_RDRM] = { 0xff, R_8,D20_20,B_16,U4_12,0,0 }, [INSTR_RS_AARD] = { 0xff, A_8,A_12,D_20,B_16,0,0 }, [INSTR_RS_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 }, [INSTR_RS_R0RD] = { 0xff, R_8,D_20,B_16,0,0,0 }, @@ -290,6 +294,7 @@ static const unsigned char formats[][7] = { [INSTR_SI_URD] = { 0xff, D_20,B_16,U8_8,0,0,0 }, [INSTR_SSE_RDRD] = { 0xff, D_20,B_16,D_36,B_32,0,0 }, [INSTR_SSF_RRDRD] = { 0x00, D_20,B_16,D_36,B_32,R_8,0 }, + [INSTR_SSF_RRDRD2]= { 0x00, R_8,D_20,B_16,D_36,B_32,0 }, [INSTR_SS_L0RDRD] = { 0xff, D_20,L8_8,B_16,D_36,B_32,0 }, [INSTR_SS_LIRDRD] = { 0xff, D_20,L4_8,B_16,D_36,B_32,U4_12 }, [INSTR_SS_LLRDRD] = { 0xff, D_20,L4_8,B_16,D_36,L4_12,B_32 }, @@ -300,6 +305,36 @@ static const unsigned char formats[][7] = { [INSTR_S_RD] = { 0xff, D_20,B_16,0,0,0,0 }, }; +enum { + LONG_INSN_ALGHSIK, + LONG_INSN_ALHSIK, + LONG_INSN_CLFHSI, + LONG_INSN_CLGFRL, + LONG_INSN_CLGHRL, + LONG_INSN_CLGHSI, + LONG_INSN_CLHHSI, + LONG_INSN_LLGFRL, + LONG_INSN_LLGHRL, + LONG_INSN_POPCNT, + LONG_INSN_RISBHG, + LONG_INSN_RISBLG, +}; + +static char *long_insn_name[] = { + [LONG_INSN_ALGHSIK] = "alghsik", + [LONG_INSN_ALHSIK] = "alhsik", + [LONG_INSN_CLFHSI] = "clfhsi", + [LONG_INSN_CLGFRL] = "clgfrl", + [LONG_INSN_CLGHRL] = "clghrl", + [LONG_INSN_CLGHSI] = "clghsi", + [LONG_INSN_CLHHSI] = "clhhsi", + [LONG_INSN_LLGFRL] = "llgfrl", + [LONG_INSN_LLGHRL] = "llghrl", + [LONG_INSN_POPCNT] = "popcnt", + [LONG_INSN_RISBHG] = "risbhg", + [LONG_INSN_RISBLG] = "risblk", +}; + static struct insn opcode[] = { #ifdef CONFIG_64BIT { "lmd", 0xef, INSTR_SS_RRRDRD3 }, @@ -881,6 +916,35 @@ static struct insn opcode_b9[] = { { "pfmf", 0xaf, INSTR_RRE_RR }, { "trte", 0xbf, INSTR_RRF_M0RR }, { "trtre", 0xbd, INSTR_RRF_M0RR }, + { "ahhhr", 0xc8, INSTR_RRF_R0RR2 }, + { "shhhr", 0xc9, INSTR_RRF_R0RR2 }, + { "alhhh", 0xca, INSTR_RRF_R0RR2 }, + { "alhhl", 0xca, INSTR_RRF_R0RR2 }, + { "slhhh", 0xcb, INSTR_RRF_R0RR2 }, + { "chhr ", 0xcd, INSTR_RRE_RR }, + { "clhhr", 0xcf, INSTR_RRE_RR }, + { "ahhlr", 0xd8, INSTR_RRF_R0RR2 }, + { "shhlr", 0xd9, INSTR_RRF_R0RR2 }, + { "slhhl", 0xdb, INSTR_RRF_R0RR2 }, + { "chlr", 0xdd, INSTR_RRE_RR }, + { "clhlr", 0xdf, INSTR_RRE_RR }, + { { 0, LONG_INSN_POPCNT }, 0xe1, INSTR_RRE_RR }, + { "locgr", 0xe2, INSTR_RRF_M0RR }, + { "ngrk", 0xe4, INSTR_RRF_R0RR2 }, + { "ogrk", 0xe6, INSTR_RRF_R0RR2 }, + { "xgrk", 0xe7, INSTR_RRF_R0RR2 }, + { "agrk", 0xe8, INSTR_RRF_R0RR2 }, + { "sgrk", 0xe9, INSTR_RRF_R0RR2 }, + { "algrk", 0xea, INSTR_RRF_R0RR2 }, + { "slgrk", 0xeb, INSTR_RRF_R0RR2 }, + { "locr", 0xf2, INSTR_RRF_M0RR }, + { "nrk", 0xf4, INSTR_RRF_R0RR2 }, + { "ork", 0xf6, INSTR_RRF_R0RR2 }, + { "xrk", 0xf7, INSTR_RRF_R0RR2 }, + { "ark", 0xf8, INSTR_RRF_R0RR2 }, + { "srk", 0xf9, INSTR_RRF_R0RR2 }, + { "alrk", 0xfa, INSTR_RRF_R0RR2 }, + { "slrk", 0xfb, INSTR_RRF_R0RR2 }, #endif { "kmac", 0x1e, INSTR_RRE_RR }, { "lrvr", 0x1f, INSTR_RRE_RR }, @@ -949,9 +1013,9 @@ static struct insn opcode_c4[] = { { "lgfrl", 0x0c, INSTR_RIL_RP }, { "lhrl", 0x05, INSTR_RIL_RP }, { "lghrl", 0x04, INSTR_RIL_RP }, - { "llgfrl", 0x0e, INSTR_RIL_RP }, + { { 0, LONG_INSN_LLGFRL }, 0x0e, INSTR_RIL_RP }, { "llhrl", 0x02, INSTR_RIL_RP }, - { "llghrl", 0x06, INSTR_RIL_RP }, + { { 0, LONG_INSN_LLGHRL }, 0x06, INSTR_RIL_RP }, { "strl", 0x0f, INSTR_RIL_RP }, { "stgrl", 0x0b, INSTR_RIL_RP }, { "sthrl", 0x07, INSTR_RIL_RP }, @@ -968,9 +1032,9 @@ static struct insn opcode_c6[] = { { "cghrl", 0x04, INSTR_RIL_RP }, { "clrl", 0x0f, INSTR_RIL_RP }, { "clgrl", 0x0a, INSTR_RIL_RP }, - { "clgfrl", 0x0e, INSTR_RIL_RP }, + { { 0, LONG_INSN_CLGFRL }, 0x0e, INSTR_RIL_RP }, { "clhrl", 0x07, INSTR_RIL_RP }, - { "clghrl", 0x06, INSTR_RIL_RP }, + { { 0, LONG_INSN_CLGHRL }, 0x06, INSTR_RIL_RP }, { "pfdrl", 0x02, INSTR_RIL_UP }, { "exrl", 0x00, INSTR_RIL_RP }, #endif @@ -982,6 +1046,20 @@ static struct insn opcode_c8[] = { { "mvcos", 0x00, INSTR_SSF_RRDRD }, { "ectg", 0x01, INSTR_SSF_RRDRD }, { "csst", 0x02, INSTR_SSF_RRDRD }, + { "lpd", 0x04, INSTR_SSF_RRDRD2 }, + { "lpdg ", 0x05, INSTR_SSF_RRDRD2 }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct insn opcode_cc[] = { +#ifdef CONFIG_64BIT + { "brcth", 0x06, INSTR_RIL_RP }, + { "aih", 0x08, INSTR_RIL_RI }, + { "alsih", 0x0a, INSTR_RIL_RI }, + { "alsih", 0x0b, INSTR_RIL_RI }, + { "cih", 0x0d, INSTR_RIL_RI }, + { "clih ", 0x0f, INSTR_RIL_RI }, #endif { "", 0, INSTR_INVALID } }; @@ -1063,6 +1141,16 @@ static struct insn opcode_e3[] = { { "mfy", 0x5c, INSTR_RXY_RRRD }, { "mhy", 0x7c, INSTR_RXY_RRRD }, { "pfd", 0x36, INSTR_RXY_URRD }, + { "lbh", 0xc0, INSTR_RXY_RRRD }, + { "llch", 0xc2, INSTR_RXY_RRRD }, + { "stch", 0xc3, INSTR_RXY_RRRD }, + { "lhh", 0xc4, INSTR_RXY_RRRD }, + { "llhh", 0xc6, INSTR_RXY_RRRD }, + { "sthh", 0xc7, INSTR_RXY_RRRD }, + { "lfh", 0xca, INSTR_RXY_RRRD }, + { "stfh", 0xcb, INSTR_RXY_RRRD }, + { "chf", 0xcd, INSTR_RXY_RRRD }, + { "clhf", 0xcf, INSTR_RXY_RRRD }, #endif { "lrv", 0x1e, INSTR_RXY_RRRD }, { "lrvh", 0x1f, INSTR_RXY_RRRD }, @@ -1080,9 +1168,9 @@ static struct insn opcode_e5[] = { { "chhsi", 0x54, INSTR_SIL_RDI }, { "chsi", 0x5c, INSTR_SIL_RDI }, { "cghsi", 0x58, INSTR_SIL_RDI }, - { "clhhsi", 0x55, INSTR_SIL_RDU }, - { "clfhsi", 0x5d, INSTR_SIL_RDU }, - { "clghsi", 0x59, INSTR_SIL_RDU }, + { { 0, LONG_INSN_CLHHSI }, 0x55, INSTR_SIL_RDU }, + { { 0, LONG_INSN_CLFHSI }, 0x5d, INSTR_SIL_RDU }, + { { 0, LONG_INSN_CLGHSI }, 0x59, INSTR_SIL_RDU }, { "mvhhi", 0x44, INSTR_SIL_RDI }, { "mvhi", 0x4c, INSTR_SIL_RDI }, { "mvghi", 0x48, INSTR_SIL_RDI }, @@ -1137,6 +1225,24 @@ static struct insn opcode_eb[] = { { "alsi", 0x6e, INSTR_SIY_IRD }, { "algsi", 0x7e, INSTR_SIY_IRD }, { "ecag", 0x4c, INSTR_RSY_RRRD }, + { "srak", 0xdc, INSTR_RSY_RRRD }, + { "slak", 0xdd, INSTR_RSY_RRRD }, + { "srlk", 0xde, INSTR_RSY_RRRD }, + { "sllk", 0xdf, INSTR_RSY_RRRD }, + { "locg", 0xe2, INSTR_RSY_RDRM }, + { "stocg", 0xe3, INSTR_RSY_RDRM }, + { "lang", 0xe4, INSTR_RSY_RRRD }, + { "laog", 0xe6, INSTR_RSY_RRRD }, + { "laxg", 0xe7, INSTR_RSY_RRRD }, + { "laag", 0xe8, INSTR_RSY_RRRD }, + { "laalg", 0xea, INSTR_RSY_RRRD }, + { "loc", 0xf2, INSTR_RSY_RDRM }, + { "stoc", 0xf3, INSTR_RSY_RDRM }, + { "lan", 0xf4, INSTR_RSY_RRRD }, + { "lao", 0xf6, INSTR_RSY_RRRD }, + { "lax", 0xf7, INSTR_RSY_RRRD }, + { "laa", 0xf8, INSTR_RSY_RRRD }, + { "laal", 0xfa, INSTR_RSY_RRRD }, #endif { "rll", 0x1d, INSTR_RSY_RRRD }, { "mvclu", 0x8e, INSTR_RSY_RRRD }, @@ -1172,6 +1278,12 @@ static struct insn opcode_ec[] = { { "rxsbg", 0x57, INSTR_RIE_RRUUU }, { "rosbg", 0x56, INSTR_RIE_RRUUU }, { "risbg", 0x55, INSTR_RIE_RRUUU }, + { { 0, LONG_INSN_RISBLG }, 0x51, INSTR_RIE_RRUUU }, + { { 0, LONG_INSN_RISBHG }, 0x5D, INSTR_RIE_RRUUU }, + { "ahik", 0xd8, INSTR_RIE_RRI0 }, + { "aghik", 0xd9, INSTR_RIE_RRI0 }, + { { 0, LONG_INSN_ALHSIK }, 0xda, INSTR_RIE_RRI0 }, + { { 0, LONG_INSN_ALGHSIK }, 0xdb, INSTR_RIE_RRI0 }, #endif { "", 0, INSTR_INVALID } }; @@ -1321,6 +1433,9 @@ static struct insn *find_insn(unsigned char *code) case 0xc8: table = opcode_c8; break; + case 0xcc: + table = opcode_cc; + break; case 0xe3: table = opcode_e3; opfrag = code[5]; @@ -1367,7 +1482,11 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) ptr = buffer; insn = find_insn(code); if (insn) { - ptr += sprintf(ptr, "%.5s\t", insn->name); + if (insn->name[0] == '\0') + ptr += sprintf(ptr, "%s\t", + long_insn_name[(int) insn->name[1]]); + else + ptr += sprintf(ptr, "%.5s\t", insn->name); /* Extract the operands. */ separator = 0; for (ops = formats[insn->format] + 1, i = 0; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index c00856ad4e5..d149609e46e 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -208,7 +208,8 @@ static noinline __init void init_kernel_storage_key(void) end_pfn = PFN_UP(__pa(&_end)); for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++) - page_set_storage_key(init_pfn << PAGE_SHIFT, PAGE_DEFAULT_KEY); + page_set_storage_key(init_pfn << PAGE_SHIFT, + PAGE_DEFAULT_KEY, 0); } static __initdata struct sysinfo_3_2_2 vmms __aligned(PAGE_SIZE); @@ -255,13 +256,35 @@ static noinline __init void setup_lowcore_early(void) s390_base_pgm_handler_fn = early_pgm_check_handler; } +static noinline __init void setup_facility_list(void) +{ + unsigned long nr; + + S390_lowcore.stfl_fac_list = 0; + asm volatile( + " .insn s,0xb2b10000,0(0)\n" /* stfl */ + "0:\n" + EX_TABLE(0b,0b) : "=m" (S390_lowcore.stfl_fac_list)); + memcpy(&S390_lowcore.stfle_fac_list, &S390_lowcore.stfl_fac_list, 4); + nr = 4; /* # bytes stored by stfl */ + if (test_facility(7)) { + /* More facility bits available with stfle */ + register unsigned long reg0 asm("0") = MAX_FACILITY_BIT/64 - 1; + asm volatile(".insn s,0xb2b00000,%0" /* stfle */ + : "=m" (S390_lowcore.stfle_fac_list), "+d" (reg0) + : : "cc"); + nr = (reg0 + 1) * 8; /* # bytes stored by stfle */ + } + memset((char *) S390_lowcore.stfle_fac_list + nr, 0, + MAX_FACILITY_BIT/8 - nr); +} + static noinline __init void setup_hpage(void) { #ifndef CONFIG_DEBUG_PAGEALLOC unsigned int facilities; - facilities = stfl(); - if (!(facilities & (1UL << 23)) || !(facilities & (1UL << 29))) + if (!test_facility(2) || !test_facility(8)) return; S390_lowcore.machine_flags |= MACHINE_FLAG_HPAGE; __ctl_set_bit(0, 23); @@ -355,18 +378,15 @@ static __init void detect_diag44(void) static __init void detect_machine_facilities(void) { #ifdef CONFIG_64BIT - unsigned int facilities; - unsigned long long facility_bits; - - facilities = stfl(); - if (facilities & (1 << 28)) + if (test_facility(3)) S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE; - if (facilities & (1 << 23)) + if (test_facility(8)) S390_lowcore.machine_flags |= MACHINE_FLAG_PFMF; - if (facilities & (1 << 4)) + if (test_facility(11)) + S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY; + if (test_facility(27)) S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS; - if ((stfle(&facility_bits, 1) > 0) && - (facility_bits & (1ULL << (63 - 40)))) + if (test_facility(40)) S390_lowcore.machine_flags |= MACHINE_FLAG_SPP; #endif } @@ -447,6 +467,7 @@ void __init startup_init(void) lockdep_off(); sort_main_extable(); setup_lowcore_early(); + setup_facility_list(); detect_machine_type(); ipl_update_parameters(); setup_boot_command_line(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index bea9ee37ac9..5efce720298 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -72,25 +72,9 @@ STACK_SIZE = 1 << STACK_SHIFT l %r1,BASED(.Ltrace_irq_off_caller) basr %r14,%r1 .endm - - .macro TRACE_IRQS_CHECK_ON - tm SP_PSW(%r15),0x03 # irqs enabled? - bz BASED(0f) - TRACE_IRQS_ON -0: - .endm - - .macro TRACE_IRQS_CHECK_OFF - tm SP_PSW(%r15),0x03 # irqs enabled? - bz BASED(0f) - TRACE_IRQS_OFF -0: - .endm #else #define TRACE_IRQS_ON #define TRACE_IRQS_OFF -#define TRACE_IRQS_CHECK_ON -#define TRACE_IRQS_CHECK_OFF #endif #ifdef CONFIG_LOCKDEP @@ -198,6 +182,12 @@ STACK_SIZE = 1 << STACK_SHIFT lpsw \psworg # back to caller .endm + .macro REENABLE_IRQS + mvc __SF_EMPTY(1,%r15),SP_PSW(%r15) + ni __SF_EMPTY(%r15),0xbf + ssm __SF_EMPTY(%r15) + .endm + /* * Scheduler resume function, called by switch_to * gpr2 = (task_struct *) prev @@ -264,12 +254,11 @@ sysc_do_svc: bnl BASED(sysc_nr_ok) lr %r7,%r1 # copy svc number to %r7 sysc_nr_ok: - mvc SP_ARGS(4,%r15),SP_R7(%r15) -sysc_do_restart: sth %r7,SP_SVCNR(%r15) sll %r7,2 # svc number *4 l %r8,BASED(.Lsysc_table) tm __TI_flags+2(%r9),_TIF_SYSCALL + mvc SP_ARGS(4,%r15),SP_R7(%r15) l %r8,0(%r7,%r8) # get system call addr. bnz BASED(sysc_tracesys) basr %r14,%r8 # call sys_xxxx @@ -357,7 +346,7 @@ sysc_restart: l %r7,SP_R2(%r15) # load new svc number mvc SP_R2(4,%r15),SP_ORIG_R2(%r15) # restore first argument lm %r2,%r6,SP_R2(%r15) # load svc arguments - b BASED(sysc_do_restart) # restart svc + b BASED(sysc_nr_ok) # restart svc # # _TIF_SINGLE_STEP is set, call do_single_step @@ -390,6 +379,7 @@ sysc_tracesys: l %r8,0(%r7,%r8) sysc_tracego: lm %r3,%r6,SP_R3(%r15) + mvc SP_ARGS(4,%r15),SP_R7(%r15) l %r2,SP_ORIG_R2(%r15) basr %r14,%r8 # call sys_xxx st %r2,SP_R2(%r15) # store return value @@ -440,13 +430,11 @@ kernel_execve: br %r14 # execve succeeded. 0: stnsm __SF_EMPTY(%r15),0xfc # disable interrupts - TRACE_IRQS_OFF l %r15,__LC_KERNEL_STACK # load ksp s %r15,BASED(.Lc_spsize) # make room for registers & psw l %r9,__LC_THREAD_INFO mvc SP_PTREGS(__PT_SIZE,%r15),0(%r12) # copy pt_regs xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) - TRACE_IRQS_ON stosm __SF_EMPTY(%r15),0x03 # reenable interrupts l %r1,BASED(.Lexecve_tail) basr %r14,%r1 @@ -483,9 +471,10 @@ pgm_check_handler: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER pgm_no_vtime: - TRACE_IRQS_CHECK_OFF l %r9,__LC_THREAD_INFO # load pointer to thread_info struct l %r3,__LC_PGM_ILC # load program interruption code + l %r4,__LC_TRANS_EXC_CODE + REENABLE_IRQS la %r8,0x7f nr %r8,%r3 pgm_do_call: @@ -495,7 +484,6 @@ pgm_do_call: la %r2,SP_PTREGS(%r15) # address of register-save area basr %r14,%r7 # branch to interrupt-handler pgm_exit: - TRACE_IRQS_CHECK_ON b BASED(sysc_return) # @@ -523,7 +511,6 @@ pgm_per_std: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER pgm_no_vtime2: - TRACE_IRQS_CHECK_OFF l %r9,__LC_THREAD_INFO # load pointer to thread_info struct l %r1,__TI_task(%r9) tm SP_PSW+1(%r15),0x01 # kernel per event ? @@ -533,6 +520,8 @@ pgm_no_vtime2: mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID oi __TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP l %r3,__LC_PGM_ILC # load program interruption code + l %r4,__LC_TRANS_EXC_CODE + REENABLE_IRQS la %r8,0x7f nr %r8,%r3 # clear per-event-bit and ilc be BASED(pgm_exit2) # only per or per+check ? @@ -542,8 +531,6 @@ pgm_no_vtime2: la %r2,SP_PTREGS(%r15) # address of register-save area basr %r14,%r7 # branch to interrupt-handler pgm_exit2: - TRACE_IRQS_ON - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts b BASED(sysc_return) # @@ -557,13 +544,11 @@ pgm_svcper: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER lh %r7,0x8a # get svc number from lowcore l %r9,__LC_THREAD_INFO # load pointer to thread_info struct - TRACE_IRQS_OFF l %r8,__TI_task(%r9) mvc __THREAD_per+__PER_atmid(2,%r8),__LC_PER_ATMID mvc __THREAD_per+__PER_address(4,%r8),__LC_PER_ADDRESS mvc __THREAD_per+__PER_access_id(1,%r8),__LC_PER_ACCESS_ID oi __TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP - TRACE_IRQS_ON stosm __SF_EMPTY(%r15),0x03 # reenable interrupts lm %r2,%r6,SP_R2(%r15) # load svc arguments b BASED(sysc_do_svc) @@ -737,7 +722,8 @@ ext_no_vtime: l %r9,__LC_THREAD_INFO # load pointer to thread_info struct TRACE_IRQS_OFF la %r2,SP_PTREGS(%r15) # address of register-save area - lh %r3,__LC_EXT_INT_CODE # get interruption code + l %r3,__LC_CPU_ADDRESS # get cpu address + interruption code + l %r4,__LC_EXT_PARAMS # get external parameters l %r1,BASED(.Ldo_extint) basr %r14,%r1 b BASED(io_return) diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index ff579b6bde0..95c1dfc4ef3 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -5,7 +5,7 @@ #include <linux/signal.h> #include <asm/ptrace.h> -typedef void pgm_check_handler_t(struct pt_regs *, long); +typedef void pgm_check_handler_t(struct pt_regs *, long, unsigned long); extern pgm_check_handler_t *pgm_check_table[128]; pgm_check_handler_t do_protection_exception; pgm_check_handler_t do_dat_exception; @@ -19,7 +19,7 @@ void do_signal(struct pt_regs *regs); int handle_signal32(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs); -void do_extint(struct pt_regs *regs, unsigned short code); +void do_extint(struct pt_regs *regs, unsigned int, unsigned int, unsigned long); int __cpuinit start_secondary(void *cpuvoid); void __init startup_init(void); void die(const char * str, struct pt_regs * regs, long err); diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 8bccec15ea9..a2be23922f4 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -79,25 +79,9 @@ _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ basr %r2,%r0 brasl %r14,trace_hardirqs_off_caller .endm - - .macro TRACE_IRQS_CHECK_ON - tm SP_PSW(%r15),0x03 # irqs enabled? - jz 0f - TRACE_IRQS_ON -0: - .endm - - .macro TRACE_IRQS_CHECK_OFF - tm SP_PSW(%r15),0x03 # irqs enabled? - jz 0f - TRACE_IRQS_OFF -0: - .endm #else #define TRACE_IRQS_ON #define TRACE_IRQS_OFF -#define TRACE_IRQS_CHECK_ON -#define TRACE_IRQS_CHECK_OFF #endif #ifdef CONFIG_LOCKDEP @@ -207,6 +191,12 @@ _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ 0: .endm + .macro REENABLE_IRQS + mvc __SF_EMPTY(1,%r15),SP_PSW(%r15) + ni __SF_EMPTY(%r15),0xbf + ssm __SF_EMPTY(%r15) + .endm + /* * Scheduler resume function, called by switch_to * gpr2 = (task_struct *) prev @@ -256,7 +246,6 @@ sysc_saveall: CREATE_STACK_FRAME __LC_SAVE_AREA mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW mvc SP_ILC(4,%r15),__LC_SVC_ILC - stg %r7,SP_ARGS(%r15) lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct sysc_vtime: UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER @@ -284,6 +273,7 @@ sysc_nr_ok: sysc_noemu: #endif tm __TI_flags+6(%r12),_TIF_SYSCALL + mvc SP_ARGS(8,%r15),SP_R7(%r15) lgf %r8,0(%r7,%r10) # load address of system call routine jnz sysc_tracesys basr %r14,%r8 # call sys_xxxx @@ -397,6 +387,7 @@ sysc_tracesys: lgf %r8,0(%r7,%r10) sysc_tracego: lmg %r3,%r6,SP_R3(%r15) + mvc SP_ARGS(8,%r15),SP_R7(%r15) lg %r2,SP_ORIG_R2(%r15) basr %r14,%r8 # call sys_xxx stg %r2,SP_R2(%r15) # store return value @@ -443,14 +434,12 @@ kernel_execve: br %r14 # execve succeeded. 0: stnsm __SF_EMPTY(%r15),0xfc # disable interrupts -# TRACE_IRQS_OFF lg %r15,__LC_KERNEL_STACK # load ksp aghi %r15,-SP_SIZE # make room for registers & psw lg %r13,__LC_SVC_NEW_PSW+8 mvc SP_PTREGS(__PT_SIZE,%r15),0(%r12) # copy pt_regs lg %r12,__LC_THREAD_INFO xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) -# TRACE_IRQS_ON stosm __SF_EMPTY(%r15),0x03 # reenable interrupts brasl %r14,execve_tail j sysc_return @@ -490,19 +479,18 @@ pgm_check_handler: LAST_BREAK pgm_no_vtime: HANDLE_SIE_INTERCEPT - TRACE_IRQS_CHECK_OFF stg %r11,SP_ARGS(%r15) lgf %r3,__LC_PGM_ILC # load program interruption code + lg %r4,__LC_TRANS_EXC_CODE + REENABLE_IRQS lghi %r8,0x7f ngr %r8,%r3 -pgm_do_call: sll %r8,3 larl %r1,pgm_check_table lg %r1,0(%r8,%r1) # load address of handler routine la %r2,SP_PTREGS(%r15) # address of register-save area basr %r14,%r1 # branch to interrupt-handler pgm_exit: - TRACE_IRQS_CHECK_ON j sysc_return # @@ -533,7 +521,6 @@ pgm_per_std: LAST_BREAK pgm_no_vtime2: HANDLE_SIE_INTERCEPT - TRACE_IRQS_CHECK_OFF lg %r1,__TI_task(%r12) tm SP_PSW+1(%r15),0x01 # kernel per event ? jz kernel_per @@ -542,6 +529,8 @@ pgm_no_vtime2: mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID oi __TI_flags+7(%r12),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP lgf %r3,__LC_PGM_ILC # load program interruption code + lg %r4,__LC_TRANS_EXC_CODE + REENABLE_IRQS lghi %r8,0x7f ngr %r8,%r3 # clear per-event-bit and ilc je pgm_exit2 @@ -551,8 +540,6 @@ pgm_no_vtime2: la %r2,SP_PTREGS(%r15) # address of register-save area basr %r14,%r1 # branch to interrupt-handler pgm_exit2: - TRACE_IRQS_ON - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts j sysc_return # @@ -568,13 +555,11 @@ pgm_svcper: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER LAST_BREAK - TRACE_IRQS_OFF lg %r8,__TI_task(%r12) mvc __THREAD_per+__PER_atmid(2,%r8),__LC_PER_ATMID mvc __THREAD_per+__PER_address(8,%r8),__LC_PER_ADDRESS mvc __THREAD_per+__PER_access_id(1,%r8),__LC_PER_ACCESS_ID oi __TI_flags+7(%r12),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP - TRACE_IRQS_ON stosm __SF_EMPTY(%r15),0x03 # reenable interrupts lmg %r2,%r6,SP_R2(%r15) # load svc arguments j sysc_do_svc @@ -743,8 +728,11 @@ ext_int_handler: ext_no_vtime: HANDLE_SIE_INTERCEPT TRACE_IRQS_OFF + lghi %r1,4096 la %r2,SP_PTREGS(%r15) # address of register-save area - llgh %r3,__LC_EXT_INT_CODE # get interruption code + llgf %r3,__LC_CPU_ADDRESS # get cpu address + interruption code + llgf %r4,__LC_EXT_PARAMS # get external parameter + lg %r5,__LC_EXT_PARAMS2-4096(%r1) # get 64 bit external parameter brasl %r14,do_extint j io_return @@ -966,7 +954,6 @@ cleanup_system_call: CREATE_STACK_FRAME __LC_SAVE_AREA mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW mvc SP_ILC(4,%r15),__LC_SVC_ILC - stg %r7,SP_ARGS(%r15) mvc 8(8,%r12),__LC_THREAD_INFO cleanup_vtime: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+24) diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index db1696e210a..7061398341d 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -488,7 +488,9 @@ startup: .align 16 2: .long 0x000a0000,0x8badcccc #if defined(CONFIG_64BIT) -#if defined(CONFIG_MARCH_Z10) +#if defined(CONFIG_MARCH_Z196) + .long 0xc100efe3, 0xf46c0000 +#elif defined(CONFIG_MARCH_Z10) .long 0xc100efe3, 0xf0680000 #elif defined(CONFIG_MARCH_Z9_109) .long 0xc100efc3, 0x00000000 @@ -498,7 +500,9 @@ startup: .long 0xc0000000, 0x00000000 #endif #else -#if defined(CONFIG_MARCH_Z10) +#if defined(CONFIG_MARCH_Z196) + .long 0x8100c880, 0x00000000 +#elif defined(CONFIG_MARCH_Z10) .long 0x8100c880, 0x00000000 #elif defined(CONFIG_MARCH_Z9_109) .long 0x8100c880, 0x00000000 diff --git a/arch/s390/kernel/mem_detect.c b/arch/s390/kernel/mem_detect.c index 559af0d0787..0fbe4e32f7b 100644 --- a/arch/s390/kernel/mem_detect.c +++ b/arch/s390/kernel/mem_detect.c @@ -54,11 +54,11 @@ void detect_memory_layout(struct mem_chunk chunk[]) * right thing and we don't get scheduled away with low address * protection disabled. */ - flags = __raw_local_irq_stnsm(0xf8); + flags = __arch_local_irq_stnsm(0xf8); __ctl_store(cr0, 0, 0); __ctl_clear_bit(0, 28); find_memory_chunks(chunk); __ctl_load(cr0, 0, 0); - __raw_local_irq_ssm(flags); + arch_local_irq_restore(flags); } EXPORT_SYMBOL(detect_memory_layout); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index d3a2d1c6438..ec2e03b22ea 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -76,17 +76,17 @@ unsigned long thread_saved_pc(struct task_struct *tsk) static void default_idle(void) { /* CPU is going idle. */ - local_irq_disable(); - if (need_resched()) { - local_irq_enable(); - return; - } #ifdef CONFIG_HOTPLUG_CPU if (cpu_is_offline(smp_processor_id())) { preempt_enable_no_resched(); cpu_die(); } #endif + local_irq_disable(); + if (need_resched()) { + local_irq_enable(); + return; + } local_mcck_disable(); if (test_thread_flag(TIF_MCCK_PENDING)) { local_mcck_enable(); diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index ecb2d02b02e..644548e615c 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -42,7 +42,7 @@ void __cpuinit print_cpu_info(void) struct cpuid *id = &per_cpu(cpu_id, smp_processor_id()); pr_info("Processor %d started, address %d, identification %06X\n", - S390_lowcore.cpu_nr, S390_lowcore.cpu_addr, id->ident); + S390_lowcore.cpu_nr, stap(), id->ident); } /* diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c index 9ce641b5291..bd1db508e8a 100644 --- a/arch/s390/kernel/s390_ext.c +++ b/arch/s390/kernel/s390_ext.c @@ -113,12 +113,15 @@ int unregister_early_external_interrupt(__u16 code, ext_int_handler_t handler, return 0; } -void __irq_entry do_extint(struct pt_regs *regs, unsigned short code) +void __irq_entry do_extint(struct pt_regs *regs, unsigned int ext_int_code, + unsigned int param32, unsigned long param64) { + struct pt_regs *old_regs; + unsigned short code; ext_int_info_t *p; int index; - struct pt_regs *old_regs; + code = (unsigned short) ext_int_code; old_regs = set_irq_regs(regs); s390_idle_check(regs, S390_lowcore.int_clock, S390_lowcore.async_enter_timer); @@ -132,7 +135,7 @@ void __irq_entry do_extint(struct pt_regs *regs, unsigned short code) index = ext_hash(code); for (p = ext_int_hash[index]; p; p = p->next) { if (likely(p->code == code)) - p->handler(code); + p->handler(ext_int_code, param32, param64); } irq_exit(); set_irq_regs(old_regs); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c8e8e1354e1..e3ceb911dc7 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -409,6 +409,9 @@ setup_lowcore(void) lc->current_task = (unsigned long) init_thread_union.thread_info.task; lc->thread_info = (unsigned long) &init_thread_union; lc->machine_flags = S390_lowcore.machine_flags; + lc->stfl_fac_list = S390_lowcore.stfl_fac_list; + memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, + MAX_FACILITY_BIT/8); #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) { lc->extended_save_area_addr = (__u32) @@ -627,7 +630,8 @@ setup_memory(void) add_active_range(0, start_chunk, end_chunk); pfn = max(start_chunk, start_pfn); for (; pfn < end_chunk; pfn++) - page_set_storage_key(PFN_PHYS(pfn), PAGE_DEFAULT_KEY); + page_set_storage_key(PFN_PHYS(pfn), + PAGE_DEFAULT_KEY, 0); } psw_set_key(PAGE_DEFAULT_KEY); @@ -674,12 +678,9 @@ setup_memory(void) static void __init setup_hwcaps(void) { static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 }; - unsigned long long facility_list_extended; - unsigned int facility_list; struct cpuid cpu_id; int i; - facility_list = stfl(); /* * The store facility list bits numbers as found in the principles * of operation are numbered with bit 1UL<<31 as number 0 to @@ -699,11 +700,10 @@ static void __init setup_hwcaps(void) * HWCAP_S390_ETF3EH bit 8 (22 && 30). */ for (i = 0; i < 6; i++) - if (facility_list & (1UL << (31 - stfl_bits[i]))) + if (test_facility(stfl_bits[i])) elf_hwcap |= 1UL << i; - if ((facility_list & (1UL << (31 - 22))) - && (facility_list & (1UL << (31 - 30)))) + if (test_facility(22) && test_facility(30)) elf_hwcap |= HWCAP_S390_ETF3EH; /* @@ -719,12 +719,8 @@ static void __init setup_hwcaps(void) * translated to: * HWCAP_S390_DFP bit 6 (42 && 44). */ - if ((elf_hwcap & (1UL << 2)) && - __stfle(&facility_list_extended, 1) > 0) { - if ((facility_list_extended & (1ULL << (63 - 42))) - && (facility_list_extended & (1ULL << (63 - 44)))) - elf_hwcap |= HWCAP_S390_DFP; - } + if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44)) + elf_hwcap |= HWCAP_S390_DFP; /* * Huge page support HWCAP_S390_HPAGE is bit 7. diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 8127ebd59c4..94cf510b8fe 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -156,7 +156,8 @@ void smp_send_stop(void) * cpus are handled. */ -static void do_ext_call_interrupt(__u16 code) +static void do_ext_call_interrupt(unsigned int ext_int_code, + unsigned int param32, unsigned long param64) { unsigned long bits; @@ -593,6 +594,8 @@ int __cpuinit __cpu_up(unsigned int cpu) cpu_lowcore->kernel_asce = S390_lowcore.kernel_asce; cpu_lowcore->machine_flags = S390_lowcore.machine_flags; cpu_lowcore->ftrace_func = S390_lowcore.ftrace_func; + memcpy(cpu_lowcore->stfle_fac_list, S390_lowcore.stfle_fac_list, + MAX_FACILITY_BIT/8); eieio(); while (sigp(cpu, sigp_restart) == sigp_busy) diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index a0ffc7717ed..f04d93aa48e 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -15,6 +15,7 @@ #include <asm/ebcdic.h> #include <asm/sysinfo.h> #include <asm/cpcmd.h> +#include <asm/topology.h> /* Sigh, math-emu. Don't ask. */ #include <asm/sfp-util.h> @@ -74,6 +75,42 @@ static int stsi_1_1_1(struct sysinfo_1_1_1 *info, char *page, int len) "Model Temp. Capacity: %-16.16s %08u\n", info->model_temp_cap, *(u32 *) info->model_temp_cap_rating); + if (info->cai) { + len += sprintf(page + len, + "Capacity Adj. Ind.: %d\n", + info->cai); + len += sprintf(page + len, "Capacity Ch. Reason: %d\n", + info->ccr); + } + return len; +} + +static int stsi_15_1_x(struct sysinfo_15_1_x *info, char *page, int len) +{ + static int max_mnest; + int i, rc; + + len += sprintf(page + len, "\n"); + if (!MACHINE_HAS_TOPOLOGY) + return len; + if (max_mnest) { + stsi(info, 15, 1, max_mnest); + } else { + for (max_mnest = 6; max_mnest > 1; max_mnest--) { + rc = stsi(info, 15, 1, max_mnest); + if (rc != -ENOSYS) + break; + } + } + len += sprintf(page + len, "CPU Topology HW: "); + for (i = 0; i < TOPOLOGY_NR_MAG; i++) + len += sprintf(page + len, " %d", info->mag[i]); + len += sprintf(page + len, "\n"); + store_topology(info); + len += sprintf(page + len, "CPU Topology SW: "); + for (i = 0; i < TOPOLOGY_NR_MAG; i++) + len += sprintf(page + len, " %d", info->mag[i]); + len += sprintf(page + len, "\n"); return len; } @@ -87,7 +124,6 @@ static int stsi_1_2_2(struct sysinfo_1_2_2 *info, char *page, int len) ext = (struct sysinfo_1_2_2_extension *) ((unsigned long) info + info->acc_offset); - len += sprintf(page + len, "\n"); len += sprintf(page + len, "CPUs Total: %d\n", info->cpus_total); len += sprintf(page + len, "CPUs Configured: %d\n", @@ -217,6 +253,9 @@ static int proc_read_sysinfo(char *page, char **start, len = stsi_1_1_1((struct sysinfo_1_1_1 *) info, page, len); if (level >= 1) + len = stsi_15_1_x((struct sysinfo_15_1_x *) info, page, len); + + if (level >= 1) len = stsi_1_2_2((struct sysinfo_1_2_2 *) info, page, len); if (level >= 2) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 2896cac9c14..f754a6dc4f9 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -155,7 +155,9 @@ void init_cpu_timer(void) __ctl_set_bit(0, 4); } -static void clock_comparator_interrupt(__u16 code) +static void clock_comparator_interrupt(unsigned int ext_int_code, + unsigned int param32, + unsigned long param64) { if (S390_lowcore.clock_comparator == -1ULL) set_clock_comparator(S390_lowcore.clock_comparator); @@ -164,14 +166,13 @@ static void clock_comparator_interrupt(__u16 code) static void etr_timing_alert(struct etr_irq_parm *); static void stp_timing_alert(struct stp_irq_parm *); -static void timing_alert_interrupt(__u16 code) +static void timing_alert_interrupt(unsigned int ext_int_code, + unsigned int param32, unsigned long param64) { - if (S390_lowcore.ext_params & 0x00c40000) - etr_timing_alert((struct etr_irq_parm *) - &S390_lowcore.ext_params); - if (S390_lowcore.ext_params & 0x00038000) - stp_timing_alert((struct stp_irq_parm *) - &S390_lowcore.ext_params); + if (param32 & 0x00c40000) + etr_timing_alert((struct etr_irq_parm *) ¶m32); + if (param32 & 0x00038000) + stp_timing_alert((struct stp_irq_parm *) ¶m32); } static void etr_reset(void); diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index bcef00766a6..a9dee9048ee 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -18,158 +18,138 @@ #include <linux/cpuset.h> #include <asm/delay.h> #include <asm/s390_ext.h> -#include <asm/sysinfo.h> - -#define CPU_BITS 64 -#define NR_MAG 6 #define PTF_HORIZONTAL (0UL) #define PTF_VERTICAL (1UL) #define PTF_CHECK (2UL) -struct tl_cpu { - unsigned char reserved0[4]; - unsigned char :6; - unsigned char pp:2; - unsigned char reserved1; - unsigned short origin; - unsigned long mask[CPU_BITS / BITS_PER_LONG]; -}; - -struct tl_container { - unsigned char reserved[7]; - unsigned char id; -}; - -union tl_entry { - unsigned char nl; - struct tl_cpu cpu; - struct tl_container container; -}; - -struct tl_info { - unsigned char reserved0[2]; - unsigned short length; - unsigned char mag[NR_MAG]; - unsigned char reserved1; - unsigned char mnest; - unsigned char reserved2[4]; - union tl_entry tle[0]; -}; - -struct core_info { - struct core_info *next; +struct mask_info { + struct mask_info *next; unsigned char id; cpumask_t mask; }; -static int topology_enabled; +static int topology_enabled = 1; static void topology_work_fn(struct work_struct *work); -static struct tl_info *tl_info; -static struct core_info core_info; -static int machine_has_topology; +static struct sysinfo_15_1_x *tl_info; static struct timer_list topology_timer; static void set_topology_timer(void); static DECLARE_WORK(topology_work, topology_work_fn); /* topology_lock protects the core linked list */ static DEFINE_SPINLOCK(topology_lock); +static struct mask_info core_info; cpumask_t cpu_core_map[NR_CPUS]; unsigned char cpu_core_id[NR_CPUS]; -static cpumask_t cpu_coregroup_map(unsigned int cpu) +#ifdef CONFIG_SCHED_BOOK +static struct mask_info book_info; +cpumask_t cpu_book_map[NR_CPUS]; +unsigned char cpu_book_id[NR_CPUS]; +#endif + +static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) { - struct core_info *core = &core_info; - unsigned long flags; cpumask_t mask; cpus_clear(mask); - if (!topology_enabled || !machine_has_topology) + if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) return cpu_possible_map; - spin_lock_irqsave(&topology_lock, flags); - while (core) { - if (cpu_isset(cpu, core->mask)) { - mask = core->mask; + while (info) { + if (cpu_isset(cpu, info->mask)) { + mask = info->mask; break; } - core = core->next; + info = info->next; } - spin_unlock_irqrestore(&topology_lock, flags); if (cpus_empty(mask)) mask = cpumask_of_cpu(cpu); return mask; } -const struct cpumask *cpu_coregroup_mask(unsigned int cpu) -{ - return &cpu_core_map[cpu]; -} - -static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core) +static void add_cpus_to_mask(struct topology_cpu *tl_cpu, + struct mask_info *book, struct mask_info *core) { unsigned int cpu; - for (cpu = find_first_bit(&tl_cpu->mask[0], CPU_BITS); - cpu < CPU_BITS; - cpu = find_next_bit(&tl_cpu->mask[0], CPU_BITS, cpu + 1)) + for (cpu = find_first_bit(&tl_cpu->mask[0], TOPOLOGY_CPU_BITS); + cpu < TOPOLOGY_CPU_BITS; + cpu = find_next_bit(&tl_cpu->mask[0], TOPOLOGY_CPU_BITS, cpu + 1)) { unsigned int rcpu, lcpu; - rcpu = CPU_BITS - 1 - cpu + tl_cpu->origin; + rcpu = TOPOLOGY_CPU_BITS - 1 - cpu + tl_cpu->origin; for_each_present_cpu(lcpu) { - if (cpu_logical_map(lcpu) == rcpu) { - cpu_set(lcpu, core->mask); - cpu_core_id[lcpu] = core->id; - smp_cpu_polarization[lcpu] = tl_cpu->pp; - } + if (cpu_logical_map(lcpu) != rcpu) + continue; +#ifdef CONFIG_SCHED_BOOK + cpu_set(lcpu, book->mask); + cpu_book_id[lcpu] = book->id; +#endif + cpu_set(lcpu, core->mask); + cpu_core_id[lcpu] = core->id; + smp_cpu_polarization[lcpu] = tl_cpu->pp; } } } -static void clear_cores(void) +static void clear_masks(void) { - struct core_info *core = &core_info; + struct mask_info *info; - while (core) { - cpus_clear(core->mask); - core = core->next; + info = &core_info; + while (info) { + cpus_clear(info->mask); + info = info->next; } +#ifdef CONFIG_SCHED_BOOK + info = &book_info; + while (info) { + cpus_clear(info->mask); + info = info->next; + } +#endif } -static union tl_entry *next_tle(union tl_entry *tle) +static union topology_entry *next_tle(union topology_entry *tle) { - if (tle->nl) - return (union tl_entry *)((struct tl_container *)tle + 1); - else - return (union tl_entry *)((struct tl_cpu *)tle + 1); + if (!tle->nl) + return (union topology_entry *)((struct topology_cpu *)tle + 1); + return (union topology_entry *)((struct topology_container *)tle + 1); } -static void tl_to_cores(struct tl_info *info) +static void tl_to_cores(struct sysinfo_15_1_x *info) { - union tl_entry *tle, *end; - struct core_info *core = &core_info; +#ifdef CONFIG_SCHED_BOOK + struct mask_info *book = &book_info; +#else + struct mask_info *book = NULL; +#endif + struct mask_info *core = &core_info; + union topology_entry *tle, *end; + spin_lock_irq(&topology_lock); - clear_cores(); + clear_masks(); tle = info->tle; - end = (union tl_entry *)((unsigned long)info + info->length); + end = (union topology_entry *)((unsigned long)info + info->length); while (tle < end) { switch (tle->nl) { - case 5: - case 4: - case 3: +#ifdef CONFIG_SCHED_BOOK case 2: + book = book->next; + book->id = tle->container.id; break; +#endif case 1: core = core->next; core->id = tle->container.id; break; case 0: - add_cpus_to_core(&tle->cpu, core); + add_cpus_to_mask(&tle->cpu, book, core); break; default: - clear_cores(); - machine_has_topology = 0; + clear_masks(); goto out; } tle = next_tle(tle); @@ -206,7 +186,7 @@ int topology_set_cpu_management(int fc) int cpu; int rc; - if (!machine_has_topology) + if (!MACHINE_HAS_TOPOLOGY) return -EOPNOTSUPP; if (fc) rc = ptf(PTF_VERTICAL); @@ -221,24 +201,43 @@ int topology_set_cpu_management(int fc) static void update_cpu_core_map(void) { + unsigned long flags; int cpu; - for_each_possible_cpu(cpu) - cpu_core_map[cpu] = cpu_coregroup_map(cpu); + spin_lock_irqsave(&topology_lock, flags); + for_each_possible_cpu(cpu) { + cpu_core_map[cpu] = cpu_group_map(&core_info, cpu); +#ifdef CONFIG_SCHED_BOOK + cpu_book_map[cpu] = cpu_group_map(&book_info, cpu); +#endif + } + spin_unlock_irqrestore(&topology_lock, flags); +} + +void store_topology(struct sysinfo_15_1_x *info) +{ +#ifdef CONFIG_SCHED_BOOK + int rc; + + rc = stsi(info, 15, 1, 3); + if (rc != -ENOSYS) + return; +#endif + stsi(info, 15, 1, 2); } int arch_update_cpu_topology(void) { - struct tl_info *info = tl_info; + struct sysinfo_15_1_x *info = tl_info; struct sys_device *sysdev; int cpu; - if (!machine_has_topology) { + if (!MACHINE_HAS_TOPOLOGY) { update_cpu_core_map(); topology_update_polarization_simple(); return 0; } - stsi(info, 15, 1, 2); + store_topology(info); tl_to_cores(info); update_cpu_core_map(); for_each_online_cpu(cpu) { @@ -275,9 +274,9 @@ static void set_topology_timer(void) static int __init early_parse_topology(char *p) { - if (strncmp(p, "on", 2)) + if (strncmp(p, "off", 3)) return 0; - topology_enabled = 1; + topology_enabled = 0; return 0; } early_param("topology", early_parse_topology); @@ -287,7 +286,7 @@ static int __init init_topology_update(void) int rc; rc = 0; - if (!machine_has_topology) { + if (!MACHINE_HAS_TOPOLOGY) { topology_update_polarization_simple(); goto out; } @@ -299,41 +298,37 @@ out: } __initcall(init_topology_update); +static void alloc_masks(struct sysinfo_15_1_x *info, struct mask_info *mask, + int offset) +{ + int i, nr_masks; + + nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; + for (i = 0; i < info->mnest - offset; i++) + nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; + nr_masks = max(nr_masks, 1); + for (i = 0; i < nr_masks; i++) { + mask->next = alloc_bootmem(sizeof(struct mask_info)); + mask = mask->next; + } +} + void __init s390_init_cpu_topology(void) { - unsigned long long facility_bits; - struct tl_info *info; - struct core_info *core; - int nr_cores; + struct sysinfo_15_1_x *info; int i; - if (stfle(&facility_bits, 1) <= 0) - return; - if (!(facility_bits & (1ULL << 52)) || !(facility_bits & (1ULL << 61))) + if (!MACHINE_HAS_TOPOLOGY) return; - machine_has_topology = 1; - tl_info = alloc_bootmem_pages(PAGE_SIZE); info = tl_info; - stsi(info, 15, 1, 2); - - nr_cores = info->mag[NR_MAG - 2]; - for (i = 0; i < info->mnest - 2; i++) - nr_cores *= info->mag[NR_MAG - 3 - i]; - + store_topology(info); pr_info("The CPU configuration topology of the machine is:"); - for (i = 0; i < NR_MAG; i++) + for (i = 0; i < TOPOLOGY_NR_MAG; i++) printk(" %d", info->mag[i]); printk(" / %d\n", info->mnest); - - core = &core_info; - for (i = 0; i < nr_cores; i++) { - core->next = alloc_bootmem(sizeof(struct core_info)); - core = core->next; - if (!core) - goto error; - } - return; -error: - machine_has_topology = 0; + alloc_masks(info, &core_info, 2); +#ifdef CONFIG_SCHED_BOOK + alloc_masks(info, &book_info, 3); +#endif } diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 5d8f0f3d025..70640822621 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -329,27 +329,19 @@ int is_valid_bugaddr(unsigned long addr) return 1; } -static void __kprobes inline do_trap(long interruption_code, int signr, - char *str, struct pt_regs *regs, - siginfo_t *info) +static inline void __kprobes do_trap(long pgm_int_code, int signr, char *str, + struct pt_regs *regs, siginfo_t *info) { - /* - * We got all needed information from the lowcore and can - * now safely switch on interrupts. - */ - if (regs->psw.mask & PSW_MASK_PSTATE) - local_irq_enable(); - - if (notify_die(DIE_TRAP, str, regs, interruption_code, - interruption_code, signr) == NOTIFY_STOP) + if (notify_die(DIE_TRAP, str, regs, pgm_int_code, + pgm_int_code, signr) == NOTIFY_STOP) return; if (regs->psw.mask & PSW_MASK_PSTATE) { struct task_struct *tsk = current; - tsk->thread.trap_no = interruption_code & 0xffff; + tsk->thread.trap_no = pgm_int_code & 0xffff; force_sig_info(signr, info, tsk); - report_user_fault(regs, interruption_code, signr); + report_user_fault(regs, pgm_int_code, signr); } else { const struct exception_table_entry *fixup; fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); @@ -361,14 +353,16 @@ static void __kprobes inline do_trap(long interruption_code, int signr, btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs); if (btt == BUG_TRAP_TYPE_WARN) return; - die(str, regs, interruption_code); + die(str, regs, pgm_int_code); } } } -static inline void __user *get_check_address(struct pt_regs *regs) +static inline void __user *get_psw_address(struct pt_regs *regs, + long pgm_int_code) { - return (void __user *)((regs->psw.addr-S390_lowcore.pgm_ilc) & PSW_ADDR_INSN); + return (void __user *) + ((regs->psw.addr - (pgm_int_code >> 16)) & PSW_ADDR_INSN); } void __kprobes do_single_step(struct pt_regs *regs) @@ -381,57 +375,57 @@ void __kprobes do_single_step(struct pt_regs *regs) force_sig(SIGTRAP, current); } -static void default_trap_handler(struct pt_regs * regs, long interruption_code) +static void default_trap_handler(struct pt_regs *regs, long pgm_int_code, + unsigned long trans_exc_code) { if (regs->psw.mask & PSW_MASK_PSTATE) { - local_irq_enable(); - report_user_fault(regs, interruption_code, SIGSEGV); + report_user_fault(regs, pgm_int_code, SIGSEGV); do_exit(SIGSEGV); } else - die("Unknown program exception", regs, interruption_code); + die("Unknown program exception", regs, pgm_int_code); } -#define DO_ERROR_INFO(signr, str, name, sicode, siaddr) \ -static void name(struct pt_regs * regs, long interruption_code) \ +#define DO_ERROR_INFO(name, signr, sicode, str) \ +static void name(struct pt_regs *regs, long pgm_int_code, \ + unsigned long trans_exc_code) \ { \ siginfo_t info; \ info.si_signo = signr; \ info.si_errno = 0; \ info.si_code = sicode; \ - info.si_addr = siaddr; \ - do_trap(interruption_code, signr, str, regs, &info); \ + info.si_addr = get_psw_address(regs, pgm_int_code); \ + do_trap(pgm_int_code, signr, str, regs, &info); \ } -DO_ERROR_INFO(SIGILL, "addressing exception", addressing_exception, - ILL_ILLADR, get_check_address(regs)) -DO_ERROR_INFO(SIGILL, "execute exception", execute_exception, - ILL_ILLOPN, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "fixpoint divide exception", divide_exception, - FPE_INTDIV, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "fixpoint overflow exception", overflow_exception, - FPE_INTOVF, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "HFP overflow exception", hfp_overflow_exception, - FPE_FLTOVF, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "HFP underflow exception", hfp_underflow_exception, - FPE_FLTUND, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "HFP significance exception", hfp_significance_exception, - FPE_FLTRES, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "HFP divide exception", hfp_divide_exception, - FPE_FLTDIV, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "HFP square root exception", hfp_sqrt_exception, - FPE_FLTINV, get_check_address(regs)) -DO_ERROR_INFO(SIGILL, "operand exception", operand_exception, - ILL_ILLOPN, get_check_address(regs)) -DO_ERROR_INFO(SIGILL, "privileged operation", privileged_op, - ILL_PRVOPC, get_check_address(regs)) -DO_ERROR_INFO(SIGILL, "special operation exception", special_op_exception, - ILL_ILLOPN, get_check_address(regs)) -DO_ERROR_INFO(SIGILL, "translation exception", translation_exception, - ILL_ILLOPN, get_check_address(regs)) - -static inline void -do_fp_trap(struct pt_regs *regs, void __user *location, - int fpc, long interruption_code) +DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, + "addressing exception") +DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, + "execute exception") +DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, + "fixpoint divide exception") +DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, + "fixpoint overflow exception") +DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, + "HFP overflow exception") +DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, + "HFP underflow exception") +DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, + "HFP significance exception") +DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, + "HFP divide exception") +DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, + "HFP square root exception") +DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, + "operand exception") +DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, + "privileged operation") +DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, + "special operation exception") +DO_ERROR_INFO(translation_exception, SIGILL, ILL_ILLOPN, + "translation exception") + +static inline void do_fp_trap(struct pt_regs *regs, void __user *location, + int fpc, long pgm_int_code) { siginfo_t si; @@ -453,26 +447,19 @@ do_fp_trap(struct pt_regs *regs, void __user *location, else if (fpc & 0x0800) /* inexact */ si.si_code = FPE_FLTRES; } - current->thread.ieee_instruction_pointer = (addr_t) location; - do_trap(interruption_code, SIGFPE, + do_trap(pgm_int_code, SIGFPE, "floating point exception", regs, &si); } -static void illegal_op(struct pt_regs * regs, long interruption_code) +static void illegal_op(struct pt_regs *regs, long pgm_int_code, + unsigned long trans_exc_code) { siginfo_t info; __u8 opcode[6]; __u16 __user *location; int signal = 0; - location = get_check_address(regs); - - /* - * We got all needed information from the lowcore and can - * now safely switch on interrupts. - */ - if (regs->psw.mask & PSW_MASK_PSTATE) - local_irq_enable(); + location = get_psw_address(regs, pgm_int_code); if (regs->psw.mask & PSW_MASK_PSTATE) { if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) @@ -512,7 +499,7 @@ static void illegal_op(struct pt_regs * regs, long interruption_code) * If we get an illegal op in kernel mode, send it through the * kprobes notifier. If kprobes doesn't pick it up, SIGILL */ - if (notify_die(DIE_BPT, "bpt", regs, interruption_code, + if (notify_die(DIE_BPT, "bpt", regs, pgm_int_code, 3, SIGTRAP) != NOTIFY_STOP) signal = SIGILL; } @@ -520,13 +507,13 @@ static void illegal_op(struct pt_regs * regs, long interruption_code) #ifdef CONFIG_MATHEMU if (signal == SIGFPE) do_fp_trap(regs, location, - current->thread.fp_regs.fpc, interruption_code); + current->thread.fp_regs.fpc, pgm_int_code); else if (signal == SIGSEGV) { info.si_signo = signal; info.si_errno = 0; info.si_code = SEGV_MAPERR; info.si_addr = (void __user *) location; - do_trap(interruption_code, signal, + do_trap(pgm_int_code, signal, "user address fault", regs, &info); } else #endif @@ -535,28 +522,22 @@ static void illegal_op(struct pt_regs * regs, long interruption_code) info.si_errno = 0; info.si_code = ILL_ILLOPC; info.si_addr = (void __user *) location; - do_trap(interruption_code, signal, + do_trap(pgm_int_code, signal, "illegal operation", regs, &info); } } #ifdef CONFIG_MATHEMU -asmlinkage void -specification_exception(struct pt_regs * regs, long interruption_code) +asmlinkage void specification_exception(struct pt_regs *regs, + long pgm_int_code, + unsigned long trans_exc_code) { __u8 opcode[6]; __u16 __user *location = NULL; int signal = 0; - location = (__u16 __user *) get_check_address(regs); - - /* - * We got all needed information from the lowcore and can - * now safely switch on interrupts. - */ - if (regs->psw.mask & PSW_MASK_PSTATE) - local_irq_enable(); + location = (__u16 __user *) get_psw_address(regs, pgm_int_code); if (regs->psw.mask & PSW_MASK_PSTATE) { get_user(*((__u16 *) opcode), location); @@ -592,35 +573,29 @@ specification_exception(struct pt_regs * regs, long interruption_code) if (signal == SIGFPE) do_fp_trap(regs, location, - current->thread.fp_regs.fpc, interruption_code); + current->thread.fp_regs.fpc, pgm_int_code); else if (signal) { siginfo_t info; info.si_signo = signal; info.si_errno = 0; info.si_code = ILL_ILLOPN; info.si_addr = location; - do_trap(interruption_code, signal, + do_trap(pgm_int_code, signal, "specification exception", regs, &info); } } #else -DO_ERROR_INFO(SIGILL, "specification exception", specification_exception, - ILL_ILLOPN, get_check_address(regs)); +DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, + "specification exception"); #endif -static void data_exception(struct pt_regs * regs, long interruption_code) +static void data_exception(struct pt_regs *regs, long pgm_int_code, + unsigned long trans_exc_code) { __u16 __user *location; int signal = 0; - location = get_check_address(regs); - - /* - * We got all needed information from the lowcore and can - * now safely switch on interrupts. - */ - if (regs->psw.mask & PSW_MASK_PSTATE) - local_irq_enable(); + location = get_psw_address(regs, pgm_int_code); if (MACHINE_HAS_IEEE) asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc)); @@ -686,19 +661,19 @@ static void data_exception(struct pt_regs * regs, long interruption_code) signal = SIGILL; if (signal == SIGFPE) do_fp_trap(regs, location, - current->thread.fp_regs.fpc, interruption_code); + current->thread.fp_regs.fpc, pgm_int_code); else if (signal) { siginfo_t info; info.si_signo = signal; info.si_errno = 0; info.si_code = ILL_ILLOPN; info.si_addr = location; - do_trap(interruption_code, signal, - "data exception", regs, &info); + do_trap(pgm_int_code, signal, "data exception", regs, &info); } } -static void space_switch_exception(struct pt_regs * regs, long int_code) +static void space_switch_exception(struct pt_regs *regs, long pgm_int_code, + unsigned long trans_exc_code) { siginfo_t info; @@ -709,8 +684,8 @@ static void space_switch_exception(struct pt_regs * regs, long int_code) info.si_signo = SIGILL; info.si_errno = 0; info.si_code = ILL_PRVOPC; - info.si_addr = get_check_address(regs); - do_trap(int_code, SIGILL, "space switch event", regs, &info); + info.si_addr = get_psw_address(regs, pgm_int_code); + do_trap(pgm_int_code, SIGILL, "space switch event", regs, &info); } asmlinkage void kernel_stack_overflow(struct pt_regs * regs) diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 6b83870507d..e3150dd2fe7 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -84,11 +84,7 @@ struct vdso_data *vdso_data = &vdso_data_store.data; */ static void vdso_init_data(struct vdso_data *vd) { - unsigned int facility_list; - - facility_list = stfl(); - vd->ectg_available = - user_mode != HOME_SPACE_MODE && (facility_list & 1); + vd->ectg_available = user_mode != HOME_SPACE_MODE && test_facility(31); } #ifdef CONFIG_64BIT diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 3479f1b0d4e..56c8687b29b 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -314,7 +314,8 @@ static void do_callbacks(struct list_head *cb_list) /* * Handler for the virtual CPU timer. */ -static void do_cpu_timer_interrupt(__u16 error_code) +static void do_cpu_timer_interrupt(unsigned int ext_int_code, + unsigned int param32, unsigned long param64) { struct vtimer_queue *vq; struct vtimer_list *event, *tmp; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4fe68650535..985d825494f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -740,8 +740,8 @@ static int __init kvm_s390_init(void) kvm_exit(); return -ENOMEM; } - stfle(facilities, 1); - facilities[0] &= 0xff00fff3f0700000ULL; + memcpy(facilities, S390_lowcore.stfle_fac_list, 16); + facilities[0] &= 0xff00fff3f47c0000ULL; return 0; } diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 44205507717..9194a4b52b2 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -154,12 +154,12 @@ static int handle_chsc(struct kvm_vcpu *vcpu) static int handle_stfl(struct kvm_vcpu *vcpu) { - unsigned int facility_list = stfl(); + unsigned int facility_list; int rc; vcpu->stat.instruction_stfl++; /* only pass the facility bits, which we can handle */ - facility_list &= 0xff00fff3; + facility_list = S390_lowcore.stfl_fac_list & 0xff00fff3; rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), &facility_list, sizeof(facility_list)); diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index eec05448441..6fbc6f3fbdf 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -3,6 +3,6 @@ # obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \ - page-states.o + page-states.o gup.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index a9550dca3e4..c66ffd8dbbb 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -23,7 +23,10 @@ #include <asm/pgalloc.h> #include <asm/diag.h> -static char *sender = "VMRMSVM"; +#ifdef CONFIG_CMM_IUCV +static char *cmm_default_sender = "VMRMSVM"; +#endif +static char *sender; module_param(sender, charp, 0400); MODULE_PARM_DESC(sender, "Guest name that may send SMSG messages (default VMRMSVM)"); @@ -440,6 +443,8 @@ static int __init cmm_init(void) int len = strlen(sender); while (len--) sender[len] = toupper(sender[len]); + } else { + sender = cmm_default_sender; } rc = smsg_register_callback(SMSG_PREFIX, cmm_smsg_target); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 2505b2ea0ef..fe5701e9efb 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -52,6 +52,14 @@ #define VM_FAULT_BADMAP 0x020000 #define VM_FAULT_BADACCESS 0x040000 +static unsigned long store_indication; + +void fault_init(void) +{ + if (test_facility(2) && test_facility(75)) + store_indication = 0xc00; +} + static inline int notify_page_fault(struct pt_regs *regs) { int ret = 0; @@ -199,14 +207,21 @@ static noinline void do_sigbus(struct pt_regs *regs, long int_code, unsigned long trans_exc_code) { struct task_struct *tsk = current; + unsigned long address; + struct siginfo si; /* * Send a sigbus, regardless of whether we were in kernel * or user mode. */ - tsk->thread.prot_addr = trans_exc_code & __FAIL_ADDR_MASK; + address = trans_exc_code & __FAIL_ADDR_MASK; + tsk->thread.prot_addr = address; tsk->thread.trap_no = int_code; - force_sig(SIGBUS, tsk); + si.si_signo = SIGBUS; + si.si_errno = 0; + si.si_code = BUS_ADRERR; + si.si_addr = (void __user *) address; + force_sig_info(SIGBUS, &si, tsk); } #ifdef CONFIG_S390_EXEC_PROTECT @@ -266,10 +281,11 @@ static noinline void do_fault_error(struct pt_regs *regs, long int_code, if (fault & VM_FAULT_OOM) pagefault_out_of_memory(); else if (fault & VM_FAULT_SIGBUS) { - do_sigbus(regs, int_code, trans_exc_code); /* Kernel mode? Handle exceptions or die */ if (!(regs->psw.mask & PSW_MASK_PSTATE)) do_no_context(regs, int_code, trans_exc_code); + else + do_sigbus(regs, int_code, trans_exc_code); } else BUG(); break; @@ -294,7 +310,7 @@ static inline int do_exception(struct pt_regs *regs, int access, struct mm_struct *mm; struct vm_area_struct *vma; unsigned long address; - int fault; + int fault, write; if (notify_page_fault(regs)) return 0; @@ -312,12 +328,6 @@ static inline int do_exception(struct pt_regs *regs, int access, goto out; address = trans_exc_code & __FAIL_ADDR_MASK; - /* - * When we get here, the fault happened in the current - * task's user address space, so we can switch on the - * interrupts again and then search the VMAs - */ - local_irq_enable(); perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); down_read(&mm->mmap_sem); @@ -348,8 +358,10 @@ static inline int do_exception(struct pt_regs *regs, int access, * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, - (access == VM_WRITE) ? FAULT_FLAG_WRITE : 0); + write = (access == VM_WRITE || + (trans_exc_code & store_indication) == 0x400) ? + FAULT_FLAG_WRITE : 0; + fault = handle_mm_fault(mm, vma, address, write); if (unlikely(fault & VM_FAULT_ERROR)) goto out_up; @@ -374,20 +386,20 @@ out: return fault; } -void __kprobes do_protection_exception(struct pt_regs *regs, long int_code) +void __kprobes do_protection_exception(struct pt_regs *regs, long pgm_int_code, + unsigned long trans_exc_code) { - unsigned long trans_exc_code = S390_lowcore.trans_exc_code; int fault; /* Protection exception is supressing, decrement psw address. */ - regs->psw.addr -= (int_code >> 16); + regs->psw.addr -= (pgm_int_code >> 16); /* * Check for low-address protection. This needs to be treated * as a special case because the translation exception code * field is not guaranteed to contain valid data in this case. */ if (unlikely(!(trans_exc_code & 4))) { - do_low_address(regs, int_code, trans_exc_code); + do_low_address(regs, pgm_int_code, trans_exc_code); return; } fault = do_exception(regs, VM_WRITE, trans_exc_code); @@ -395,9 +407,9 @@ void __kprobes do_protection_exception(struct pt_regs *regs, long int_code) do_fault_error(regs, 4, trans_exc_code, fault); } -void __kprobes do_dat_exception(struct pt_regs *regs, long int_code) +void __kprobes do_dat_exception(struct pt_regs *regs, long pgm_int_code, + unsigned long trans_exc_code) { - unsigned long trans_exc_code = S390_lowcore.trans_exc_code; int access, fault; access = VM_READ | VM_EXEC | VM_WRITE; @@ -408,21 +420,19 @@ void __kprobes do_dat_exception(struct pt_regs *regs, long int_code) #endif fault = do_exception(regs, access, trans_exc_code); if (unlikely(fault)) - do_fault_error(regs, int_code & 255, trans_exc_code, fault); + do_fault_error(regs, pgm_int_code & 255, trans_exc_code, fault); } #ifdef CONFIG_64BIT -void __kprobes do_asce_exception(struct pt_regs *regs, long int_code) +void __kprobes do_asce_exception(struct pt_regs *regs, long pgm_int_code, + unsigned long trans_exc_code) { - unsigned long trans_exc_code = S390_lowcore.trans_exc_code; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) goto no_context; - local_irq_enable(); - down_read(&mm->mmap_sem); vma = find_vma(mm, trans_exc_code & __FAIL_ADDR_MASK); up_read(&mm->mmap_sem); @@ -434,16 +444,16 @@ void __kprobes do_asce_exception(struct pt_regs *regs, long int_code) /* User mode accesses just cause a SIGSEGV */ if (regs->psw.mask & PSW_MASK_PSTATE) { - do_sigsegv(regs, int_code, SEGV_MAPERR, trans_exc_code); + do_sigsegv(regs, pgm_int_code, SEGV_MAPERR, trans_exc_code); return; } no_context: - do_no_context(regs, int_code, trans_exc_code); + do_no_context(regs, pgm_int_code, trans_exc_code); } #endif -int __handle_fault(unsigned long uaddr, unsigned long int_code, int write_user) +int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write) { struct pt_regs regs; int access, fault; @@ -454,14 +464,14 @@ int __handle_fault(unsigned long uaddr, unsigned long int_code, int write_user) regs.psw.addr = (unsigned long) __builtin_return_address(0); regs.psw.addr |= PSW_ADDR_AMODE; uaddr &= PAGE_MASK; - access = write_user ? VM_WRITE : VM_READ; + access = write ? VM_WRITE : VM_READ; fault = do_exception(®s, access, uaddr | 2); if (unlikely(fault)) { if (fault & VM_FAULT_OOM) { pagefault_out_of_memory(); fault = 0; } else if (fault & VM_FAULT_SIGBUS) - do_sigbus(®s, int_code, uaddr); + do_sigbus(®s, pgm_int_code, uaddr); } return fault ? -EFAULT : 0; } @@ -527,7 +537,8 @@ void pfault_fini(void) : : "a" (&refbk), "m" (refbk) : "cc"); } -static void pfault_interrupt(__u16 int_code) +static void pfault_interrupt(unsigned int ext_int_code, + unsigned int param32, unsigned long param64) { struct task_struct *tsk; __u16 subcode; @@ -538,14 +549,18 @@ static void pfault_interrupt(__u16 int_code) * in the 'cpu address' field associated with the * external interrupt. */ - subcode = S390_lowcore.cpu_addr; + subcode = ext_int_code >> 16; if ((subcode & 0xff00) != __SUBCODE_MASK) return; /* * Get the token (= address of the task structure of the affected task). */ - tsk = *(struct task_struct **) __LC_PFAULT_INTPARM; +#ifdef CONFIG_64BIT + tsk = *(struct task_struct **) param64; +#else + tsk = *(struct task_struct **) param32; +#endif if (subcode & 0x0080) { /* signal bit is set -> a page has been swapped in by VM */ diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c new file mode 100644 index 00000000000..38e641cdd97 --- /dev/null +++ b/arch/s390/mm/gup.c @@ -0,0 +1,225 @@ +/* + * Lockless get_user_pages_fast for s390 + * + * Copyright IBM Corp. 2010 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/vmstat.h> +#include <linux/pagemap.h> +#include <linux/rwsem.h> +#include <asm/pgtable.h> + +/* + * The performance critical leaf functions are made noinline otherwise gcc + * inlines everything into a single function which results in too much + * register pressure. + */ +static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long mask, result; + pte_t *ptep, pte; + struct page *page; + + result = write ? 0 : _PAGE_RO; + mask = result | _PAGE_INVALID | _PAGE_SPECIAL; + + ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr); + do { + pte = *ptep; + barrier(); + if ((pte_val(pte) & mask) != result) + return 0; + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + page = pte_page(pte); + if (!page_cache_get_speculative(page)) + return 0; + if (unlikely(pte_val(pte) != pte_val(*ptep))) { + put_page(page); + return 0; + } + pages[*nr] = page; + (*nr)++; + + } while (ptep++, addr += PAGE_SIZE, addr != end); + + return 1; +} + +static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long mask, result; + struct page *head, *page; + int refs; + + result = write ? 0 : _SEGMENT_ENTRY_RO; + mask = result | _SEGMENT_ENTRY_INV; + if ((pmd_val(pmd) & mask) != result) + return 0; + VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT)); + + refs = 0; + head = pmd_page(pmd); + page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + do { + VM_BUG_ON(compound_head(page) != head); + pages[*nr] = page; + (*nr)++; + page++; + refs++; + } while (addr += PAGE_SIZE, addr != end); + + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; + } + + if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) { + *nr -= refs; + while (refs--) + put_page(head); + } + + return 1; +} + + +static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long next; + pmd_t *pmdp, pmd; + + pmdp = (pmd_t *) pudp; +#ifdef CONFIG_64BIT + if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + pmdp = (pmd_t *) pud_deref(pud); + pmdp += pmd_index(addr); +#endif + do { + pmd = *pmdp; + barrier(); + next = pmd_addr_end(addr, end); + if (pmd_none(pmd)) + return 0; + if (unlikely(pmd_huge(pmd))) { + if (!gup_huge_pmd(pmdp, pmd, addr, next, + write, pages, nr)) + return 0; + } else if (!gup_pte_range(pmdp, pmd, addr, next, + write, pages, nr)) + return 0; + } while (pmdp++, addr = next, addr != end); + + return 1; +} + +static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long next; + pud_t *pudp, pud; + + pudp = (pud_t *) pgdp; +#ifdef CONFIG_64BIT + if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) + pudp = (pud_t *) pgd_deref(pgd); + pudp += pud_index(addr); +#endif + do { + pud = *pudp; + barrier(); + next = pud_addr_end(addr, end); + if (pud_none(pud)) + return 0; + if (!gup_pmd_range(pudp, pud, addr, next, write, pages, nr)) + return 0; + } while (pudp++, addr = next, addr != end); + + return 1; +} + +/** + * get_user_pages_fast() - pin user pages in memory + * @start: starting user address + * @nr_pages: number of pages from start to pin + * @write: whether pages will be written to + * @pages: array that receives pointers to the pages pinned. + * Should be at least nr_pages long. + * + * Attempt to pin user pages in memory without taking mm->mmap_sem. + * If not successful, it will fall back to taking the lock and + * calling get_user_pages(). + * + * Returns number of pages pinned. This may be fewer than the number + * requested. If nr_pages is 0 or negative, returns 0. If no pages + * were pinned, returns -errno. + */ +int get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next; + pgd_t *pgdp, pgd; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + if (end < start) + goto slow_irqon; + + /* + * local_irq_disable() doesn't prevent pagetable teardown, but does + * prevent the pagetables from being freed on s390. + * + * So long as we atomically load page table pointers versus teardown, + * we can follow the address down to the the page and take a ref on it. + */ + local_irq_disable(); + pgdp = pgd_offset(mm, addr); + do { + pgd = *pgdp; + barrier(); + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + goto slow; + if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr)) + goto slow; + } while (pgdp++, addr = next, addr != end); + local_irq_enable(); + + VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); + return nr; + + { + int ret; +slow: + local_irq_enable(); +slow_irqon: + /* Try to get the remaining pages with get_user_pages */ + start += nr << PAGE_SHIFT; + pages += nr; + + down_read(&mm->mmap_sem); + ret = get_user_pages(current, mm, start, + (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); + up_read(&mm->mmap_sem); + + /* Have to be a bit careful with return values */ + if (nr > 0) { + if (ret < 0) + ret = nr; + else + ret += nr; + } + + return ret; + } +} diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index f28c43d2f61..639cd21f221 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -68,7 +68,7 @@ void arch_release_hugepage(struct page *page) ptep = (pte_t *) page[1].index; if (!ptep) return; - pte_free(&init_mm, ptep); + page_table_free(&init_mm, (unsigned long *) ptep); page[1].index = 0; } diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 30eb6d02ddb..bb409332a48 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -38,19 +38,59 @@ #include <asm/tlbflush.h> #include <asm/sections.h> -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); -char empty_zero_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); +unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); +static unsigned long setup_zero_pages(void) +{ + struct cpuid cpu_id; + unsigned int order; + unsigned long size; + struct page *page; + int i; + + get_cpu_id(&cpu_id); + switch (cpu_id.machine) { + case 0x9672: /* g5 */ + case 0x2064: /* z900 */ + case 0x2066: /* z900 */ + case 0x2084: /* z990 */ + case 0x2086: /* z990 */ + case 0x2094: /* z9-109 */ + case 0x2096: /* z9-109 */ + order = 0; + break; + case 0x2097: /* z10 */ + case 0x2098: /* z10 */ + default: + order = 2; + break; + } + + empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!empty_zero_page) + panic("Out of memory in setup_zero_pages"); + + page = virt_to_page((void *) empty_zero_page); + split_page(page, order); + for (i = 1 << order; i > 0; i--) { + SetPageReserved(page); + page++; + } + + size = PAGE_SIZE << order; + zero_page_mask = (size - 1) & PAGE_MASK; + + return 1UL << order; +} + /* * paging_init() sets up the page tables */ void __init paging_init(void) { - static const int ssm_mask = 0x04000000L; unsigned long max_zone_pfns[MAX_NR_ZONES]; unsigned long pgd_type; @@ -72,7 +112,7 @@ void __init paging_init(void) __ctl_load(S390_lowcore.kernel_asce, 1, 1); __ctl_load(S390_lowcore.kernel_asce, 7, 7); __ctl_load(S390_lowcore.kernel_asce, 13, 13); - __raw_local_irq_ssm(ssm_mask); + arch_local_irq_restore(4UL << (BITS_PER_LONG - 8)); atomic_set(&init_mm.context.attach_count, 1); @@ -84,6 +124,7 @@ void __init paging_init(void) #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; free_area_init_nodes(max_zone_pfns); + fault_init(); } void __init mem_init(void) @@ -93,14 +134,12 @@ void __init mem_init(void) max_mapnr = num_physpages = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - /* clear the zero-page */ - memset(empty_zero_page, 0, PAGE_SIZE); - /* Setup guest page hinting */ cmma_init(); /* this will put all low memory onto the freelists */ totalram_pages += free_all_bootmem(); + totalram_pages -= setup_zero_pages(); /* Setup zeroed pages. */ reservedpages = 0; diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index a8c2af8c650..71a4b0d34be 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -71,7 +71,7 @@ int memcpy_real(void *dest, void *src, size_t count) if (!count) return 0; - flags = __raw_local_irq_stnsm(0xf8UL); + flags = __arch_local_irq_stnsm(0xf8UL); asm volatile ( "0: mvcle %1,%2,0x0\n" "1: jo 0b\n" @@ -82,6 +82,6 @@ int memcpy_real(void *dest, void *src, size_t count) "+d" (_len2), "=m" (*((long *) dest)) : "m" (*((long *) src)) : "cc", "memory"); - __raw_local_irq_ssm(flags); + arch_local_irq_restore(flags); return rc; } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 8d999249d35..0c719c61972 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -15,6 +15,7 @@ #include <linux/spinlock.h> #include <linux/module.h> #include <linux/quicklist.h> +#include <linux/rcupdate.h> #include <asm/system.h> #include <asm/pgtable.h> @@ -23,6 +24,67 @@ #include <asm/tlbflush.h> #include <asm/mmu_context.h> +struct rcu_table_freelist { + struct rcu_head rcu; + struct mm_struct *mm; + unsigned int pgt_index; + unsigned int crst_index; + unsigned long *table[0]; +}; + +#define RCU_FREELIST_SIZE \ + ((PAGE_SIZE - sizeof(struct rcu_table_freelist)) \ + / sizeof(unsigned long)) + +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +static DEFINE_PER_CPU(struct rcu_table_freelist *, rcu_table_freelist); + +static void __page_table_free(struct mm_struct *mm, unsigned long *table); +static void __crst_table_free(struct mm_struct *mm, unsigned long *table); + +static struct rcu_table_freelist *rcu_table_freelist_get(struct mm_struct *mm) +{ + struct rcu_table_freelist **batchp = &__get_cpu_var(rcu_table_freelist); + struct rcu_table_freelist *batch = *batchp; + + if (batch) + return batch; + batch = (struct rcu_table_freelist *) __get_free_page(GFP_ATOMIC); + if (batch) { + batch->mm = mm; + batch->pgt_index = 0; + batch->crst_index = RCU_FREELIST_SIZE; + *batchp = batch; + } + return batch; +} + +static void rcu_table_freelist_callback(struct rcu_head *head) +{ + struct rcu_table_freelist *batch = + container_of(head, struct rcu_table_freelist, rcu); + + while (batch->pgt_index > 0) + __page_table_free(batch->mm, batch->table[--batch->pgt_index]); + while (batch->crst_index < RCU_FREELIST_SIZE) + __crst_table_free(batch->mm, batch->table[batch->crst_index++]); + free_page((unsigned long) batch); +} + +void rcu_table_freelist_finish(void) +{ + struct rcu_table_freelist *batch = __get_cpu_var(rcu_table_freelist); + + if (!batch) + return; + call_rcu(&batch->rcu, rcu_table_freelist_callback); + __get_cpu_var(rcu_table_freelist) = NULL; +} + +static void smp_sync(void *arg) +{ +} + #ifndef CONFIG_64BIT #define ALLOC_ORDER 1 #define TABLES_PER_PAGE 4 @@ -78,25 +140,55 @@ unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) } page->index = page_to_phys(shadow); } - spin_lock(&mm->context.list_lock); + spin_lock_bh(&mm->context.list_lock); list_add(&page->lru, &mm->context.crst_list); - spin_unlock(&mm->context.list_lock); + spin_unlock_bh(&mm->context.list_lock); return (unsigned long *) page_to_phys(page); } -void crst_table_free(struct mm_struct *mm, unsigned long *table) +static void __crst_table_free(struct mm_struct *mm, unsigned long *table) { unsigned long *shadow = get_shadow_table(table); - struct page *page = virt_to_page(table); - spin_lock(&mm->context.list_lock); - list_del(&page->lru); - spin_unlock(&mm->context.list_lock); if (shadow) free_pages((unsigned long) shadow, ALLOC_ORDER); free_pages((unsigned long) table, ALLOC_ORDER); } +void crst_table_free(struct mm_struct *mm, unsigned long *table) +{ + struct page *page = virt_to_page(table); + + spin_lock_bh(&mm->context.list_lock); + list_del(&page->lru); + spin_unlock_bh(&mm->context.list_lock); + __crst_table_free(mm, table); +} + +void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table) +{ + struct rcu_table_freelist *batch; + struct page *page = virt_to_page(table); + + spin_lock_bh(&mm->context.list_lock); + list_del(&page->lru); + spin_unlock_bh(&mm->context.list_lock); + if (atomic_read(&mm->mm_users) < 2 && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { + __crst_table_free(mm, table); + return; + } + batch = rcu_table_freelist_get(mm); + if (!batch) { + smp_call_function(smp_sync, NULL, 1); + __crst_table_free(mm, table); + return; + } + batch->table[--batch->crst_index] = table; + if (batch->pgt_index >= batch->crst_index) + rcu_table_freelist_finish(); +} + #ifdef CONFIG_64BIT int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) { @@ -108,7 +200,7 @@ repeat: table = crst_table_alloc(mm, mm->context.noexec); if (!table) return -ENOMEM; - spin_lock(&mm->page_table_lock); + spin_lock_bh(&mm->page_table_lock); if (mm->context.asce_limit < limit) { pgd = (unsigned long *) mm->pgd; if (mm->context.asce_limit <= (1UL << 31)) { @@ -130,7 +222,7 @@ repeat: mm->task_size = mm->context.asce_limit; table = NULL; } - spin_unlock(&mm->page_table_lock); + spin_unlock_bh(&mm->page_table_lock); if (table) crst_table_free(mm, table); if (mm->context.asce_limit < limit) @@ -182,7 +274,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) unsigned long bits; bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; - spin_lock(&mm->context.list_lock); + spin_lock_bh(&mm->context.list_lock); page = NULL; if (!list_empty(&mm->context.pgtable_list)) { page = list_first_entry(&mm->context.pgtable_list, @@ -191,7 +283,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) page = NULL; } if (!page) { - spin_unlock(&mm->context.list_lock); + spin_unlock_bh(&mm->context.list_lock); page = alloc_page(GFP_KERNEL|__GFP_REPEAT); if (!page) return NULL; @@ -202,7 +294,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) clear_table_pgstes(table); else clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); - spin_lock(&mm->context.list_lock); + spin_lock_bh(&mm->context.list_lock); list_add(&page->lru, &mm->context.pgtable_list); } table = (unsigned long *) page_to_phys(page); @@ -213,10 +305,25 @@ unsigned long *page_table_alloc(struct mm_struct *mm) page->flags |= bits; if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) list_move_tail(&page->lru, &mm->context.pgtable_list); - spin_unlock(&mm->context.list_lock); + spin_unlock_bh(&mm->context.list_lock); return table; } +static void __page_table_free(struct mm_struct *mm, unsigned long *table) +{ + struct page *page; + unsigned long bits; + + bits = ((unsigned long) table) & 15; + table = (unsigned long *)(((unsigned long) table) ^ bits); + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + page->flags ^= bits; + if (!(page->flags & FRAG_MASK)) { + pgtable_page_dtor(page); + __free_page(page); + } +} + void page_table_free(struct mm_struct *mm, unsigned long *table) { struct page *page; @@ -225,7 +332,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - spin_lock(&mm->context.list_lock); + spin_lock_bh(&mm->context.list_lock); page->flags ^= bits; if (page->flags & FRAG_MASK) { /* Page now has some free pgtable fragments. */ @@ -234,18 +341,48 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) } else /* All fragments of the 4K page have been freed. */ list_del(&page->lru); - spin_unlock(&mm->context.list_lock); + spin_unlock_bh(&mm->context.list_lock); if (page) { pgtable_page_dtor(page); __free_page(page); } } +void page_table_free_rcu(struct mm_struct *mm, unsigned long *table) +{ + struct rcu_table_freelist *batch; + struct page *page; + unsigned long bits; + + if (atomic_read(&mm->mm_users) < 2 && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { + page_table_free(mm, table); + return; + } + batch = rcu_table_freelist_get(mm); + if (!batch) { + smp_call_function(smp_sync, NULL, 1); + page_table_free(mm, table); + return; + } + bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; + bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + spin_lock_bh(&mm->context.list_lock); + /* Delayed freeing with rcu prevents reuse of pgtable fragments */ + list_del_init(&page->lru); + spin_unlock_bh(&mm->context.list_lock); + table = (unsigned long *)(((unsigned long) table) | bits); + batch->table[batch->pgt_index++] = table; + if (batch->pgt_index >= batch->crst_index) + rcu_table_freelist_finish(); +} + void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) { struct page *page; - spin_lock(&mm->context.list_lock); + spin_lock_bh(&mm->context.list_lock); /* Free shadow region and segment tables. */ list_for_each_entry(page, &mm->context.crst_list, lru) if (page->index) { @@ -255,7 +392,7 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) /* "Free" second halves of page tables. */ list_for_each_entry(page, &mm->context.pgtable_list, lru) page->flags &= ~SECOND_HALVES; - spin_unlock(&mm->context.list_lock); + spin_unlock_bh(&mm->context.list_lock); mm->context.noexec = 0; update_mm(mm, tsk); } @@ -312,6 +449,8 @@ int s390_enable_sie(void) tsk->mm = tsk->active_mm = mm; preempt_disable(); update_mm(mm, tsk); + atomic_inc(&mm->context.attach_count); + atomic_dec(&old_mm->context.attach_count); cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); preempt_enable(); task_unlock(tsk); |