diff options
Diffstat (limited to 'arch/s390')
77 files changed, 2134 insertions, 457 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index ed5cb5af528..a9fbd43395f 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -91,6 +91,7 @@ config S390 select HAVE_ARCH_MUTEX_CPU_RELAX select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 select HAVE_RCU_TABLE_FREE if SMP + select ARCH_SAVE_PAGE_KEYS if HIBERNATION select ARCH_INLINE_SPIN_TRYLOCK select ARCH_INLINE_SPIN_TRYLOCK_BH select ARCH_INLINE_SPIN_LOCK @@ -568,6 +569,16 @@ config KEXEC current kernel, and to start another kernel. It is like a reboot but is independent of hardware/microcode support. +config CRASH_DUMP + bool "kernel crash dumps" + depends on 64BIT + help + Generate crash dump after being started by kexec. + Crash dump kernels are loaded in the main kernel with kexec-tools + into a specially reserved region and then later executed after + a crash by kdump/kexec. + For more details see Documentation/kdump/kdump.txt + config ZFCPDUMP def_bool n prompt "zfcpdump support" diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 028f23ea81d..465eca756fe 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -61,7 +61,7 @@ static unsigned long free_mem_end_ptr; extern _sclp_print_early(const char *); -int puts(const char *s) +static int puts(const char *s) { _sclp_print_early(s); return 0; diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 29c82c640a8..6cf8e26b313 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -68,7 +68,7 @@ CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 6023c6dc1fb..74c8f5e76ce 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -562,10 +562,9 @@ static int dbfs_d204_create(void **data, void **data_free_ptr, size_t *size) void *base; buf_size = PAGE_SIZE * (diag204_buf_pages + 1) + sizeof(d204->hdr); - base = vmalloc(buf_size); + base = vzalloc(buf_size); if (!base) return -ENOMEM; - memset(base, 0, buf_size); d204 = page_align_ptr(base + sizeof(d204->hdr)) - sizeof(d204->hdr); rc = diag204_do_store(d204->buf, diag204_buf_pages); if (rc) { diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 6fe874fc5f8..481f4f76f66 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -108,9 +108,7 @@ static struct inode *hypfs_make_inode(struct super_block *sb, int mode) ret->i_gid = hypfs_info->gid; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; if (mode & S_IFDIR) - ret->i_nlink = 2; - else - ret->i_nlink = 1; + set_nlink(ret, 2); } return ret; } @@ -361,7 +359,7 @@ static struct dentry *hypfs_create_file(struct super_block *sb, } else if (mode & S_IFDIR) { inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; - parent->d_inode->i_nlink++; + inc_nlink(parent->d_inode); } else BUG(); inode->i_private = data; diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index 623f2fb7177..9381c92cc77 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -11,6 +11,7 @@ #include <linux/device.h> #include <linux/mod_devicetable.h> #include <asm/fcx.h> +#include <asm/irq.h> /* structs from asm/cio.h */ struct irb; @@ -127,6 +128,7 @@ enum uc_todo { * @restore: callback for restoring after hibernation * @uc_handler: callback for unit check handler * @driver: embedded device driver structure + * @int_class: interruption class to use for accounting interrupts */ struct ccw_driver { struct ccw_device_id *ids; @@ -144,6 +146,7 @@ struct ccw_driver { int (*restore)(struct ccw_device *); enum uc_todo (*uc_handler) (struct ccw_device *, struct irb *); struct device_driver driver; + enum interruption_class int_class; }; extern struct ccw_device *get_ccwdev_by_busid(struct ccw_driver *cdrv, diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index da359ca6fe5..2e49748b27d 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -12,6 +12,7 @@ #define PSW32_MASK_IO 0x02000000UL #define PSW32_MASK_EXT 0x01000000UL #define PSW32_MASK_KEY 0x00F00000UL +#define PSW32_MASK_BASE 0x00080000UL /* Always one */ #define PSW32_MASK_MCHECK 0x00040000UL #define PSW32_MASK_WAIT 0x00020000UL #define PSW32_MASK_PSTATE 0x00010000UL @@ -19,21 +20,19 @@ #define PSW32_MASK_CC 0x00003000UL #define PSW32_MASK_PM 0x00000f00UL -#define PSW32_ADDR_AMODE31 0x80000000UL +#define PSW32_MASK_USER 0x00003F00UL + +#define PSW32_ADDR_AMODE 0x80000000UL #define PSW32_ADDR_INSN 0x7FFFFFFFUL -#define PSW32_BASE_BITS 0x00080000UL +#define PSW32_DEFAULT_KEY (((u32) PAGE_DEFAULT_ACC) << 20) #define PSW32_ASC_PRIMARY 0x00000000UL #define PSW32_ASC_ACCREG 0x00004000UL #define PSW32_ASC_SECONDARY 0x00008000UL #define PSW32_ASC_HOME 0x0000C000UL -#define PSW32_MASK_MERGE(CURRENT,NEW) \ - (((CURRENT) & ~(PSW32_MASK_CC|PSW32_MASK_PM)) | \ - ((NEW) & (PSW32_MASK_CC|PSW32_MASK_PM))) - -extern long psw32_user_bits; +extern u32 psw32_user_bits; #define COMPAT_USER_HZ 100 #define COMPAT_UTS_MACHINE "s390\0\0\0\0" @@ -131,7 +130,8 @@ struct compat_statfs { compat_fsid_t f_fsid; s32 f_namelen; s32 f_frsize; - s32 f_spare[6]; + s32 f_flags; + s32 f_spare[5]; }; #define COMPAT_RLIM_OLD_INFINITY 0x7fffffff diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 64b61bf72e9..547f1a6a35d 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -188,7 +188,8 @@ extern char elf_platform[]; #define SET_PERSONALITY(ex) \ do { \ if (personality(current->personality) != PER_LINUX32) \ - set_personality(PER_LINUX); \ + set_personality(PER_LINUX | \ + (current->personality & ~PER_MASK)); \ if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \ set_thread_flag(TIF_31BIT); \ else \ diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index 97cc4403fab..6940abfbe1d 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -168,5 +168,6 @@ enum diag308_rc { extern int diag308(unsigned long subcode, void *addr); extern void diag308_reset(void); +extern void store_status(void); #endif /* _ASM_S390_IPL_H */ diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index ba7b01c726a..ba6d85f88d5 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -8,7 +8,8 @@ enum interruption_class { EXTERNAL_INTERRUPT, IO_INTERRUPT, EXTINT_CLK, - EXTINT_IPI, + EXTINT_EXC, + EXTINT_EMS, EXTINT_TMR, EXTINT_TLA, EXTINT_PFL, @@ -17,8 +18,8 @@ enum interruption_class { EXTINT_SCP, EXTINT_IUC, EXTINT_CPM, + IOINT_CIO, IOINT_QAI, - IOINT_QDI, IOINT_DAS, IOINT_C15, IOINT_C70, @@ -28,6 +29,7 @@ enum interruption_class { IOINT_CLW, IOINT_CTC, IOINT_APB, + IOINT_CSC, NMI_NMI, NR_IRQS, }; diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index bb729b84a21..cf4e47b0948 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -30,9 +30,15 @@ /* Not more than 2GB */ #define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31) +/* Maximum address we can use for the crash control pages */ +#define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL) + /* Allocate one page for the pdp and the second for the code */ #define KEXEC_CONTROL_PAGE_SIZE 4096 +/* Alignment of crashkernel memory */ +#define KEXEC_CRASH_MEM_ALIGN HPAGE_SIZE + /* The native architecture */ #define KEXEC_ARCH KEXEC_ARCH_S390 diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 00ff00dfb24..24e18473d92 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -119,6 +119,7 @@ struct kvm_vcpu_stat { u32 instruction_lctlg; u32 exit_program_interruption; u32 exit_instr_and_program; + u32 deliver_external_call; u32 deliver_emergency_signal; u32 deliver_service_signal; u32 deliver_virtio_interrupt; @@ -138,11 +139,13 @@ struct kvm_vcpu_stat { u32 instruction_stfl; u32 instruction_tprot; u32 instruction_sigp_sense; + u32 instruction_sigp_external_call; u32 instruction_sigp_emergency; u32 instruction_sigp_stop; u32 instruction_sigp_arch; u32 instruction_sigp_prefix; u32 instruction_sigp_restart; + u32 diagnose_10; u32 diagnose_44; }; @@ -174,6 +177,10 @@ struct kvm_s390_prefix_info { __u32 address; }; +struct kvm_s390_extcall_info { + __u16 code; +}; + struct kvm_s390_emerg_info { __u16 code; }; @@ -186,6 +193,7 @@ struct kvm_s390_interrupt_info { struct kvm_s390_ext_info ext; struct kvm_s390_pgm_info pgm; struct kvm_s390_emerg_info emerg; + struct kvm_s390_extcall_info extcall; struct kvm_s390_prefix_info prefix; }; }; diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index e85c911aabf..9e13c7d56cc 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -151,10 +151,8 @@ struct _lowcore { */ __u32 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e04 */ - - /* 64 bit save area */ - __u64 save_area_64; /* 0x0e08 */ - __u8 pad_0x0e10[0x0f00-0x0e10]; /* 0x0e10 */ + __u32 vmcore_info; /* 0x0e08 */ + __u8 pad_0x0e0c[0x0f00-0x0e0c]; /* 0x0e0c */ /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ @@ -290,9 +288,7 @@ struct _lowcore { */ __u64 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e08 */ - - /* 64 bit save area */ - __u64 save_area_64; /* 0x0e0c */ + __u64 vmcore_info; /* 0x0e0c */ __u8 pad_0x0e14[0x0f00-0x0e14]; /* 0x0e14 */ /* Extended facility list */ diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index accb372ddc7..f7ec548c2b9 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -177,6 +177,7 @@ static inline int page_test_and_clear_young(unsigned long pfn) struct page; void arch_free_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order); +void arch_set_page_states(int make_stable); static inline int devmem_is_allowed(unsigned long pfn) { diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 519eb5f187e..34ede0ea85a 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -658,12 +658,14 @@ static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste) * struct gmap_struct - guest address space * @mm: pointer to the parent mm_struct * @table: pointer to the page directory + * @asce: address space control element for gmap page table * @crst_list: list of all crst tables used in the guest address space */ struct gmap { struct list_head list; struct mm_struct *mm; unsigned long *table; + unsigned long asce; struct list_head crst_list; }; @@ -694,7 +696,9 @@ void gmap_disable(struct gmap *gmap); int gmap_map_segment(struct gmap *gmap, unsigned long from, unsigned long to, unsigned long length); int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); +unsigned long __gmap_fault(unsigned long address, struct gmap *); unsigned long gmap_fault(unsigned long address, struct gmap *); +void gmap_discard(unsigned long from, unsigned long to, struct gmap *); /* * Certain architectures need to do special things when PTEs diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index a4b6229e5d4..5f33d37d032 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -33,6 +33,8 @@ static inline void get_cpu_id(struct cpuid *ptr) extern void s390_adjust_jiffies(void); extern int get_cpu_capability(unsigned int *); +extern const struct seq_operations cpuinfo_op; +extern int sysctl_ieee_emulation_warnings; /* * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit. @@ -118,17 +120,17 @@ struct stack_frame { /* * Do necessary setup to start up a new thread. */ -#define start_thread(regs, new_psw, new_stackp) do { \ - regs->psw.mask = psw_user_bits; \ - regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ - regs->gprs[15] = new_stackp; \ +#define start_thread(regs, new_psw, new_stackp) do { \ + regs->psw.mask = psw_user_bits | PSW_MASK_EA | PSW_MASK_BA; \ + regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ + regs->gprs[15] = new_stackp; \ } while (0) -#define start_thread31(regs, new_psw, new_stackp) do { \ - regs->psw.mask = psw_user32_bits; \ - regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ - regs->gprs[15] = new_stackp; \ - crst_table_downgrade(current->mm, 1UL << 31); \ +#define start_thread31(regs, new_psw, new_stackp) do { \ + regs->psw.mask = psw_user_bits | PSW_MASK_BA; \ + regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ + regs->gprs[15] = new_stackp; \ + crst_table_downgrade(current->mm, 1UL << 31); \ } while (0) /* Forward declaration, a strange C thing */ @@ -187,7 +189,6 @@ static inline void __load_psw(psw_t psw) * Set PSW mask to specified value, while leaving the * PSW addr pointing to the next instruction. */ - static inline void __load_psw_mask (unsigned long mask) { unsigned long addr; @@ -212,26 +213,37 @@ static inline void __load_psw_mask (unsigned long mask) : "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc"); #endif /* __s390x__ */ } - + /* - * Function to stop a processor until an interruption occurred + * Rewind PSW instruction address by specified number of bytes. */ -static inline void enabled_wait(void) +static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc) { - __load_psw_mask(PSW_BASE_BITS | PSW_MASK_IO | PSW_MASK_EXT | - PSW_MASK_MCHECK | PSW_MASK_WAIT | PSW_DEFAULT_KEY); -} +#ifndef __s390x__ + if (psw.addr & PSW_ADDR_AMODE) + /* 31 bit mode */ + return (psw.addr - ilc) | PSW_ADDR_AMODE; + /* 24 bit mode */ + return (psw.addr - ilc) & ((1UL << 24) - 1); +#else + unsigned long mask; + mask = (psw.mask & PSW_MASK_EA) ? -1UL : + (psw.mask & PSW_MASK_BA) ? (1UL << 31) - 1 : + (1UL << 24) - 1; + return (psw.addr - ilc) & mask; +#endif +} + /* * Function to drop a processor into disabled wait state */ - static inline void ATTRIB_NORET disabled_wait(unsigned long code) { unsigned long ctl_buf; psw_t dw_psw; - dw_psw.mask = PSW_BASE_BITS | PSW_MASK_WAIT; + dw_psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA; dw_psw.addr = code; /* * Store status and then load disabled wait psw, diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 62fd80c9e98..a65846340d5 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -230,17 +230,21 @@ typedef struct #define PSW_MASK_IO 0x02000000UL #define PSW_MASK_EXT 0x01000000UL #define PSW_MASK_KEY 0x00F00000UL +#define PSW_MASK_BASE 0x00080000UL /* always one */ #define PSW_MASK_MCHECK 0x00040000UL #define PSW_MASK_WAIT 0x00020000UL #define PSW_MASK_PSTATE 0x00010000UL #define PSW_MASK_ASC 0x0000C000UL #define PSW_MASK_CC 0x00003000UL #define PSW_MASK_PM 0x00000F00UL +#define PSW_MASK_EA 0x00000000UL +#define PSW_MASK_BA 0x00000000UL + +#define PSW_MASK_USER 0x00003F00UL #define PSW_ADDR_AMODE 0x80000000UL #define PSW_ADDR_INSN 0x7FFFFFFFUL -#define PSW_BASE_BITS 0x00080000UL #define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 20) #define PSW_ASC_PRIMARY 0x00000000UL @@ -254,6 +258,7 @@ typedef struct #define PSW_MASK_DAT 0x0400000000000000UL #define PSW_MASK_IO 0x0200000000000000UL #define PSW_MASK_EXT 0x0100000000000000UL +#define PSW_MASK_BASE 0x0000000000000000UL #define PSW_MASK_KEY 0x00F0000000000000UL #define PSW_MASK_MCHECK 0x0004000000000000UL #define PSW_MASK_WAIT 0x0002000000000000UL @@ -261,12 +266,14 @@ typedef struct #define PSW_MASK_ASC 0x0000C00000000000UL #define PSW_MASK_CC 0x0000300000000000UL #define PSW_MASK_PM 0x00000F0000000000UL +#define PSW_MASK_EA 0x0000000100000000UL +#define PSW_MASK_BA 0x0000000080000000UL + +#define PSW_MASK_USER 0x00003F0180000000UL #define PSW_ADDR_AMODE 0x0000000000000000UL #define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL -#define PSW_BASE_BITS 0x0000000180000000UL -#define PSW_BASE32_BITS 0x0000000080000000UL #define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 52) #define PSW_ASC_PRIMARY 0x0000000000000000UL @@ -279,18 +286,7 @@ typedef struct #ifdef __KERNEL__ extern long psw_kernel_bits; extern long psw_user_bits; -#ifdef CONFIG_64BIT -extern long psw_user32_bits; #endif -#endif - -/* This macro merges a NEW PSW mask specified by the user into - the currently active PSW mask CURRENT, modifying only those - bits in CURRENT that the user may be allowed to change: this - is the condition code and the program mask bits. */ -#define PSW_MASK_MERGE(CURRENT,NEW) \ - (((CURRENT) & ~(PSW_MASK_CC|PSW_MASK_PM)) | \ - ((NEW) & (PSW_MASK_CC|PSW_MASK_PM))) /* * The s390_regs structure is used to define the elf_gregset_t. @@ -328,8 +324,7 @@ struct pt_regs psw_t psw; unsigned long gprs[NUM_GPRS]; unsigned long orig_gpr2; - unsigned short ilc; - unsigned short svcnr; + unsigned int svc_code; }; /* @@ -487,6 +482,8 @@ typedef struct #define PTRACE_POKETEXT_AREA 0x5004 #define PTRACE_POKEDATA_AREA 0x5005 #define PTRACE_GET_LAST_BREAK 0x5006 +#define PTRACE_PEEK_SYSTEM_CALL 0x5007 +#define PTRACE_POKE_SYSTEM_CALL 0x5008 /* * PT_PROT definition is loosely based on hppa bsd definition in diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 15c97625df8..e63d13dd3bf 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -46,6 +46,8 @@ struct qdesfmt0 { u32 : 16; } __attribute__ ((packed)); +#define QDR_AC_MULTI_BUFFER_ENABLE 0x01 + /** * struct qdr - queue description record (QDR) * @qfmt: queue format @@ -123,6 +125,40 @@ struct slibe { }; /** + * struct qaob - queue asynchronous operation block + * @res0: reserved parameters + * @res1: reserved parameter + * @res2: reserved parameter + * @res3: reserved parameter + * @aorc: asynchronous operation return code + * @flags: internal flags + * @cbtbs: control block type + * @sb_count: number of storage blocks + * @sba: storage block element addresses + * @dcount: size of storage block elements + * @user0: user defineable value + * @res4: reserved paramater + * @user1: user defineable value + * @user2: user defineable value + */ +struct qaob { + u64 res0[6]; + u8 res1; + u8 res2; + u8 res3; + u8 aorc; + u8 flags; + u16 cbtbs; + u8 sb_count; + u64 sba[QDIO_MAX_ELEMENTS_PER_BUFFER]; + u16 dcount[QDIO_MAX_ELEMENTS_PER_BUFFER]; + u64 user0; + u64 res4[2]; + u64 user1; + u64 user2; +} __attribute__ ((packed, aligned(256))); + +/** * struct slib - storage list information block (SLIB) * @nsliba: next SLIB address (if any) * @sla: SL address @@ -222,9 +258,46 @@ struct slsb { u8 val[QDIO_MAX_BUFFERS_PER_Q]; } __attribute__ ((packed, aligned(256))); +#define CHSC_AC2_MULTI_BUFFER_AVAILABLE 0x0080 +#define CHSC_AC2_MULTI_BUFFER_ENABLED 0x0040 #define CHSC_AC2_DATA_DIV_AVAILABLE 0x0010 #define CHSC_AC2_DATA_DIV_ENABLED 0x0002 +/** + * struct qdio_outbuf_state - SBAL related asynchronous operation information + * (for communication with upper layer programs) + * (only required for use with completion queues) + * @flags: flags indicating state of buffer + * @aob: pointer to QAOB used for the particular SBAL + * @user: pointer to upper layer program's state information related to SBAL + * (stored in user1 data of QAOB) + */ +struct qdio_outbuf_state { + u8 flags; + struct qaob *aob; + void *user; +}; + +#define QDIO_OUTBUF_STATE_FLAG_NONE 0x00 +#define QDIO_OUTBUF_STATE_FLAG_PENDING 0x01 + +#define CHSC_AC1_INITIATE_INPUTQ 0x80 + + +/* qdio adapter-characteristics-1 flag */ +#define AC1_SIGA_INPUT_NEEDED 0x40 /* process input queues */ +#define AC1_SIGA_OUTPUT_NEEDED 0x20 /* process output queues */ +#define AC1_SIGA_SYNC_NEEDED 0x10 /* ask hypervisor to sync */ +#define AC1_AUTOMATIC_SYNC_ON_THININT 0x08 /* set by hypervisor */ +#define AC1_AUTOMATIC_SYNC_ON_OUT_PCI 0x04 /* set by hypervisor */ +#define AC1_SC_QEBSM_AVAILABLE 0x02 /* available for subchannel */ +#define AC1_SC_QEBSM_ENABLED 0x01 /* enabled for subchannel */ + +#define CHSC_AC2_DATA_DIV_AVAILABLE 0x0010 +#define CHSC_AC2_DATA_DIV_ENABLED 0x0002 + +#define CHSC_AC3_FORMAT2_CQ_AVAILABLE 0x8000 + struct qdio_ssqd_desc { u8 flags; u8:8; @@ -243,8 +316,7 @@ struct qdio_ssqd_desc { u64 sch_token; u8 mro; u8 mri; - u8:8; - u8 sbalic; + u16 qdioac3; u16:16; u8:8; u8 mmwc; @@ -280,13 +352,16 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, * @no_output_qs: number of output queues * @input_handler: handler to be called for input queues * @output_handler: handler to be called for output queues + * @queue_start_poll: polling handlers (one per input queue or NULL) * @int_parm: interruption parameter * @input_sbal_addr_array: address of no_input_qs * 128 pointers * @output_sbal_addr_array: address of no_output_qs * 128 pointers + * @output_sbal_state_array: no_output_qs * 128 state info (for CQ or NULL) */ struct qdio_initialize { struct ccw_device *cdev; unsigned char q_format; + unsigned char qdr_ac; unsigned char adapter_name[8]; unsigned int qib_param_field_format; unsigned char *qib_param_field; @@ -297,11 +372,12 @@ struct qdio_initialize { unsigned int no_output_qs; qdio_handler_t *input_handler; qdio_handler_t *output_handler; - void (*queue_start_poll) (struct ccw_device *, int, unsigned long); + void (**queue_start_poll) (struct ccw_device *, int, unsigned long); int scan_threshold; unsigned long int_parm; void **input_sbal_addr_array; void **output_sbal_addr_array; + struct qdio_outbuf_state *output_sbal_state_array; }; #define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */ @@ -316,6 +392,7 @@ struct qdio_initialize { extern int qdio_allocate(struct qdio_initialize *); extern int qdio_establish(struct qdio_initialize *); extern int qdio_activate(struct ccw_device *); +extern void qdio_release_aob(struct qaob *); extern int do_QDIO(struct ccw_device *, unsigned int, int, unsigned int, unsigned int); extern int qdio_start_irq(struct ccw_device *, int); diff --git a/arch/s390/include/asm/reset.h b/arch/s390/include/asm/reset.h index f584f4a5258..3d6ad4ad2a3 100644 --- a/arch/s390/include/asm/reset.h +++ b/arch/s390/include/asm/reset.h @@ -17,5 +17,5 @@ struct reset_call { extern void register_reset_call(struct reset_call *reset); extern void unregister_reset_call(struct reset_call *reset); -extern void s390_reset_system(void); +extern void s390_reset_system(void (*func)(void *), void *data); #endif /* _ASM_S390_RESET_H */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index d5e2ef10537..5a099714df0 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -26,15 +26,21 @@ #define IPL_DEVICE (*(unsigned long *) (0x10404)) #define INITRD_START (*(unsigned long *) (0x1040C)) #define INITRD_SIZE (*(unsigned long *) (0x10414)) +#define OLDMEM_BASE (*(unsigned long *) (0x1041C)) +#define OLDMEM_SIZE (*(unsigned long *) (0x10424)) #else /* __s390x__ */ #define IPL_DEVICE (*(unsigned long *) (0x10400)) #define INITRD_START (*(unsigned long *) (0x10408)) #define INITRD_SIZE (*(unsigned long *) (0x10410)) +#define OLDMEM_BASE (*(unsigned long *) (0x10418)) +#define OLDMEM_SIZE (*(unsigned long *) (0x10420)) #endif /* __s390x__ */ #define COMMAND_LINE ((char *) (0x10480)) #define CHUNK_READ_WRITE 0 #define CHUNK_READ_ONLY 1 +#define CHUNK_OLDMEM 4 +#define CHUNK_CRASHK 5 struct mem_chunk { unsigned long addr; @@ -48,6 +54,8 @@ extern int memory_end_set; extern unsigned long memory_end; void detect_memory_layout(struct mem_chunk chunk[]); +void create_mem_hole(struct mem_chunk memory_chunk[], unsigned long addr, + unsigned long size, int type); #define PRIMARY_SPACE_MODE 0 #define ACCESS_REGISTER_MODE 1 @@ -106,6 +114,7 @@ extern unsigned int user_mode; #endif /* __s390x__ */ #define ZFCPDUMP_HSA_SIZE (32UL<<20) +#define ZFCPDUMP_HSA_SIZE_MAX (64UL<<20) /* * Console mode. Override with conmode= @@ -134,10 +143,14 @@ extern char kernel_nss_name[]; #define IPL_DEVICE 0x10404 #define INITRD_START 0x1040C #define INITRD_SIZE 0x10414 +#define OLDMEM_BASE 0x1041C +#define OLDMEM_SIZE 0x10424 #else /* __s390x__ */ #define IPL_DEVICE 0x10400 #define INITRD_START 0x10408 #define INITRD_SIZE 0x10410 +#define OLDMEM_BASE 0x10418 +#define OLDMEM_SIZE 0x10420 #endif /* __s390x__ */ #define COMMAND_LINE 0x10480 diff --git a/arch/s390/include/asm/sfp-util.h b/arch/s390/include/asm/sfp-util.h index 0addc6466d9..ca3f8814e36 100644 --- a/arch/s390/include/asm/sfp-util.h +++ b/arch/s390/include/asm/sfp-util.h @@ -72,6 +72,6 @@ extern unsigned long __udiv_qrnnd (unsigned int *, unsigned int, #define UDIV_NEEDS_NORMALIZATION 0 -#define abort() return 0 +#define abort() BUG() #define __BYTE_ORDER __BIG_ENDIAN diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 045e009fc16..ab47a69fdf0 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -33,6 +33,7 @@ extern struct save_area *zfcpdump_save_areas[NR_CPUS + 1]; extern void smp_switch_to_ipl_cpu(void (*func)(void *), void *); extern void smp_switch_to_cpu(void (*)(void *), void *, unsigned long sp, int from, int to); +extern void smp_restart_with_online_cpu(void); extern void smp_restart_cpu(void); /* @@ -64,6 +65,10 @@ static inline void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) func(data); } +static inline void smp_restart_with_online_cpu(void) +{ +} + #define smp_vcpu_scheduled (1) #endif /* CONFIG_SMP */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 56612fc8186..fd94dfec8d0 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -13,6 +13,8 @@ #include <linux/smp.h> +extern int spin_retry; + static inline int _raw_compare_and_swap(volatile unsigned int *lock, unsigned int old, unsigned int new) diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 5c0246b955d..b239ff53b18 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -13,6 +13,7 @@ #define _ASM_SYSCALL_H 1 #include <linux/sched.h> +#include <linux/err.h> #include <asm/ptrace.h> /* @@ -25,7 +26,8 @@ extern const unsigned int sys_call_table[]; static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { - return regs->svcnr ? regs->svcnr : -1; + return test_tsk_thread_flag(task, TIF_SYSCALL) ? + (regs->svc_code & 0xffff) : -1; } static inline void syscall_rollback(struct task_struct *task, @@ -37,7 +39,7 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - return (regs->gprs[2] >= -4096UL) ? -regs->gprs[2] : 0; + return IS_ERR_VALUE(regs->gprs[2]) ? regs->gprs[2] : 0; } static inline long syscall_get_return_value(struct task_struct *task, diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index 6582f69f238..ef573c1d71a 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -20,6 +20,8 @@ struct task_struct; +extern int sysctl_userprocess_debug; + extern struct task_struct *__switch_to(void *, void *); extern void update_per_regs(struct task_struct *task); @@ -114,6 +116,8 @@ extern void pfault_fini(void); extern void cmma_init(void); extern int memcpy_real(void *, void *, size_t); extern void copy_to_absolute_zero(void *dest, void *src, size_t count); +extern int copy_to_user_real(void __user *dest, void *src, size_t count); +extern int copy_from_user_real(void *dest, void __user *src, size_t count); #define finish_arch_switch(prev) do { \ set_fs(current->thread.mm_segment); \ @@ -210,8 +214,10 @@ __set_psw_mask(unsigned long mask) __load_psw_mask(mask | (arch_local_save_flags() & ~(-1UL >> 8))); } -#define local_mcck_enable() __set_psw_mask(psw_kernel_bits) -#define local_mcck_disable() __set_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK) +#define local_mcck_enable() \ + __set_psw_mask(psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK) +#define local_mcck_disable() \ + __set_psw_mask(psw_kernel_bits | PSW_MASK_DAT) #ifdef CONFIG_SMP diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 1a5dbb6f149..a23183423b1 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -48,6 +48,7 @@ struct thread_info { unsigned int cpu; /* current CPU */ int preempt_count; /* 0 => preemptable, <0 => BUG */ struct restart_block restart_block; + unsigned int system_call; __u64 user_timer; __u64 system_timer; unsigned long last_break; /* last breaking-event-address. */ @@ -84,10 +85,10 @@ static inline struct thread_info *current_thread_info(void) /* * thread information flags bit numbers */ +#define TIF_SYSCALL 0 /* inside a system call */ #define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ -#define TIF_RESTART_SVC 4 /* restart svc with new svc number */ #define TIF_PER_TRAP 6 /* deliver sigtrap on return to user */ #define TIF_MCCK_PENDING 7 /* machine check handling is pending */ #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ @@ -103,11 +104,11 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SINGLE_STEP 20 /* This task is single stepped */ #define TIF_FREEZE 21 /* thread is freezing for suspend */ +#define _TIF_SYSCALL (1<<TIF_SYSCALL) #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) #define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) -#define _TIF_RESTART_SVC (1<<TIF_RESTART_SVC) #define _TIF_PER_TRAP (1<<TIF_PER_TRAP) #define _TIF_MCCK_PENDING (1<<TIF_MCCK_PENDING) #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) @@ -117,7 +118,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SIE (1<<TIF_SIE) #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) #define _TIF_31BIT (1<<TIF_31BIT) -#define _TIF_SINGLE_STEP (1<<TIF_FREEZE) +#define _TIF_SINGLE_STEP (1<<TIF_SINGLE_STEP) #define _TIF_FREEZE (1<<TIF_FREEZE) #ifdef CONFIG_64BIT diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 88829a40af6..d610bef9c5e 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -86,6 +86,17 @@ static inline void get_clock_ext(char *clk) asm volatile("stcke %0" : "=Q" (*clk) : : "cc"); } +static inline unsigned long long get_clock_fast(void) +{ + unsigned long long clk; + + if (test_facility(25)) + asm volatile(".insn s,0xb27c0000,%0" : "=Q" (clk) : : "cc"); + else + clk = get_clock(); + return clk; +} + static inline unsigned long long get_clock_xt(void) { unsigned char clk[16]; diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 30444538238..1d8648cf2fe 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -59,6 +59,7 @@ static inline void __tlb_flush_full(struct mm_struct *mm) } #else #define __tlb_flush_full(mm) __tlb_flush_local() +#define __tlb_flush_global() __tlb_flush_local() #endif /* diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index df3732249ba..dd4f0764091 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o) obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o # Kexec part S390_KEXEC_OBJS := machine_kexec.o crash.o diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 532fd432215..751318765e2 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -10,6 +10,7 @@ #include <linux/sched.h> #include <asm/vdso.h> #include <asm/sigp.h> +#include <asm/pgtable.h> /* * Make sure that the compiler is new enough. We want a compiler that @@ -44,8 +45,7 @@ int main(void) DEFINE(__PT_PSW, offsetof(struct pt_regs, psw)); DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs)); DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2)); - DEFINE(__PT_ILC, offsetof(struct pt_regs, ilc)); - DEFINE(__PT_SVCNR, offsetof(struct pt_regs, svcnr)); + DEFINE(__PT_SVC_CODE, offsetof(struct pt_regs, svc_code)); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain)); @@ -126,6 +126,7 @@ int main(void) DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack)); DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack)); DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack)); + DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce)); DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock)); DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock)); DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags)); @@ -139,7 +140,6 @@ int main(void) DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area)); DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area)); DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area)); - DEFINE(__LC_SAVE_AREA_64, offsetof(struct _lowcore, save_area_64)); #ifdef CONFIG_32BIT DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr)); #else /* CONFIG_32BIT */ @@ -151,6 +151,7 @@ int main(void) DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data)); DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap)); DEFINE(__LC_CMF_HPP, offsetof(struct _lowcore, cmf_hpp)); + DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce)); #endif /* CONFIG_32BIT */ return 0; } diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index 255435663bf..f8828d38fa6 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -86,6 +86,8 @@ s390_base_pgm_handler_fn: ENTRY(diag308_reset) larl %r4,.Lctlregs # Save control registers stctg %c0,%c15,0(%r4) + larl %r4,.Lfpctl # Floating point control register + stfpc 0(%r4) larl %r4,.Lrestart_psw # Setup restart PSW at absolute 0 lghi %r3,0 lg %r4,0(%r4) # Save PSW @@ -99,6 +101,8 @@ ENTRY(diag308_reset) sam64 # Switch to 64 bit addressing mode larl %r4,.Lctlregs # Restore control registers lctlg %c0,%c15,0(%r4) + larl %r4,.Lfpctl # Restore floating point ctl register + lfpc 0(%r4) br %r14 .align 16 .Lrestart_psw: @@ -110,6 +114,8 @@ ENTRY(diag308_reset) .rept 16 .quad 0 .endr +.Lfpctl: + .long 0 .previous #else /* CONFIG_64BIT */ diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 53acaa86dd9..84a98289844 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -60,12 +60,9 @@ #include "compat_linux.h" -long psw_user32_bits = (PSW_BASE32_BITS | PSW_MASK_DAT | PSW_ASC_HOME | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY); -long psw32_user_bits = (PSW32_BASE_BITS | PSW32_MASK_DAT | PSW32_ASC_HOME | - PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK | - PSW32_MASK_PSTATE); +u32 psw32_user_bits = PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | + PSW32_DEFAULT_KEY | PSW32_MASK_BASE | PSW32_MASK_MCHECK | + PSW32_MASK_PSTATE | PSW32_ASC_HOME; /* For this source file, we want overflow handling. */ @@ -365,12 +362,7 @@ asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, if (set) { if (copy_from_user (&s32, set, sizeof(compat_sigset_t))) return -EFAULT; - switch (_NSIG_WORDS) { - case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32); - case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32); - case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32); - case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); - } + s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); } set_fs (KERNEL_DS); ret = sys_rt_sigprocmask(how, @@ -380,12 +372,8 @@ asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, set_fs (old_fs); if (ret) return ret; if (oset) { - switch (_NSIG_WORDS) { - case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; - case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; - case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; - case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; - } + s32.sig[1] = (s.sig[0] >> 32); + s32.sig[0] = s.sig[0]; if (copy_to_user (oset, &s32, sizeof(compat_sigset_t))) return -EFAULT; } @@ -404,12 +392,8 @@ asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set, ret = sys_rt_sigpending((sigset_t __force __user *) &s, sigsetsize); set_fs (old_fs); if (!ret) { - switch (_NSIG_WORDS) { - case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; - case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; - case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; - case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; - } + s32.sig[1] = (s.sig[0] >> 32); + s32.sig[0] = s.sig[0]; if (copy_to_user (set, &s32, sizeof(compat_sigset_t))) return -EFAULT; } diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index a9a285b8c4a..4f68c81d3ff 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -141,7 +141,8 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) break; case __SI_FAULT >> 16: err |= __get_user(tmp, &from->si_addr); - to->si_addr = (void __user *)(u64) (tmp & PSW32_ADDR_INSN); + to->si_addr = (void __force __user *) + (u64) (tmp & PSW32_ADDR_INSN); break; case __SI_POLL >> 16: err |= __get_user(to->si_band, &from->si_band); @@ -213,16 +214,8 @@ sys32_rt_sigaction(int sig, const struct sigaction32 __user *act, ret = get_user(sa_handler, &act->sa_handler); ret |= __copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t)); - switch (_NSIG_WORDS) { - case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6] - | (((long)set32.sig[7]) << 32); - case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4] - | (((long)set32.sig[5]) << 32); - case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2] - | (((long)set32.sig[3]) << 32); - case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0] - | (((long)set32.sig[1]) << 32); - } + new_ka.sa.sa_mask.sig[0] = + set32.sig[0] | (((long)set32.sig[1]) << 32); ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); if (ret) @@ -233,20 +226,8 @@ sys32_rt_sigaction(int sig, const struct sigaction32 __user *act, ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); if (!ret && oact) { - switch (_NSIG_WORDS) { - case 4: - set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32); - set32.sig[6] = old_ka.sa.sa_mask.sig[3]; - case 3: - set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32); - set32.sig[4] = old_ka.sa.sa_mask.sig[2]; - case 2: - set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32); - set32.sig[2] = old_ka.sa.sa_mask.sig[1]; - case 1: - set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32); - set32.sig[0] = old_ka.sa.sa_mask.sig[0]; - } + set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32); + set32.sig[0] = old_ka.sa.sa_mask.sig[0]; ret = put_user((unsigned long)old_ka.sa.sa_handler, &oact->sa_handler); ret |= __copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t)); @@ -300,9 +281,10 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) _s390_regs_common32 regs32; int err, i; - regs32.psw.mask = PSW32_MASK_MERGE(psw32_user_bits, - (__u32)(regs->psw.mask >> 32)); - regs32.psw.addr = PSW32_ADDR_AMODE31 | (__u32) regs->psw.addr; + regs32.psw.mask = psw32_user_bits | + ((__u32)(regs->psw.mask >> 32) & PSW32_MASK_USER); + regs32.psw.addr = (__u32) regs->psw.addr | + (__u32)(regs->psw.mask & PSW_MASK_BA); for (i = 0; i < NUM_GPRS; i++) regs32.gprs[i] = (__u32) regs->gprs[i]; save_access_regs(current->thread.acrs); @@ -327,8 +309,9 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) err = __copy_from_user(®s32, &sregs->regs, sizeof(regs32)); if (err) return err; - regs->psw.mask = PSW_MASK_MERGE(regs->psw.mask, - (__u64)regs32.psw.mask << 32); + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (__u64)(regs32.psw.mask & PSW32_MASK_USER) << 32 | + (__u64)(regs32.psw.addr & PSW32_ADDR_AMODE); regs->psw.addr = (__u64)(regs32.psw.addr & PSW32_ADDR_INSN); for (i = 0; i < NUM_GPRS; i++) regs->gprs[i] = (__u64) regs32.gprs[i]; @@ -342,7 +325,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) return err; restore_fp_regs(¤t->thread.fp_regs); - regs->svcnr = 0; /* disable syscall checks */ + clear_thread_flag(TIF_SYSCALL); /* No longer in a system call */ return 0; } @@ -496,11 +479,11 @@ static int setup_frame32(int sig, struct k_sigaction *ka, /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64) ka->sa.sa_restorer; + regs->gprs[14] = (__u64) ka->sa.sa_restorer | PSW32_ADDR_AMODE; } else { - regs->gprs[14] = (__u64) frame->retcode; + regs->gprs[14] = (__u64) frame->retcode | PSW32_ADDR_AMODE; if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, - (u16 __user *)(frame->retcode))) + (u16 __force __user *)(frame->retcode))) goto give_sigsegv; } @@ -509,11 +492,12 @@ static int setup_frame32(int sig, struct k_sigaction *ka, goto give_sigsegv; /* Set up registers for signal handler */ - regs->gprs[15] = (__u64) frame; - regs->psw.addr = (__u64) ka->sa.sa_handler; + regs->gprs[15] = (__force __u64) frame; + regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */ + regs->psw.addr = (__force __u64) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); - regs->gprs[3] = (__u64) &frame->sc; + regs->gprs[3] = (__force __u64) &frame->sc; /* We forgot to include these in the sigcontext. To avoid breaking binary compatibility, they are passed as args. */ @@ -521,7 +505,7 @@ static int setup_frame32(int sig, struct k_sigaction *ka, regs->gprs[5] = current->thread.prot_addr; /* Place signal number on stack to allow backtrace from handler. */ - if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) + if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo)) goto give_sigsegv; return 0; @@ -564,20 +548,21 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, } else { regs->gprs[14] = (__u64) frame->retcode; err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, - (u16 __user *)(frame->retcode)); + (u16 __force __user *)(frame->retcode)); } /* Set up backchain. */ - if (__put_user(regs->gprs[15], (unsigned int __user *) frame)) + if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame)) goto give_sigsegv; /* Set up registers for signal handler */ - regs->gprs[15] = (__u64) frame; + regs->gprs[15] = (__force __u64) frame; + regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */ regs->psw.addr = (__u64) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); - regs->gprs[3] = (__u64) &frame->info; - regs->gprs[4] = (__u64) &frame->uc; + regs->gprs[3] = (__force __u64) &frame->info; + regs->gprs[4] = (__force __u64) &frame->uc; return 0; give_sigsegv: diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 7526db6bf50..5006a1d9f5d 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1623,8 +1623,7 @@ ENTRY(sys_syncfs_wrapper) lgfr %r2,%r2 # int jg sys_syncfs - .globl sys_setns_wrapper -sys_setns_wrapper: +ENTRY(sys_setns_wrapper) lgfr %r2,%r2 # int lgfr %r3,%r3 # int jg sys_setns diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c new file mode 100644 index 00000000000..39f8fd4438f --- /dev/null +++ b/arch/s390/kernel/crash_dump.c @@ -0,0 +1,426 @@ +/* + * S390 kdump implementation + * + * Copyright IBM Corp. 2011 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <linux/crash_dump.h> +#include <asm/lowcore.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/crash_dump.h> +#include <linux/bootmem.h> +#include <linux/elf.h> +#include <asm/ipl.h> + +#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) +#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) +#define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y)))) + +/* + * Copy one page from "oldmem" + * + * For the kdump reserved memory this functions performs a swap operation: + * - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE]. + * - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] + */ +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, + size_t csize, unsigned long offset, int userbuf) +{ + unsigned long src; + + if (!csize) + return 0; + + src = (pfn << PAGE_SHIFT) + offset; + if (src < OLDMEM_SIZE) + src += OLDMEM_BASE; + else if (src > OLDMEM_BASE && + src < OLDMEM_BASE + OLDMEM_SIZE) + src -= OLDMEM_BASE; + if (userbuf) + copy_to_user_real((void __force __user *) buf, (void *) src, + csize); + else + memcpy_real(buf, (void *) src, csize); + return csize; +} + +/* + * Copy memory from old kernel + */ +static int copy_from_oldmem(void *dest, void *src, size_t count) +{ + unsigned long copied = 0; + int rc; + + if ((unsigned long) src < OLDMEM_SIZE) { + copied = min(count, OLDMEM_SIZE - (unsigned long) src); + rc = memcpy_real(dest, src + OLDMEM_BASE, copied); + if (rc) + return rc; + } + return memcpy_real(dest + copied, src + copied, count - copied); +} + +/* + * Alloc memory and panic in case of ENOMEM + */ +static void *kzalloc_panic(int len) +{ + void *rc; + + rc = kzalloc(len, GFP_KERNEL); + if (!rc) + panic("s390 kdump kzalloc (%d) failed", len); + return rc; +} + +/* + * Get memory layout and create hole for oldmem + */ +static struct mem_chunk *get_memory_layout(void) +{ + struct mem_chunk *chunk_array; + + chunk_array = kzalloc_panic(MEMORY_CHUNKS * sizeof(struct mem_chunk)); + detect_memory_layout(chunk_array); + create_mem_hole(chunk_array, OLDMEM_BASE, OLDMEM_SIZE, CHUNK_CRASHK); + return chunk_array; +} + +/* + * Initialize ELF note + */ +static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len, + const char *name) +{ + Elf64_Nhdr *note; + u64 len; + + note = (Elf64_Nhdr *)buf; + note->n_namesz = strlen(name) + 1; + note->n_descsz = d_len; + note->n_type = type; + len = sizeof(Elf64_Nhdr); + + memcpy(buf + len, name, note->n_namesz); + len = roundup(len + note->n_namesz, 4); + + memcpy(buf + len, desc, note->n_descsz); + len = roundup(len + note->n_descsz, 4); + + return PTR_ADD(buf, len); +} + +/* + * Initialize prstatus note + */ +static void *nt_prstatus(void *ptr, struct save_area *sa) +{ + struct elf_prstatus nt_prstatus; + static int cpu_nr = 1; + + memset(&nt_prstatus, 0, sizeof(nt_prstatus)); + memcpy(&nt_prstatus.pr_reg.gprs, sa->gp_regs, sizeof(sa->gp_regs)); + memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw)); + memcpy(&nt_prstatus.pr_reg.acrs, sa->acc_regs, sizeof(sa->acc_regs)); + nt_prstatus.pr_pid = cpu_nr; + cpu_nr++; + + return nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus), + "CORE"); +} + +/* + * Initialize fpregset (floating point) note + */ +static void *nt_fpregset(void *ptr, struct save_area *sa) +{ + elf_fpregset_t nt_fpregset; + + memset(&nt_fpregset, 0, sizeof(nt_fpregset)); + memcpy(&nt_fpregset.fpc, &sa->fp_ctrl_reg, sizeof(sa->fp_ctrl_reg)); + memcpy(&nt_fpregset.fprs, &sa->fp_regs, sizeof(sa->fp_regs)); + + return nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset), + "CORE"); +} + +/* + * Initialize timer note + */ +static void *nt_s390_timer(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer), + KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize TOD clock comparator note + */ +static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp, + sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize TOD programmable register note + */ +static void *nt_s390_tod_preg(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg, + sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize control register note + */ +static void *nt_s390_ctrs(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs, + sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize prefix register note + */ +static void *nt_s390_prefix(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg, + sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME); +} + +/* + * Fill ELF notes for one CPU with save area registers + */ +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa) +{ + ptr = nt_prstatus(ptr, sa); + ptr = nt_fpregset(ptr, sa); + ptr = nt_s390_timer(ptr, sa); + ptr = nt_s390_tod_cmp(ptr, sa); + ptr = nt_s390_tod_preg(ptr, sa); + ptr = nt_s390_ctrs(ptr, sa); + ptr = nt_s390_prefix(ptr, sa); + return ptr; +} + +/* + * Initialize prpsinfo note (new kernel) + */ +static void *nt_prpsinfo(void *ptr) +{ + struct elf_prpsinfo prpsinfo; + + memset(&prpsinfo, 0, sizeof(prpsinfo)); + prpsinfo.pr_sname = 'R'; + strcpy(prpsinfo.pr_fname, "vmlinux"); + return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo), + KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize vmcoreinfo note (new kernel) + */ +static void *nt_vmcoreinfo(void *ptr) +{ + char nt_name[11], *vmcoreinfo; + Elf64_Nhdr note; + void *addr; + + if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) + return ptr; + memset(nt_name, 0, sizeof(nt_name)); + if (copy_from_oldmem(¬e, addr, sizeof(note))) + return ptr; + if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1)) + return ptr; + if (strcmp(nt_name, "VMCOREINFO") != 0) + return ptr; + vmcoreinfo = kzalloc_panic(note.n_descsz + 1); + if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz)) + return ptr; + vmcoreinfo[note.n_descsz + 1] = 0; + return nt_init(ptr, 0, vmcoreinfo, note.n_descsz, "VMCOREINFO"); +} + +/* + * Initialize ELF header (new kernel) + */ +static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt) +{ + memset(ehdr, 0, sizeof(*ehdr)); + memcpy(ehdr->e_ident, ELFMAG, SELFMAG); + ehdr->e_ident[EI_CLASS] = ELFCLASS64; + ehdr->e_ident[EI_DATA] = ELFDATA2MSB; + ehdr->e_ident[EI_VERSION] = EV_CURRENT; + memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); + ehdr->e_type = ET_CORE; + ehdr->e_machine = EM_S390; + ehdr->e_version = EV_CURRENT; + ehdr->e_phoff = sizeof(Elf64_Ehdr); + ehdr->e_ehsize = sizeof(Elf64_Ehdr); + ehdr->e_phentsize = sizeof(Elf64_Phdr); + ehdr->e_phnum = mem_chunk_cnt + 1; + return ehdr + 1; +} + +/* + * Return CPU count for ELF header (new kernel) + */ +static int get_cpu_cnt(void) +{ + int i, cpus = 0; + + for (i = 0; zfcpdump_save_areas[i]; i++) { + if (zfcpdump_save_areas[i]->pref_reg == 0) + continue; + cpus++; + } + return cpus; +} + +/* + * Return memory chunk count for ELF header (new kernel) + */ +static int get_mem_chunk_cnt(void) +{ + struct mem_chunk *chunk_array, *mem_chunk; + int i, cnt = 0; + + chunk_array = get_memory_layout(); + for (i = 0; i < MEMORY_CHUNKS; i++) { + mem_chunk = &chunk_array[i]; + if (chunk_array[i].type != CHUNK_READ_WRITE && + chunk_array[i].type != CHUNK_READ_ONLY) + continue; + if (mem_chunk->size == 0) + continue; + cnt++; + } + kfree(chunk_array); + return cnt; +} + +/* + * Relocate pointer in order to allow vmcore code access the data + */ +static inline unsigned long relocate(unsigned long addr) +{ + return OLDMEM_BASE + addr; +} + +/* + * Initialize ELF loads (new kernel) + */ +static int loads_init(Elf64_Phdr *phdr, u64 loads_offset) +{ + struct mem_chunk *chunk_array, *mem_chunk; + int i; + + chunk_array = get_memory_layout(); + for (i = 0; i < MEMORY_CHUNKS; i++) { + mem_chunk = &chunk_array[i]; + if (mem_chunk->size == 0) + break; + if (chunk_array[i].type != CHUNK_READ_WRITE && + chunk_array[i].type != CHUNK_READ_ONLY) + continue; + else + phdr->p_filesz = mem_chunk->size; + phdr->p_type = PT_LOAD; + phdr->p_offset = mem_chunk->addr; + phdr->p_vaddr = mem_chunk->addr; + phdr->p_paddr = mem_chunk->addr; + phdr->p_memsz = mem_chunk->size; + phdr->p_flags = PF_R | PF_W | PF_X; + phdr->p_align = PAGE_SIZE; + phdr++; + } + kfree(chunk_array); + return i; +} + +/* + * Initialize notes (new kernel) + */ +static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) +{ + struct save_area *sa; + void *ptr_start = ptr; + int i; + + ptr = nt_prpsinfo(ptr); + + for (i = 0; zfcpdump_save_areas[i]; i++) { + sa = zfcpdump_save_areas[i]; + if (sa->pref_reg == 0) + continue; + ptr = fill_cpu_elf_notes(ptr, sa); + } + ptr = nt_vmcoreinfo(ptr); + memset(phdr, 0, sizeof(*phdr)); + phdr->p_type = PT_NOTE; + phdr->p_offset = relocate(notes_offset); + phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start); + phdr->p_memsz = phdr->p_filesz; + return ptr; +} + +/* + * Create ELF core header (new kernel) + */ +static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz) +{ + Elf64_Phdr *phdr_notes, *phdr_loads; + int mem_chunk_cnt; + void *ptr, *hdr; + u32 alloc_size; + u64 hdr_off; + + mem_chunk_cnt = get_mem_chunk_cnt(); + + alloc_size = 0x1000 + get_cpu_cnt() * 0x300 + + mem_chunk_cnt * sizeof(Elf64_Phdr); + hdr = kzalloc_panic(alloc_size); + /* Init elf header */ + ptr = ehdr_init(hdr, mem_chunk_cnt); + /* Init program headers */ + phdr_notes = ptr; + ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr)); + phdr_loads = ptr; + ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt); + /* Init notes */ + hdr_off = PTR_DIFF(ptr, hdr); + ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off); + /* Init loads */ + hdr_off = PTR_DIFF(ptr, hdr); + loads_init(phdr_loads, ((unsigned long) hdr) + hdr_off); + *elfcorebuf_sz = hdr_off; + *elfcorebuf = (void *) relocate((unsigned long) hdr); + BUG_ON(*elfcorebuf_sz > alloc_size); +} + +/* + * Create kdump ELF core header in new kernel, if it has not been passed via + * the "elfcorehdr" kernel parameter + */ +static int setup_kdump_elfcorehdr(void) +{ + size_t elfcorebuf_sz; + char *elfcorebuf; + + if (!OLDMEM_BASE || is_kdump_kernel()) + return -EINVAL; + s390_elf_corehdr_create(&elfcorebuf, &elfcorebuf_sz); + elfcorehdr_addr = (unsigned long long) elfcorebuf; + elfcorehdr_size = elfcorebuf_sz; + return 0; +} + +subsys_initcall(setup_kdump_elfcorehdr); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index f297456dba7..37394b3413e 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -252,7 +252,7 @@ static noinline __init void setup_lowcore_early(void) { psw_t psw; - psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler; S390_lowcore.external_new_psw = psw; psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 02ec8fe7d03..b13157057e0 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -43,16 +43,15 @@ SP_R13 = STACK_FRAME_OVERHEAD + __PT_GPRS + 52 SP_R14 = STACK_FRAME_OVERHEAD + __PT_GPRS + 56 SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 60 SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2 -SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC -SP_SVCNR = STACK_FRAME_OVERHEAD + __PT_SVCNR +SP_SVC_CODE = STACK_FRAME_OVERHEAD + __PT_SVC_CODE SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_PER_TRAP ) + _TIF_MCCK_PENDING | _TIF_PER_TRAP ) _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) -_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ - _TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8) +_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + _TIF_SYSCALL_TRACEPOINT) STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT @@ -228,9 +227,10 @@ ENTRY(system_call) sysc_saveall: SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC l %r12,__LC_THREAD_INFO # load pointer to thread_info struct + mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC + oi __TI_flags+3(%r12),_TIF_SYSCALL sysc_vtime: UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER sysc_stime: @@ -239,17 +239,17 @@ sysc_update: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER sysc_do_svc: xr %r7,%r7 - icm %r7,3,SP_SVCNR(%r15) # load svc number and test for svc 0 + icm %r7,3,SP_SVC_CODE+2(%r15)# load svc number and test for svc 0 bnz BASED(sysc_nr_ok) # svc number > 0 # svc 0: system call number in %r1 cl %r1,BASED(.Lnr_syscalls) bnl BASED(sysc_nr_ok) - sth %r1,SP_SVCNR(%r15) + sth %r1,SP_SVC_CODE+2(%r15) lr %r7,%r1 # copy svc number to %r7 sysc_nr_ok: sll %r7,2 # svc number *4 l %r10,BASED(.Lsysc_table) - tm __TI_flags+2(%r12),_TIF_SYSCALL + tm __TI_flags+2(%r12),_TIF_TRACE >> 8 mvc SP_ARGS(4,%r15),SP_R7(%r15) l %r8,0(%r7,%r10) # get system call addr. bnz BASED(sysc_tracesys) @@ -259,23 +259,19 @@ sysc_nr_ok: sysc_return: LOCKDEP_SYS_EXIT sysc_tif: + tm SP_PSW+1(%r15),0x01 # returning to user ? + bno BASED(sysc_restore) tm __TI_flags+3(%r12),_TIF_WORK_SVC bnz BASED(sysc_work) # there is work to do (signals etc.) + ni __TI_flags+3(%r12),255-_TIF_SYSCALL sysc_restore: RESTORE_ALL __LC_RETURN_PSW,1 sysc_done: # -# There is work to do, but first we need to check if we return to userspace. -# -sysc_work: - tm SP_PSW+1(%r15),0x01 # returning to user ? - bno BASED(sysc_restore) - -# # One of the work bits is on. Find out which one. # -sysc_work_tif: +sysc_work: tm __TI_flags+3(%r12),_TIF_MCCK_PENDING bo BASED(sysc_mcck_pending) tm __TI_flags+3(%r12),_TIF_NEED_RESCHED @@ -284,8 +280,6 @@ sysc_work_tif: bo BASED(sysc_sigpending) tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME bo BASED(sysc_notify_resume) - tm __TI_flags+3(%r12),_TIF_RESTART_SVC - bo BASED(sysc_restart) tm __TI_flags+3(%r12),_TIF_PER_TRAP bo BASED(sysc_singlestep) b BASED(sysc_return) # beware of critical section cleanup @@ -314,11 +308,14 @@ sysc_sigpending: la %r2,SP_PTREGS(%r15) # load pt_regs l %r1,BASED(.Ldo_signal) basr %r14,%r1 # call do_signal - tm __TI_flags+3(%r12),_TIF_RESTART_SVC - bo BASED(sysc_restart) - tm __TI_flags+3(%r12),_TIF_PER_TRAP - bo BASED(sysc_singlestep) - b BASED(sysc_return) + tm __TI_flags+3(%r12),_TIF_SYSCALL + bno BASED(sysc_return) + lm %r2,%r6,SP_R2(%r15) # load svc arguments + xr %r7,%r7 # svc 0 returns -ENOSYS + clc SP_SVC_CODE+2(2,%r15),BASED(.Lnr_syscalls+2) + bnl BASED(sysc_nr_ok) # invalid svc number -> do svc 0 + icm %r7,3,SP_SVC_CODE+2(%r15)# load new svc number + b BASED(sysc_nr_ok) # restart svc # # _TIF_NOTIFY_RESUME is set, call do_notify_resume @@ -329,24 +326,11 @@ sysc_notify_resume: la %r14,BASED(sysc_return) br %r1 # call do_notify_resume - -# -# _TIF_RESTART_SVC is set, set up registers and restart svc -# -sysc_restart: - ni __TI_flags+3(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC - l %r7,SP_R2(%r15) # load new svc number - mvc SP_R2(4,%r15),SP_ORIG_R2(%r15) # restore first argument - lm %r2,%r6,SP_R2(%r15) # load svc arguments - sth %r7,SP_SVCNR(%r15) - b BASED(sysc_nr_ok) # restart svc - # # _TIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: - ni __TI_flags+3(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number + ni __TI_flags+3(%r12),255-(_TIF_SYSCALL | _TIF_PER_TRAP) la %r2,SP_PTREGS(%r15) # address of register-save area l %r1,BASED(.Lhandle_per) # load adr. of per handler la %r14,BASED(sysc_return) # load adr. of system return @@ -361,7 +345,7 @@ sysc_tracesys: la %r2,SP_PTREGS(%r15) # load pt_regs la %r3,0 xr %r0,%r0 - icm %r0,3,SP_SVCNR(%r15) + icm %r0,3,SP_SVC_CODE(%r15) st %r0,SP_R2(%r15) basr %r14,%r1 cl %r2,BASED(.Lnr_syscalls) @@ -376,7 +360,7 @@ sysc_tracego: basr %r14,%r8 # call sys_xxx st %r2,SP_R2(%r15) # store return value sysc_tracenogo: - tm __TI_flags+2(%r12),_TIF_SYSCALL + tm __TI_flags+2(%r12),_TIF_TRACE >> 8 bz BASED(sysc_return) l %r1,BASED(.Ltrace_exit) la %r2,SP_PTREGS(%r15) # load pt_regs @@ -454,7 +438,6 @@ ENTRY(pgm_check_handler) bnz BASED(pgm_per) # got per exception -> special case SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA - xc SP_ILC(4,%r15),SP_ILC(%r15) mvc SP_PSW(8,%r15),__LC_PGM_OLD_PSW l %r12,__LC_THREAD_INFO # load pointer to thread_info struct tm SP_PSW+1(%r15),0x01 # interrupting from user ? @@ -530,9 +513,10 @@ pgm_exit2: pgm_svcper: SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC l %r12,__LC_THREAD_INFO # load pointer to thread_info struct + mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC + oi __TI_flags+3(%r12),(_TIF_SYSCALL | _TIF_PER_TRAP) UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER @@ -540,7 +524,6 @@ pgm_svcper: mvc __THREAD_per_cause(2,%r8),__LC_PER_CAUSE mvc __THREAD_per_address(4,%r8),__LC_PER_ADDRESS mvc __THREAD_per_paid(1,%r8),__LC_PER_PAID - oi __TI_flags+3(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP stosm __SF_EMPTY(%r15),0x03 # reenable interrupts lm %r2,%r6,SP_R2(%r15) # load svc arguments b BASED(sysc_do_svc) @@ -550,7 +533,6 @@ pgm_svcper: # kernel_per: REENABLE_IRQS - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) la %r2,SP_PTREGS(%r15) # address of register-save area l %r1,BASED(.Lhandle_per) # load adr. of per handler basr %r14,%r1 # branch to do_single_step @@ -853,13 +835,13 @@ restart_go: # PSW restart interrupt handler # ENTRY(psw_restart_int_handler) - st %r15,__LC_SAVE_AREA_64(%r0) # save r15 + st %r15,__LC_SAVE_AREA+48(%r0) # save r15 basr %r15,0 0: l %r15,.Lrestart_stack-0b(%r15) # load restart stack l %r15,0(%r15) ahi %r15,-SP_SIZE # make room for pt_regs stm %r0,%r14,SP_R0(%r15) # store gprs %r0-%r14 to stack - mvc SP_R15(4,%r15),__LC_SAVE_AREA_64(%r0)# store saved %r15 to stack + mvc SP_R15(4,%r15),__LC_SAVE_AREA+48(%r0)# store saved %r15 to stack mvc SP_PSW(8,%r15),__LC_RST_OLD_PSW(%r0) # store restart old psw xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # set backchain to 0 basr %r14,0 @@ -965,9 +947,11 @@ cleanup_system_call: s %r15,BASED(.Lc_spsize) # make room for registers & psw st %r15,12(%r12) CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC mvc 0(4,%r12),__LC_THREAD_INFO + l %r12,__LC_THREAD_INFO + mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC + oi __TI_flags+3(%r12),_TIF_SYSCALL cleanup_vtime: clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+12) bhe BASED(cleanup_stime) diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 66729eb7bbc..ef8fb1d6e8d 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -5,24 +5,33 @@ #include <linux/signal.h> #include <asm/ptrace.h> + +extern void (*pgm_check_table[128])(struct pt_regs *, long, unsigned long); +extern void *restart_stack; + +asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); +asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); + void do_protection_exception(struct pt_regs *, long, unsigned long); void do_dat_exception(struct pt_regs *, long, unsigned long); void do_asce_exception(struct pt_regs *, long, unsigned long); -extern int sysctl_userprocess_debug; - void do_per_trap(struct pt_regs *regs); void syscall_trace(struct pt_regs *regs, int entryexit); void kernel_stack_overflow(struct pt_regs * regs); void do_signal(struct pt_regs *regs); int handle_signal32(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs); +void do_notify_resume(struct pt_regs *regs); void do_extint(struct pt_regs *regs, unsigned int, unsigned int, unsigned long); +void do_restart(void); int __cpuinit start_secondary(void *cpuvoid); void __init startup_init(void); void die(const char * str, struct pt_regs * regs, long err); +void __init time_init(void); + struct s390_mmap_arg_struct; struct fadvise64_64_args; struct old_sigaction; diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 5f729d627ce..83a93747e2f 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -43,19 +43,18 @@ SP_R13 = STACK_FRAME_OVERHEAD + __PT_GPRS + 104 SP_R14 = STACK_FRAME_OVERHEAD + __PT_GPRS + 112 SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 120 SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2 -SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC -SP_SVCNR = STACK_FRAME_OVERHEAD + __PT_SVCNR +SP_SVC_CODE = STACK_FRAME_OVERHEAD + __PT_SVC_CODE SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_PER_TRAP ) + _TIF_MCCK_PENDING | _TIF_PER_TRAP ) _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) -_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ - _TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8) +_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + _TIF_SYSCALL_TRACEPOINT) _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) #define BASED(name) name-system_call(%r13) @@ -249,9 +248,10 @@ ENTRY(system_call) sysc_saveall: SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct + mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC + oi __TI_flags+7(%r12),_TIF_SYSCALL sysc_vtime: UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER sysc_stime: @@ -260,14 +260,14 @@ sysc_update: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER LAST_BREAK sysc_do_svc: - llgh %r7,SP_SVCNR(%r15) + llgh %r7,SP_SVC_CODE+2(%r15) slag %r7,%r7,2 # shift and test for svc 0 jnz sysc_nr_ok # svc 0: system call number in %r1 llgfr %r1,%r1 # clear high word in r1 cghi %r1,NR_syscalls jnl sysc_nr_ok - sth %r1,SP_SVCNR(%r15) + sth %r1,SP_SVC_CODE+2(%r15) slag %r7,%r1,2 # shift and test for svc 0 sysc_nr_ok: larl %r10,sys_call_table @@ -277,7 +277,7 @@ sysc_nr_ok: larl %r10,sys_call_table_emu # use 31 bit emulation system calls sysc_noemu: #endif - tm __TI_flags+6(%r12),_TIF_SYSCALL + tm __TI_flags+6(%r12),_TIF_TRACE >> 8 mvc SP_ARGS(8,%r15),SP_R7(%r15) lgf %r8,0(%r7,%r10) # load address of system call routine jnz sysc_tracesys @@ -287,23 +287,19 @@ sysc_noemu: sysc_return: LOCKDEP_SYS_EXIT sysc_tif: + tm SP_PSW+1(%r15),0x01 # returning to user ? + jno sysc_restore tm __TI_flags+7(%r12),_TIF_WORK_SVC jnz sysc_work # there is work to do (signals etc.) + ni __TI_flags+7(%r12),255-_TIF_SYSCALL sysc_restore: RESTORE_ALL __LC_RETURN_PSW,1 sysc_done: # -# There is work to do, but first we need to check if we return to userspace. -# -sysc_work: - tm SP_PSW+1(%r15),0x01 # returning to user ? - jno sysc_restore - -# # One of the work bits is on. Find out which one. # -sysc_work_tif: +sysc_work: tm __TI_flags+7(%r12),_TIF_MCCK_PENDING jo sysc_mcck_pending tm __TI_flags+7(%r12),_TIF_NEED_RESCHED @@ -312,8 +308,6 @@ sysc_work_tif: jo sysc_sigpending tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME jo sysc_notify_resume - tm __TI_flags+7(%r12),_TIF_RESTART_SVC - jo sysc_restart tm __TI_flags+7(%r12),_TIF_PER_TRAP jo sysc_singlestep j sysc_return # beware of critical section cleanup @@ -339,11 +333,15 @@ sysc_sigpending: ni __TI_flags+7(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP la %r2,SP_PTREGS(%r15) # load pt_regs brasl %r14,do_signal # call do_signal - tm __TI_flags+7(%r12),_TIF_RESTART_SVC - jo sysc_restart - tm __TI_flags+7(%r12),_TIF_PER_TRAP - jo sysc_singlestep - j sysc_return + tm __TI_flags+7(%r12),_TIF_SYSCALL + jno sysc_return + lmg %r2,%r6,SP_R2(%r15) # load svc arguments + lghi %r7,0 # svc 0 returns -ENOSYS + lh %r1,SP_SVC_CODE+2(%r15) # load new svc number + cghi %r1,NR_syscalls + jnl sysc_nr_ok # invalid svc number -> do svc 0 + slag %r7,%r1,2 + j sysc_nr_ok # restart svc # # _TIF_NOTIFY_RESUME is set, call do_notify_resume @@ -354,23 +352,10 @@ sysc_notify_resume: jg do_notify_resume # call do_notify_resume # -# _TIF_RESTART_SVC is set, set up registers and restart svc -# -sysc_restart: - ni __TI_flags+7(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC - lg %r7,SP_R2(%r15) # load new svc number - mvc SP_R2(8,%r15),SP_ORIG_R2(%r15) # restore first argument - lmg %r2,%r6,SP_R2(%r15) # load svc arguments - sth %r7,SP_SVCNR(%r15) - slag %r7,%r7,2 - j sysc_nr_ok # restart svc - -# # _TIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: - ni __TI_flags+7(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number + ni __TI_flags+7(%r12),255-(_TIF_SYSCALL | _TIF_PER_TRAP) la %r2,SP_PTREGS(%r15) # address of register-save area larl %r14,sysc_return # load adr. of system return jg do_per_trap @@ -382,7 +367,7 @@ sysc_singlestep: sysc_tracesys: la %r2,SP_PTREGS(%r15) # load pt_regs la %r3,0 - llgh %r0,SP_SVCNR(%r15) + llgh %r0,SP_SVC_CODE+2(%r15) stg %r0,SP_R2(%r15) brasl %r14,do_syscall_trace_enter lghi %r0,NR_syscalls @@ -397,7 +382,7 @@ sysc_tracego: basr %r14,%r8 # call sys_xxx stg %r2,SP_R2(%r15) # store return value sysc_tracenogo: - tm __TI_flags+6(%r12),_TIF_SYSCALL + tm __TI_flags+6(%r12),_TIF_TRACE >> 8 jz sysc_return la %r2,SP_PTREGS(%r15) # load pt_regs larl %r14,sysc_return # return point is sysc_return @@ -470,7 +455,6 @@ ENTRY(pgm_check_handler) jnz pgm_per # got per exception -> special case SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA - xc SP_ILC(4,%r15),SP_ILC(%r15) mvc SP_PSW(16,%r15),__LC_PGM_OLD_PSW lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct HANDLE_SIE_INTERCEPT @@ -550,9 +534,10 @@ pgm_exit2: pgm_svcper: SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct + mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC + oi __TI_flags+7(%r12),(_TIF_SYSCALL | _TIF_PER_TRAP) UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER @@ -561,7 +546,6 @@ pgm_svcper: mvc __THREAD_per_cause(2,%r8),__LC_PER_CAUSE mvc __THREAD_per_address(8,%r8),__LC_PER_ADDRESS mvc __THREAD_per_paid(1,%r8),__LC_PER_PAID - oi __TI_flags+7(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP stosm __SF_EMPTY(%r15),0x03 # reenable interrupts lmg %r2,%r6,SP_R2(%r15) # load svc arguments j sysc_do_svc @@ -571,7 +555,6 @@ pgm_svcper: # kernel_per: REENABLE_IRQS - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number la %r2,SP_PTREGS(%r15) # address of register-save area brasl %r14,do_per_trap j pgm_exit @@ -869,12 +852,12 @@ restart_go: # PSW restart interrupt handler # ENTRY(psw_restart_int_handler) - stg %r15,__LC_SAVE_AREA_64(%r0) # save r15 + stg %r15,__LC_SAVE_AREA+120(%r0) # save r15 larl %r15,restart_stack # load restart stack lg %r15,0(%r15) aghi %r15,-SP_SIZE # make room for pt_regs stmg %r0,%r14,SP_R0(%r15) # store gprs %r0-%r14 to stack - mvc SP_R15(8,%r15),__LC_SAVE_AREA_64(%r0)# store saved %r15 to stack + mvc SP_R15(8,%r15),__LC_SAVE_AREA+120(%r0)# store saved %r15 to stack mvc SP_PSW(16,%r15),__LC_RST_OLD_PSW(%r0)# store restart old psw xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # set backchain to 0 brasl %r14,do_restart @@ -972,9 +955,11 @@ cleanup_system_call: stg %r15,32(%r12) stg %r11,0(%r12) CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC mvc 8(8,%r12),__LC_THREAD_INFO + lg %r12,__LC_THREAD_INFO + mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC + oi __TI_flags+7(%r12),_TIF_SYSCALL cleanup_vtime: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+24) jhe cleanup_stime @@ -1076,6 +1061,11 @@ sie_loop: lg %r14,__LC_THREAD_INFO # pointer thread_info struct tm __TI_flags+7(%r14),_TIF_EXIT_SIE jnz sie_exit + lg %r14,__LC_GMAP # get gmap pointer + ltgr %r14,%r14 + jz sie_gmap + lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce +sie_gmap: lg %r14,__SF_EMPTY(%r15) # get control block pointer SPP __SF_EMPTY(%r15) # set guest id sie 0(%r14) @@ -1083,6 +1073,7 @@ sie_done: SPP __LC_CMF_HPP # set host id lg %r14,__LC_THREAD_INFO # pointer thread_info struct sie_exit: + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce ni __TI_flags+6(%r14),255-(_TIF_SIE>>8) lg %r14,__SF_EMPTY+8(%r15) # load guest register save area stmg %r0,%r13,0(%r14) # save guest gprs 0-13 @@ -1090,6 +1081,7 @@ sie_exit: lghi %r2,0 br %r14 sie_fault: + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce lg %r14,__LC_THREAD_INFO # pointer thread_info struct ni __TI_flags+6(%r14),255-(_TIF_SIE>>8) lg %r14,__SF_EMPTY+8(%r15) # load guest register save area diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 2d781bab37b..900068d2bf9 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -449,10 +449,28 @@ ENTRY(start) # .org 0x10000 ENTRY(startup) + j .Lep_startup_normal + .org 0x10008 +# +# This is a list of s390 kernel entry points. At address 0x1000f the number of +# valid entry points is stored. +# +# IMPORTANT: Do not change this table, it is s390 kernel ABI! +# + .ascii "S390EP" + .byte 0x00,0x01 +# +# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode +# + .org 0x10010 +ENTRY(startup_kdump) + j .Lep_startup_kdump +.Lep_startup_normal: basr %r13,0 # get base .LPG0: xc 0x200(256),0x200 # partially clear lowcore xc 0x300(256),0x300 + xc 0xe00(256),0xe00 stck __LC_LAST_UPDATE_CLOCK spt 5f-.LPG0(%r13) mvc __LC_LAST_UPDATE_TIMER(8),5f-.LPG0(%r13) @@ -534,6 +552,8 @@ ENTRY(startup) .align 8 5: .long 0x7fffffff,0xffffffff +#include "head_kdump.S" + # # params at 10400 (setup.h) # @@ -541,6 +561,8 @@ ENTRY(startup) .long 0,0 # IPL_DEVICE .long 0,0 # INITRD_START .long 0,0 # INITRD_SIZE + .long 0,0 # OLDMEM_BASE + .long 0,0 # OLDMEM_SIZE .org COMMAND_LINE .byte "root=/dev/ram0 ro" diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index f21954b44dc..d3f1ab7d90a 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -92,7 +92,7 @@ ENTRY(_stext) .LPG3: # check control registers stctl %c0,%c15,0(%r15) - oi 2(%r15),0x40 # enable sigp emergency signal + oi 2(%r15),0x60 # enable sigp emergency & external call oi 0(%r15),0x10 # switch on low address protection lctl %c0,%c15,0(%r15) diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index ae5d492b069..99348c0eaa4 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -90,7 +90,7 @@ ENTRY(_stext) .LPG3: # check control registers stctg %c0,%c15,0(%r15) - oi 6(%r15),0x40 # enable sigp emergency signal + oi 6(%r15),0x60 # enable sigp emergency & external call oi 4(%r15),0x10 # switch on low address proctection lctlg %c0,%c15,0(%r15) diff --git a/arch/s390/kernel/head_kdump.S b/arch/s390/kernel/head_kdump.S new file mode 100644 index 00000000000..e1ac3893e97 --- /dev/null +++ b/arch/s390/kernel/head_kdump.S @@ -0,0 +1,119 @@ +/* + * S390 kdump lowlevel functions (new kernel) + * + * Copyright IBM Corp. 2011 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#define DATAMOVER_ADDR 0x4000 +#define COPY_PAGE_ADDR 0x6000 + +#ifdef CONFIG_CRASH_DUMP + +# +# kdump entry (new kernel - not yet relocated) +# +# Note: This code has to be position independent +# + +.align 2 +.Lep_startup_kdump: + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,0x12 # Switch to esame mode + sam64 # Switch to 64 bit addressing + basr %r13,0 +.Lbase: + larl %r2,.Lbase_addr # Check, if we have been + lg %r2,0(%r2) # already relocated: + clgr %r2,%r13 # + jne .Lrelocate # No : Start data mover + lghi %r2,0 # Yes: Start kdump kernel + brasl %r14,startup_kdump_relocated + +.Lrelocate: + larl %r4,startup + lg %r2,0x418(%r4) # Get kdump base + lg %r3,0x420(%r4) # Get kdump size + + larl %r10,.Lcopy_start # Source of data mover + lghi %r8,DATAMOVER_ADDR # Target of data mover + mvc 0(256,%r8),0(%r10) # Copy data mover code + + agr %r8,%r2 # Copy data mover to + mvc 0(256,%r8),0(%r10) # reserved mem + + lghi %r14,DATAMOVER_ADDR # Jump to copied data mover + basr %r14,%r14 +.Lbase_addr: + .quad .Lbase + +# +# kdump data mover code (runs at address DATAMOVER_ADDR) +# +# r2: kdump base address +# r3: kdump size +# +.Lcopy_start: + basr %r13,0 # Base +0: + lgr %r11,%r2 # Save kdump base address + lgr %r12,%r2 + agr %r12,%r3 # Compute kdump end address + + lghi %r5,0 + lghi %r10,COPY_PAGE_ADDR # Load copy page address +1: + mvc 0(256,%r10),0(%r5) # Copy old kernel to tmp + mvc 0(256,%r5),0(%r11) # Copy new kernel to old + mvc 0(256,%r11),0(%r10) # Copy tmp to new + aghi %r11,256 + aghi %r5,256 + clgr %r11,%r12 + jl 1b + + lg %r14,.Lstartup_kdump-0b(%r13) + basr %r14,%r14 # Start relocated kernel +.Lstartup_kdump: + .long 0x00000000,0x00000000 + startup_kdump_relocated +.Lcopy_end: + +# +# Startup of kdump (relocated new kernel) +# +.align 2 +startup_kdump_relocated: + basr %r13,0 +0: + mvc 0(8,%r0),.Lrestart_psw-0b(%r13) # Setup restart PSW + mvc 464(16,%r0),.Lpgm_psw-0b(%r13) # Setup pgm check PSW + lhi %r1,1 # Start new kernel + diag %r1,%r1,0x308 # with diag 308 + +.Lno_diag308: # No diag 308 + sam31 # Switch to 31 bit addr mode + sr %r1,%r1 # Erase register r1 + sr %r2,%r2 # Erase register r2 + sigp %r1,%r2,0x12 # Switch to 31 bit arch mode + lpsw 0 # Start new kernel... +.align 8 +.Lrestart_psw: + .long 0x00080000,0x80000000 + startup +.Lpgm_psw: + .quad 0x0000000180000000,0x0000000000000000 + .Lno_diag308 +#else +.align 2 +.Lep_startup_kdump: +#ifdef CONFIG_64BIT + larl %r13,startup_kdump_crash + lpswe 0(%r13) +.align 8 +startup_kdump_crash: + .quad 0x0002000080000000,0x0000000000000000 + startup_kdump_crash +#else + basr %r13,0 +0: lpsw startup_kdump_crash-0b(%r13) +.align 8 +startup_kdump_crash: + .long 0x000a0000,0x00000000 + startup_kdump_crash +#endif /* CONFIG_64BIT */ +#endif /* CONFIG_CRASH_DUMP */ diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 48c71020636..affa8e68124 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -16,6 +16,7 @@ #include <linux/ctype.h> #include <linux/fs.h> #include <linux/gfp.h> +#include <linux/crash_dump.h> #include <asm/ipl.h> #include <asm/smp.h> #include <asm/setup.h> @@ -26,6 +27,7 @@ #include <asm/sclp.h> #include <asm/sigp.h> #include <asm/checksum.h> +#include "entry.h" #define IPL_PARM_BLOCK_VERSION 0 @@ -275,8 +277,8 @@ static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr, static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type); /* VM IPL PARM routines */ -size_t reipl_get_ascii_vmparm(char *dest, size_t size, - const struct ipl_parameter_block *ipb) +static size_t reipl_get_ascii_vmparm(char *dest, size_t size, + const struct ipl_parameter_block *ipb) { int i; size_t len; @@ -338,8 +340,8 @@ static size_t scpdata_length(const char* buf, size_t count) return count; } -size_t reipl_append_ascii_scpdata(char *dest, size_t size, - const struct ipl_parameter_block *ipb) +static size_t reipl_append_ascii_scpdata(char *dest, size_t size, + const struct ipl_parameter_block *ipb) { size_t count; size_t i; @@ -1738,7 +1740,11 @@ static struct kobj_attribute on_restart_attr = void do_restart(void) { + smp_restart_with_online_cpu(); smp_send_stop(); +#ifdef CONFIG_CRASH_DUMP + crash_kexec(NULL); +#endif on_restart_trigger.action->fn(&on_restart_trigger); stop_run(&on_restart_trigger); } @@ -2009,7 +2015,7 @@ static void do_reset_calls(void) u32 dump_prefix_page; -void s390_reset_system(void) +void s390_reset_system(void (*func)(void *), void *data) { struct _lowcore *lc; @@ -2028,15 +2034,19 @@ void s390_reset_system(void) __ctl_clear_bit(0,28); /* Set new machine check handler */ - S390_lowcore.mcck_new_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; + S390_lowcore.mcck_new_psw.mask = psw_kernel_bits | PSW_MASK_DAT; S390_lowcore.mcck_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler; /* Set new program check handler */ - S390_lowcore.program_new_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; + S390_lowcore.program_new_psw.mask = psw_kernel_bits | PSW_MASK_DAT; S390_lowcore.program_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; + /* Store status at absolute zero */ + store_status(); + do_reset_calls(); + if (func) + func(data); } - diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 1f4050d45f7..b9a7fdd9c81 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -33,7 +33,8 @@ static const struct irq_class intrclass_names[] = { {.name = "EXT" }, {.name = "I/O" }, {.name = "CLK", .desc = "[EXT] Clock Comparator" }, - {.name = "IPI", .desc = "[EXT] Signal Processor" }, + {.name = "EXC", .desc = "[EXT] External Call" }, + {.name = "EMS", .desc = "[EXT] Emergency Signal" }, {.name = "TMR", .desc = "[EXT] CPU Timer" }, {.name = "TAL", .desc = "[EXT] Timing Alert" }, {.name = "PFL", .desc = "[EXT] Pseudo Page Fault" }, @@ -42,8 +43,8 @@ static const struct irq_class intrclass_names[] = { {.name = "SCP", .desc = "[EXT] Service Call" }, {.name = "IUC", .desc = "[EXT] IUCV" }, {.name = "CPM", .desc = "[EXT] CPU Measurement" }, + {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt" }, {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt" }, - {.name = "QDI", .desc = "[I/O] QDIO Interrupt" }, {.name = "DAS", .desc = "[I/O] DASD" }, {.name = "C15", .desc = "[I/O] 3215" }, {.name = "C70", .desc = "[I/O] 3270" }, @@ -53,6 +54,7 @@ static const struct irq_class intrclass_names[] = { {.name = "CLW", .desc = "[I/O] CLAW" }, {.name = "CTC", .desc = "[I/O] CTC" }, {.name = "APB", .desc = "[I/O] AP Bus" }, + {.name = "CSC", .desc = "[I/O] CHSC Subchannel" }, {.name = "NMI", .desc = "[NMI] Machine Check" }, }; diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 1d05d669107..64b761aef00 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -635,7 +635,7 @@ void __kprobes jprobe_return(void) asm volatile(".word 0x0002"); } -void __kprobes jprobe_return_end(void) +static void __used __kprobes jprobe_return_end(void) { asm volatile("bcr 0,0"); } diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index b09b9c62573..3cd0f25ab01 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -1,10 +1,11 @@ /* * arch/s390/kernel/machine_kexec.c * - * Copyright IBM Corp. 2005,2006 + * Copyright IBM Corp. 2005,2011 * * Author(s): Rolf Adelsberger, * Heiko Carstens <heiko.carstens@de.ibm.com> + * Michael Holzheu <holzheu@linux.vnet.ibm.com> */ #include <linux/device.h> @@ -21,12 +22,162 @@ #include <asm/smp.h> #include <asm/reset.h> #include <asm/ipl.h> +#include <asm/diag.h> +#include <asm/asm-offsets.h> typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); extern const unsigned char relocate_kernel[]; extern const unsigned long long relocate_kernel_len; +#ifdef CONFIG_CRASH_DUMP + +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa); + +/* + * Create ELF notes for one CPU + */ +static void add_elf_notes(int cpu) +{ + struct save_area *sa = (void *) 4608 + store_prefix(); + void *ptr; + + memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa)); + ptr = (u64 *) per_cpu_ptr(crash_notes, cpu); + ptr = fill_cpu_elf_notes(ptr, sa); + memset(ptr, 0, sizeof(struct elf_note)); +} + +/* + * Store status of next available physical CPU + */ +static int store_status_next(int start_cpu, int this_cpu) +{ + struct save_area *sa = (void *) 4608 + store_prefix(); + int cpu, rc; + + for (cpu = start_cpu; cpu < 65536; cpu++) { + if (cpu == this_cpu) + continue; + do { + rc = raw_sigp(cpu, sigp_stop_and_store_status); + } while (rc == sigp_busy); + if (rc != sigp_order_code_accepted) + continue; + if (sa->pref_reg) + return cpu; + } + return -1; +} + +/* + * Initialize CPU ELF notes + */ +void setup_regs(void) +{ + unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE; + int cpu, this_cpu, phys_cpu = 0, first = 1; + + this_cpu = stap(); + + if (!S390_lowcore.prefixreg_save_area) + first = 0; + for_each_online_cpu(cpu) { + if (first) { + add_elf_notes(cpu); + first = 0; + continue; + } + phys_cpu = store_status_next(phys_cpu, this_cpu); + if (phys_cpu == -1) + break; + add_elf_notes(cpu); + phys_cpu++; + } + /* Copy dump CPU store status info to absolute zero */ + memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area)); +} + +#endif + +/* + * Start kdump: We expect here that a store status has been done on our CPU + */ +static void __do_machine_kdump(void *image) +{ +#ifdef CONFIG_CRASH_DUMP + int (*start_kdump)(int) = (void *)((struct kimage *) image)->start; + + __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA); + setup_regs(); + start_kdump(1); +#endif +} + +/* + * Check if kdump checksums are valid: We call purgatory with parameter "0" + */ +static int kdump_csum_valid(struct kimage *image) +{ +#ifdef CONFIG_CRASH_DUMP + int (*start_kdump)(int) = (void *)image->start; + int rc; + + __arch_local_irq_stnsm(0xfb); /* disable DAT */ + rc = start_kdump(0); + __arch_local_irq_stosm(0x04); /* enable DAT */ + return rc ? 0 : -EINVAL; +#else + return -EINVAL; +#endif +} + +/* + * Map or unmap crashkernel memory + */ +static void crash_map_pages(int enable) +{ + unsigned long size = resource_size(&crashk_res); + + BUG_ON(crashk_res.start % KEXEC_CRASH_MEM_ALIGN || + size % KEXEC_CRASH_MEM_ALIGN); + if (enable) + vmem_add_mapping(crashk_res.start, size); + else + vmem_remove_mapping(crashk_res.start, size); +} + +/* + * Map crashkernel memory + */ +void crash_map_reserved_pages(void) +{ + crash_map_pages(1); +} + +/* + * Unmap crashkernel memory + */ +void crash_unmap_reserved_pages(void) +{ + crash_map_pages(0); +} + +/* + * Give back memory to hypervisor before new kdump is loaded + */ +static int machine_kexec_prepare_kdump(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (MACHINE_IS_VM) + diag10_range(PFN_DOWN(crashk_res.start), + PFN_DOWN(crashk_res.end - crashk_res.start + 1)); + return 0; +#else + return -EINVAL; +#endif +} + int machine_kexec_prepare(struct kimage *image) { void *reboot_code_buffer; @@ -35,6 +186,9 @@ int machine_kexec_prepare(struct kimage *image) if (ipl_flags & IPL_NSS_VALID) return -ENOSYS; + if (image->type == KEXEC_TYPE_CRASH) + return machine_kexec_prepare_kdump(); + /* We don't support anything but the default image type for now. */ if (image->type != KEXEC_TYPE_DEFAULT) return -EINVAL; @@ -51,27 +205,53 @@ void machine_kexec_cleanup(struct kimage *image) { } +void arch_crash_save_vmcoreinfo(void) +{ + VMCOREINFO_SYMBOL(lowcore_ptr); + VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); +} + void machine_shutdown(void) { } -static void __machine_kexec(void *data) +/* + * Do normal kexec + */ +static void __do_machine_kexec(void *data) { relocate_kernel_t data_mover; struct kimage *image = data; - pfault_fini(); - s390_reset_system(); - data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page); /* Call the moving routine */ (*data_mover)(&image->head, image->start); - for (;;); } +/* + * Reset system and call either kdump or normal kexec + */ +static void __machine_kexec(void *data) +{ + struct kimage *image = data; + + pfault_fini(); + if (image->type == KEXEC_TYPE_CRASH) + s390_reset_system(__do_machine_kdump, data); + else + s390_reset_system(__do_machine_kexec, data); + disabled_wait((unsigned long) __builtin_return_address(0)); +} + +/* + * Do either kdump or normal kexec. In case of kdump we first ask + * purgatory, if kdump checksums are valid. + */ void machine_kexec(struct kimage *image) { + if (image->type == KEXEC_TYPE_CRASH && !kdump_csum_valid(image)) + return; tracer_disable(); smp_send_stop(); smp_switch_to_ipl_cpu(__machine_kexec, image); diff --git a/arch/s390/kernel/mem_detect.c b/arch/s390/kernel/mem_detect.c index 0fbe4e32f7b..19b4568f4ce 100644 --- a/arch/s390/kernel/mem_detect.c +++ b/arch/s390/kernel/mem_detect.c @@ -62,3 +62,72 @@ void detect_memory_layout(struct mem_chunk chunk[]) arch_local_irq_restore(flags); } EXPORT_SYMBOL(detect_memory_layout); + +/* + * Create memory hole with given address, size, and type + */ +void create_mem_hole(struct mem_chunk chunks[], unsigned long addr, + unsigned long size, int type) +{ + unsigned long start, end, new_size; + int i; + + for (i = 0; i < MEMORY_CHUNKS; i++) { + if (chunks[i].size == 0) + continue; + if (addr + size < chunks[i].addr) + continue; + if (addr >= chunks[i].addr + chunks[i].size) + continue; + start = max(addr, chunks[i].addr); + end = min(addr + size, chunks[i].addr + chunks[i].size); + new_size = end - start; + if (new_size == 0) + continue; + if (start == chunks[i].addr && + end == chunks[i].addr + chunks[i].size) { + /* Remove chunk */ + chunks[i].type = type; + } else if (start == chunks[i].addr) { + /* Make chunk smaller at start */ + if (i >= MEMORY_CHUNKS - 1) + panic("Unable to create memory hole"); + memmove(&chunks[i + 1], &chunks[i], + sizeof(struct mem_chunk) * + (MEMORY_CHUNKS - (i + 1))); + chunks[i + 1].addr = chunks[i].addr + new_size; + chunks[i + 1].size = chunks[i].size - new_size; + chunks[i].size = new_size; + chunks[i].type = type; + i += 1; + } else if (end == chunks[i].addr + chunks[i].size) { + /* Make chunk smaller at end */ + if (i >= MEMORY_CHUNKS - 1) + panic("Unable to create memory hole"); + memmove(&chunks[i + 1], &chunks[i], + sizeof(struct mem_chunk) * + (MEMORY_CHUNKS - (i + 1))); + chunks[i + 1].addr = start; + chunks[i + 1].size = new_size; + chunks[i + 1].type = type; + chunks[i].size -= new_size; + i += 1; + } else { + /* Create memory hole */ + if (i >= MEMORY_CHUNKS - 2) + panic("Unable to create memory hole"); + memmove(&chunks[i + 2], &chunks[i], + sizeof(struct mem_chunk) * + (MEMORY_CHUNKS - (i + 2))); + chunks[i + 1].addr = addr; + chunks[i + 1].size = size; + chunks[i + 1].type = type; + chunks[i + 2].addr = addr + size; + chunks[i + 2].size = + chunks[i].addr + chunks[i].size - (addr + size); + chunks[i + 2].type = chunks[i].type; + chunks[i].size = addr - chunks[i].addr; + i += 2; + } + } +} diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 541a7509fae..9451b210a1b 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -12,6 +12,7 @@ #include <linux/sched.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/elfcore.h> #include <linux/smp.h> #include <linux/slab.h> #include <linux/interrupt.h> @@ -117,7 +118,8 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) struct pt_regs regs; memset(®s, 0, sizeof(regs)); - regs.psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT; + regs.psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE; regs.gprs[9] = (unsigned long) fn; regs.gprs[10] = (unsigned long) arg; diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 311e9d71288..6e0073e43f5 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -74,7 +74,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < NR_CPUS ? (void *)((unsigned long) *pos + 1) : NULL; + return *pos < nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index ef86ad24398..450931a45b6 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -42,34 +42,37 @@ enum s390_regset { REGSET_GENERAL, REGSET_FP, REGSET_LAST_BREAK, + REGSET_SYSTEM_CALL, REGSET_GENERAL_EXTENDED, }; void update_per_regs(struct task_struct *task) { - static const struct per_regs per_single_step = { - .control = PER_EVENT_IFETCH, - .start = 0, - .end = PSW_ADDR_INSN, - }; struct pt_regs *regs = task_pt_regs(task); struct thread_struct *thread = &task->thread; - const struct per_regs *new; - struct per_regs old; - - /* TIF_SINGLE_STEP overrides the user specified PER registers. */ - new = test_tsk_thread_flag(task, TIF_SINGLE_STEP) ? - &per_single_step : &thread->per_user; + struct per_regs old, new; + + /* Copy user specified PER registers */ + new.control = thread->per_user.control; + new.start = thread->per_user.start; + new.end = thread->per_user.end; + + /* merge TIF_SINGLE_STEP into user specified PER registers. */ + if (test_tsk_thread_flag(task, TIF_SINGLE_STEP)) { + new.control |= PER_EVENT_IFETCH; + new.start = 0; + new.end = PSW_ADDR_INSN; + } /* Take care of the PER enablement bit in the PSW. */ - if (!(new->control & PER_EVENT_MASK)) { + if (!(new.control & PER_EVENT_MASK)) { regs->psw.mask &= ~PSW_MASK_PER; return; } regs->psw.mask |= PSW_MASK_PER; __ctl_store(old, 9, 11); - if (memcmp(new, &old, sizeof(struct per_regs)) != 0) - __ctl_load(*new, 9, 11); + if (memcmp(&new, &old, sizeof(struct per_regs)) != 0) + __ctl_load(new, 9, 11); } void user_enable_single_step(struct task_struct *task) @@ -166,8 +169,8 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr) */ tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr); if (addr == (addr_t) &dummy->regs.psw.mask) - /* Remove per bit from user psw. */ - tmp &= ~PSW_MASK_PER; + /* Return a clean psw mask. */ + tmp = psw_user_bits | (tmp & PSW_MASK_USER); } else if (addr < (addr_t) &dummy->regs.orig_gpr2) { /* @@ -289,18 +292,17 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) * psw and gprs are stored on the stack */ if (addr == (addr_t) &dummy->regs.psw.mask && -#ifdef CONFIG_COMPAT - data != PSW_MASK_MERGE(psw_user32_bits, data) && -#endif - data != PSW_MASK_MERGE(psw_user_bits, data)) + ((data & ~PSW_MASK_USER) != psw_user_bits || + ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA)))) /* Invalid psw mask. */ return -EINVAL; -#ifndef CONFIG_64BIT if (addr == (addr_t) &dummy->regs.psw.addr) - /* I'd like to reject addresses without the - high order bit but older gdb's rely on it */ - data |= PSW_ADDR_AMODE; -#endif + /* + * The debugger changed the instruction address, + * reset system call restart, see signal.c:do_signal + */ + task_thread_info(child)->system_call = 0; + *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { @@ -495,21 +497,21 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) __u32 tmp; if (addr < (addr_t) &dummy32->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ if (addr == (addr_t) &dummy32->regs.psw.mask) { /* Fake a 31 bit psw mask. */ - tmp = (__u32)(task_pt_regs(child)->psw.mask >> 32); - tmp = PSW32_MASK_MERGE(psw32_user_bits, tmp); + tmp = (__u32)(regs->psw.mask >> 32); + tmp = psw32_user_bits | (tmp & PSW32_MASK_USER); } else if (addr == (addr_t) &dummy32->regs.psw.addr) { /* Fake a 31 bit psw address. */ - tmp = (__u32) task_pt_regs(child)->psw.addr | - PSW32_ADDR_AMODE31; + tmp = (__u32) regs->psw.addr | + (__u32)(regs->psw.mask & PSW_MASK_BA); } else { /* gpr 0-15 */ - tmp = *(__u32 *)((addr_t) &task_pt_regs(child)->psw + - addr*2 + 4); + tmp = *(__u32 *)((addr_t) ®s->psw + addr*2 + 4); } } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { /* @@ -594,24 +596,32 @@ static int __poke_user_compat(struct task_struct *child, addr_t offset; if (addr < (addr_t) &dummy32->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw, gprs, acrs and orig_gpr2 are stored on the stack */ if (addr == (addr_t) &dummy32->regs.psw.mask) { /* Build a 64 bit psw mask from 31 bit mask. */ - if (tmp != PSW32_MASK_MERGE(psw32_user_bits, tmp)) + if ((tmp & ~PSW32_MASK_USER) != psw32_user_bits) /* Invalid psw mask. */ return -EINVAL; - task_pt_regs(child)->psw.mask = - PSW_MASK_MERGE(psw_user32_bits, (__u64) tmp << 32); + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (regs->psw.mask & PSW_MASK_BA) | + (__u64)(tmp & PSW32_MASK_USER) << 32; } else if (addr == (addr_t) &dummy32->regs.psw.addr) { /* Build a 64 bit psw address from 31 bit address. */ - task_pt_regs(child)->psw.addr = - (__u64) tmp & PSW32_ADDR_INSN; + regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN; + /* Transfer 31 bit amode bit to psw mask. */ + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | + (__u64)(tmp & PSW32_ADDR_AMODE); + /* + * The debugger changed the instruction address, + * reset system call restart, see signal.c:do_signal + */ + task_thread_info(child)->system_call = 0; } else { /* gpr 0-15 */ - *(__u32*)((addr_t) &task_pt_regs(child)->psw - + addr*2 + 4) = tmp; + *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; } } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { /* @@ -735,7 +745,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) * debugger stored an invalid system call number. Skip * the system call and the system call restart handling. */ - regs->svcnr = 0; + clear_thread_flag(TIF_SYSCALL); ret = -1; } @@ -897,6 +907,26 @@ static int s390_last_break_get(struct task_struct *target, #endif +static int s390_system_call_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + unsigned int *data = &task_thread_info(target)->system_call; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(unsigned int)); +} + +static int s390_system_call_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned int *data = &task_thread_info(target)->system_call; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(unsigned int)); +} + static const struct user_regset s390_regsets[] = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, @@ -923,6 +953,14 @@ static const struct user_regset s390_regsets[] = { .get = s390_last_break_get, }, #endif + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(unsigned int), + .align = sizeof(unsigned int), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, }; static const struct user_regset_view user_s390_view = { @@ -1102,6 +1140,14 @@ static const struct user_regset s390_compat_regsets[] = { .align = sizeof(long), .get = s390_compat_last_break_get, }, + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(compat_uint_t), + .align = sizeof(compat_uint_t), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, [REGSET_GENERAL_EXTENDED] = { .core_note_type = NT_S390_HIGH_GPRS, .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t), diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index 303d961c3bb..ad67c214be0 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -10,6 +10,12 @@ #include <asm/asm-offsets.h> # +# store_status: Empty implementation until kdump is supported on 31 bit +# +ENTRY(store_status) + br %r14 + +# # do_reipl_asm # Parameter: r2 = schid of reipl device # diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S index e690975403f..732a793ec53 100644 --- a/arch/s390/kernel/reipl64.S +++ b/arch/s390/kernel/reipl64.S @@ -17,11 +17,11 @@ # ENTRY(store_status) /* Save register one and load save area base */ - stg %r1,__LC_SAVE_AREA_64(%r0) + stg %r1,__LC_SAVE_AREA+120(%r0) lghi %r1,SAVE_AREA_BASE /* General purpose registers */ stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - lg %r2,__LC_SAVE_AREA_64(%r0) + lg %r2,__LC_SAVE_AREA+120(%r0) stg %r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1) /* Control registers */ stctg %c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) @@ -62,8 +62,11 @@ ENTRY(store_status) larl %r2,store_status stg %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1) br %r14 -.align 8 + + .section .bss + .align 8 .Lclkcmp: .quad 0x0000000000000000 + .previous # # do_reipl_asm diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 7b371c37061..8ac6bfa2786 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -42,6 +42,9 @@ #include <linux/reboot.h> #include <linux/topology.h> #include <linux/ftrace.h> +#include <linux/kexec.h> +#include <linux/crash_dump.h> +#include <linux/memory.h> #include <asm/ipl.h> #include <asm/uaccess.h> @@ -57,12 +60,13 @@ #include <asm/ebcdic.h> #include <asm/compat.h> #include <asm/kvm_virtio.h> +#include <asm/diag.h> -long psw_kernel_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY | - PSW_MASK_MCHECK | PSW_DEFAULT_KEY); -long psw_user_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY); +long psw_kernel_bits = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_PRIMARY | + PSW_MASK_EA | PSW_MASK_BA; +long psw_user_bits = PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | + PSW_MASK_PSTATE | PSW_ASC_HOME; /* * User copy operations. @@ -274,22 +278,14 @@ early_param("mem", early_parse_mem); unsigned int user_mode = HOME_SPACE_MODE; EXPORT_SYMBOL_GPL(user_mode); -static int set_amode_and_uaccess(unsigned long user_amode, - unsigned long user32_amode) +static int set_amode_primary(void) { - psw_user_bits = PSW_BASE_BITS | PSW_MASK_DAT | user_amode | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY; + psw_kernel_bits = (psw_kernel_bits & ~PSW_MASK_ASC) | PSW_ASC_HOME; + psw_user_bits = (psw_user_bits & ~PSW_MASK_ASC) | PSW_ASC_PRIMARY; #ifdef CONFIG_COMPAT - psw_user32_bits = PSW_BASE32_BITS | PSW_MASK_DAT | user_amode | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY; - psw32_user_bits = PSW32_BASE_BITS | PSW32_MASK_DAT | user32_amode | - PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK | - PSW32_MASK_PSTATE; + psw32_user_bits = + (psw32_user_bits & ~PSW32_MASK_ASC) | PSW32_ASC_PRIMARY; #endif - psw_kernel_bits = PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME | - PSW_MASK_MCHECK | PSW_DEFAULT_KEY; if (MACHINE_HAS_MVCOS) { memcpy(&uaccess, &uaccess_mvcos_switch, sizeof(uaccess)); @@ -325,7 +321,7 @@ early_param("user_mode", early_parse_user_mode); static void setup_addressing_mode(void) { if (user_mode == PRIMARY_SPACE_MODE) { - if (set_amode_and_uaccess(PSW_ASC_PRIMARY, PSW32_ASC_PRIMARY)) + if (set_amode_primary()) pr_info("Address spaces switched, " "mvcos available\n"); else @@ -344,24 +340,25 @@ setup_lowcore(void) */ BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096); lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0); - lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + lc->restart_psw.mask = psw_kernel_bits; lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; - if (user_mode != HOME_SPACE_MODE) - lc->restart_psw.mask |= PSW_ASC_HOME; - lc->external_new_psw.mask = psw_kernel_bits; + lc->external_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->external_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) ext_int_handler; - lc->svc_new_psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT; + lc->svc_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call; - lc->program_new_psw.mask = psw_kernel_bits; + lc->program_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->program_new_psw.addr = - PSW_ADDR_AMODE | (unsigned long)pgm_check_handler; - lc->mcck_new_psw.mask = - psw_kernel_bits & ~PSW_MASK_MCHECK & ~PSW_MASK_DAT; + PSW_ADDR_AMODE | (unsigned long) pgm_check_handler; + lc->mcck_new_psw.mask = psw_kernel_bits; lc->mcck_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) mcck_int_handler; - lc->io_new_psw.mask = psw_kernel_bits; + lc->io_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler; lc->clock_comparator = -1ULL; lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE; @@ -435,10 +432,14 @@ static void __init setup_resources(void) for (i = 0; i < MEMORY_CHUNKS; i++) { if (!memory_chunk[i].size) continue; + if (memory_chunk[i].type == CHUNK_OLDMEM || + memory_chunk[i].type == CHUNK_CRASHK) + continue; res = alloc_bootmem_low(sizeof(*res)); res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; switch (memory_chunk[i].type) { case CHUNK_READ_WRITE: + case CHUNK_CRASHK: res->name = "System RAM"; break; case CHUNK_READ_ONLY: @@ -479,6 +480,7 @@ static void __init setup_memory_end(void) unsigned long max_mem; int i; + #ifdef CONFIG_ZFCPDUMP if (ipl_info.type == IPL_TYPE_FCP_DUMP) { memory_end = ZFCPDUMP_HSA_SIZE; @@ -545,11 +547,201 @@ static void __init setup_restart_psw(void) * Setup restart PSW for absolute zero lowcore. This is necesary * if PSW restart is done on an offline CPU that has lowcore zero */ - psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + psw.mask = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; copy_to_absolute_zero(&S390_lowcore.restart_psw, &psw, sizeof(psw)); } +static void __init setup_vmcoreinfo(void) +{ +#ifdef CONFIG_KEXEC + unsigned long ptr = paddr_vmcoreinfo_note(); + + copy_to_absolute_zero(&S390_lowcore.vmcore_info, &ptr, sizeof(ptr)); +#endif +} + +#ifdef CONFIG_CRASH_DUMP + +/* + * Find suitable location for crashkernel memory + */ +static unsigned long __init find_crash_base(unsigned long crash_size, + char **msg) +{ + unsigned long crash_base; + struct mem_chunk *chunk; + int i; + + if (memory_chunk[0].size < crash_size) { + *msg = "first memory chunk must be at least crashkernel size"; + return 0; + } + if (is_kdump_kernel() && (crash_size == OLDMEM_SIZE)) + return OLDMEM_BASE; + + for (i = MEMORY_CHUNKS - 1; i >= 0; i--) { + chunk = &memory_chunk[i]; + if (chunk->size == 0) + continue; + if (chunk->type != CHUNK_READ_WRITE) + continue; + if (chunk->size < crash_size) + continue; + crash_base = (chunk->addr + chunk->size) - crash_size; + if (crash_base < crash_size) + continue; + if (crash_base < ZFCPDUMP_HSA_SIZE_MAX) + continue; + if (crash_base < (unsigned long) INITRD_START + INITRD_SIZE) + continue; + return crash_base; + } + *msg = "no suitable area found"; + return 0; +} + +/* + * Check if crash_base and crash_size is valid + */ +static int __init verify_crash_base(unsigned long crash_base, + unsigned long crash_size, + char **msg) +{ + struct mem_chunk *chunk; + int i; + + /* + * Because we do the swap to zero, we must have at least 'crash_size' + * bytes free space before crash_base + */ + if (crash_size > crash_base) { + *msg = "crashkernel offset must be greater than size"; + return -EINVAL; + } + + /* First memory chunk must be at least crash_size */ + if (memory_chunk[0].size < crash_size) { + *msg = "first memory chunk must be at least crashkernel size"; + return -EINVAL; + } + /* Check if we fit into the respective memory chunk */ + for (i = 0; i < MEMORY_CHUNKS; i++) { + chunk = &memory_chunk[i]; + if (chunk->size == 0) + continue; + if (crash_base < chunk->addr) + continue; + if (crash_base >= chunk->addr + chunk->size) + continue; + /* we have found the memory chunk */ + if (crash_base + crash_size > chunk->addr + chunk->size) { + *msg = "selected memory chunk is too small for " + "crashkernel memory"; + return -EINVAL; + } + return 0; + } + *msg = "invalid memory range specified"; + return -EINVAL; +} + +/* + * Reserve kdump memory by creating a memory hole in the mem_chunk array + */ +static void __init reserve_kdump_bootmem(unsigned long addr, unsigned long size, + int type) +{ + + create_mem_hole(memory_chunk, addr, size, type); +} + +/* + * When kdump is enabled, we have to ensure that no memory from + * the area [0 - crashkernel memory size] and + * [crashk_res.start - crashk_res.end] is set offline. + */ +static int kdump_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct memory_notify *arg = data; + + if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res))) + return NOTIFY_BAD; + if (arg->start_pfn > PFN_DOWN(crashk_res.end)) + return NOTIFY_OK; + if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start)) + return NOTIFY_OK; + return NOTIFY_BAD; +} + +static struct notifier_block kdump_mem_nb = { + .notifier_call = kdump_mem_notifier, +}; + +#endif + +/* + * Make sure that oldmem, where the dump is stored, is protected + */ +static void reserve_oldmem(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (!OLDMEM_BASE) + return; + + reserve_kdump_bootmem(OLDMEM_BASE, OLDMEM_SIZE, CHUNK_OLDMEM); + reserve_kdump_bootmem(OLDMEM_SIZE, memory_end - OLDMEM_SIZE, + CHUNK_OLDMEM); + if (OLDMEM_BASE + OLDMEM_SIZE == real_memory_size) + saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1; + else + saved_max_pfn = PFN_DOWN(real_memory_size) - 1; +#endif +} + +/* + * Reserve memory for kdump kernel to be loaded with kexec + */ +static void __init reserve_crashkernel(void) +{ +#ifdef CONFIG_CRASH_DUMP + unsigned long long crash_base, crash_size; + char *msg; + int rc; + + rc = parse_crashkernel(boot_command_line, memory_end, &crash_size, + &crash_base); + if (rc || crash_size == 0) + return; + crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); + crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN); + if (register_memory_notifier(&kdump_mem_nb)) + return; + if (!crash_base) + crash_base = find_crash_base(crash_size, &msg); + if (!crash_base) { + pr_info("crashkernel reservation failed: %s\n", msg); + unregister_memory_notifier(&kdump_mem_nb); + return; + } + if (verify_crash_base(crash_base, crash_size, &msg)) { + pr_info("crashkernel reservation failed: %s\n", msg); + unregister_memory_notifier(&kdump_mem_nb); + return; + } + if (!OLDMEM_BASE && MACHINE_IS_VM) + diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + insert_resource(&iomem_resource, &crashk_res); + reserve_kdump_bootmem(crash_base, crash_size, CHUNK_CRASHK); + pr_info("Reserving %lluMB of memory at %lluMB " + "for crashkernel (System RAM: %luMB)\n", + crash_size >> 20, crash_base >> 20, memory_end >> 20); +#endif +} + static void __init setup_memory(void) { @@ -580,6 +772,14 @@ setup_memory(void) if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) { start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE; +#ifdef CONFIG_CRASH_DUMP + if (OLDMEM_BASE) { + /* Move initrd behind kdump oldmem */ + if (start + INITRD_SIZE > OLDMEM_BASE && + start < OLDMEM_BASE + OLDMEM_SIZE) + start = OLDMEM_BASE + OLDMEM_SIZE; + } +#endif if (start + INITRD_SIZE > memory_end) { pr_err("initrd extends beyond end of " "memory (0x%08lx > 0x%08lx) " @@ -610,7 +810,8 @@ setup_memory(void) for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { unsigned long start_chunk, end_chunk, pfn; - if (memory_chunk[i].type != CHUNK_READ_WRITE) + if (memory_chunk[i].type != CHUNK_READ_WRITE && + memory_chunk[i].type != CHUNK_CRASHK) continue; start_chunk = PFN_DOWN(memory_chunk[i].addr); end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size); @@ -644,6 +845,15 @@ setup_memory(void) reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size, BOOTMEM_DEFAULT); +#ifdef CONFIG_CRASH_DUMP + if (crashk_res.start) + reserve_bootmem(crashk_res.start, + crashk_res.end - crashk_res.start + 1, + BOOTMEM_DEFAULT); + if (is_kdump_kernel()) + reserve_bootmem(elfcorehdr_addr - OLDMEM_BASE, + PAGE_ALIGN(elfcorehdr_size), BOOTMEM_DEFAULT); +#endif #ifdef CONFIG_BLK_DEV_INITRD if (INITRD_START && INITRD_SIZE) { if (INITRD_START + INITRD_SIZE <= memory_end) { @@ -812,8 +1022,11 @@ setup_arch(char **cmdline_p) setup_ipl(); setup_memory_end(); setup_addressing_mode(); + reserve_oldmem(); + reserve_crashkernel(); setup_memory(); setup_resources(); + setup_vmcoreinfo(); setup_restart_psw(); setup_lowcore(); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 9a40e1cc5ec..05a85bc14c9 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -30,6 +30,7 @@ #include <asm/ucontext.h> #include <asm/uaccess.h> #include <asm/lowcore.h> +#include <asm/compat.h> #include "entry.h" #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) @@ -116,7 +117,8 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) /* Copy a 'clean' PSW mask to the user to avoid leaking information about whether PER is currently on. */ - user_sregs.regs.psw.mask = PSW_MASK_MERGE(psw_user_bits, regs->psw.mask); + user_sregs.regs.psw.mask = psw_user_bits | + (regs->psw.mask & PSW_MASK_USER); user_sregs.regs.psw.addr = regs->psw.addr; memcpy(&user_sregs.regs.gprs, ®s->gprs, sizeof(sregs->regs.gprs)); memcpy(&user_sregs.regs.acrs, current->thread.acrs, @@ -143,9 +145,13 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) err = __copy_from_user(&user_sregs, sregs, sizeof(_sigregs)); if (err) return err; - regs->psw.mask = PSW_MASK_MERGE(regs->psw.mask, - user_sregs.regs.psw.mask); - regs->psw.addr = PSW_ADDR_AMODE | user_sregs.regs.psw.addr; + /* Use regs->psw.mask instead of psw_user_bits to preserve PER bit. */ + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (user_sregs.regs.psw.mask & PSW_MASK_USER); + /* Check for invalid amode */ + if (regs->psw.mask & PSW_MASK_EA) + regs->psw.mask |= PSW_MASK_BA; + regs->psw.addr = user_sregs.regs.psw.addr; memcpy(®s->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs)); memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, sizeof(sregs->regs.acrs)); @@ -156,7 +162,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) current->thread.fp_regs.fpc &= FPC_VALID_MASK; restore_fp_regs(¤t->thread.fp_regs); - regs->svcnr = 0; /* disable syscall checks */ + clear_thread_flag(TIF_SYSCALL); /* No longer in a system call */ return 0; } @@ -288,6 +294,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, /* Set up registers for signal handler */ regs->gprs[15] = (unsigned long) frame; + regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA; /* 64 bit amode */ regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; regs->gprs[2] = map_signal(sig); @@ -356,6 +363,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up registers for signal handler */ regs->gprs[15] = (unsigned long) frame; + regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA; /* 64 bit amode */ regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; regs->gprs[2] = map_signal(sig); @@ -401,7 +409,6 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka, */ void do_signal(struct pt_regs *regs) { - unsigned long retval = 0, continue_addr = 0, restart_addr = 0; siginfo_t info; int signr; struct k_sigaction ka; @@ -421,54 +428,45 @@ void do_signal(struct pt_regs *regs) else oldset = ¤t->blocked; - /* Are we from a system call? */ - if (regs->svcnr) { - continue_addr = regs->psw.addr; - restart_addr = continue_addr - regs->ilc; - retval = regs->gprs[2]; - - /* Prepare for system call restart. We do this here so that a - debugger will see the already changed PSW. */ - switch (retval) { - case -ERESTARTNOHAND: - case -ERESTARTSYS: - case -ERESTARTNOINTR: - regs->gprs[2] = regs->orig_gpr2; - regs->psw.addr = restart_addr; - break; - case -ERESTART_RESTARTBLOCK: - regs->gprs[2] = -EINTR; - } - regs->svcnr = 0; /* Don't deal with this again. */ - } - - /* Get signal to deliver. When running under ptrace, at this point - the debugger may change all our registers ... */ + /* + * Get signal to deliver. When running under ptrace, at this point + * the debugger may change all our registers, including the system + * call information. + */ + current_thread_info()->system_call = + test_thread_flag(TIF_SYSCALL) ? regs->svc_code : 0; signr = get_signal_to_deliver(&info, &ka, regs, NULL); - /* Depending on the signal settings we may need to revert the - decision to restart the system call. */ - if (signr > 0 && regs->psw.addr == restart_addr) { - if (retval == -ERESTARTNOHAND - || (retval == -ERESTARTSYS - && !(current->sighand->action[signr-1].sa.sa_flags - & SA_RESTART))) { - regs->gprs[2] = -EINTR; - regs->psw.addr = continue_addr; - } - } - if (signr > 0) { /* Whee! Actually deliver the signal. */ - int ret; -#ifdef CONFIG_COMPAT - if (is_compat_task()) { - ret = handle_signal32(signr, &ka, &info, oldset, regs); - } - else -#endif - ret = handle_signal(signr, &ka, &info, oldset, regs); - if (!ret) { + if (current_thread_info()->system_call) { + regs->svc_code = current_thread_info()->system_call; + /* Check for system call restarting. */ + switch (regs->gprs[2]) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + regs->gprs[2] = -EINTR; + break; + case -ERESTARTSYS: + if (!(ka.sa.sa_flags & SA_RESTART)) { + regs->gprs[2] = -EINTR; + break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + regs->gprs[2] = regs->orig_gpr2; + regs->psw.addr = + __rewind_psw(regs->psw, + regs->svc_code >> 16); + break; + } + /* No longer in a system call */ + clear_thread_flag(TIF_SYSCALL); + } + + if ((is_compat_task() ? + handle_signal32(signr, &ka, &info, oldset, regs) : + handle_signal(signr, &ka, &info, oldset, regs)) == 0) { /* * A signal was successfully delivered; the saved * sigmask will have been stored in the signal frame, @@ -482,11 +480,32 @@ void do_signal(struct pt_regs *regs) * Let tracing know that we've done the handler setup. */ tracehook_signal_handler(signr, &info, &ka, regs, - test_thread_flag(TIF_SINGLE_STEP)); + test_thread_flag(TIF_SINGLE_STEP)); } return; } + /* No handlers present - check for system call restart */ + if (current_thread_info()->system_call) { + regs->svc_code = current_thread_info()->system_call; + switch (regs->gprs[2]) { + case -ERESTART_RESTARTBLOCK: + /* Restart with sys_restart_syscall */ + regs->svc_code = __NR_restart_syscall; + /* fallthrough */ + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + /* Restart system call with magic TIF bit. */ + regs->gprs[2] = regs->orig_gpr2; + set_thread_flag(TIF_SYSCALL); + break; + default: + clear_thread_flag(TIF_SYSCALL); + break; + } + } + /* * If there's no signal to deliver, we just put the saved sigmask back. */ @@ -494,13 +513,6 @@ void do_signal(struct pt_regs *regs) clear_thread_flag(TIF_RESTORE_SIGMASK); sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } - - /* Restart a different system call. */ - if (retval == -ERESTART_RESTARTBLOCK - && regs->psw.addr == continue_addr) { - regs->gprs[2] = __NR_restart_syscall; - set_thread_flag(TIF_RESTART_SVC); - } } void do_notify_resume(struct pt_regs *regs) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 6ab16ac64d2..3ea872890da 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -38,6 +38,7 @@ #include <linux/timex.h> #include <linux/bootmem.h> #include <linux/slab.h> +#include <linux/crash_dump.h> #include <asm/asm-offsets.h> #include <asm/ipl.h> #include <asm/setup.h> @@ -97,6 +98,29 @@ static inline int cpu_stopped(int cpu) return raw_cpu_stopped(cpu_logical_map(cpu)); } +/* + * Ensure that PSW restart is done on an online CPU + */ +void smp_restart_with_online_cpu(void) +{ + int cpu; + + for_each_online_cpu(cpu) { + if (stap() == __cpu_logical_map[cpu]) { + /* We are online: Enable DAT again and return */ + __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); + return; + } + } + /* We are not online: Do PSW restart on an online CPU */ + while (sigp(cpu, sigp_restart) == sigp_busy) + cpu_relax(); + /* And stop ourself */ + while (raw_sigp(stap(), sigp_stop) == sigp_busy) + cpu_relax(); + for (;;); +} + void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) { struct _lowcore *lc, *current_lc; @@ -106,14 +130,16 @@ void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) if (smp_processor_id() == 0) func(data); - __load_psw_mask(PSW_BASE_BITS | PSW_DEFAULT_KEY); + __load_psw_mask(PSW_DEFAULT_KEY | PSW_MASK_BASE | + PSW_MASK_EA | PSW_MASK_BA); /* Disable lowcore protection */ __ctl_clear_bit(0, 28); current_lc = lowcore_ptr[smp_processor_id()]; lc = lowcore_ptr[0]; if (!lc) lc = current_lc; - lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + lc->restart_psw.mask = + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) smp_restart_cpu; if (!cpu_online(0)) smp_switch_to_cpu(func, data, 0, stap(), __cpu_logical_map[0]); @@ -135,7 +161,7 @@ void smp_send_stop(void) int cpu, rc; /* Disable all interrupts/machine checks */ - __load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK); + __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); trace_hardirqs_off(); /* stop all processors */ @@ -161,7 +187,10 @@ static void do_ext_call_interrupt(unsigned int ext_int_code, { unsigned long bits; - kstat_cpu(smp_processor_id()).irqs[EXTINT_IPI]++; + if (ext_int_code == 0x1202) + kstat_cpu(smp_processor_id()).irqs[EXTINT_EXC]++; + else + kstat_cpu(smp_processor_id()).irqs[EXTINT_EMS]++; /* * handle bit signal external calls */ @@ -183,12 +212,19 @@ static void do_ext_call_interrupt(unsigned int ext_int_code, */ static void smp_ext_bitcall(int cpu, int sig) { + int order; + /* * Set signaling bit in lowcore of target cpu and kick it */ set_bit(sig, (unsigned long *) &lowcore_ptr[cpu]->ext_call_fast); - while (sigp(cpu, sigp_emergency_signal) == sigp_busy) + while (1) { + order = smp_vcpu_scheduled(cpu) ? + sigp_external_call : sigp_emergency_signal; + if (sigp(cpu, order) != sigp_busy) + break; udelay(10); + } } void arch_send_call_function_ipi_mask(const struct cpumask *mask) @@ -281,11 +317,13 @@ void smp_ctl_clear_bit(int cr, int bit) } EXPORT_SYMBOL(smp_ctl_clear_bit); -#ifdef CONFIG_ZFCPDUMP +#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP) static void __init smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) { - if (ipl_info.type != IPL_TYPE_FCP_DUMP) + if (ipl_info.type != IPL_TYPE_FCP_DUMP && !OLDMEM_BASE) + return; + if (is_kdump_kernel()) return; if (cpu >= NR_CPUS) { pr_warning("CPU %i exceeds the maximum %i and is excluded from " @@ -403,6 +441,18 @@ static void __init smp_detect_cpus(void) info = kmalloc(sizeof(*info), GFP_KERNEL); if (!info) panic("smp_detect_cpus failed to allocate memory\n"); +#ifdef CONFIG_CRASH_DUMP + if (OLDMEM_BASE && !is_kdump_kernel()) { + struct save_area *save_area; + + save_area = kmalloc(sizeof(*save_area), GFP_KERNEL); + if (!save_area) + panic("could not allocate memory for save area\n"); + copy_oldmem_page(1, (void *) save_area, sizeof(*save_area), + 0x200, 0); + zfcpdump_save_areas[0] = save_area; + } +#endif /* Use sigp detection algorithm if sclp doesn't work. */ if (sclp_get_cpu_info(info)) { smp_use_sigp_detection = 1; @@ -463,7 +513,8 @@ int __cpuinit start_secondary(void *cpuvoid) set_cpu_online(smp_processor_id(), true); ipi_call_unlock(); __ctl_clear_bit(0, 28); /* Disable lowcore protection */ - S390_lowcore.restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + S390_lowcore.restart_psw.mask = + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; S390_lowcore.restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; __ctl_set_bit(0, 28); /* Enable lowcore protection */ @@ -511,7 +562,8 @@ static int __cpuinit smp_alloc_lowcore(int cpu) memset((char *)lowcore + 512, 0, sizeof(*lowcore) - 512); lowcore->async_stack = async_stack + ASYNC_SIZE; lowcore->panic_stack = panic_stack + PAGE_SIZE; - lowcore->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + lowcore->restart_psw.mask = + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; lowcore->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) restart_int_handler; if (user_mode != HOME_SPACE_MODE) @@ -712,6 +764,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) /* request the 0x1201 emergency signal external interrupt */ if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0) panic("Couldn't request external interrupt 0x1201"); + /* request the 0x1202 external call external interrupt */ + if (register_external_interrupt(0x1202, do_ext_call_interrupt) != 0) + panic("Couldn't request external interrupt 0x1202"); /* Reallocate current lowcore, but keep its contents. */ lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index cf9e5c6d552..47df775c844 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -7,6 +7,8 @@ */ #include <linux/pfn.h> +#include <linux/suspend.h> +#include <linux/mm.h> #include <asm/system.h> /* @@ -14,6 +16,123 @@ */ extern const void __nosave_begin, __nosave_end; +/* + * The restore of the saved pages in an hibernation image will set + * the change and referenced bits in the storage key for each page. + * Overindication of the referenced bits after an hibernation cycle + * does not cause any harm but the overindication of the change bits + * would cause trouble. + * Use the ARCH_SAVE_PAGE_KEYS hooks to save the storage key of each + * page to the most significant byte of the associated page frame + * number in the hibernation image. + */ + +/* + * Key storage is allocated as a linked list of pages. + * The size of the keys array is (PAGE_SIZE - sizeof(long)) + */ +struct page_key_data { + struct page_key_data *next; + unsigned char data[]; +}; + +#define PAGE_KEY_DATA_SIZE (PAGE_SIZE - sizeof(struct page_key_data *)) + +static struct page_key_data *page_key_data; +static struct page_key_data *page_key_rp, *page_key_wp; +static unsigned long page_key_rx, page_key_wx; + +/* + * For each page in the hibernation image one additional byte is + * stored in the most significant byte of the page frame number. + * On suspend no additional memory is required but on resume the + * keys need to be memorized until the page data has been restored. + * Only then can the storage keys be set to their old state. + */ +unsigned long page_key_additional_pages(unsigned long pages) +{ + return DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE); +} + +/* + * Free page_key_data list of arrays. + */ +void page_key_free(void) +{ + struct page_key_data *pkd; + + while (page_key_data) { + pkd = page_key_data; + page_key_data = pkd->next; + free_page((unsigned long) pkd); + } +} + +/* + * Allocate page_key_data list of arrays with enough room to store + * one byte for each page in the hibernation image. + */ +int page_key_alloc(unsigned long pages) +{ + struct page_key_data *pk; + unsigned long size; + + size = DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE); + while (size--) { + pk = (struct page_key_data *) get_zeroed_page(GFP_KERNEL); + if (!pk) { + page_key_free(); + return -ENOMEM; + } + pk->next = page_key_data; + page_key_data = pk; + } + page_key_rp = page_key_wp = page_key_data; + page_key_rx = page_key_wx = 0; + return 0; +} + +/* + * Save the storage key into the upper 8 bits of the page frame number. + */ +void page_key_read(unsigned long *pfn) +{ + unsigned long addr; + + addr = (unsigned long) page_address(pfn_to_page(*pfn)); + *(unsigned char *) pfn = (unsigned char) page_get_storage_key(addr); +} + +/* + * Extract the storage key from the upper 8 bits of the page frame number + * and store it in the page_key_data list of arrays. + */ +void page_key_memorize(unsigned long *pfn) +{ + page_key_wp->data[page_key_wx] = *(unsigned char *) pfn; + *(unsigned char *) pfn = 0; + if (++page_key_wx < PAGE_KEY_DATA_SIZE) + return; + page_key_wp = page_key_wp->next; + page_key_wx = 0; +} + +/* + * Get the next key from the page_key_data list of arrays and set the + * storage key of the page referred by @address. If @address refers to + * a "safe" page the swsusp_arch_resume code will transfer the storage + * key from the buffer page to the original page. + */ +void page_key_write(void *address) +{ + page_set_storage_key((unsigned long) address, + page_key_rp->data[page_key_rx], 0); + if (++page_key_rx >= PAGE_KEY_DATA_SIZE) + return; + page_key_rp = page_key_rp->next; + page_key_rx = 0; +} + int pfn_is_nosave(unsigned long pfn) { unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin)); diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S index 51bcdb50a23..acb78cdee89 100644 --- a/arch/s390/kernel/swsusp_asm64.S +++ b/arch/s390/kernel/swsusp_asm64.S @@ -136,11 +136,14 @@ ENTRY(swsusp_arch_resume) 0: lg %r2,8(%r1) lg %r4,0(%r1) + iske %r0,%r4 lghi %r3,PAGE_SIZE lghi %r5,PAGE_SIZE 1: mvcle %r2,%r4,0 jo 1b + lg %r2,8(%r1) + sske %r0,%r2 lg %r1,16(%r1) ltgr %r1,%r1 jnz 0b diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 5c9e439bf3f..2a94b774695 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -442,7 +442,7 @@ void s390_adjust_jiffies(void) */ FP_UNPACK_SP(SA, &fmil); if ((info->capability >> 23) == 0) - FP_FROM_INT_S(SB, info->capability, 32, int); + FP_FROM_INT_S(SB, (long) info->capability, 64, long); else FP_UNPACK_SP(SB, &info->capability); FP_DIV_S(SR, SA, SB); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index dff933065ab..ebbfab3c6e5 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -48,6 +48,7 @@ #include <asm/timer.h> #include <asm/etr.h> #include <asm/cio.h> +#include "entry.h" /* change this if you have some constant time drift */ #define USECS_PER_JIFFY ((unsigned long) 1000000/HZ) @@ -109,10 +110,14 @@ static void fixup_clock_comparator(unsigned long long delta) set_clock_comparator(S390_lowcore.clock_comparator); } -static int s390_next_event(unsigned long delta, +static int s390_next_ktime(ktime_t expires, struct clock_event_device *evt) { - S390_lowcore.clock_comparator = get_clock() + delta; + u64 nsecs; + + nsecs = ktime_to_ns(ktime_sub(expires, ktime_get_monotonic_offset())); + do_div(nsecs, 125); + S390_lowcore.clock_comparator = TOD_UNIX_EPOCH + (nsecs << 9); set_clock_comparator(S390_lowcore.clock_comparator); return 0; } @@ -137,14 +142,15 @@ void init_cpu_timer(void) cpu = smp_processor_id(); cd = &per_cpu(comparators, cpu); cd->name = "comparator"; - cd->features = CLOCK_EVT_FEAT_ONESHOT; + cd->features = CLOCK_EVT_FEAT_ONESHOT | + CLOCK_EVT_FEAT_KTIME; cd->mult = 16777; cd->shift = 12; cd->min_delta_ns = 1; cd->max_delta_ns = LONG_MAX; cd->rating = 400; cd->cpumask = cpumask_of(cpu); - cd->set_next_event = s390_next_event; + cd->set_next_ktime = s390_next_ktime; cd->set_mode = s390_set_mode; clockevents_register_device(cd); diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 0cd340b7263..77b8942b9a1 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -299,8 +299,8 @@ out: } __initcall(init_topology_update); -static void alloc_masks(struct sysinfo_15_1_x *info, struct mask_info *mask, - int offset) +static void __init alloc_masks(struct sysinfo_15_1_x *info, + struct mask_info *mask, int offset) { int i, nr_masks; diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index ffabcd9d336..a9807dd8627 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -200,7 +200,7 @@ void show_registers(struct pt_regs *regs) mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC), mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM)); #ifdef CONFIG_64BIT - printk(" EA:%x", mask_bits(regs, PSW_BASE_BITS)); + printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA)); #endif printk("\n%s GPRS: " FOURLONG, mode, regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); @@ -334,7 +334,8 @@ void __kprobes do_per_trap(struct pt_regs *regs) info.si_signo = SIGTRAP; info.si_errno = 0; info.si_code = TRAP_HWBKPT; - info.si_addr = (void *) current->thread.per_event.address; + info.si_addr = + (void __force __user *) current->thread.per_event.address; force_sig_info(SIGTRAP, &info, current); } diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 2d6228f60cd..bb48977f546 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -170,7 +170,8 @@ void __kprobes vtime_stop_cpu(void) psw_t psw; /* Wait for external, I/O or machine check interrupt. */ - psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT; + psw.mask = psw_kernel_bits | PSW_MASK_WAIT | + PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; idle->nohz_delay = 0; @@ -183,7 +184,8 @@ void __kprobes vtime_stop_cpu(void) * set_cpu_timer(VTIMER_MAX_SLICE); * idle->idle_enter = get_clock(); * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | - * PSW_MASK_IO | PSW_MASK_EXT); + * PSW_MASK_DAT | PSW_MASK_IO | + * PSW_MASK_EXT | PSW_MASK_MCHECK); * The difference is that the inline assembly makes sure that * the last three instruction are stpt, stck and lpsw in that * order. This is done to increase the precision. @@ -216,7 +218,8 @@ void __kprobes vtime_stop_cpu(void) * vq->idle = get_cpu_timer(); * idle->idle_enter = get_clock(); * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | - * PSW_MASK_IO | PSW_MASK_EXT); + * PSW_MASK_DAT | PSW_MASK_IO | + * PSW_MASK_EXT | PSW_MASK_MCHECK); * The difference is that the inline assembly makes sure that * the last three instruction are stpt, stck and lpsw in that * order. This is done to increase the precision. @@ -458,7 +461,7 @@ void add_virt_timer_periodic(void *new) } EXPORT_SYMBOL(add_virt_timer_periodic); -int __mod_vtimer(struct vtimer_list *timer, __u64 expires, int periodic) +static int __mod_vtimer(struct vtimer_list *timer, __u64 expires, int periodic) { struct vtimer_queue *vq; unsigned long flags; diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 9e4c84187cf..87cedd61be0 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -1,7 +1,7 @@ /* * diag.c - handling diagnose instructions * - * Copyright IBM Corp. 2008 + * Copyright IBM Corp. 2008,2011 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -15,6 +15,34 @@ #include <linux/kvm_host.h> #include "kvm-s390.h" +static int diag_release_pages(struct kvm_vcpu *vcpu) +{ + unsigned long start, end; + unsigned long prefix = vcpu->arch.sie_block->prefix; + + start = vcpu->arch.guest_gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; + end = vcpu->arch.guest_gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; + + if (start & ~PAGE_MASK || end & ~PAGE_MASK || start > end + || start < 2 * PAGE_SIZE) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end); + vcpu->stat.diagnose_10++; + + /* we checked for start > end above */ + if (end < prefix || start >= prefix + 2 * PAGE_SIZE) { + gmap_discard(start, end, vcpu->arch.gmap); + } else { + if (start < prefix) + gmap_discard(start, prefix, vcpu->arch.gmap); + if (end >= prefix) + gmap_discard(prefix + 2 * PAGE_SIZE, + end, vcpu->arch.gmap); + } + return 0; +} + static int __diag_time_slice_end(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); @@ -57,6 +85,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; switch (code) { + case 0x10: + return diag_release_pages(vcpu); case 0x44: return __diag_time_slice_end(vcpu); case 0x308: diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index c9aeb4b4d0b..87c16705b38 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -38,6 +38,11 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info *inti) { switch (inti->type) { + case KVM_S390_INT_EXTERNAL_CALL: + if (psw_extint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[0] & 0x2000ul) + return 1; case KVM_S390_INT_EMERGENCY: if (psw_extint_disabled(vcpu)) return 0; @@ -98,6 +103,7 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info *inti) { switch (inti->type) { + case KVM_S390_INT_EXTERNAL_CALL: case KVM_S390_INT_EMERGENCY: case KVM_S390_INT_SERVICE: case KVM_S390_INT_VIRTIO: @@ -143,6 +149,28 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, exception = 1; break; + case KVM_S390_INT_EXTERNAL_CALL: + VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); + vcpu->stat.deliver_external_call++; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, inti->extcall.code); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + break; + case KVM_S390_INT_SERVICE: VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", inti->ext.ext_params); @@ -522,6 +550,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, break; case KVM_S390_PROGRAM_INT: case KVM_S390_SIGP_STOP: + case KVM_S390_INT_EXTERNAL_CALL: case KVM_S390_INT_EMERGENCY: default: kfree(inti); @@ -581,6 +610,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, break; case KVM_S390_SIGP_STOP: case KVM_S390_RESTART: + case KVM_S390_INT_EXTERNAL_CALL: case KVM_S390_INT_EMERGENCY: VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type); inti->type = s390int->type; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f17296e4fc8..0bd3bea1e4c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -46,6 +46,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, { "instruction_lctl", VCPU_STAT(instruction_lctl) }, { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, + { "deliver_external_call", VCPU_STAT(deliver_external_call) }, { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, @@ -64,11 +65,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_stfl", VCPU_STAT(instruction_stfl) }, { "instruction_tprot", VCPU_STAT(instruction_tprot) }, { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, + { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, + { "diagnose_10", VCPU_STAT(diagnose_10) }, { "diagnose_44", VCPU_STAT(diagnose_44) }, { NULL } }; @@ -123,6 +126,7 @@ int kvm_dev_ioctl_check_extension(long ext) switch (ext) { case KVM_CAP_S390_PSW: + case KVM_CAP_S390_GMAP: r = 1; break; default: @@ -174,6 +178,8 @@ int kvm_arch_init_vm(struct kvm *kvm) if (rc) goto out_err; + rc = -ENOMEM; + kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); if (!kvm->arch.sca) goto out_err; @@ -263,10 +269,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK; restore_fp_regs(&vcpu->arch.guest_fpregs); restore_access_regs(vcpu->arch.guest_acrs); + gmap_enable(vcpu->arch.gmap); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + gmap_disable(vcpu->arch.gmap); save_fp_regs(&vcpu->arch.guest_fpregs); save_access_regs(vcpu->arch.guest_acrs); restore_fp_regs(&vcpu->arch.host_fpregs); @@ -309,11 +317,17 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { - struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); - int rc = -ENOMEM; + struct kvm_vcpu *vcpu; + int rc = -EINVAL; + + if (id >= KVM_MAX_VCPUS) + goto out; + rc = -ENOMEM; + + vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); if (!vcpu) - goto out_nomem; + goto out; vcpu->arch.sie_block = (struct kvm_s390_sie_block *) get_zeroed_page(GFP_KERNEL); @@ -349,7 +363,7 @@ out_free_sie_block: free_page((unsigned long)(vcpu->arch.sie_block)); out_free_cpu: kfree(vcpu); -out_nomem: +out: return ERR_PTR(rc); } @@ -383,6 +397,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, { memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs)); memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); + restore_access_regs(vcpu->arch.guest_acrs); return 0; } @@ -398,6 +413,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); vcpu->arch.guest_fpregs.fpc = fpu->fpc; + restore_fp_regs(&vcpu->arch.guest_fpregs); return 0; } @@ -461,7 +477,6 @@ static void __vcpu_run(struct kvm_vcpu *vcpu) local_irq_disable(); kvm_guest_enter(); local_irq_enable(); - gmap_enable(vcpu->arch.gmap); VCPU_EVENT(vcpu, 6, "entering sie flags %x", atomic_read(&vcpu->arch.sie_block->cpuflags)); if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) { @@ -470,7 +485,6 @@ static void __vcpu_run(struct kvm_vcpu *vcpu) } VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", vcpu->arch.sie_block->icptcode); - gmap_disable(vcpu->arch.gmap); local_irq_disable(); kvm_guest_exit(); local_irq_enable(); diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index d6a50c1fb2e..f815118835f 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -87,6 +87,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) return -ENOMEM; inti->type = KVM_S390_INT_EMERGENCY; + inti->emerg.code = vcpu->vcpu_id; spin_lock(&fi->lock); li = fi->local_int[cpu_addr]; @@ -103,9 +104,47 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) wake_up_interruptible(&li->wq); spin_unlock_bh(&li->lock); rc = 0; /* order accepted */ + VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); +unlock: + spin_unlock(&fi->lock); + return rc; +} + +static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) +{ + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_local_interrupt *li; + struct kvm_s390_interrupt_info *inti; + int rc; + + if (cpu_addr >= KVM_MAX_VCPUS) + return 3; /* not operational */ + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + inti->type = KVM_S390_INT_EXTERNAL_CALL; + inti->extcall.code = vcpu->vcpu_id; + + spin_lock(&fi->lock); + li = fi->local_int[cpu_addr]; + if (li == NULL) { + rc = 3; /* not operational */ + kfree(inti); + goto unlock; + } + spin_lock_bh(&li->lock); + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&li->wq); + spin_unlock_bh(&li->lock); + rc = 0; /* order accepted */ + VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr); unlock: spin_unlock(&fi->lock); - VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); return rc; } @@ -267,6 +306,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) rc = __sigp_sense(vcpu, cpu_addr, &vcpu->arch.guest_gprs[r1]); break; + case SIGP_EXTERNAL_CALL: + vcpu->stat.instruction_sigp_external_call++; + rc = __sigp_external_call(vcpu, cpu_addr); + break; case SIGP_EMERGENCY: vcpu->stat.instruction_sigp_emergency++; rc = __sigp_emergency(vcpu, cpu_addr); diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index a65229d91c9..db92f044024 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -32,7 +32,8 @@ static void __udelay_disabled(unsigned long long usecs) u64 clock_saved; u64 end; - mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT; + mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_WAIT | + PSW_MASK_EXT | PSW_MASK_MCHECK; end = get_clock() + (usecs << 12); clock_saved = local_tick_disable(); __ctl_store(cr0_saved, 0, 0); diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 74833831417..342ae35a5ba 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -342,7 +342,8 @@ int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old) if (segment_eq(get_fs(), KERNEL_DS)) return __futex_atomic_op_pt(op, uaddr, oparg, old); spin_lock(¤t->mm->page_table_lock); - uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr); + uaddr = (u32 __force __user *) + __dat_user_addr((__force unsigned long) uaddr); if (!uaddr) { spin_unlock(¤t->mm->page_table_lock); return -EFAULT; @@ -378,7 +379,8 @@ int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr, if (segment_eq(get_fs(), KERNEL_DS)) return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval); spin_lock(¤t->mm->page_table_lock); - uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr); + uaddr = (u32 __force __user *) + __dat_user_addr((__force unsigned long) uaddr); if (!uaddr) { spin_unlock(¤t->mm->page_table_lock); return -EFAULT; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 9564fc779b2..1766def5bc3 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -307,7 +307,7 @@ static inline int do_exception(struct pt_regs *regs, int access, #ifdef CONFIG_PGSTE if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) { - address = gmap_fault(address, + address = __gmap_fault(address, (struct gmap *) S390_lowcore.gmap); if (address == -EFAULT) { fault = VM_FAULT_BADMAP; @@ -393,7 +393,7 @@ void __kprobes do_protection_exception(struct pt_regs *regs, long pgm_int_code, int fault; /* Protection exception is suppressing, decrement psw address. */ - regs->psw.addr -= (pgm_int_code >> 16); + regs->psw.addr = __rewind_psw(regs->psw, pgm_int_code >> 16); /* * Check for low-address protection. This needs to be treated * as a special case because the translation exception code @@ -454,7 +454,7 @@ int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write) struct pt_regs regs; int access, fault; - regs.psw.mask = psw_kernel_bits; + regs.psw.mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK; if (!irqs_disabled()) regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT; regs.psw.addr = (unsigned long) __builtin_return_address(0); diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 45b405ca256..65cb06e2af4 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -52,7 +52,7 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long mask, result; - struct page *head, *page; + struct page *head, *page, *tail; int refs; result = write ? 0 : _SEGMENT_ENTRY_RO; @@ -64,6 +64,7 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, refs = 0; head = pmd_page(pmd); page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + tail = page; do { VM_BUG_ON(compound_head(page) != head); pages[*nr] = page; @@ -81,6 +82,17 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, *nr -= refs; while (refs--) put_page(head); + return 0; + } + + /* + * Any tail page need their mapcount reference taken before we + * return. + */ + while (refs--) { + if (PageTail(tail)) + get_huge_page_tail(tail); + tail++; } return 1; diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 5dbbaa6e594..1cb8427bedf 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/errno.h> +#include <linux/gfp.h> #include <asm/system.h> /* @@ -60,6 +61,9 @@ long probe_kernel_write(void *dst, const void *src, size_t size) return copied < 0 ? -EFAULT : 0; } +/* + * Copy memory in real mode (kernel to kernel) + */ int memcpy_real(void *dest, void *src, size_t count) { register unsigned long _dest asm("2") = (unsigned long) dest; @@ -101,3 +105,55 @@ void copy_to_absolute_zero(void *dest, void *src, size_t count) __ctl_load(cr0, 0, 0); preempt_enable(); } + +/* + * Copy memory from kernel (real) to user (virtual) + */ +int copy_to_user_real(void __user *dest, void *src, size_t count) +{ + int offs = 0, size, rc; + char *buf; + + buf = (char *) __get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + rc = -EFAULT; + while (offs < count) { + size = min(PAGE_SIZE, count - offs); + if (memcpy_real(buf, src + offs, size)) + goto out; + if (copy_to_user(dest + offs, buf, size)) + goto out; + offs += size; + } + rc = 0; +out: + free_page((unsigned long) buf); + return rc; +} + +/* + * Copy memory from user (virtual) to kernel (real) + */ +int copy_from_user_real(void *dest, void __user *src, size_t count) +{ + int offs = 0, size, rc; + char *buf; + + buf = (char *) __get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + rc = -EFAULT; + while (offs < count) { + size = min(PAGE_SIZE, count - offs); + if (copy_from_user(buf, src + offs, size)) + goto out; + if (memcpy_real(dest + offs, buf, size)) + goto out; + offs += size; + } + rc = 0; +out: + free_page((unsigned long) buf); + return rc; +} diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index c9a9f7f1818..f09c74881b7 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -26,6 +26,7 @@ #include <linux/personality.h> #include <linux/mm.h> +#include <linux/mman.h> #include <linux/module.h> #include <linux/random.h> #include <asm/pgalloc.h> diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index d013ed39743..b36537a5f43 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -5,6 +5,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/hugetlb.h> +#include <asm/cacheflush.h> #include <asm/pgtable.h> static void change_page_attr(unsigned long addr, int numpages, diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 4d1f2bce87b..301c84d3b54 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp. 2007,2009 + * Copyright IBM Corp. 2007,2011 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ @@ -160,6 +160,8 @@ struct gmap *gmap_alloc(struct mm_struct *mm) table = (unsigned long *) page_to_phys(page); crst_table_init(table, _REGION1_ENTRY_EMPTY); gmap->table = table; + gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | __pa(table); list_add(&gmap->list, &mm->context.gmap_list); return gmap; @@ -220,6 +222,7 @@ void gmap_free(struct gmap *gmap) /* Free all segment & region tables. */ down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { table = (unsigned long *) page_to_phys(page); if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) @@ -228,6 +231,7 @@ void gmap_free(struct gmap *gmap) gmap_unlink_segment(gmap, table); __free_pages(page, ALLOC_ORDER); } + spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); list_del(&gmap->list); kfree(gmap); @@ -240,10 +244,6 @@ EXPORT_SYMBOL_GPL(gmap_free); */ void gmap_enable(struct gmap *gmap) { - /* Load primary space page table origin. */ - S390_lowcore.user_asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | __pa(gmap->table); - asm volatile("lctlg 1,1,%0\n" : : "m" (S390_lowcore.user_asce) ); S390_lowcore.gmap = (unsigned long) gmap; } EXPORT_SYMBOL_GPL(gmap_enable); @@ -254,14 +254,13 @@ EXPORT_SYMBOL_GPL(gmap_enable); */ void gmap_disable(struct gmap *gmap) { - /* Load primary space page table origin. */ - S390_lowcore.user_asce = - gmap->mm->context.asce_bits | __pa(gmap->mm->pgd); - asm volatile("lctlg 1,1,%0\n" : : "m" (S390_lowcore.user_asce) ); S390_lowcore.gmap = 0UL; } EXPORT_SYMBOL_GPL(gmap_disable); +/* + * gmap_alloc_table is assumed to be called with mmap_sem held + */ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, unsigned long init) { @@ -273,14 +272,12 @@ static int gmap_alloc_table(struct gmap *gmap, return -ENOMEM; new = (unsigned long *) page_to_phys(page); crst_table_init(new, init); - down_read(&gmap->mm->mmap_sem); if (*table & _REGION_ENTRY_INV) { list_add(&page->lru, &gmap->crst_list); *table = (unsigned long) new | _REGION_ENTRY_LENGTH | (*table & _REGION_ENTRY_TYPE_MASK); } else __free_pages(page, ALLOC_ORDER); - up_read(&gmap->mm->mmap_sem); return 0; } @@ -305,19 +302,20 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) flush = 0; down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); for (off = 0; off < len; off += PMD_SIZE) { /* Walk the guest addr space page table */ table = gmap->table + (((to + off) >> 53) & 0x7ff); if (*table & _REGION_ENTRY_INV) - return 0; + goto out; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 42) & 0x7ff); if (*table & _REGION_ENTRY_INV) - return 0; + goto out; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 31) & 0x7ff); if (*table & _REGION_ENTRY_INV) - return 0; + goto out; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 20) & 0x7ff); @@ -325,6 +323,8 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) flush |= gmap_unlink_segment(gmap, table); *table = _SEGMENT_ENTRY_INV; } +out: + spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); if (flush) gmap_flush_tlb(gmap); @@ -355,6 +355,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, flush = 0; down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); for (off = 0; off < len; off += PMD_SIZE) { /* Walk the gmap address space page table */ table = gmap->table + (((to + off) >> 53) & 0x7ff); @@ -378,19 +379,24 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, flush |= gmap_unlink_segment(gmap, table); *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); } + spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); if (flush) gmap_flush_tlb(gmap); return 0; out_unmap: + spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); gmap_unmap_segment(gmap, to, len); return -ENOMEM; } EXPORT_SYMBOL_GPL(gmap_map_segment); -unsigned long gmap_fault(unsigned long address, struct gmap *gmap) +/* + * this function is assumed to be called with mmap_sem held + */ +unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) { unsigned long *table, vmaddr, segment; struct mm_struct *mm; @@ -450,16 +456,75 @@ unsigned long gmap_fault(unsigned long address, struct gmap *gmap) page = pmd_page(*pmd); mp = (struct gmap_pgtable *) page->index; rmap->entry = table; + spin_lock(&mm->page_table_lock); list_add(&rmap->list, &mp->mapper); + spin_unlock(&mm->page_table_lock); /* Set gmap segment table entry to page table. */ *table = pmd_val(*pmd) & PAGE_MASK; return vmaddr | (address & ~PMD_MASK); } return -EFAULT; +} + +unsigned long gmap_fault(unsigned long address, struct gmap *gmap) +{ + unsigned long rc; + + down_read(&gmap->mm->mmap_sem); + rc = __gmap_fault(address, gmap); + up_read(&gmap->mm->mmap_sem); + return rc; } EXPORT_SYMBOL_GPL(gmap_fault); +void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) +{ + + unsigned long *table, address, size; + struct vm_area_struct *vma; + struct gmap_pgtable *mp; + struct page *page; + + down_read(&gmap->mm->mmap_sem); + address = from; + while (address < to) { + /* Walk the gmap address space page table */ + table = gmap->table + ((address >> 53) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 42) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 31) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 20) & 0x7ff); + if (unlikely(*table & _SEGMENT_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + page = pfn_to_page(*table >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + vma = find_vma(gmap->mm, mp->vmaddr); + size = min(to - address, PMD_SIZE - (address & ~PMD_MASK)); + zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK), + size, NULL); + address = (address + PMD_SIZE) & PMD_MASK; + } + up_read(&gmap->mm->mmap_sem); +} +EXPORT_SYMBOL_GPL(gmap_discard); + void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table) { struct gmap_rmap *rmap, *next; @@ -667,8 +732,9 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) void __tlb_remove_table(void *_table) { - void *table = (void *)((unsigned long) _table & PAGE_MASK); - unsigned type = (unsigned long) _table & ~PAGE_MASK; + const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK; + void *table = (void *)((unsigned long) _table & ~mask); + unsigned type = (unsigned long) _table & mask; if (type) __page_table_free_rcu(table, type); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 781ff516956..4799383e2df 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -335,6 +335,9 @@ void __init vmem_map_init(void) ro_start = ((unsigned long)&_stext) & PAGE_MASK; ro_end = PFN_ALIGN((unsigned long)&_eshared); for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + if (memory_chunk[i].type == CHUNK_CRASHK || + memory_chunk[i].type == CHUNK_OLDMEM) + continue; start = memory_chunk[i].addr; end = memory_chunk[i].addr + memory_chunk[i].size; if (start >= ro_end || end <= ro_start) @@ -368,6 +371,9 @@ static int __init vmem_convert_memory_chunk(void) for (i = 0; i < MEMORY_CHUNKS; i++) { if (!memory_chunk[i].size) continue; + if (memory_chunk[i].type == CHUNK_CRASHK || + memory_chunk[i].type == CHUNK_OLDMEM) + continue; seg = kzalloc(sizeof(*seg), GFP_KERNEL); if (!seg) panic("Out of memory...\n"); diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index 4552ce40c81..f43c0e4282a 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -994,7 +994,7 @@ allocate_error: * * Returns 0 on success, !0 on failure. */ -int hwsampler_deallocate() +int hwsampler_deallocate(void) { int rc; @@ -1035,7 +1035,7 @@ unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu) return cb->sample_overflow; } -int hwsampler_setup() +int hwsampler_setup(void) { int rc; int cpu; @@ -1102,7 +1102,7 @@ setup_exit: return rc; } -int hwsampler_shutdown() +int hwsampler_shutdown(void) { int rc; @@ -1203,7 +1203,7 @@ start_all_exit: * * Returns 0 on success, !0 on failure. */ -int hwsampler_stop_all() +int hwsampler_stop_all(void) { int tmp_rc, rc, cpu; struct hws_cpu_buffer *cb; |