diff options
Diffstat (limited to 'arch/s390')
103 files changed, 4449 insertions, 2511 deletions
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild index ae4b01060ed..9858476fa0f 100644 --- a/arch/s390/Kbuild +++ b/arch/s390/Kbuild @@ -1,6 +1,7 @@ -obj-y += kernel/ -obj-y += mm/ -obj-y += crypto/ -obj-y += appldata/ -obj-y += hypfs/ -obj-y += kvm/ +obj-y += kernel/ +obj-y += mm/ +obj-$(CONFIG_KVM) += kvm/ +obj-$(CONFIG_CRYPTO_HW) += crypto/ +obj-$(CONFIG_S390_HYPFS_FS) += hypfs/ +obj-$(CONFIG_APPLDATA_BASE) += appldata/ +obj-$(CONFIG_MATHEMU) += math-emu/ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index ed5cb5af528..d1727584230 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -87,10 +87,13 @@ config S390 select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO select HAVE_KERNEL_XZ - select HAVE_GET_USER_PAGES_FAST select HAVE_ARCH_MUTEX_CPU_RELAX select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 select HAVE_RCU_TABLE_FREE if SMP + select ARCH_SAVE_PAGE_KEYS if HIBERNATION + select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP + select ARCH_DISCARD_MEMBLOCK select ARCH_INLINE_SPIN_TRYLOCK select ARCH_INLINE_SPIN_TRYLOCK_BH select ARCH_INLINE_SPIN_LOCK @@ -190,18 +193,13 @@ config HOTPLUG_CPU Say N if you want to disable CPU hotplug. config SCHED_MC - def_bool y - prompt "Multi-core scheduler support" - depends on SMP - help - Multi-core scheduler support improves the CPU scheduler's decision - making when dealing with multi-core CPU chips at a cost of slightly - increased overhead in some places. + def_bool n config SCHED_BOOK def_bool y prompt "Book scheduler support" - depends on SMP && SCHED_MC + depends on SMP + select SCHED_MC help Book scheduler support improves the CPU scheduler's decision making when dealing with machines that have several books. @@ -344,9 +342,6 @@ config WARN_DYNAMIC_STACK Say N if you are unsure. -config ARCH_POPULATES_NODE_MAP - def_bool y - comment "Kernel preemption" source "kernel/Kconfig.preempt" @@ -568,6 +563,17 @@ config KEXEC current kernel, and to start another kernel. It is like a reboot but is independent of hardware/microcode support. +config CRASH_DUMP + bool "kernel crash dumps" + depends on 64BIT + select KEXEC + help + Generate crash dump after being started by kexec. + Crash dump kernels are loaded in the main kernel with kexec-tools + into a specially reserved region and then later executed after + a crash by kdump/kexec. + For more details see Documentation/kdump/kdump.txt + config ZFCPDUMP def_bool n prompt "zfcpdump support" diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 27a0b5df5ea..e9f35334169 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -99,7 +99,6 @@ core-y += arch/s390/ libs-y += arch/s390/lib/ drivers-y += drivers/s390/ -drivers-$(CONFIG_MATHEMU) += arch/s390/math-emu/ # must be linked after kernel drivers-$(CONFIG_OPROFILE) += arch/s390/oprofile/ diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c index 92f1cb745d6..4de031d6b76 100644 --- a/arch/s390/appldata/appldata_os.c +++ b/arch/s390/appldata/appldata_os.c @@ -115,21 +115,21 @@ static void appldata_get_os_data(void *data) j = 0; for_each_online_cpu(i) { os_data->os_cpu[j].per_cpu_user = - cputime_to_jiffies(kstat_cpu(i).cpustat.user); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]); os_data->os_cpu[j].per_cpu_nice = - cputime_to_jiffies(kstat_cpu(i).cpustat.nice); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]); os_data->os_cpu[j].per_cpu_system = - cputime_to_jiffies(kstat_cpu(i).cpustat.system); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]); os_data->os_cpu[j].per_cpu_idle = - cputime_to_jiffies(kstat_cpu(i).cpustat.idle); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]); os_data->os_cpu[j].per_cpu_irq = - cputime_to_jiffies(kstat_cpu(i).cpustat.irq); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]); os_data->os_cpu[j].per_cpu_softirq = - cputime_to_jiffies(kstat_cpu(i).cpustat.softirq); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]); os_data->os_cpu[j].per_cpu_iowait = - cputime_to_jiffies(kstat_cpu(i).cpustat.iowait); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]); os_data->os_cpu[j].per_cpu_steal = - cputime_to_jiffies(kstat_cpu(i).cpustat.steal); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]); os_data->os_cpu[j].cpu_id = i; j++; } diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 635d677d328..f2737a005af 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -23,4 +23,4 @@ $(obj)/compressed/vmlinux: FORCE install: $(CONFIGURE) $(obj)/image sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/image \ - System.map Kerntypes "$(INSTALL_PATH)" + System.map "$(INSTALL_PATH)" diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 028f23ea81d..465eca756fe 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -61,7 +61,7 @@ static unsigned long free_mem_end_ptr; extern _sclp_print_early(const char *); -int puts(const char *s) +static int puts(const char *s) { _sclp_print_early(s); return 0; diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h index 49676771bd6..ffd1ac255f1 100644 --- a/arch/s390/crypto/crypt_s390.h +++ b/arch/s390/crypto/crypt_s390.h @@ -368,9 +368,12 @@ static inline int crypt_s390_func_available(int func, if (facility_mask & CRYPT_S390_MSA && !test_facility(17)) return 0; - if (facility_mask & CRYPT_S390_MSA3 && !test_facility(76)) + + if (facility_mask & CRYPT_S390_MSA3 && + (!test_facility(2) || !test_facility(76))) return 0; - if (facility_mask & CRYPT_S390_MSA4 && !test_facility(77)) + if (facility_mask & CRYPT_S390_MSA4 && + (!test_facility(2) || !test_facility(77))) return 0; switch (func & CRYPT_S390_OP_MASK) { diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c index 48884f89ab9..bd37d09b9d3 100644 --- a/arch/s390/crypto/sha_common.c +++ b/arch/s390/crypto/sha_common.c @@ -14,6 +14,7 @@ */ #include <crypto/internal/hash.h> +#include <linux/module.h> #include "sha.h" #include "crypt_s390.h" diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 29c82c640a8..6cf8e26b313 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -68,7 +68,7 @@ CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 6023c6dc1fb..74c8f5e76ce 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -562,10 +562,9 @@ static int dbfs_d204_create(void **data, void **data_free_ptr, size_t *size) void *base; buf_size = PAGE_SIZE * (diag204_buf_pages + 1) + sizeof(d204->hdr); - base = vmalloc(buf_size); + base = vzalloc(buf_size); if (!base) return -ENOMEM; - memset(base, 0, buf_size); d204 = page_align_ptr(base + sizeof(d204->hdr)) - sizeof(d204->hdr); rc = diag204_do_store(d204->buf, diag204_buf_pages); if (rc) { diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 6fe874fc5f8..8a2a887478c 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -97,7 +97,7 @@ static void hypfs_delete_tree(struct dentry *root) } } -static struct inode *hypfs_make_inode(struct super_block *sb, int mode) +static struct inode *hypfs_make_inode(struct super_block *sb, umode_t mode) { struct inode *ret = new_inode(sb); @@ -107,10 +107,8 @@ static struct inode *hypfs_make_inode(struct super_block *sb, int mode) ret->i_uid = hypfs_info->uid; ret->i_gid = hypfs_info->gid; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; - if (mode & S_IFDIR) - ret->i_nlink = 2; - else - ret->i_nlink = 1; + if (S_ISDIR(mode)) + set_nlink(ret, 2); } return ret; } @@ -261,9 +259,9 @@ static int hypfs_parse_options(char *options, struct super_block *sb) return 0; } -static int hypfs_show_options(struct seq_file *s, struct vfsmount *mnt) +static int hypfs_show_options(struct seq_file *s, struct dentry *root) { - struct hypfs_sb_info *hypfs_info = mnt->mnt_sb->s_fs_info; + struct hypfs_sb_info *hypfs_info = root->d_sb->s_fs_info; seq_printf(s, ",uid=%u", hypfs_info->uid); seq_printf(s, ",gid=%u", hypfs_info->gid); @@ -335,7 +333,7 @@ static void hypfs_kill_super(struct super_block *sb) static struct dentry *hypfs_create_file(struct super_block *sb, struct dentry *parent, const char *name, - char *data, mode_t mode) + char *data, umode_t mode) { struct dentry *dentry; struct inode *inode; @@ -352,16 +350,16 @@ static struct dentry *hypfs_create_file(struct super_block *sb, dentry = ERR_PTR(-ENOMEM); goto fail; } - if (mode & S_IFREG) { + if (S_ISREG(mode)) { inode->i_fop = &hypfs_file_ops; if (data) inode->i_size = strlen(data); else inode->i_size = 0; - } else if (mode & S_IFDIR) { + } else if (S_ISDIR(mode)) { inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; - parent->d_inode->i_nlink++; + inc_nlink(parent->d_inode); } else BUG(); inode->i_private = data; diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index 623f2fb7177..9381c92cc77 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -11,6 +11,7 @@ #include <linux/device.h> #include <linux/mod_devicetable.h> #include <asm/fcx.h> +#include <asm/irq.h> /* structs from asm/cio.h */ struct irb; @@ -127,6 +128,7 @@ enum uc_todo { * @restore: callback for restoring after hibernation * @uc_handler: callback for unit check handler * @driver: embedded device driver structure + * @int_class: interruption class to use for accounting interrupts */ struct ccw_driver { struct ccw_device_id *ids; @@ -144,6 +146,7 @@ struct ccw_driver { int (*restore)(struct ccw_device *); enum uc_todo (*uc_handler) (struct ccw_device *, struct irb *); struct device_driver driver; + enum interruption_class int_class; }; extern struct ccw_device *get_ccwdev_by_busid(struct ccw_driver *cdrv, diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index da359ca6fe5..2e49748b27d 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -12,6 +12,7 @@ #define PSW32_MASK_IO 0x02000000UL #define PSW32_MASK_EXT 0x01000000UL #define PSW32_MASK_KEY 0x00F00000UL +#define PSW32_MASK_BASE 0x00080000UL /* Always one */ #define PSW32_MASK_MCHECK 0x00040000UL #define PSW32_MASK_WAIT 0x00020000UL #define PSW32_MASK_PSTATE 0x00010000UL @@ -19,21 +20,19 @@ #define PSW32_MASK_CC 0x00003000UL #define PSW32_MASK_PM 0x00000f00UL -#define PSW32_ADDR_AMODE31 0x80000000UL +#define PSW32_MASK_USER 0x00003F00UL + +#define PSW32_ADDR_AMODE 0x80000000UL #define PSW32_ADDR_INSN 0x7FFFFFFFUL -#define PSW32_BASE_BITS 0x00080000UL +#define PSW32_DEFAULT_KEY (((u32) PAGE_DEFAULT_ACC) << 20) #define PSW32_ASC_PRIMARY 0x00000000UL #define PSW32_ASC_ACCREG 0x00004000UL #define PSW32_ASC_SECONDARY 0x00008000UL #define PSW32_ASC_HOME 0x0000C000UL -#define PSW32_MASK_MERGE(CURRENT,NEW) \ - (((CURRENT) & ~(PSW32_MASK_CC|PSW32_MASK_PM)) | \ - ((NEW) & (PSW32_MASK_CC|PSW32_MASK_PM))) - -extern long psw32_user_bits; +extern u32 psw32_user_bits; #define COMPAT_USER_HZ 100 #define COMPAT_UTS_MACHINE "s390\0\0\0\0" @@ -131,7 +130,8 @@ struct compat_statfs { compat_fsid_t f_fsid; s32 f_namelen; s32 f_frsize; - s32 f_spare[6]; + s32 f_flags; + s32 f_spare[5]; }; #define COMPAT_RLIM_OLD_INFINITY 0x7fffffff diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 08143487829..c23c3900c30 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -16,114 +16,100 @@ /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ -typedef unsigned long long cputime_t; -typedef unsigned long long cputime64_t; +typedef unsigned long long __nocast cputime_t; +typedef unsigned long long __nocast cputime64_t; -#ifndef __s390x__ - -static inline unsigned int -__div(unsigned long long n, unsigned int base) +static inline unsigned long __div(unsigned long long n, unsigned long base) { +#ifndef __s390x__ register_pair rp; rp.pair = n >> 1; asm ("dr %0,%1" : "+d" (rp) : "d" (base >> 1)); return rp.subreg.odd; +#else /* __s390x__ */ + return n / base; +#endif /* __s390x__ */ } -#else /* __s390x__ */ +#define cputime_one_jiffy jiffies_to_cputime(1) + +/* + * Convert cputime to jiffies and back. + */ +static inline unsigned long cputime_to_jiffies(const cputime_t cputime) +{ + return __div((__force unsigned long long) cputime, 4096000000ULL / HZ); +} -static inline unsigned int -__div(unsigned long long n, unsigned int base) +static inline cputime_t jiffies_to_cputime(const unsigned int jif) { - return n / base; + return (__force cputime_t)(jif * (4096000000ULL / HZ)); } -#endif /* __s390x__ */ +static inline u64 cputime64_to_jiffies64(cputime64_t cputime) +{ + unsigned long long jif = (__force unsigned long long) cputime; + do_div(jif, 4096000000ULL / HZ); + return jif; +} -#define cputime_zero (0ULL) -#define cputime_one_jiffy jiffies_to_cputime(1) -#define cputime_max ((~0UL >> 1) - 1) -#define cputime_add(__a, __b) ((__a) + (__b)) -#define cputime_sub(__a, __b) ((__a) - (__b)) -#define cputime_div(__a, __n) ({ \ - unsigned long long __div = (__a); \ - do_div(__div,__n); \ - __div; \ -}) -#define cputime_halve(__a) ((__a) >> 1) -#define cputime_eq(__a, __b) ((__a) == (__b)) -#define cputime_gt(__a, __b) ((__a) > (__b)) -#define cputime_ge(__a, __b) ((__a) >= (__b)) -#define cputime_lt(__a, __b) ((__a) < (__b)) -#define cputime_le(__a, __b) ((__a) <= (__b)) -#define cputime_to_jiffies(__ct) (__div((__ct), 4096000000ULL / HZ)) -#define cputime_to_scaled(__ct) (__ct) -#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (4096000000ULL / HZ)) - -#define cputime64_zero (0ULL) -#define cputime64_add(__a, __b) ((__a) + (__b)) -#define cputime_to_cputime64(__ct) (__ct) - -static inline u64 -cputime64_to_jiffies64(cputime64_t cputime) -{ - do_div(cputime, 4096000000ULL / HZ); - return cputime; +static inline cputime64_t jiffies64_to_cputime64(const u64 jif) +{ + return (__force cputime64_t)(jif * (4096000000ULL / HZ)); } /* * Convert cputime to microseconds and back. */ -static inline unsigned int -cputime_to_usecs(const cputime_t cputime) +static inline unsigned int cputime_to_usecs(const cputime_t cputime) { - return cputime_div(cputime, 4096); + return (__force unsigned long long) cputime >> 12; } -static inline cputime_t -usecs_to_cputime(const unsigned int m) +static inline cputime_t usecs_to_cputime(const unsigned int m) { - return (cputime_t) m * 4096; + return (__force cputime_t)(m * 4096ULL); } +#define usecs_to_cputime64(m) usecs_to_cputime(m) + /* * Convert cputime to milliseconds and back. */ -static inline unsigned int -cputime_to_secs(const cputime_t cputime) +static inline unsigned int cputime_to_secs(const cputime_t cputime) { - return __div(cputime, 2048000000) >> 1; + return __div((__force unsigned long long) cputime, 2048000000) >> 1; } -static inline cputime_t -secs_to_cputime(const unsigned int s) +static inline cputime_t secs_to_cputime(const unsigned int s) { - return (cputime_t) s * 4096000000ULL; + return (__force cputime_t)(s * 4096000000ULL); } /* * Convert cputime to timespec and back. */ -static inline cputime_t -timespec_to_cputime(const struct timespec *value) +static inline cputime_t timespec_to_cputime(const struct timespec *value) { - return value->tv_nsec * 4096 / 1000 + (u64) value->tv_sec * 4096000000ULL; + unsigned long long ret = value->tv_sec * 4096000000ULL; + return (__force cputime_t)(ret + value->tv_nsec * 4096 / 1000); } -static inline void -cputime_to_timespec(const cputime_t cputime, struct timespec *value) +static inline void cputime_to_timespec(const cputime_t cputime, + struct timespec *value) { + unsigned long long __cputime = (__force unsigned long long) cputime; #ifndef __s390x__ register_pair rp; - rp.pair = cputime >> 1; + rp.pair = __cputime >> 1; asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL)); value->tv_nsec = rp.subreg.even * 1000 / 4096; value->tv_sec = rp.subreg.odd; #else - value->tv_nsec = (cputime % 4096000000ULL) * 1000 / 4096; - value->tv_sec = cputime / 4096000000ULL; + value->tv_nsec = (__cputime % 4096000000ULL) * 1000 / 4096; + value->tv_sec = __cputime / 4096000000ULL; #endif } @@ -132,50 +118,52 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value) * Since cputime and timeval have the same resolution (microseconds) * this is easy. */ -static inline cputime_t -timeval_to_cputime(const struct timeval *value) +static inline cputime_t timeval_to_cputime(const struct timeval *value) { - return value->tv_usec * 4096 + (u64) value->tv_sec * 4096000000ULL; + unsigned long long ret = value->tv_sec * 4096000000ULL; + return (__force cputime_t)(ret + value->tv_usec * 4096ULL); } -static inline void -cputime_to_timeval(const cputime_t cputime, struct timeval *value) +static inline void cputime_to_timeval(const cputime_t cputime, + struct timeval *value) { + unsigned long long __cputime = (__force unsigned long long) cputime; #ifndef __s390x__ register_pair rp; - rp.pair = cputime >> 1; + rp.pair = __cputime >> 1; asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL)); value->tv_usec = rp.subreg.even / 4096; value->tv_sec = rp.subreg.odd; #else - value->tv_usec = (cputime % 4096000000ULL) / 4096; - value->tv_sec = cputime / 4096000000ULL; + value->tv_usec = (__cputime % 4096000000ULL) / 4096; + value->tv_sec = __cputime / 4096000000ULL; #endif } /* * Convert cputime to clock and back. */ -static inline clock_t -cputime_to_clock_t(cputime_t cputime) +static inline clock_t cputime_to_clock_t(cputime_t cputime) { - return cputime_div(cputime, 4096000000ULL / USER_HZ); + unsigned long long clock = (__force unsigned long long) cputime; + do_div(clock, 4096000000ULL / USER_HZ); + return clock; } -static inline cputime_t -clock_t_to_cputime(unsigned long x) +static inline cputime_t clock_t_to_cputime(unsigned long x) { - return (cputime_t) x * (4096000000ULL / USER_HZ); + return (__force cputime_t)(x * (4096000000ULL / USER_HZ)); } /* * Convert cputime64 to clock. */ -static inline clock_t -cputime64_to_clock_t(cputime64_t cputime) +static inline clock_t cputime64_to_clock_t(cputime64_t cputime) { - return cputime_div(cputime, 4096000000ULL / USER_HZ); + unsigned long long clock = (__force unsigned long long) cputime; + do_div(clock, 4096000000ULL / USER_HZ); + return clock; } struct s390_idle_data { diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index 18124b75a7a..9d88db1f55d 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -73,7 +73,7 @@ typedef struct debug_info { struct dentry* debugfs_entries[DEBUG_MAX_VIEWS]; struct debug_view* views[DEBUG_MAX_VIEWS]; char name[DEBUG_MAX_NAME_LEN]; - mode_t mode; + umode_t mode; } debug_info_t; typedef int (debug_header_proc_t) (debug_info_t* id, @@ -124,7 +124,7 @@ debug_info_t *debug_register(const char *name, int pages, int nr_areas, int buf_size); debug_info_t *debug_register_mode(const char *name, int pages, int nr_areas, - int buf_size, mode_t mode, uid_t uid, + int buf_size, umode_t mode, uid_t uid, gid_t gid); void debug_unregister(debug_info_t* id); diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index 97cc4403fab..6940abfbe1d 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -168,5 +168,6 @@ enum diag308_rc { extern int diag308(unsigned long subcode, void *addr); extern void diag308_reset(void); +extern void store_status(void); #endif /* _ASM_S390_IPL_H */ diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index ba7b01c726a..ba6d85f88d5 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -8,7 +8,8 @@ enum interruption_class { EXTERNAL_INTERRUPT, IO_INTERRUPT, EXTINT_CLK, - EXTINT_IPI, + EXTINT_EXC, + EXTINT_EMS, EXTINT_TMR, EXTINT_TLA, EXTINT_PFL, @@ -17,8 +18,8 @@ enum interruption_class { EXTINT_SCP, EXTINT_IUC, EXTINT_CPM, + IOINT_CIO, IOINT_QAI, - IOINT_QDI, IOINT_DAS, IOINT_C15, IOINT_C70, @@ -28,6 +29,7 @@ enum interruption_class { IOINT_CLW, IOINT_CTC, IOINT_APB, + IOINT_CSC, NMI_NMI, NR_IRQS, }; diff --git a/arch/s390/include/asm/kdebug.h b/arch/s390/include/asm/kdebug.h index 40db27cd6e6..5c1abd47612 100644 --- a/arch/s390/include/asm/kdebug.h +++ b/arch/s390/include/asm/kdebug.h @@ -22,6 +22,6 @@ enum die_val { DIE_NMI_IPI, }; -extern void die(const char *, struct pt_regs *, long); +extern void die(struct pt_regs *, const char *); #endif diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index bb729b84a21..cf4e47b0948 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -30,9 +30,15 @@ /* Not more than 2GB */ #define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31) +/* Maximum address we can use for the crash control pages */ +#define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL) + /* Allocate one page for the pdp and the second for the code */ #define KEXEC_CONTROL_PAGE_SIZE 4096 +/* Alignment of crashkernel memory */ +#define KEXEC_CRASH_MEM_ALIGN HPAGE_SIZE + /* The native architecture */ #define KEXEC_ARCH KEXEC_ARCH_S390 diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 00ff00dfb24..b0c235cb6ad 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -47,7 +47,7 @@ struct sca_block { #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) -#define CPUSTAT_HOST 0x80000000 +#define CPUSTAT_STOPPED 0x80000000 #define CPUSTAT_WAIT 0x10000000 #define CPUSTAT_ECALL_PEND 0x08000000 #define CPUSTAT_STOP_INT 0x04000000 @@ -119,6 +119,7 @@ struct kvm_vcpu_stat { u32 instruction_lctlg; u32 exit_program_interruption; u32 exit_instr_and_program; + u32 deliver_external_call; u32 deliver_emergency_signal; u32 deliver_service_signal; u32 deliver_virtio_interrupt; @@ -138,11 +139,14 @@ struct kvm_vcpu_stat { u32 instruction_stfl; u32 instruction_tprot; u32 instruction_sigp_sense; + u32 instruction_sigp_sense_running; + u32 instruction_sigp_external_call; u32 instruction_sigp_emergency; u32 instruction_sigp_stop; u32 instruction_sigp_arch; u32 instruction_sigp_prefix; u32 instruction_sigp_restart; + u32 diagnose_10; u32 diagnose_44; }; @@ -174,6 +178,10 @@ struct kvm_s390_prefix_info { __u32 address; }; +struct kvm_s390_extcall_info { + __u16 code; +}; + struct kvm_s390_emerg_info { __u16 code; }; @@ -186,6 +194,7 @@ struct kvm_s390_interrupt_info { struct kvm_s390_ext_info ext; struct kvm_s390_pgm_info pgm; struct kvm_s390_emerg_info emerg; + struct kvm_s390_extcall_info extcall; struct kvm_s390_prefix_info prefix; }; }; diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index e85c911aabf..707f2306725 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -97,47 +97,52 @@ struct _lowcore { __u32 gpregs_save_area[16]; /* 0x0180 */ __u32 cregs_save_area[16]; /* 0x01c0 */ + /* Save areas. */ + __u32 save_area_sync[8]; /* 0x0200 */ + __u32 save_area_async[8]; /* 0x0220 */ + __u32 save_area_restart[1]; /* 0x0240 */ + __u8 pad_0x0244[0x0248-0x0244]; /* 0x0244 */ + /* Return psws. */ - __u32 save_area[16]; /* 0x0200 */ - psw_t return_psw; /* 0x0240 */ - psw_t return_mcck_psw; /* 0x0248 */ + psw_t return_psw; /* 0x0248 */ + psw_t return_mcck_psw; /* 0x0250 */ /* CPU time accounting values */ - __u64 sync_enter_timer; /* 0x0250 */ - __u64 async_enter_timer; /* 0x0258 */ - __u64 mcck_enter_timer; /* 0x0260 */ - __u64 exit_timer; /* 0x0268 */ - __u64 user_timer; /* 0x0270 */ - __u64 system_timer; /* 0x0278 */ - __u64 steal_timer; /* 0x0280 */ - __u64 last_update_timer; /* 0x0288 */ - __u64 last_update_clock; /* 0x0290 */ + __u64 sync_enter_timer; /* 0x0258 */ + __u64 async_enter_timer; /* 0x0260 */ + __u64 mcck_enter_timer; /* 0x0268 */ + __u64 exit_timer; /* 0x0270 */ + __u64 user_timer; /* 0x0278 */ + __u64 system_timer; /* 0x0280 */ + __u64 steal_timer; /* 0x0288 */ + __u64 last_update_timer; /* 0x0290 */ + __u64 last_update_clock; /* 0x0298 */ /* Current process. */ - __u32 current_task; /* 0x0298 */ - __u32 thread_info; /* 0x029c */ - __u32 kernel_stack; /* 0x02a0 */ + __u32 current_task; /* 0x02a0 */ + __u32 thread_info; /* 0x02a4 */ + __u32 kernel_stack; /* 0x02a8 */ /* Interrupt and panic stack. */ - __u32 async_stack; /* 0x02a4 */ - __u32 panic_stack; /* 0x02a8 */ + __u32 async_stack; /* 0x02ac */ + __u32 panic_stack; /* 0x02b0 */ /* Address space pointer. */ - __u32 kernel_asce; /* 0x02ac */ - __u32 user_asce; /* 0x02b0 */ - __u32 current_pid; /* 0x02b4 */ + __u32 kernel_asce; /* 0x02b4 */ + __u32 user_asce; /* 0x02b8 */ + __u32 current_pid; /* 0x02bc */ /* SMP info area */ - __u32 cpu_nr; /* 0x02b8 */ - __u32 softirq_pending; /* 0x02bc */ - __u32 percpu_offset; /* 0x02c0 */ - __u32 ext_call_fast; /* 0x02c4 */ - __u64 int_clock; /* 0x02c8 */ - __u64 mcck_clock; /* 0x02d0 */ - __u64 clock_comparator; /* 0x02d8 */ - __u32 machine_flags; /* 0x02e0 */ - __u32 ftrace_func; /* 0x02e4 */ - __u8 pad_0x02e8[0x0300-0x02e8]; /* 0x02e8 */ + __u32 cpu_nr; /* 0x02c0 */ + __u32 softirq_pending; /* 0x02c4 */ + __u32 percpu_offset; /* 0x02c8 */ + __u32 ext_call_fast; /* 0x02cc */ + __u64 int_clock; /* 0x02d0 */ + __u64 mcck_clock; /* 0x02d8 */ + __u64 clock_comparator; /* 0x02e0 */ + __u32 machine_flags; /* 0x02e8 */ + __u32 ftrace_func; /* 0x02ec */ + __u8 pad_0x02f8[0x0300-0x02f0]; /* 0x02f0 */ /* Interrupt response block */ __u8 irb[64]; /* 0x0300 */ @@ -151,10 +156,8 @@ struct _lowcore { */ __u32 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e04 */ - - /* 64 bit save area */ - __u64 save_area_64; /* 0x0e08 */ - __u8 pad_0x0e10[0x0f00-0x0e10]; /* 0x0e10 */ + __u32 vmcore_info; /* 0x0e08 */ + __u8 pad_0x0e0c[0x0f00-0x0e0c]; /* 0x0e0c */ /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ @@ -231,57 +234,62 @@ struct _lowcore { psw_t mcck_new_psw; /* 0x01e0 */ psw_t io_new_psw; /* 0x01f0 */ - /* Entry/exit save area & return psws. */ - __u64 save_area[16]; /* 0x0200 */ - psw_t return_psw; /* 0x0280 */ - psw_t return_mcck_psw; /* 0x0290 */ + /* Save areas. */ + __u64 save_area_sync[8]; /* 0x0200 */ + __u64 save_area_async[8]; /* 0x0240 */ + __u64 save_area_restart[1]; /* 0x0280 */ + __u8 pad_0x0288[0x0290-0x0288]; /* 0x0288 */ + + /* Return psws. */ + psw_t return_psw; /* 0x0290 */ + psw_t return_mcck_psw; /* 0x02a0 */ /* CPU accounting and timing values. */ - __u64 sync_enter_timer; /* 0x02a0 */ - __u64 async_enter_timer; /* 0x02a8 */ - __u64 mcck_enter_timer; /* 0x02b0 */ - __u64 exit_timer; /* 0x02b8 */ - __u64 user_timer; /* 0x02c0 */ - __u64 system_timer; /* 0x02c8 */ - __u64 steal_timer; /* 0x02d0 */ - __u64 last_update_timer; /* 0x02d8 */ - __u64 last_update_clock; /* 0x02e0 */ + __u64 sync_enter_timer; /* 0x02b0 */ + __u64 async_enter_timer; /* 0x02b8 */ + __u64 mcck_enter_timer; /* 0x02c0 */ + __u64 exit_timer; /* 0x02c8 */ + __u64 user_timer; /* 0x02d0 */ + __u64 system_timer; /* 0x02d8 */ + __u64 steal_timer; /* 0x02e0 */ + __u64 last_update_timer; /* 0x02e8 */ + __u64 last_update_clock; /* 0x02f0 */ /* Current process. */ - __u64 current_task; /* 0x02e8 */ - __u64 thread_info; /* 0x02f0 */ - __u64 kernel_stack; /* 0x02f8 */ + __u64 current_task; /* 0x02f8 */ + __u64 thread_info; /* 0x0300 */ + __u64 kernel_stack; /* 0x0308 */ /* Interrupt and panic stack. */ - __u64 async_stack; /* 0x0300 */ - __u64 panic_stack; /* 0x0308 */ + __u64 async_stack; /* 0x0310 */ + __u64 panic_stack; /* 0x0318 */ /* Address space pointer. */ - __u64 kernel_asce; /* 0x0310 */ - __u64 user_asce; /* 0x0318 */ - __u64 current_pid; /* 0x0320 */ + __u64 kernel_asce; /* 0x0320 */ + __u64 user_asce; /* 0x0328 */ + __u64 current_pid; /* 0x0330 */ /* SMP info area */ - __u32 cpu_nr; /* 0x0328 */ - __u32 softirq_pending; /* 0x032c */ - __u64 percpu_offset; /* 0x0330 */ - __u64 ext_call_fast; /* 0x0338 */ - __u64 int_clock; /* 0x0340 */ - __u64 mcck_clock; /* 0x0348 */ - __u64 clock_comparator; /* 0x0350 */ - __u64 vdso_per_cpu_data; /* 0x0358 */ - __u64 machine_flags; /* 0x0360 */ - __u64 ftrace_func; /* 0x0368 */ - __u64 gmap; /* 0x0370 */ - __u64 cmf_hpp; /* 0x0378 */ + __u32 cpu_nr; /* 0x0338 */ + __u32 softirq_pending; /* 0x033c */ + __u64 percpu_offset; /* 0x0340 */ + __u64 ext_call_fast; /* 0x0348 */ + __u64 int_clock; /* 0x0350 */ + __u64 mcck_clock; /* 0x0358 */ + __u64 clock_comparator; /* 0x0360 */ + __u64 vdso_per_cpu_data; /* 0x0368 */ + __u64 machine_flags; /* 0x0370 */ + __u64 ftrace_func; /* 0x0378 */ + __u64 gmap; /* 0x0380 */ + __u8 pad_0x0388[0x0400-0x0388]; /* 0x0388 */ /* Interrupt response block. */ - __u8 irb[64]; /* 0x0380 */ + __u8 irb[64]; /* 0x0400 */ /* Per cpu primary space access list */ - __u32 paste[16]; /* 0x03c0 */ + __u32 paste[16]; /* 0x0440 */ - __u8 pad_0x0400[0x0e00-0x0400]; /* 0x0400 */ + __u8 pad_0x0480[0x0e00-0x0480]; /* 0x0480 */ /* * 0xe00 contains the address of the IPL Parameter Information @@ -290,9 +298,7 @@ struct _lowcore { */ __u64 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e08 */ - - /* 64 bit save area */ - __u64 save_area_64; /* 0x0e0c */ + __u64 vmcore_info; /* 0x0e0c */ __u8 pad_0x0e14[0x0f00-0x0e14]; /* 0x0e14 */ /* Extended facility list */ diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index accb372ddc7..f7ec548c2b9 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -177,6 +177,7 @@ static inline int page_test_and_clear_young(unsigned long pfn) struct page; void arch_free_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order); +void arch_set_page_states(int make_stable); static inline int devmem_is_allowed(unsigned long pfn) { diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index 5325c89a584..0fbd1899c7b 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -19,7 +19,7 @@ #define ARCH_NEEDS_WEAK_PER_CPU #endif -#define arch_irqsafe_cpu_to_op(pcp, val, op) \ +#define arch_this_cpu_to_op(pcp, val, op) \ do { \ typedef typeof(pcp) pcp_op_T__; \ pcp_op_T__ old__, new__, prev__; \ @@ -41,27 +41,27 @@ do { \ preempt_enable(); \ } while (0) -#define irqsafe_cpu_add_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) -#define irqsafe_cpu_add_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) -#define irqsafe_cpu_add_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) -#define irqsafe_cpu_add_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) +#define this_cpu_add_1(pcp, val) arch_this_cpu_to_op(pcp, val, +) +#define this_cpu_add_2(pcp, val) arch_this_cpu_to_op(pcp, val, +) +#define this_cpu_add_4(pcp, val) arch_this_cpu_to_op(pcp, val, +) +#define this_cpu_add_8(pcp, val) arch_this_cpu_to_op(pcp, val, +) -#define irqsafe_cpu_and_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) -#define irqsafe_cpu_and_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) -#define irqsafe_cpu_and_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) -#define irqsafe_cpu_and_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) +#define this_cpu_and_1(pcp, val) arch_this_cpu_to_op(pcp, val, &) +#define this_cpu_and_2(pcp, val) arch_this_cpu_to_op(pcp, val, &) +#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op(pcp, val, &) +#define this_cpu_and_8(pcp, val) arch_this_cpu_to_op(pcp, val, &) -#define irqsafe_cpu_or_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) -#define irqsafe_cpu_or_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) -#define irqsafe_cpu_or_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) -#define irqsafe_cpu_or_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) +#define this_cpu_or_1(pcp, val) arch_this_cpu_to_op(pcp, val, |) +#define this_cpu_or_2(pcp, val) arch_this_cpu_to_op(pcp, val, |) +#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op(pcp, val, |) +#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op(pcp, val, |) -#define irqsafe_cpu_xor_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) -#define irqsafe_cpu_xor_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) -#define irqsafe_cpu_xor_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) -#define irqsafe_cpu_xor_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) +#define this_cpu_xor_1(pcp, val) arch_this_cpu_to_op(pcp, val, ^) +#define this_cpu_xor_2(pcp, val) arch_this_cpu_to_op(pcp, val, ^) +#define this_cpu_xor_4(pcp, val) arch_this_cpu_to_op(pcp, val, ^) +#define this_cpu_xor_8(pcp, val) arch_this_cpu_to_op(pcp, val, ^) -#define arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) \ +#define arch_this_cpu_cmpxchg(pcp, oval, nval) \ ({ \ typedef typeof(pcp) pcp_op_T__; \ pcp_op_T__ ret__; \ @@ -79,10 +79,10 @@ do { \ ret__; \ }) -#define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) -#define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) -#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) -#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) +#define this_cpu_cmpxchg_1(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) +#define this_cpu_cmpxchg_2(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) +#define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) +#define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) #include <asm-generic/percpu.h> diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index c0cb794bb36..011358c1b18 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -128,28 +128,11 @@ static inline int is_zero_pfn(unsigned long pfn) * effect, this also makes sure that 64 bit module code cannot be used * as system call address. */ - extern unsigned long VMALLOC_START; +extern unsigned long VMALLOC_END; +extern struct page *vmemmap; -#ifndef __s390x__ -#define VMALLOC_SIZE (96UL << 20) -#define VMALLOC_END 0x7e000000UL -#define VMEM_MAP_END 0x80000000UL -#else /* __s390x__ */ -#define VMALLOC_SIZE (128UL << 30) -#define VMALLOC_END 0x3e000000000UL -#define VMEM_MAP_END 0x40000000000UL -#endif /* __s390x__ */ - -/* - * VMEM_MAX_PHYS is the highest physical address that can be added to the 1:1 - * mapping. This needs to be calculated at compile time since the size of the - * VMEM_MAP is static but the size of struct page can change. - */ -#define VMEM_MAX_PAGES ((VMEM_MAP_END - VMALLOC_END) / sizeof(struct page)) -#define VMEM_MAX_PFN min(VMALLOC_START >> PAGE_SHIFT, VMEM_MAX_PAGES) -#define VMEM_MAX_PHYS ((VMEM_MAX_PFN << PAGE_SHIFT) & ~((16 << 20) - 1)) -#define vmemmap ((struct page *) VMALLOC_END) +#define VMEM_MAX_PHYS ((unsigned long) vmemmap) /* * A 31 bit pagetable entry of S390 has following format: @@ -593,14 +576,16 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) unsigned long address, bits; unsigned char skey; + if (!pte_present(*ptep)) + return pgste; address = pte_val(*ptep) & PAGE_MASK; skey = page_get_storage_key(address); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); /* Clear page changed & referenced bit in the storage key */ - if (bits) { - skey ^= bits; - page_set_storage_key(address, skey, 1); - } + if (bits & _PAGE_CHANGED) + page_set_storage_key(address, skey ^ bits, 1); + else if (bits) + page_reset_referenced(address); /* Transfer page changed & referenced bit to guest bits in pgste */ pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */ /* Get host changed & referenced bits from pgste */ @@ -625,6 +610,8 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) #ifdef CONFIG_PGSTE int young; + if (!pte_present(*ptep)) + return pgste; young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); /* Transfer page referenced bit to pte software bit (host view) */ if (young || (pgste_val(pgste) & RCP_HR_BIT)) @@ -638,13 +625,15 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) } -static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste) +static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) { #ifdef CONFIG_PGSTE unsigned long address; unsigned long okey, nkey; - address = pte_val(*ptep) & PAGE_MASK; + if (!pte_present(entry)) + return; + address = pte_val(entry) & PAGE_MASK; okey = nkey = page_get_storage_key(address); nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT); /* Set page access key and fetch protection bit from pgste */ @@ -696,7 +685,9 @@ void gmap_disable(struct gmap *gmap); int gmap_map_segment(struct gmap *gmap, unsigned long from, unsigned long to, unsigned long length); int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); +unsigned long __gmap_fault(unsigned long address, struct gmap *); unsigned long gmap_fault(unsigned long address, struct gmap *); +void gmap_discard(unsigned long from, unsigned long to, struct gmap *); /* * Certain architectures need to do special things when PTEs @@ -710,7 +701,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); - pgste_set_pte(ptep, pgste); + pgste_set_pte(ptep, pgste, entry); *ptep = entry; pgste_set_unlock(ptep, pgste); } else diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index a4b6229e5d4..27272f6a14c 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -33,6 +33,8 @@ static inline void get_cpu_id(struct cpuid *ptr) extern void s390_adjust_jiffies(void); extern int get_cpu_capability(unsigned int *); +extern const struct seq_operations cpuinfo_op; +extern int sysctl_ieee_emulation_warnings; /* * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit. @@ -78,8 +80,6 @@ struct thread_struct { unsigned int acrs[NUM_ACRS]; unsigned long ksp; /* kernel stack pointer */ mm_segment_t mm_segment; - unsigned long prot_addr; /* address of protection-excep. */ - unsigned int trap_no; unsigned long gmap_addr; /* address of last gmap fault. */ struct per_regs per_user; /* User specified PER registers */ struct per_event per_event; /* Cause of the last PER trap */ @@ -118,17 +118,17 @@ struct stack_frame { /* * Do necessary setup to start up a new thread. */ -#define start_thread(regs, new_psw, new_stackp) do { \ - regs->psw.mask = psw_user_bits; \ - regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ - regs->gprs[15] = new_stackp; \ +#define start_thread(regs, new_psw, new_stackp) do { \ + regs->psw.mask = psw_user_bits | PSW_MASK_EA | PSW_MASK_BA; \ + regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ + regs->gprs[15] = new_stackp; \ } while (0) -#define start_thread31(regs, new_psw, new_stackp) do { \ - regs->psw.mask = psw_user32_bits; \ - regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ - regs->gprs[15] = new_stackp; \ - crst_table_downgrade(current->mm, 1UL << 31); \ +#define start_thread31(regs, new_psw, new_stackp) do { \ + regs->psw.mask = psw_user_bits | PSW_MASK_BA; \ + regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ + regs->gprs[15] = new_stackp; \ + crst_table_downgrade(current->mm, 1UL << 31); \ } while (0) /* Forward declaration, a strange C thing */ @@ -187,7 +187,6 @@ static inline void __load_psw(psw_t psw) * Set PSW mask to specified value, while leaving the * PSW addr pointing to the next instruction. */ - static inline void __load_psw_mask (unsigned long mask) { unsigned long addr; @@ -212,26 +211,37 @@ static inline void __load_psw_mask (unsigned long mask) : "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc"); #endif /* __s390x__ */ } - + /* - * Function to stop a processor until an interruption occurred + * Rewind PSW instruction address by specified number of bytes. */ -static inline void enabled_wait(void) +static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc) { - __load_psw_mask(PSW_BASE_BITS | PSW_MASK_IO | PSW_MASK_EXT | - PSW_MASK_MCHECK | PSW_MASK_WAIT | PSW_DEFAULT_KEY); -} +#ifndef __s390x__ + if (psw.addr & PSW_ADDR_AMODE) + /* 31 bit mode */ + return (psw.addr - ilc) | PSW_ADDR_AMODE; + /* 24 bit mode */ + return (psw.addr - ilc) & ((1UL << 24) - 1); +#else + unsigned long mask; + mask = (psw.mask & PSW_MASK_EA) ? -1UL : + (psw.mask & PSW_MASK_BA) ? (1UL << 31) - 1 : + (1UL << 24) - 1; + return (psw.addr - ilc) & mask; +#endif +} + /* * Function to drop a processor into disabled wait state */ - static inline void ATTRIB_NORET disabled_wait(unsigned long code) { unsigned long ctl_buf; psw_t dw_psw; - dw_psw.mask = PSW_BASE_BITS | PSW_MASK_WAIT; + dw_psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA; dw_psw.addr = code; /* * Store status and then load disabled wait psw, diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 62fd80c9e98..56da355678f 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -230,17 +230,21 @@ typedef struct #define PSW_MASK_IO 0x02000000UL #define PSW_MASK_EXT 0x01000000UL #define PSW_MASK_KEY 0x00F00000UL +#define PSW_MASK_BASE 0x00080000UL /* always one */ #define PSW_MASK_MCHECK 0x00040000UL #define PSW_MASK_WAIT 0x00020000UL #define PSW_MASK_PSTATE 0x00010000UL #define PSW_MASK_ASC 0x0000C000UL #define PSW_MASK_CC 0x00003000UL #define PSW_MASK_PM 0x00000F00UL +#define PSW_MASK_EA 0x00000000UL +#define PSW_MASK_BA 0x00000000UL + +#define PSW_MASK_USER 0x00003F00UL #define PSW_ADDR_AMODE 0x80000000UL #define PSW_ADDR_INSN 0x7FFFFFFFUL -#define PSW_BASE_BITS 0x00080000UL #define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 20) #define PSW_ASC_PRIMARY 0x00000000UL @@ -254,6 +258,7 @@ typedef struct #define PSW_MASK_DAT 0x0400000000000000UL #define PSW_MASK_IO 0x0200000000000000UL #define PSW_MASK_EXT 0x0100000000000000UL +#define PSW_MASK_BASE 0x0000000000000000UL #define PSW_MASK_KEY 0x00F0000000000000UL #define PSW_MASK_MCHECK 0x0004000000000000UL #define PSW_MASK_WAIT 0x0002000000000000UL @@ -261,12 +266,14 @@ typedef struct #define PSW_MASK_ASC 0x0000C00000000000UL #define PSW_MASK_CC 0x0000300000000000UL #define PSW_MASK_PM 0x00000F0000000000UL +#define PSW_MASK_EA 0x0000000100000000UL +#define PSW_MASK_BA 0x0000000080000000UL + +#define PSW_MASK_USER 0x00003F0180000000UL #define PSW_ADDR_AMODE 0x0000000000000000UL #define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL -#define PSW_BASE_BITS 0x0000000180000000UL -#define PSW_BASE32_BITS 0x0000000080000000UL #define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 52) #define PSW_ASC_PRIMARY 0x0000000000000000UL @@ -279,18 +286,7 @@ typedef struct #ifdef __KERNEL__ extern long psw_kernel_bits; extern long psw_user_bits; -#ifdef CONFIG_64BIT -extern long psw_user32_bits; #endif -#endif - -/* This macro merges a NEW PSW mask specified by the user into - the currently active PSW mask CURRENT, modifying only those - bits in CURRENT that the user may be allowed to change: this - is the condition code and the program mask bits. */ -#define PSW_MASK_MERGE(CURRENT,NEW) \ - (((CURRENT) & ~(PSW_MASK_CC|PSW_MASK_PM)) | \ - ((NEW) & (PSW_MASK_CC|PSW_MASK_PM))) /* * The s390_regs structure is used to define the elf_gregset_t. @@ -328,8 +324,8 @@ struct pt_regs psw_t psw; unsigned long gprs[NUM_GPRS]; unsigned long orig_gpr2; - unsigned short ilc; - unsigned short svcnr; + unsigned int int_code; + unsigned long int_parm_long; }; /* @@ -487,6 +483,8 @@ typedef struct #define PTRACE_POKETEXT_AREA 0x5004 #define PTRACE_POKEDATA_AREA 0x5005 #define PTRACE_GET_LAST_BREAK 0x5006 +#define PTRACE_PEEK_SYSTEM_CALL 0x5007 +#define PTRACE_POKE_SYSTEM_CALL 0x5008 /* * PT_PROT definition is loosely based on hppa bsd definition in diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 15c97625df8..d75c8e78f7e 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -46,6 +46,8 @@ struct qdesfmt0 { u32 : 16; } __attribute__ ((packed)); +#define QDR_AC_MULTI_BUFFER_ENABLE 0x01 + /** * struct qdr - queue description record (QDR) * @qfmt: queue format @@ -123,6 +125,40 @@ struct slibe { }; /** + * struct qaob - queue asynchronous operation block + * @res0: reserved parameters + * @res1: reserved parameter + * @res2: reserved parameter + * @res3: reserved parameter + * @aorc: asynchronous operation return code + * @flags: internal flags + * @cbtbs: control block type + * @sb_count: number of storage blocks + * @sba: storage block element addresses + * @dcount: size of storage block elements + * @user0: user defineable value + * @res4: reserved paramater + * @user1: user defineable value + * @user2: user defineable value + */ +struct qaob { + u64 res0[6]; + u8 res1; + u8 res2; + u8 res3; + u8 aorc; + u8 flags; + u16 cbtbs; + u8 sb_count; + u64 sba[QDIO_MAX_ELEMENTS_PER_BUFFER]; + u16 dcount[QDIO_MAX_ELEMENTS_PER_BUFFER]; + u64 user0; + u64 res4[2]; + u64 user1; + u64 user2; +} __attribute__ ((packed, aligned(256))); + +/** * struct slib - storage list information block (SLIB) * @nsliba: next SLIB address (if any) * @sla: SL address @@ -222,9 +258,46 @@ struct slsb { u8 val[QDIO_MAX_BUFFERS_PER_Q]; } __attribute__ ((packed, aligned(256))); +#define CHSC_AC2_MULTI_BUFFER_AVAILABLE 0x0080 +#define CHSC_AC2_MULTI_BUFFER_ENABLED 0x0040 #define CHSC_AC2_DATA_DIV_AVAILABLE 0x0010 #define CHSC_AC2_DATA_DIV_ENABLED 0x0002 +/** + * struct qdio_outbuf_state - SBAL related asynchronous operation information + * (for communication with upper layer programs) + * (only required for use with completion queues) + * @flags: flags indicating state of buffer + * @aob: pointer to QAOB used for the particular SBAL + * @user: pointer to upper layer program's state information related to SBAL + * (stored in user1 data of QAOB) + */ +struct qdio_outbuf_state { + u8 flags; + struct qaob *aob; + void *user; +}; + +#define QDIO_OUTBUF_STATE_FLAG_NONE 0x00 +#define QDIO_OUTBUF_STATE_FLAG_PENDING 0x01 + +#define CHSC_AC1_INITIATE_INPUTQ 0x80 + + +/* qdio adapter-characteristics-1 flag */ +#define AC1_SIGA_INPUT_NEEDED 0x40 /* process input queues */ +#define AC1_SIGA_OUTPUT_NEEDED 0x20 /* process output queues */ +#define AC1_SIGA_SYNC_NEEDED 0x10 /* ask hypervisor to sync */ +#define AC1_AUTOMATIC_SYNC_ON_THININT 0x08 /* set by hypervisor */ +#define AC1_AUTOMATIC_SYNC_ON_OUT_PCI 0x04 /* set by hypervisor */ +#define AC1_SC_QEBSM_AVAILABLE 0x02 /* available for subchannel */ +#define AC1_SC_QEBSM_ENABLED 0x01 /* enabled for subchannel */ + +#define CHSC_AC2_DATA_DIV_AVAILABLE 0x0010 +#define CHSC_AC2_DATA_DIV_ENABLED 0x0002 + +#define CHSC_AC3_FORMAT2_CQ_AVAILABLE 0x8000 + struct qdio_ssqd_desc { u8 flags; u8:8; @@ -243,8 +316,7 @@ struct qdio_ssqd_desc { u64 sch_token; u8 mro; u8 mri; - u8:8; - u8 sbalic; + u16 qdioac3; u16:16; u8:8; u8 mmwc; @@ -280,13 +352,16 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, * @no_output_qs: number of output queues * @input_handler: handler to be called for input queues * @output_handler: handler to be called for output queues + * @queue_start_poll_array: polling handlers (one per input queue or NULL) * @int_parm: interruption parameter * @input_sbal_addr_array: address of no_input_qs * 128 pointers * @output_sbal_addr_array: address of no_output_qs * 128 pointers + * @output_sbal_state_array: no_output_qs * 128 state info (for CQ or NULL) */ struct qdio_initialize { struct ccw_device *cdev; unsigned char q_format; + unsigned char qdr_ac; unsigned char adapter_name[8]; unsigned int qib_param_field_format; unsigned char *qib_param_field; @@ -297,11 +372,13 @@ struct qdio_initialize { unsigned int no_output_qs; qdio_handler_t *input_handler; qdio_handler_t *output_handler; - void (*queue_start_poll) (struct ccw_device *, int, unsigned long); + void (**queue_start_poll_array) (struct ccw_device *, int, + unsigned long); int scan_threshold; unsigned long int_parm; void **input_sbal_addr_array; void **output_sbal_addr_array; + struct qdio_outbuf_state *output_sbal_state_array; }; #define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */ @@ -316,6 +393,7 @@ struct qdio_initialize { extern int qdio_allocate(struct qdio_initialize *); extern int qdio_establish(struct qdio_initialize *); extern int qdio_activate(struct ccw_device *); +extern void qdio_release_aob(struct qaob *); extern int do_QDIO(struct ccw_device *, unsigned int, int, unsigned int, unsigned int); extern int qdio_start_irq(struct ccw_device *, int); diff --git a/arch/s390/include/asm/reset.h b/arch/s390/include/asm/reset.h index f584f4a5258..3d6ad4ad2a3 100644 --- a/arch/s390/include/asm/reset.h +++ b/arch/s390/include/asm/reset.h @@ -17,5 +17,5 @@ struct reset_call { extern void register_reset_call(struct reset_call *reset); extern void unregister_reset_call(struct reset_call *reset); -extern void s390_reset_system(void); +extern void s390_reset_system(void (*func)(void *), void *data); #endif /* _ASM_S390_RESET_H */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index d5e2ef10537..097183c7040 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -26,15 +26,21 @@ #define IPL_DEVICE (*(unsigned long *) (0x10404)) #define INITRD_START (*(unsigned long *) (0x1040C)) #define INITRD_SIZE (*(unsigned long *) (0x10414)) +#define OLDMEM_BASE (*(unsigned long *) (0x1041C)) +#define OLDMEM_SIZE (*(unsigned long *) (0x10424)) #else /* __s390x__ */ #define IPL_DEVICE (*(unsigned long *) (0x10400)) #define INITRD_START (*(unsigned long *) (0x10408)) #define INITRD_SIZE (*(unsigned long *) (0x10410)) +#define OLDMEM_BASE (*(unsigned long *) (0x10418)) +#define OLDMEM_SIZE (*(unsigned long *) (0x10420)) #endif /* __s390x__ */ #define COMMAND_LINE ((char *) (0x10480)) #define CHUNK_READ_WRITE 0 #define CHUNK_READ_ONLY 1 +#define CHUNK_OLDMEM 4 +#define CHUNK_CRASHK 5 struct mem_chunk { unsigned long addr; @@ -48,6 +54,8 @@ extern int memory_end_set; extern unsigned long memory_end; void detect_memory_layout(struct mem_chunk chunk[]); +void create_mem_hole(struct mem_chunk memory_chunk[], unsigned long addr, + unsigned long size, int type); #define PRIMARY_SPACE_MODE 0 #define ACCESS_REGISTER_MODE 1 @@ -74,6 +82,7 @@ extern unsigned int user_mode; #define MACHINE_FLAG_LPAR (1UL << 12) #define MACHINE_FLAG_SPP (1UL << 13) #define MACHINE_FLAG_TOPOLOGY (1UL << 14) +#define MACHINE_FLAG_STCKF (1UL << 15) #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) @@ -92,6 +101,7 @@ extern unsigned int user_mode; #define MACHINE_HAS_PFMF (0) #define MACHINE_HAS_SPP (0) #define MACHINE_HAS_TOPOLOGY (0) +#define MACHINE_HAS_STCKF (0) #else /* __s390x__ */ #define MACHINE_HAS_IEEE (1) #define MACHINE_HAS_CSP (1) @@ -103,9 +113,11 @@ extern unsigned int user_mode; #define MACHINE_HAS_PFMF (S390_lowcore.machine_flags & MACHINE_FLAG_PFMF) #define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP) #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) +#define MACHINE_HAS_STCKF (S390_lowcore.machine_flags & MACHINE_FLAG_STCKF) #endif /* __s390x__ */ #define ZFCPDUMP_HSA_SIZE (32UL<<20) +#define ZFCPDUMP_HSA_SIZE_MAX (64UL<<20) /* * Console mode. Override with conmode= @@ -134,10 +146,14 @@ extern char kernel_nss_name[]; #define IPL_DEVICE 0x10404 #define INITRD_START 0x1040C #define INITRD_SIZE 0x10414 +#define OLDMEM_BASE 0x1041C +#define OLDMEM_SIZE 0x10424 #else /* __s390x__ */ #define IPL_DEVICE 0x10400 #define INITRD_START 0x10408 #define INITRD_SIZE 0x10410 +#define OLDMEM_BASE 0x10418 +#define OLDMEM_SIZE 0x10420 #endif /* __s390x__ */ #define COMMAND_LINE 0x10480 diff --git a/arch/s390/include/asm/sfp-util.h b/arch/s390/include/asm/sfp-util.h index 0addc6466d9..ca3f8814e36 100644 --- a/arch/s390/include/asm/sfp-util.h +++ b/arch/s390/include/asm/sfp-util.h @@ -72,6 +72,6 @@ extern unsigned long __udiv_qrnnd (unsigned int *, unsigned int, #define UDIV_NEEDS_NORMALIZATION 0 -#define abort() return 0 +#define abort() BUG() #define __BYTE_ORDER __BIG_ENDIAN diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index e3bffd4e2d6..7040b8567cd 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -56,6 +56,7 @@ enum { ec_schedule = 0, ec_call_function, ec_call_function_single, + ec_stop_cpu, }; /* diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 045e009fc16..c32e9123b40 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -23,7 +23,6 @@ extern void __cpu_die (unsigned int cpu); extern int __cpu_up (unsigned int cpu); extern struct mutex smp_cpu_state_mutex; -extern int smp_cpu_polarization[]; extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); @@ -33,6 +32,7 @@ extern struct save_area *zfcpdump_save_areas[NR_CPUS + 1]; extern void smp_switch_to_ipl_cpu(void (*func)(void *), void *); extern void smp_switch_to_cpu(void (*)(void *), void *, unsigned long sp, int from, int to); +extern void smp_restart_with_online_cpu(void); extern void smp_restart_cpu(void); /* @@ -64,6 +64,10 @@ static inline void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) func(data); } +static inline void smp_restart_with_online_cpu(void) +{ +} + #define smp_vcpu_scheduled (1) #endif /* CONFIG_SMP */ diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h index fdff1e995c7..67b5c1b14b5 100644 --- a/arch/s390/include/asm/socket.h +++ b/arch/s390/include/asm/socket.h @@ -70,4 +70,7 @@ #define SO_RXQ_OVFL 40 +#define SO_WIFI_STATUS 41 +#define SCM_WIFI_STATUS SO_WIFI_STATUS + #endif /* _ASM_SOCKET_H */ diff --git a/arch/s390/include/asm/sparsemem.h b/arch/s390/include/asm/sparsemem.h index 545d219e6a2..0fb34027d3f 100644 --- a/arch/s390/include/asm/sparsemem.h +++ b/arch/s390/include/asm/sparsemem.h @@ -4,8 +4,8 @@ #ifdef CONFIG_64BIT #define SECTION_SIZE_BITS 28 -#define MAX_PHYSADDR_BITS 42 -#define MAX_PHYSMEM_BITS 42 +#define MAX_PHYSADDR_BITS 46 +#define MAX_PHYSMEM_BITS 46 #else diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 56612fc8186..fd94dfec8d0 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -13,6 +13,8 @@ #include <linux/smp.h> +extern int spin_retry; + static inline int _raw_compare_and_swap(volatile unsigned int *lock, unsigned int old, unsigned int new) diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 5c0246b955d..fb214dd9b7e 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -13,6 +13,7 @@ #define _ASM_SYSCALL_H 1 #include <linux/sched.h> +#include <linux/err.h> #include <asm/ptrace.h> /* @@ -25,7 +26,8 @@ extern const unsigned int sys_call_table[]; static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { - return regs->svcnr ? regs->svcnr : -1; + return test_tsk_thread_flag(task, TIF_SYSCALL) ? + (regs->int_code & 0xffff) : -1; } static inline void syscall_rollback(struct task_struct *task, @@ -37,7 +39,7 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - return (regs->gprs[2] >= -4096UL) ? -regs->gprs[2] : 0; + return IS_ERR_VALUE(regs->gprs[2]) ? regs->gprs[2] : 0; } static inline long syscall_get_return_value(struct task_struct *task, diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index 6582f69f238..d73cc6b6000 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -114,6 +114,8 @@ extern void pfault_fini(void); extern void cmma_init(void); extern int memcpy_real(void *, void *, size_t); extern void copy_to_absolute_zero(void *dest, void *src, size_t count); +extern int copy_to_user_real(void __user *dest, void *src, size_t count); +extern int copy_from_user_real(void *dest, void __user *src, size_t count); #define finish_arch_switch(prev) do { \ set_fs(current->thread.mm_segment); \ @@ -210,8 +212,10 @@ __set_psw_mask(unsigned long mask) __load_psw_mask(mask | (arch_local_save_flags() & ~(-1UL >> 8))); } -#define local_mcck_enable() __set_psw_mask(psw_kernel_bits) -#define local_mcck_disable() __set_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK) +#define local_mcck_enable() \ + __set_psw_mask(psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK) +#define local_mcck_disable() \ + __set_psw_mask(psw_kernel_bits | PSW_MASK_DAT) #ifdef CONFIG_SMP diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 1a5dbb6f149..a73038155e0 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -48,6 +48,7 @@ struct thread_info { unsigned int cpu; /* current CPU */ int preempt_count; /* 0 => preemptable, <0 => BUG */ struct restart_block restart_block; + unsigned int system_call; __u64 user_timer; __u64 system_timer; unsigned long last_break; /* last breaking-event-address. */ @@ -84,10 +85,10 @@ static inline struct thread_info *current_thread_info(void) /* * thread information flags bit numbers */ +#define TIF_SYSCALL 0 /* inside a system call */ #define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ -#define TIF_RESTART_SVC 4 /* restart svc with new svc number */ #define TIF_PER_TRAP 6 /* deliver sigtrap on return to user */ #define TIF_MCCK_PENDING 7 /* machine check handling is pending */ #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ @@ -101,13 +102,12 @@ static inline struct thread_info *current_thread_info(void) #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 19 /* restore signal mask in do_signal() */ #define TIF_SINGLE_STEP 20 /* This task is single stepped */ -#define TIF_FREEZE 21 /* thread is freezing for suspend */ +#define _TIF_SYSCALL (1<<TIF_SYSCALL) #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) #define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) -#define _TIF_RESTART_SVC (1<<TIF_RESTART_SVC) #define _TIF_PER_TRAP (1<<TIF_PER_TRAP) #define _TIF_MCCK_PENDING (1<<TIF_MCCK_PENDING) #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) @@ -117,8 +117,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SIE (1<<TIF_SIE) #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) #define _TIF_31BIT (1<<TIF_31BIT) -#define _TIF_SINGLE_STEP (1<<TIF_FREEZE) -#define _TIF_FREEZE (1<<TIF_FREEZE) +#define _TIF_SINGLE_STEP (1<<TIF_SINGLE_STEP) #ifdef CONFIG_64BIT #define is_32bit_task() (test_thread_flag(TIF_31BIT)) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 88829a40af6..c447a27a7fd 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -86,6 +86,17 @@ static inline void get_clock_ext(char *clk) asm volatile("stcke %0" : "=Q" (*clk) : : "cc"); } +static inline unsigned long long get_clock_fast(void) +{ + unsigned long long clk; + + if (MACHINE_HAS_STCKF) + asm volatile(".insn s,0xb27c0000,%0" : "=Q" (clk) : : "cc"); + else + clk = get_clock(); + return clk; +} + static inline unsigned long long get_clock_xt(void) { unsigned char clk[16]; diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 30444538238..1d8648cf2fe 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -59,6 +59,7 @@ static inline void __tlb_flush_full(struct mm_struct *mm) } #else #define __tlb_flush_full(mm) __tlb_flush_local() +#define __tlb_flush_global() __tlb_flush_local() #endif /* diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 005d77d8ae2..0837de80c35 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -4,6 +4,10 @@ #include <linux/cpumask.h> #include <asm/sysinfo.h> +struct cpu; + +#ifdef CONFIG_SCHED_BOOK + extern unsigned char cpu_core_id[NR_CPUS]; extern cpumask_t cpu_core_map[NR_CPUS]; @@ -16,8 +20,6 @@ static inline const struct cpumask *cpu_coregroup_mask(int cpu) #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) #define mc_capable() (1) -#ifdef CONFIG_SCHED_BOOK - extern unsigned char cpu_book_id[NR_CPUS]; extern cpumask_t cpu_book_map[NR_CPUS]; @@ -29,19 +31,45 @@ static inline const struct cpumask *cpu_book_mask(int cpu) #define topology_book_id(cpu) (cpu_book_id[cpu]) #define topology_book_cpumask(cpu) (&cpu_book_map[cpu]) -#endif /* CONFIG_SCHED_BOOK */ - +int topology_cpu_init(struct cpu *); int topology_set_cpu_management(int fc); void topology_schedule_update(void); void store_topology(struct sysinfo_15_1_x *info); +void topology_expect_change(void); + +#else /* CONFIG_SCHED_BOOK */ + +static inline void topology_schedule_update(void) { } +static inline int topology_cpu_init(struct cpu *cpu) { return 0; } +static inline void topology_expect_change(void) { } -#define POLARIZATION_UNKNWN (-1) +#endif /* CONFIG_SCHED_BOOK */ + +#define POLARIZATION_UNKNOWN (-1) #define POLARIZATION_HRZ (0) #define POLARIZATION_VL (1) #define POLARIZATION_VM (2) #define POLARIZATION_VH (3) -#ifdef CONFIG_SMP +extern int cpu_polarization[]; + +static inline void cpu_set_polarization(int cpu, int val) +{ +#ifdef CONFIG_SCHED_BOOK + cpu_polarization[cpu] = val; +#endif +} + +static inline int cpu_read_polarization(int cpu) +{ +#ifdef CONFIG_SCHED_BOOK + return cpu_polarization[cpu]; +#else + return POLARIZATION_HRZ; +#endif +} + +#ifdef CONFIG_SCHED_BOOK void s390_init_cpu_topology(void); #else static inline void s390_init_cpu_topology(void) diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h index eeb52ccf499..05ebbcdbbf6 100644 --- a/arch/s390/include/asm/types.h +++ b/arch/s390/include/asm/types.h @@ -13,8 +13,6 @@ #ifndef __ASSEMBLY__ -typedef unsigned short umode_t; - /* A address type so that arithmetic can be done on it & it can be upgraded to 64 bit when necessary */ diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index 404bdb9671b..8a8008fe7b8 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -277,7 +277,9 @@ #define __NR_clock_adjtime 337 #define __NR_syncfs 338 #define __NR_setns 339 -#define NR_syscalls 340 +#define __NR_process_vm_readv 340 +#define __NR_process_vm_writev 341 +#define NR_syscalls 342 /* * There are some system calls that are not present on 64 bit, some @@ -396,6 +398,7 @@ #define __ARCH_WANT_SYS_SIGNAL #define __ARCH_WANT_SYS_UTIME #define __ARCH_WANT_SYS_SOCKETCALL +#define __ARCH_WANT_SYS_IPC #define __ARCH_WANT_SYS_FADVISE64 #define __ARCH_WANT_SYS_GETPGRP #define __ARCH_WANT_SYS_LLSEEK diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index df3732249ba..7d9ec924e7e 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -32,7 +32,8 @@ extra-y += head.o init_task.o vmlinux.lds extra-y += $(if $(CONFIG_64BIT),head64.o,head31.o) obj-$(CONFIG_MODULES) += s390_ksyms.o module.o -obj-$(CONFIG_SMP) += smp.o topology.o +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SCHED_BOOK) += topology.o obj-$(CONFIG_SMP) += $(if $(CONFIG_64BIT),switch_cpu64.o, \ switch_cpu.o) obj-$(CONFIG_HIBERNATION) += suspend.o swsusp_asm64.o @@ -48,6 +49,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o) obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o # Kexec part S390_KEXEC_OBJS := machine_kexec.o crash.o diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 2b45591e158..6e6a72e66d6 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -45,8 +45,8 @@ int main(void) DEFINE(__PT_PSW, offsetof(struct pt_regs, psw)); DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs)); DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2)); - DEFINE(__PT_ILC, offsetof(struct pt_regs, ilc)); - DEFINE(__PT_SVCNR, offsetof(struct pt_regs, svcnr)); + DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code)); + DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long)); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain)); @@ -109,7 +109,9 @@ int main(void) DEFINE(__LC_PGM_NEW_PSW, offsetof(struct _lowcore, program_new_psw)); DEFINE(__LC_MCK_NEW_PSW, offsetof(struct _lowcore, mcck_new_psw)); DEFINE(__LC_IO_NEW_PSW, offsetof(struct _lowcore, io_new_psw)); - DEFINE(__LC_SAVE_AREA, offsetof(struct _lowcore, save_area)); + DEFINE(__LC_SAVE_AREA_SYNC, offsetof(struct _lowcore, save_area_sync)); + DEFINE(__LC_SAVE_AREA_ASYNC, offsetof(struct _lowcore, save_area_async)); + DEFINE(__LC_SAVE_AREA_RESTART, offsetof(struct _lowcore, save_area_restart)); DEFINE(__LC_RETURN_PSW, offsetof(struct _lowcore, return_psw)); DEFINE(__LC_RETURN_MCCK_PSW, offsetof(struct _lowcore, return_mcck_psw)); DEFINE(__LC_SYNC_ENTER_TIMER, offsetof(struct _lowcore, sync_enter_timer)); @@ -141,7 +143,6 @@ int main(void) DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area)); DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area)); DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area)); - DEFINE(__LC_SAVE_AREA_64, offsetof(struct _lowcore, save_area_64)); #ifdef CONFIG_32BIT DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr)); #else /* CONFIG_32BIT */ @@ -152,7 +153,6 @@ int main(void) DEFINE(__LC_LAST_BREAK, offsetof(struct _lowcore, breaking_event_addr)); DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data)); DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap)); - DEFINE(__LC_CMF_HPP, offsetof(struct _lowcore, cmf_hpp)); DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce)); #endif /* CONFIG_32BIT */ return 0; diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index 255435663bf..3aa4d00aaf5 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -33,7 +33,7 @@ s390_base_mcck_handler_fn: .previous ENTRY(s390_base_ext_handler) - stmg %r0,%r15,__LC_SAVE_AREA + stmg %r0,%r15,__LC_SAVE_AREA_ASYNC basr %r13,0 0: aghi %r15,-STACK_FRAME_OVERHEAD larl %r1,s390_base_ext_handler_fn @@ -41,7 +41,7 @@ ENTRY(s390_base_ext_handler) ltgr %r1,%r1 jz 1f basr %r14,%r1 -1: lmg %r0,%r15,__LC_SAVE_AREA +1: lmg %r0,%r15,__LC_SAVE_AREA_ASYNC ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit lpswe __LC_EXT_OLD_PSW @@ -53,7 +53,7 @@ s390_base_ext_handler_fn: .previous ENTRY(s390_base_pgm_handler) - stmg %r0,%r15,__LC_SAVE_AREA + stmg %r0,%r15,__LC_SAVE_AREA_SYNC basr %r13,0 0: aghi %r15,-STACK_FRAME_OVERHEAD larl %r1,s390_base_pgm_handler_fn @@ -61,7 +61,7 @@ ENTRY(s390_base_pgm_handler) ltgr %r1,%r1 jz 1f basr %r14,%r1 - lmg %r0,%r15,__LC_SAVE_AREA + lmg %r0,%r15,__LC_SAVE_AREA_SYNC lpswe __LC_PGM_OLD_PSW 1: lpswe disabled_wait_psw-0b(%r13) @@ -86,6 +86,8 @@ s390_base_pgm_handler_fn: ENTRY(diag308_reset) larl %r4,.Lctlregs # Save control registers stctg %c0,%c15,0(%r4) + larl %r4,.Lfpctl # Floating point control register + stfpc 0(%r4) larl %r4,.Lrestart_psw # Setup restart PSW at absolute 0 lghi %r3,0 lg %r4,0(%r4) # Save PSW @@ -99,6 +101,8 @@ ENTRY(diag308_reset) sam64 # Switch to 64 bit addressing mode larl %r4,.Lctlregs # Restore control registers lctlg %c0,%c15,0(%r4) + larl %r4,.Lfpctl # Restore floating point ctl register + lfpc 0(%r4) br %r14 .align 16 .Lrestart_psw: @@ -110,6 +114,8 @@ ENTRY(diag308_reset) .rept 16 .quad 0 .endr +.Lfpctl: + .long 0 .previous #else /* CONFIG_64BIT */ @@ -136,7 +142,7 @@ s390_base_mcck_handler_fn: .previous ENTRY(s390_base_ext_handler) - stm %r0,%r15,__LC_SAVE_AREA + stm %r0,%r15,__LC_SAVE_AREA_ASYNC basr %r13,0 0: ahi %r15,-STACK_FRAME_OVERHEAD l %r1,2f-0b(%r13) @@ -144,7 +150,7 @@ ENTRY(s390_base_ext_handler) ltr %r1,%r1 jz 1f basr %r14,%r1 -1: lm %r0,%r15,__LC_SAVE_AREA +1: lm %r0,%r15,__LC_SAVE_AREA_ASYNC ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit lpsw __LC_EXT_OLD_PSW @@ -158,7 +164,7 @@ s390_base_ext_handler_fn: .previous ENTRY(s390_base_pgm_handler) - stm %r0,%r15,__LC_SAVE_AREA + stm %r0,%r15,__LC_SAVE_AREA_SYNC basr %r13,0 0: ahi %r15,-STACK_FRAME_OVERHEAD l %r1,2f-0b(%r13) @@ -166,7 +172,7 @@ ENTRY(s390_base_pgm_handler) ltr %r1,%r1 jz 1f basr %r14,%r1 - lm %r0,%r15,__LC_SAVE_AREA + lm %r0,%r15,__LC_SAVE_AREA_SYNC lpsw __LC_PGM_OLD_PSW 1: lpsw disabled_wait_psw-0b(%r13) diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 53acaa86dd9..ab64bdbab2a 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -60,12 +60,9 @@ #include "compat_linux.h" -long psw_user32_bits = (PSW_BASE32_BITS | PSW_MASK_DAT | PSW_ASC_HOME | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY); -long psw32_user_bits = (PSW32_BASE_BITS | PSW32_MASK_DAT | PSW32_ASC_HOME | - PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK | - PSW32_MASK_PSTATE); +u32 psw32_user_bits = PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | + PSW32_DEFAULT_KEY | PSW32_MASK_BASE | PSW32_MASK_MCHECK | + PSW32_MASK_PSTATE | PSW32_ASC_HOME; /* For this source file, we want overflow handling. */ @@ -281,9 +278,6 @@ asmlinkage long sys32_ipc(u32 call, int first, int second, int third, u32 ptr) { if (call >> 16) /* hack for backward compatibility */ return -EINVAL; - - call &= 0xffff; - switch (call) { case SEMTIMEDOP: return compat_sys_semtimedop(first, compat_ptr(ptr), @@ -365,12 +359,7 @@ asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, if (set) { if (copy_from_user (&s32, set, sizeof(compat_sigset_t))) return -EFAULT; - switch (_NSIG_WORDS) { - case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32); - case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32); - case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32); - case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); - } + s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); } set_fs (KERNEL_DS); ret = sys_rt_sigprocmask(how, @@ -380,12 +369,8 @@ asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, set_fs (old_fs); if (ret) return ret; if (oset) { - switch (_NSIG_WORDS) { - case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; - case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; - case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; - case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; - } + s32.sig[1] = (s.sig[0] >> 32); + s32.sig[0] = s.sig[0]; if (copy_to_user (oset, &s32, sizeof(compat_sigset_t))) return -EFAULT; } @@ -404,12 +389,8 @@ asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set, ret = sys_rt_sigpending((sigset_t __force __user *) &s, sigsetsize); set_fs (old_fs); if (!ret) { - switch (_NSIG_WORDS) { - case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; - case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; - case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; - case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; - } + s32.sig[1] = (s.sig[0] >> 32); + s32.sig[0] = s.sig[0]; if (copy_to_user (set, &s32, sizeof(compat_sigset_t))) return -EFAULT; } diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index a9a285b8c4a..6fe78c2f95d 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -141,7 +141,8 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) break; case __SI_FAULT >> 16: err |= __get_user(tmp, &from->si_addr); - to->si_addr = (void __user *)(u64) (tmp & PSW32_ADDR_INSN); + to->si_addr = (void __force __user *) + (u64) (tmp & PSW32_ADDR_INSN); break; case __SI_POLL >> 16: err |= __get_user(to->si_band, &from->si_band); @@ -213,16 +214,8 @@ sys32_rt_sigaction(int sig, const struct sigaction32 __user *act, ret = get_user(sa_handler, &act->sa_handler); ret |= __copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t)); - switch (_NSIG_WORDS) { - case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6] - | (((long)set32.sig[7]) << 32); - case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4] - | (((long)set32.sig[5]) << 32); - case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2] - | (((long)set32.sig[3]) << 32); - case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0] - | (((long)set32.sig[1]) << 32); - } + new_ka.sa.sa_mask.sig[0] = + set32.sig[0] | (((long)set32.sig[1]) << 32); ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); if (ret) @@ -233,20 +226,8 @@ sys32_rt_sigaction(int sig, const struct sigaction32 __user *act, ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); if (!ret && oact) { - switch (_NSIG_WORDS) { - case 4: - set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32); - set32.sig[6] = old_ka.sa.sa_mask.sig[3]; - case 3: - set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32); - set32.sig[4] = old_ka.sa.sa_mask.sig[2]; - case 2: - set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32); - set32.sig[2] = old_ka.sa.sa_mask.sig[1]; - case 1: - set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32); - set32.sig[0] = old_ka.sa.sa_mask.sig[0]; - } + set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32); + set32.sig[0] = old_ka.sa.sa_mask.sig[0]; ret = put_user((unsigned long)old_ka.sa.sa_handler, &oact->sa_handler); ret |= __copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t)); @@ -300,9 +281,10 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) _s390_regs_common32 regs32; int err, i; - regs32.psw.mask = PSW32_MASK_MERGE(psw32_user_bits, - (__u32)(regs->psw.mask >> 32)); - regs32.psw.addr = PSW32_ADDR_AMODE31 | (__u32) regs->psw.addr; + regs32.psw.mask = psw32_user_bits | + ((__u32)(regs->psw.mask >> 32) & PSW32_MASK_USER); + regs32.psw.addr = (__u32) regs->psw.addr | + (__u32)(regs->psw.mask & PSW_MASK_BA); for (i = 0; i < NUM_GPRS; i++) regs32.gprs[i] = (__u32) regs->gprs[i]; save_access_regs(current->thread.acrs); @@ -327,8 +309,9 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) err = __copy_from_user(®s32, &sregs->regs, sizeof(regs32)); if (err) return err; - regs->psw.mask = PSW_MASK_MERGE(regs->psw.mask, - (__u64)regs32.psw.mask << 32); + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (__u64)(regs32.psw.mask & PSW32_MASK_USER) << 32 | + (__u64)(regs32.psw.addr & PSW32_ADDR_AMODE); regs->psw.addr = (__u64)(regs32.psw.addr & PSW32_ADDR_INSN); for (i = 0; i < NUM_GPRS; i++) regs->gprs[i] = (__u64) regs32.gprs[i]; @@ -342,7 +325,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) return err; restore_fp_regs(¤t->thread.fp_regs); - regs->svcnr = 0; /* disable syscall checks */ + clear_thread_flag(TIF_SYSCALL); /* No longer in a system call */ return 0; } @@ -496,11 +479,11 @@ static int setup_frame32(int sig, struct k_sigaction *ka, /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64) ka->sa.sa_restorer; + regs->gprs[14] = (__u64) ka->sa.sa_restorer | PSW32_ADDR_AMODE; } else { - regs->gprs[14] = (__u64) frame->retcode; + regs->gprs[14] = (__u64) frame->retcode | PSW32_ADDR_AMODE; if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, - (u16 __user *)(frame->retcode))) + (u16 __force __user *)(frame->retcode))) goto give_sigsegv; } @@ -509,19 +492,24 @@ static int setup_frame32(int sig, struct k_sigaction *ka, goto give_sigsegv; /* Set up registers for signal handler */ - regs->gprs[15] = (__u64) frame; - regs->psw.addr = (__u64) ka->sa.sa_handler; + regs->gprs[15] = (__force __u64) frame; + regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */ + regs->psw.addr = (__force __u64) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); - regs->gprs[3] = (__u64) &frame->sc; + regs->gprs[3] = (__force __u64) &frame->sc; /* We forgot to include these in the sigcontext. To avoid breaking binary compatibility, they are passed as args. */ - regs->gprs[4] = current->thread.trap_no; - regs->gprs[5] = current->thread.prot_addr; + if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL || + sig == SIGTRAP || sig == SIGFPE) { + /* set extra registers only for synchronous signals */ + regs->gprs[4] = regs->int_code & 127; + regs->gprs[5] = regs->int_parm_long; + } /* Place signal number on stack to allow backtrace from handler. */ - if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) + if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo)) goto give_sigsegv; return 0; @@ -560,24 +548,25 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64) ka->sa.sa_restorer; + regs->gprs[14] = (__u64) ka->sa.sa_restorer | PSW32_ADDR_AMODE; } else { - regs->gprs[14] = (__u64) frame->retcode; + regs->gprs[14] = (__u64) frame->retcode | PSW32_ADDR_AMODE; err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, - (u16 __user *)(frame->retcode)); + (u16 __force __user *)(frame->retcode)); } /* Set up backchain. */ - if (__put_user(regs->gprs[15], (unsigned int __user *) frame)) + if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame)) goto give_sigsegv; /* Set up registers for signal handler */ - regs->gprs[15] = (__u64) frame; + regs->gprs[15] = (__force __u64) frame; + regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */ regs->psw.addr = (__u64) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); - regs->gprs[3] = (__u64) &frame->info; - regs->gprs[4] = (__u64) &frame->uc; + regs->gprs[3] = (__force __u64) &frame->info; + regs->gprs[4] = (__force __u64) &frame->uc; return 0; give_sigsegv: diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 7526db6bf50..18c51df9fe0 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1623,8 +1623,27 @@ ENTRY(sys_syncfs_wrapper) lgfr %r2,%r2 # int jg sys_syncfs - .globl sys_setns_wrapper -sys_setns_wrapper: +ENTRY(sys_setns_wrapper) lgfr %r2,%r2 # int lgfr %r3,%r3 # int jg sys_setns + +ENTRY(compat_sys_process_vm_readv_wrapper) + lgfr %r2,%r2 # compat_pid_t + llgtr %r3,%r3 # struct compat_iovec __user * + llgfr %r4,%r4 # unsigned long + llgtr %r5,%r5 # struct compat_iovec __user * + llgfr %r6,%r6 # unsigned long + llgf %r0,164(%r15) # unsigned long + stg %r0,160(%r15) + jg sys_process_vm_readv + +ENTRY(compat_sys_process_vm_writev_wrapper) + lgfr %r2,%r2 # compat_pid_t + llgtr %r3,%r3 # struct compat_iovec __user * + llgfr %r4,%r4 # unsigned long + llgtr %r5,%r5 # struct compat_iovec __user * + llgfr %r6,%r6 # unsigned long + llgf %r0,164(%r15) # unsigned long + stg %r0,160(%r15) + jg sys_process_vm_writev diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c new file mode 100644 index 00000000000..39f8fd4438f --- /dev/null +++ b/arch/s390/kernel/crash_dump.c @@ -0,0 +1,426 @@ +/* + * S390 kdump implementation + * + * Copyright IBM Corp. 2011 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <linux/crash_dump.h> +#include <asm/lowcore.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/crash_dump.h> +#include <linux/bootmem.h> +#include <linux/elf.h> +#include <asm/ipl.h> + +#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) +#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) +#define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y)))) + +/* + * Copy one page from "oldmem" + * + * For the kdump reserved memory this functions performs a swap operation: + * - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE]. + * - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] + */ +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, + size_t csize, unsigned long offset, int userbuf) +{ + unsigned long src; + + if (!csize) + return 0; + + src = (pfn << PAGE_SHIFT) + offset; + if (src < OLDMEM_SIZE) + src += OLDMEM_BASE; + else if (src > OLDMEM_BASE && + src < OLDMEM_BASE + OLDMEM_SIZE) + src -= OLDMEM_BASE; + if (userbuf) + copy_to_user_real((void __force __user *) buf, (void *) src, + csize); + else + memcpy_real(buf, (void *) src, csize); + return csize; +} + +/* + * Copy memory from old kernel + */ +static int copy_from_oldmem(void *dest, void *src, size_t count) +{ + unsigned long copied = 0; + int rc; + + if ((unsigned long) src < OLDMEM_SIZE) { + copied = min(count, OLDMEM_SIZE - (unsigned long) src); + rc = memcpy_real(dest, src + OLDMEM_BASE, copied); + if (rc) + return rc; + } + return memcpy_real(dest + copied, src + copied, count - copied); +} + +/* + * Alloc memory and panic in case of ENOMEM + */ +static void *kzalloc_panic(int len) +{ + void *rc; + + rc = kzalloc(len, GFP_KERNEL); + if (!rc) + panic("s390 kdump kzalloc (%d) failed", len); + return rc; +} + +/* + * Get memory layout and create hole for oldmem + */ +static struct mem_chunk *get_memory_layout(void) +{ + struct mem_chunk *chunk_array; + + chunk_array = kzalloc_panic(MEMORY_CHUNKS * sizeof(struct mem_chunk)); + detect_memory_layout(chunk_array); + create_mem_hole(chunk_array, OLDMEM_BASE, OLDMEM_SIZE, CHUNK_CRASHK); + return chunk_array; +} + +/* + * Initialize ELF note + */ +static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len, + const char *name) +{ + Elf64_Nhdr *note; + u64 len; + + note = (Elf64_Nhdr *)buf; + note->n_namesz = strlen(name) + 1; + note->n_descsz = d_len; + note->n_type = type; + len = sizeof(Elf64_Nhdr); + + memcpy(buf + len, name, note->n_namesz); + len = roundup(len + note->n_namesz, 4); + + memcpy(buf + len, desc, note->n_descsz); + len = roundup(len + note->n_descsz, 4); + + return PTR_ADD(buf, len); +} + +/* + * Initialize prstatus note + */ +static void *nt_prstatus(void *ptr, struct save_area *sa) +{ + struct elf_prstatus nt_prstatus; + static int cpu_nr = 1; + + memset(&nt_prstatus, 0, sizeof(nt_prstatus)); + memcpy(&nt_prstatus.pr_reg.gprs, sa->gp_regs, sizeof(sa->gp_regs)); + memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw)); + memcpy(&nt_prstatus.pr_reg.acrs, sa->acc_regs, sizeof(sa->acc_regs)); + nt_prstatus.pr_pid = cpu_nr; + cpu_nr++; + + return nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus), + "CORE"); +} + +/* + * Initialize fpregset (floating point) note + */ +static void *nt_fpregset(void *ptr, struct save_area *sa) +{ + elf_fpregset_t nt_fpregset; + + memset(&nt_fpregset, 0, sizeof(nt_fpregset)); + memcpy(&nt_fpregset.fpc, &sa->fp_ctrl_reg, sizeof(sa->fp_ctrl_reg)); + memcpy(&nt_fpregset.fprs, &sa->fp_regs, sizeof(sa->fp_regs)); + + return nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset), + "CORE"); +} + +/* + * Initialize timer note + */ +static void *nt_s390_timer(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer), + KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize TOD clock comparator note + */ +static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp, + sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize TOD programmable register note + */ +static void *nt_s390_tod_preg(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg, + sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize control register note + */ +static void *nt_s390_ctrs(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs, + sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize prefix register note + */ +static void *nt_s390_prefix(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg, + sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME); +} + +/* + * Fill ELF notes for one CPU with save area registers + */ +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa) +{ + ptr = nt_prstatus(ptr, sa); + ptr = nt_fpregset(ptr, sa); + ptr = nt_s390_timer(ptr, sa); + ptr = nt_s390_tod_cmp(ptr, sa); + ptr = nt_s390_tod_preg(ptr, sa); + ptr = nt_s390_ctrs(ptr, sa); + ptr = nt_s390_prefix(ptr, sa); + return ptr; +} + +/* + * Initialize prpsinfo note (new kernel) + */ +static void *nt_prpsinfo(void *ptr) +{ + struct elf_prpsinfo prpsinfo; + + memset(&prpsinfo, 0, sizeof(prpsinfo)); + prpsinfo.pr_sname = 'R'; + strcpy(prpsinfo.pr_fname, "vmlinux"); + return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo), + KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize vmcoreinfo note (new kernel) + */ +static void *nt_vmcoreinfo(void *ptr) +{ + char nt_name[11], *vmcoreinfo; + Elf64_Nhdr note; + void *addr; + + if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) + return ptr; + memset(nt_name, 0, sizeof(nt_name)); + if (copy_from_oldmem(¬e, addr, sizeof(note))) + return ptr; + if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1)) + return ptr; + if (strcmp(nt_name, "VMCOREINFO") != 0) + return ptr; + vmcoreinfo = kzalloc_panic(note.n_descsz + 1); + if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz)) + return ptr; + vmcoreinfo[note.n_descsz + 1] = 0; + return nt_init(ptr, 0, vmcoreinfo, note.n_descsz, "VMCOREINFO"); +} + +/* + * Initialize ELF header (new kernel) + */ +static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt) +{ + memset(ehdr, 0, sizeof(*ehdr)); + memcpy(ehdr->e_ident, ELFMAG, SELFMAG); + ehdr->e_ident[EI_CLASS] = ELFCLASS64; + ehdr->e_ident[EI_DATA] = ELFDATA2MSB; + ehdr->e_ident[EI_VERSION] = EV_CURRENT; + memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); + ehdr->e_type = ET_CORE; + ehdr->e_machine = EM_S390; + ehdr->e_version = EV_CURRENT; + ehdr->e_phoff = sizeof(Elf64_Ehdr); + ehdr->e_ehsize = sizeof(Elf64_Ehdr); + ehdr->e_phentsize = sizeof(Elf64_Phdr); + ehdr->e_phnum = mem_chunk_cnt + 1; + return ehdr + 1; +} + +/* + * Return CPU count for ELF header (new kernel) + */ +static int get_cpu_cnt(void) +{ + int i, cpus = 0; + + for (i = 0; zfcpdump_save_areas[i]; i++) { + if (zfcpdump_save_areas[i]->pref_reg == 0) + continue; + cpus++; + } + return cpus; +} + +/* + * Return memory chunk count for ELF header (new kernel) + */ +static int get_mem_chunk_cnt(void) +{ + struct mem_chunk *chunk_array, *mem_chunk; + int i, cnt = 0; + + chunk_array = get_memory_layout(); + for (i = 0; i < MEMORY_CHUNKS; i++) { + mem_chunk = &chunk_array[i]; + if (chunk_array[i].type != CHUNK_READ_WRITE && + chunk_array[i].type != CHUNK_READ_ONLY) + continue; + if (mem_chunk->size == 0) + continue; + cnt++; + } + kfree(chunk_array); + return cnt; +} + +/* + * Relocate pointer in order to allow vmcore code access the data + */ +static inline unsigned long relocate(unsigned long addr) +{ + return OLDMEM_BASE + addr; +} + +/* + * Initialize ELF loads (new kernel) + */ +static int loads_init(Elf64_Phdr *phdr, u64 loads_offset) +{ + struct mem_chunk *chunk_array, *mem_chunk; + int i; + + chunk_array = get_memory_layout(); + for (i = 0; i < MEMORY_CHUNKS; i++) { + mem_chunk = &chunk_array[i]; + if (mem_chunk->size == 0) + break; + if (chunk_array[i].type != CHUNK_READ_WRITE && + chunk_array[i].type != CHUNK_READ_ONLY) + continue; + else + phdr->p_filesz = mem_chunk->size; + phdr->p_type = PT_LOAD; + phdr->p_offset = mem_chunk->addr; + phdr->p_vaddr = mem_chunk->addr; + phdr->p_paddr = mem_chunk->addr; + phdr->p_memsz = mem_chunk->size; + phdr->p_flags = PF_R | PF_W | PF_X; + phdr->p_align = PAGE_SIZE; + phdr++; + } + kfree(chunk_array); + return i; +} + +/* + * Initialize notes (new kernel) + */ +static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) +{ + struct save_area *sa; + void *ptr_start = ptr; + int i; + + ptr = nt_prpsinfo(ptr); + + for (i = 0; zfcpdump_save_areas[i]; i++) { + sa = zfcpdump_save_areas[i]; + if (sa->pref_reg == 0) + continue; + ptr = fill_cpu_elf_notes(ptr, sa); + } + ptr = nt_vmcoreinfo(ptr); + memset(phdr, 0, sizeof(*phdr)); + phdr->p_type = PT_NOTE; + phdr->p_offset = relocate(notes_offset); + phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start); + phdr->p_memsz = phdr->p_filesz; + return ptr; +} + +/* + * Create ELF core header (new kernel) + */ +static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz) +{ + Elf64_Phdr *phdr_notes, *phdr_loads; + int mem_chunk_cnt; + void *ptr, *hdr; + u32 alloc_size; + u64 hdr_off; + + mem_chunk_cnt = get_mem_chunk_cnt(); + + alloc_size = 0x1000 + get_cpu_cnt() * 0x300 + + mem_chunk_cnt * sizeof(Elf64_Phdr); + hdr = kzalloc_panic(alloc_size); + /* Init elf header */ + ptr = ehdr_init(hdr, mem_chunk_cnt); + /* Init program headers */ + phdr_notes = ptr; + ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr)); + phdr_loads = ptr; + ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt); + /* Init notes */ + hdr_off = PTR_DIFF(ptr, hdr); + ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off); + /* Init loads */ + hdr_off = PTR_DIFF(ptr, hdr); + loads_init(phdr_loads, ((unsigned long) hdr) + hdr_off); + *elfcorebuf_sz = hdr_off; + *elfcorebuf = (void *) relocate((unsigned long) hdr); + BUG_ON(*elfcorebuf_sz > alloc_size); +} + +/* + * Create kdump ELF core header in new kernel, if it has not been passed via + * the "elfcorehdr" kernel parameter + */ +static int setup_kdump_elfcorehdr(void) +{ + size_t elfcorebuf_sz; + char *elfcorebuf; + + if (!OLDMEM_BASE || is_kdump_kernel()) + return -EINVAL; + s390_elf_corehdr_create(&elfcorebuf, &elfcorebuf_sz); + elfcorehdr_addr = (unsigned long long) elfcorebuf; + elfcorehdr_size = elfcorebuf_sz; + return 0; +} + +subsys_initcall(setup_kdump_elfcorehdr); diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 5ad6bc078bf..6848828b962 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -74,7 +74,7 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf, static int debug_open(struct inode *inode, struct file *file); static int debug_close(struct inode *inode, struct file *file); static debug_info_t *debug_info_create(const char *name, int pages_per_area, - int nr_areas, int buf_size, mode_t mode); + int nr_areas, int buf_size, umode_t mode); static void debug_info_get(debug_info_t *); static void debug_info_put(debug_info_t *); static int debug_prolog_level_fn(debug_info_t * id, @@ -330,7 +330,7 @@ debug_info_free(debug_info_t* db_info){ static debug_info_t* debug_info_create(const char *name, int pages_per_area, int nr_areas, - int buf_size, mode_t mode) + int buf_size, umode_t mode) { debug_info_t* rc; @@ -688,7 +688,7 @@ debug_close(struct inode *inode, struct file *file) */ debug_info_t *debug_register_mode(const char *name, int pages_per_area, - int nr_areas, int buf_size, mode_t mode, + int nr_areas, int buf_size, umode_t mode, uid_t uid, gid_t gid) { debug_info_t *rc = NULL; @@ -1090,7 +1090,7 @@ debug_register_view(debug_info_t * id, struct debug_view *view) int rc = 0; int i; unsigned long flags; - mode_t mode; + umode_t mode; struct dentry *pde; if (!id) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 45df6d456aa..e2f847599c8 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1578,10 +1578,15 @@ void show_code(struct pt_regs *regs) ptr += sprintf(ptr, "%s Code:", mode); hops = 0; while (start < end && hops < 8) { - *ptr++ = (start == 32) ? '>' : ' '; + opsize = insn_length(code[start]); + if (start + opsize == 32) + *ptr++ = '#'; + else if (start == 32) + *ptr++ = '>'; + else + *ptr++ = ' '; addr = regs->psw.addr + start - 32; ptr += sprintf(ptr, ONELONG, addr); - opsize = insn_length(code[start]); if (start + opsize >= end) break; for (i = 0; i < opsize; i++) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index f297456dba7..52098d6dfaa 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -252,7 +252,7 @@ static noinline __init void setup_lowcore_early(void) { psw_t psw; - psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler; S390_lowcore.external_new_psw = psw; psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; @@ -390,6 +390,8 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS; if (test_facility(40)) S390_lowcore.machine_flags |= MACHINE_FLAG_SPP; + if (test_facility(25)) + S390_lowcore.machine_flags |= MACHINE_FLAG_STCKF; #endif } @@ -432,18 +434,22 @@ static void __init append_to_cmdline(size_t (*ipl_data)(char *, size_t)) } } -static void __init setup_boot_command_line(void) +static inline int has_ebcdic_char(const char *str) { int i; - /* convert arch command line to ascii */ - for (i = 0; i < ARCH_COMMAND_LINE_SIZE; i++) - if (COMMAND_LINE[i] & 0x80) - break; - if (i < ARCH_COMMAND_LINE_SIZE) - EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE); - COMMAND_LINE[ARCH_COMMAND_LINE_SIZE-1] = 0; + for (i = 0; str[i]; i++) + if (str[i] & 0x80) + return 1; + return 0; +} +static void __init setup_boot_command_line(void) +{ + COMMAND_LINE[ARCH_COMMAND_LINE_SIZE - 1] = 0; + /* convert arch command line to ascii if necessary */ + if (has_ebcdic_char(COMMAND_LINE)) + EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE); /* copy arch command line */ strlcpy(boot_command_line, strstrip(COMMAND_LINE), ARCH_COMMAND_LINE_SIZE); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 02ec8fe7d03..3705700ed37 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -19,173 +19,120 @@ #include <asm/unistd.h> #include <asm/page.h> -/* - * Stack layout for the system_call stack entry. - * The first few entries are identical to the user_regs_struct. - */ -SP_PTREGS = STACK_FRAME_OVERHEAD -SP_ARGS = STACK_FRAME_OVERHEAD + __PT_ARGS -SP_PSW = STACK_FRAME_OVERHEAD + __PT_PSW -SP_R0 = STACK_FRAME_OVERHEAD + __PT_GPRS -SP_R1 = STACK_FRAME_OVERHEAD + __PT_GPRS + 4 -SP_R2 = STACK_FRAME_OVERHEAD + __PT_GPRS + 8 -SP_R3 = STACK_FRAME_OVERHEAD + __PT_GPRS + 12 -SP_R4 = STACK_FRAME_OVERHEAD + __PT_GPRS + 16 -SP_R5 = STACK_FRAME_OVERHEAD + __PT_GPRS + 20 -SP_R6 = STACK_FRAME_OVERHEAD + __PT_GPRS + 24 -SP_R7 = STACK_FRAME_OVERHEAD + __PT_GPRS + 28 -SP_R8 = STACK_FRAME_OVERHEAD + __PT_GPRS + 32 -SP_R9 = STACK_FRAME_OVERHEAD + __PT_GPRS + 36 -SP_R10 = STACK_FRAME_OVERHEAD + __PT_GPRS + 40 -SP_R11 = STACK_FRAME_OVERHEAD + __PT_GPRS + 44 -SP_R12 = STACK_FRAME_OVERHEAD + __PT_GPRS + 48 -SP_R13 = STACK_FRAME_OVERHEAD + __PT_GPRS + 52 -SP_R14 = STACK_FRAME_OVERHEAD + __PT_GPRS + 56 -SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 60 -SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2 -SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC -SP_SVCNR = STACK_FRAME_OVERHEAD + __PT_SVCNR -SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE +__PT_R0 = __PT_GPRS +__PT_R1 = __PT_GPRS + 4 +__PT_R2 = __PT_GPRS + 8 +__PT_R3 = __PT_GPRS + 12 +__PT_R4 = __PT_GPRS + 16 +__PT_R5 = __PT_GPRS + 20 +__PT_R6 = __PT_GPRS + 24 +__PT_R7 = __PT_GPRS + 28 +__PT_R8 = __PT_GPRS + 32 +__PT_R9 = __PT_GPRS + 36 +__PT_R10 = __PT_GPRS + 40 +__PT_R11 = __PT_GPRS + 44 +__PT_R12 = __PT_GPRS + 48 +__PT_R13 = __PT_GPRS + 524 +__PT_R14 = __PT_GPRS + 56 +__PT_R15 = __PT_GPRS + 60 _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_PER_TRAP ) + _TIF_MCCK_PENDING | _TIF_PER_TRAP ) _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) -_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ - _TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8) +_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + _TIF_SYSCALL_TRACEPOINT) STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT #define BASED(name) name-system_call(%r13) -#ifdef CONFIG_TRACE_IRQFLAGS .macro TRACE_IRQS_ON +#ifdef CONFIG_TRACE_IRQFLAGS basr %r2,%r0 - l %r1,BASED(.Ltrace_irq_on_caller) - basr %r14,%r1 + l %r1,BASED(.Lhardirqs_on) + basr %r14,%r1 # call trace_hardirqs_on_caller +#endif .endm .macro TRACE_IRQS_OFF +#ifdef CONFIG_TRACE_IRQFLAGS basr %r2,%r0 - l %r1,BASED(.Ltrace_irq_off_caller) - basr %r14,%r1 - .endm -#else -#define TRACE_IRQS_ON -#define TRACE_IRQS_OFF + l %r1,BASED(.Lhardirqs_off) + basr %r14,%r1 # call trace_hardirqs_off_caller #endif + .endm -#ifdef CONFIG_LOCKDEP .macro LOCKDEP_SYS_EXIT - tm SP_PSW+1(%r15),0x01 # returning to user ? - jz 0f +#ifdef CONFIG_LOCKDEP + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jz .+10 l %r1,BASED(.Llockdep_sys_exit) - basr %r14,%r1 -0: - .endm -#else -#define LOCKDEP_SYS_EXIT + basr %r14,%r1 # call lockdep_sys_exit #endif - -/* - * Register usage in interrupt handlers: - * R9 - pointer to current task structure - * R13 - pointer to literal pool - * R14 - return register for function calls - * R15 - kernel stack pointer - */ - - .macro UPDATE_VTIME lc_from,lc_to,lc_sum - lm %r10,%r11,\lc_from - sl %r10,\lc_to - sl %r11,\lc_to+4 - bc 3,BASED(0f) - sl %r10,BASED(.Lc_1) -0: al %r10,\lc_sum - al %r11,\lc_sum+4 - bc 12,BASED(1f) - al %r10,BASED(.Lc_1) -1: stm %r10,%r11,\lc_sum - .endm - - .macro SAVE_ALL_SVC psworg,savearea - stm %r12,%r15,\savearea - l %r13,__LC_SVC_NEW_PSW+4 # load &system_call to %r13 - l %r15,__LC_KERNEL_STACK # problem state -> load ksp - s %r15,BASED(.Lc_spsize) # make room for registers & psw .endm - .macro SAVE_ALL_BASE savearea - stm %r12,%r15,\savearea - l %r13,__LC_SVC_NEW_PSW+4 # load &system_call to %r13 - .endm - - .macro SAVE_ALL_PGM psworg,savearea - tm \psworg+1,0x01 # test problem state bit + .macro CHECK_STACK stacksize,savearea #ifdef CONFIG_CHECK_STACK - bnz BASED(1f) - tml %r15,STACK_SIZE - CONFIG_STACK_GUARD - bnz BASED(2f) - la %r12,\psworg - b BASED(stack_overflow) -#else - bz BASED(2f) + tml %r15,\stacksize - CONFIG_STACK_GUARD + la %r14,\savearea + jz stack_overflow #endif -1: l %r15,__LC_KERNEL_STACK # problem state -> load ksp -2: s %r15,BASED(.Lc_spsize) # make room for registers & psw .endm - .macro SAVE_ALL_ASYNC psworg,savearea - stm %r12,%r15,\savearea - l %r13,__LC_SVC_NEW_PSW+4 # load &system_call to %r13 - la %r12,\psworg - tm \psworg+1,0x01 # test problem state bit - bnz BASED(1f) # from user -> load async stack - clc \psworg+4(4),BASED(.Lcritical_end) - bhe BASED(0f) - clc \psworg+4(4),BASED(.Lcritical_start) - bl BASED(0f) - l %r14,BASED(.Lcleanup_critical) - basr %r14,%r14 - tm 1(%r12),0x01 # retest problem state after cleanup - bnz BASED(1f) -0: l %r14,__LC_ASYNC_STACK # are we already on the async stack ? + .macro SWITCH_ASYNC savearea,stack,shift + tmh %r8,0x0001 # interrupting from user ? + jnz 1f + lr %r14,%r9 + sl %r14,BASED(.Lcritical_start) + cl %r14,BASED(.Lcritical_length) + jhe 0f + la %r11,\savearea # inside critical section, do cleanup + bras %r14,cleanup_critical + tmh %r8,0x0001 # retest problem state after cleanup + jnz 1f +0: l %r14,\stack # are we already on the target stack? slr %r14,%r15 - sra %r14,STACK_SHIFT -#ifdef CONFIG_CHECK_STACK - bnz BASED(1f) - tml %r15,STACK_SIZE - CONFIG_STACK_GUARD - bnz BASED(2f) - b BASED(stack_overflow) -#else - bz BASED(2f) -#endif -1: l %r15,__LC_ASYNC_STACK -2: s %r15,BASED(.Lc_spsize) # make room for registers & psw + sra %r14,\shift + jnz 1f + CHECK_STACK 1<<\shift,\savearea + j 2f +1: l %r15,\stack # load target stack +2: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) .endm - .macro CREATE_STACK_FRAME savearea - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) - st %r2,SP_ORIG_R2(%r15) # store original content of gpr 2 - mvc SP_R12(16,%r15),\savearea # move %r12-%r15 to stack - stm %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack + .macro ADD64 high,low,timer + al \high,\timer + al \low,\timer+4 + brc 12,.+8 + ahi \high,1 .endm - .macro RESTORE_ALL psworg,sync - mvc \psworg(8),SP_PSW(%r15) # move user PSW to lowcore - .if !\sync - ni \psworg+1,0xfd # clear wait state bit - .endif - lm %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user - stpt __LC_EXIT_TIMER - lpsw \psworg # back to caller + .macro SUB64 high,low,timer + sl \high,\timer + sl \low,\timer+4 + brc 3,.+8 + ahi \high,-1 + .endm + + .macro UPDATE_VTIME high,low,enter_timer + lm \high,\low,__LC_EXIT_TIMER + SUB64 \high,\low,\enter_timer + ADD64 \high,\low,__LC_USER_TIMER + stm \high,\low,__LC_USER_TIMER + lm \high,\low,__LC_LAST_UPDATE_TIMER + SUB64 \high,\low,__LC_EXIT_TIMER + ADD64 \high,\low,__LC_SYSTEM_TIMER + stm \high,\low,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer .endm .macro REENABLE_IRQS - mvc __SF_EMPTY(1,%r15),SP_PSW(%r15) - ni __SF_EMPTY(%r15),0xbf - ssm __SF_EMPTY(%r15) + st %r8,__LC_RETURN_PSW + ni __LC_RETURN_PSW,0xbf + ssm __LC_RETURN_PSW .endm .section .kprobes.text, "ax" @@ -198,14 +145,13 @@ STACK_SIZE = 1 << STACK_SHIFT * gpr2 = prev */ ENTRY(__switch_to) - basr %r1,0 -0: l %r4,__THREAD_info(%r2) # get thread_info of prev + l %r4,__THREAD_info(%r2) # get thread_info of prev l %r5,__THREAD_info(%r3) # get thread_info of next tm __TI_flags+3(%r4),_TIF_MCCK_PENDING # machine check pending? - bz 1f-0b(%r1) + jz 0f ni __TI_flags+3(%r4),255-_TIF_MCCK_PENDING # clear flag in prev oi __TI_flags+3(%r5),_TIF_MCCK_PENDING # set it in next -1: stm %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task +0: stm %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task st %r15,__THREAD_ksp(%r2) # store kernel stack of prev l %r15,__THREAD_ksp(%r3) # load kernel stack of next lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 @@ -225,70 +171,72 @@ __critical_start: ENTRY(system_call) stpt __LC_SYNC_ENTER_TIMER -sysc_saveall: - SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct +sysc_stm: + stm %r8,%r15,__LC_SAVE_AREA_SYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 +sysc_per: + l %r15,__LC_KERNEL_STACK + ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs sysc_vtime: - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER -sysc_stime: - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER -sysc_update: - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER + UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC + mvc __PT_PSW(8,%r11),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC sysc_do_svc: - xr %r7,%r7 - icm %r7,3,SP_SVCNR(%r15) # load svc number and test for svc 0 - bnz BASED(sysc_nr_ok) # svc number > 0 + oi __TI_flags+3(%r12),_TIF_SYSCALL + lh %r8,__PT_INT_CODE+2(%r11) + sla %r8,2 # shift and test for svc0 + jnz sysc_nr_ok # svc 0: system call number in %r1 cl %r1,BASED(.Lnr_syscalls) - bnl BASED(sysc_nr_ok) - sth %r1,SP_SVCNR(%r15) - lr %r7,%r1 # copy svc number to %r7 + jnl sysc_nr_ok + sth %r1,__PT_INT_CODE+2(%r11) + lr %r8,%r1 + sla %r8,2 sysc_nr_ok: - sll %r7,2 # svc number *4 - l %r10,BASED(.Lsysc_table) - tm __TI_flags+2(%r12),_TIF_SYSCALL - mvc SP_ARGS(4,%r15),SP_R7(%r15) - l %r8,0(%r7,%r10) # get system call addr. - bnz BASED(sysc_tracesys) - basr %r14,%r8 # call sys_xxxx - st %r2,SP_R2(%r15) # store return value (change R2 on stack) + l %r10,BASED(.Lsys_call_table) # 31 bit system call table + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + st %r2,__PT_ORIG_GPR2(%r11) + st %r7,STACK_FRAME_OVERHEAD(%r15) + l %r9,0(%r8,%r10) # get system call addr. + tm __TI_flags+2(%r12),_TIF_TRACE >> 8 + jnz sysc_tracesys + basr %r14,%r9 # call sys_xxxx + st %r2,__PT_R2(%r11) # store return value sysc_return: LOCKDEP_SYS_EXIT sysc_tif: + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jno sysc_restore tm __TI_flags+3(%r12),_TIF_WORK_SVC - bnz BASED(sysc_work) # there is work to do (signals etc.) + jnz sysc_work # check for work + ni __TI_flags+3(%r12),255-_TIF_SYSCALL sysc_restore: - RESTORE_ALL __LC_RETURN_PSW,1 + mvc __LC_RETURN_PSW(8),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + lm %r0,%r15,__PT_R0(%r11) + lpsw __LC_RETURN_PSW sysc_done: # -# There is work to do, but first we need to check if we return to userspace. -# -sysc_work: - tm SP_PSW+1(%r15),0x01 # returning to user ? - bno BASED(sysc_restore) - -# # One of the work bits is on. Find out which one. # -sysc_work_tif: +sysc_work: tm __TI_flags+3(%r12),_TIF_MCCK_PENDING - bo BASED(sysc_mcck_pending) + jo sysc_mcck_pending tm __TI_flags+3(%r12),_TIF_NEED_RESCHED - bo BASED(sysc_reschedule) + jo sysc_reschedule tm __TI_flags+3(%r12),_TIF_SIGPENDING - bo BASED(sysc_sigpending) + jo sysc_sigpending tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME - bo BASED(sysc_notify_resume) - tm __TI_flags+3(%r12),_TIF_RESTART_SVC - bo BASED(sysc_restart) + jo sysc_notify_resume tm __TI_flags+3(%r12),_TIF_PER_TRAP - bo BASED(sysc_singlestep) - b BASED(sysc_return) # beware of critical section cleanup + jo sysc_singlestep + j sysc_return # beware of critical section cleanup # # _TIF_NEED_RESCHED is set, call schedule @@ -296,13 +244,13 @@ sysc_work_tif: sysc_reschedule: l %r1,BASED(.Lschedule) la %r14,BASED(sysc_return) - br %r1 # call scheduler + br %r1 # call schedule # # _TIF_MCCK_PENDING is set, call handler # sysc_mcck_pending: - l %r1,BASED(.Ls390_handle_mcck) + l %r1,BASED(.Lhandle_mcck) la %r14,BASED(sysc_return) br %r1 # TIF bit will be cleared by handler @@ -311,92 +259,84 @@ sysc_mcck_pending: # sysc_sigpending: ni __TI_flags+3(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - la %r2,SP_PTREGS(%r15) # load pt_regs + lr %r2,%r11 # pass pointer to pt_regs l %r1,BASED(.Ldo_signal) basr %r14,%r1 # call do_signal - tm __TI_flags+3(%r12),_TIF_RESTART_SVC - bo BASED(sysc_restart) - tm __TI_flags+3(%r12),_TIF_PER_TRAP - bo BASED(sysc_singlestep) - b BASED(sysc_return) + tm __TI_flags+3(%r12),_TIF_SYSCALL + jno sysc_return + lm %r2,%r7,__PT_R2(%r11) # load svc arguments + xr %r8,%r8 # svc 0 returns -ENOSYS + clc __PT_INT_CODE+2(2,%r11),BASED(.Lnr_syscalls+2) + jnl sysc_nr_ok # invalid svc number -> do svc 0 + lh %r8,__PT_INT_CODE+2(%r11) # load new svc number + sla %r8,2 + j sysc_nr_ok # restart svc # # _TIF_NOTIFY_RESUME is set, call do_notify_resume # sysc_notify_resume: - la %r2,SP_PTREGS(%r15) # load pt_regs + lr %r2,%r11 # pass pointer to pt_regs l %r1,BASED(.Ldo_notify_resume) la %r14,BASED(sysc_return) br %r1 # call do_notify_resume - -# -# _TIF_RESTART_SVC is set, set up registers and restart svc -# -sysc_restart: - ni __TI_flags+3(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC - l %r7,SP_R2(%r15) # load new svc number - mvc SP_R2(4,%r15),SP_ORIG_R2(%r15) # restore first argument - lm %r2,%r6,SP_R2(%r15) # load svc arguments - sth %r7,SP_SVCNR(%r15) - b BASED(sysc_nr_ok) # restart svc - # # _TIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: - ni __TI_flags+3(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number - la %r2,SP_PTREGS(%r15) # address of register-save area - l %r1,BASED(.Lhandle_per) # load adr. of per handler - la %r14,BASED(sysc_return) # load adr. of system return - br %r1 # branch to do_per_trap + ni __TI_flags+3(%r12),255-(_TIF_SYSCALL | _TIF_PER_TRAP) + lr %r2,%r11 # pass pointer to pt_regs + l %r1,BASED(.Ldo_per_trap) + la %r14,BASED(sysc_return) + br %r1 # call do_per_trap # # call tracehook_report_syscall_entry/tracehook_report_syscall_exit before # and after the system call # sysc_tracesys: - l %r1,BASED(.Ltrace_entry) - la %r2,SP_PTREGS(%r15) # load pt_regs + l %r1,BASED(.Ltrace_enter) + lr %r2,%r11 # pass pointer to pt_regs la %r3,0 xr %r0,%r0 - icm %r0,3,SP_SVCNR(%r15) - st %r0,SP_R2(%r15) - basr %r14,%r1 + icm %r0,3,__PT_INT_CODE+2(%r11) + st %r0,__PT_R2(%r11) + basr %r14,%r1 # call do_syscall_trace_enter cl %r2,BASED(.Lnr_syscalls) - bnl BASED(sysc_tracenogo) - lr %r7,%r2 - sll %r7,2 # svc number *4 - l %r8,0(%r7,%r10) + jnl sysc_tracenogo + lr %r8,%r2 + sll %r8,2 + l %r9,0(%r8,%r10) sysc_tracego: - lm %r3,%r6,SP_R3(%r15) - mvc SP_ARGS(4,%r15),SP_R7(%r15) - l %r2,SP_ORIG_R2(%r15) - basr %r14,%r8 # call sys_xxx - st %r2,SP_R2(%r15) # store return value + lm %r3,%r7,__PT_R3(%r11) + st %r7,STACK_FRAME_OVERHEAD(%r15) + l %r2,__PT_ORIG_GPR2(%r11) + basr %r14,%r9 # call sys_xxx + st %r2,__PT_R2(%r11) # store return value sysc_tracenogo: - tm __TI_flags+2(%r12),_TIF_SYSCALL - bz BASED(sysc_return) + tm __TI_flags+2(%r12),_TIF_TRACE >> 8 + jz sysc_return l %r1,BASED(.Ltrace_exit) - la %r2,SP_PTREGS(%r15) # load pt_regs + lr %r2,%r11 # pass pointer to pt_regs la %r14,BASED(sysc_return) - br %r1 + br %r1 # call do_syscall_trace_exit # # a new process exits the kernel with ret_from_fork # ENTRY(ret_from_fork) + la %r11,STACK_FRAME_OVERHEAD(%r15) + l %r12,__LC_THREAD_INFO l %r13,__LC_SVC_NEW_PSW+4 - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # forking a kernel thread ? - bo BASED(0f) - st %r15,SP_R15(%r15) # store stack pointer for new kthread -0: l %r1,BASED(.Lschedtail) - basr %r14,%r1 + tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ? + jo 0f + st %r15,__PT_R15(%r11) # store stack pointer for new kthread +0: l %r1,BASED(.Lschedule_tail) + basr %r14,%r1 # call schedule_tail TRACE_IRQS_ON - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - b BASED(sysc_tracenogo) + ssm __LC_SVC_NEW_PSW # reenable interrupts + j sysc_tracenogo # # kernel_execve function needs to deal with pt_regs that is not @@ -406,155 +346,98 @@ ENTRY(kernel_execve) stm %r12,%r15,48(%r15) lr %r14,%r15 l %r13,__LC_SVC_NEW_PSW+4 - s %r15,BASED(.Lc_spsize) + ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) st %r14,__SF_BACKCHAIN(%r15) - la %r12,SP_PTREGS(%r15) + la %r12,STACK_FRAME_OVERHEAD(%r15) xc 0(__PT_SIZE,%r12),0(%r12) l %r1,BASED(.Ldo_execve) lr %r5,%r12 - basr %r14,%r1 + basr %r14,%r1 # call do_execve ltr %r2,%r2 - be BASED(0f) - a %r15,BASED(.Lc_spsize) + je 0f + ahi %r15,(STACK_FRAME_OVERHEAD + __PT_SIZE) lm %r12,%r15,48(%r15) br %r14 # execve succeeded. -0: stnsm __SF_EMPTY(%r15),0xfc # disable interrupts +0: ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts l %r15,__LC_KERNEL_STACK # load ksp - s %r15,BASED(.Lc_spsize) # make room for registers & psw - mvc SP_PTREGS(__PT_SIZE,%r15),0(%r12) # copy pt_regs + ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) + mvc 0(__PT_SIZE,%r11),0(%r12) # copy pt_regs l %r12,__LC_THREAD_INFO xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts l %r1,BASED(.Lexecve_tail) - basr %r14,%r1 - b BASED(sysc_return) + basr %r14,%r1 # call execve_tail + j sysc_return /* * Program check handler routine */ ENTRY(pgm_check_handler) -/* - * First we need to check for a special case: - * Single stepping an instruction that disables the PER event mask will - * cause a PER event AFTER the mask has been set. Example: SVC or LPSW. - * For a single stepped SVC the program check handler gets control after - * the SVC new PSW has been loaded. But we want to execute the SVC first and - * then handle the PER event. Therefore we update the SVC old PSW to point - * to the pgm_check_handler and branch to the SVC handler after we checked - * if we have to load the kernel stack register. - * For every other possible cause for PER event without the PER mask set - * we just ignore the PER event (FIXME: is there anything we have to do - * for LPSW?). - */ stpt __LC_SYNC_ENTER_TIMER - SAVE_ALL_BASE __LC_SAVE_AREA - tm __LC_PGM_INT_CODE+1,0x80 # check whether we got a per exception - bnz BASED(pgm_per) # got per exception -> special case - SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - xc SP_ILC(4,%r15),SP_ILC(%r15) - mvc SP_PSW(8,%r15),__LC_PGM_OLD_PSW - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(pgm_no_vtime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -pgm_no_vtime: - l %r3,__LC_PGM_ILC # load program interruption code - l %r4,__LC_TRANS_EXC_CODE - REENABLE_IRQS - la %r8,0x7f - nr %r8,%r3 - sll %r8,2 - l %r1,BASED(.Ljump_table) - l %r1,0(%r8,%r1) # load address of handler routine - la %r2,SP_PTREGS(%r15) # address of register-save area - basr %r14,%r1 # branch to interrupt-handler -pgm_exit: - b BASED(sysc_return) - -# -# handle per exception -# -pgm_per: - tm __LC_PGM_OLD_PSW,0x40 # test if per event recording is on - bnz BASED(pgm_per_std) # ok, normal per event from user space -# ok its one of the special cases, now we need to find out which one - clc __LC_PGM_OLD_PSW(8),__LC_SVC_NEW_PSW - be BASED(pgm_svcper) -# no interesting special case, ignore PER event - lm %r12,%r15,__LC_SAVE_AREA - lpsw 0x28 - -# -# Normal per exception -# -pgm_per_std: - SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(8,%r15),__LC_PGM_OLD_PSW - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(pgm_no_vtime2) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -pgm_no_vtime2: + stm %r8,%r15,__LC_SAVE_AREA_SYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_PGM_OLD_PSW + tmh %r8,0x0001 # test problem state bit + jnz 1f # -> fault in user space + tmh %r8,0x4000 # PER bit set in old PSW ? + jnz 0f # -> enabled, can't be a double fault + tm __LC_PGM_ILC+3,0x80 # check for per exception + jnz pgm_svcper # -> single stepped svc +0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC + j 2f +1: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER + l %r15,__LC_KERNEL_STACK +2: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC + stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC + mvc __PT_INT_PARM_LONG(4,%r11),__LC_TRANS_EXC_CODE + tm __LC_PGM_ILC+3,0x80 # check for per exception + jz 0f l %r1,__TI_task(%r12) - tm SP_PSW+1(%r15),0x01 # kernel per event ? - bz BASED(kernel_per) - mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE + tmh %r8,0x0001 # kernel per event ? + jz pgm_kprobe + oi __TI_flags+3(%r12),_TIF_PER_TRAP mvc __THREAD_per_address(4,%r1),__LC_PER_ADDRESS + mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE mvc __THREAD_per_paid(1,%r1),__LC_PER_PAID - oi __TI_flags+3(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP - l %r3,__LC_PGM_ILC # load program interruption code - l %r4,__LC_TRANS_EXC_CODE - REENABLE_IRQS - la %r8,0x7f - nr %r8,%r3 # clear per-event-bit and ilc - be BASED(pgm_exit2) # only per or per+check ? - sll %r8,2 +0: REENABLE_IRQS + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) l %r1,BASED(.Ljump_table) - l %r1,0(%r8,%r1) # load address of handler routine - la %r2,SP_PTREGS(%r15) # address of register-save area + la %r10,0x7f + n %r10,__PT_INT_CODE(%r11) + je sysc_return + sll %r10,2 + l %r1,0(%r10,%r1) # load address of handler routine + lr %r2,%r11 # pass pointer to pt_regs basr %r14,%r1 # branch to interrupt-handler -pgm_exit2: - b BASED(sysc_return) + j sysc_return # -# it was a single stepped SVC that is causing all the trouble +# PER event in supervisor state, must be kprobes # -pgm_svcper: - SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - l %r8,__TI_task(%r12) - mvc __THREAD_per_cause(2,%r8),__LC_PER_CAUSE - mvc __THREAD_per_address(4,%r8),__LC_PER_ADDRESS - mvc __THREAD_per_paid(1,%r8),__LC_PER_PAID - oi __TI_flags+3(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - lm %r2,%r6,SP_R2(%r15) # load svc arguments - b BASED(sysc_do_svc) +pgm_kprobe: + REENABLE_IRQS + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + l %r1,BASED(.Ldo_per_trap) + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call do_per_trap + j sysc_return # -# per was called from kernel, must be kprobes +# single stepped system call # -kernel_per: - REENABLE_IRQS - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) - la %r2,SP_PTREGS(%r15) # address of register-save area - l %r1,BASED(.Lhandle_per) # load adr. of per handler - basr %r14,%r1 # branch to do_single_step - b BASED(pgm_exit) +pgm_svcper: + oi __TI_flags+3(%r12),_TIF_PER_TRAP + mvc __LC_RETURN_PSW(4),__LC_SVC_NEW_PSW + mvc __LC_RETURN_PSW+4(4),BASED(.Lsysc_per) + lpsw __LC_RETURN_PSW # branch to sysc_per and enable irqs /* * IO interrupt handler routine @@ -563,28 +446,35 @@ kernel_per: ENTRY(io_int_handler) stck __LC_INT_CLOCK stpt __LC_ASYNC_ENTER_TIMER - SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 - CREATE_STACK_FRAME __LC_SAVE_AREA+16 - mvc SP_PSW(8,%r15),0(%r12) # move user PSW to stack - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(io_no_vtime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER -io_no_vtime: + stm %r8,%r15,__LC_SAVE_AREA_ASYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_IO_OLD_PSW + tmh %r8,0x0001 # interrupting from user ? + jz io_skip + UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER +io_skip: + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC + stm %r8,%r9,__PT_PSW(%r11) TRACE_IRQS_OFF - l %r1,BASED(.Ldo_IRQ) # load address of do_IRQ - la %r2,SP_PTREGS(%r15) # address of register-save area - basr %r14,%r1 # branch to standard irq handler + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + l %r1,BASED(.Ldo_IRQ) + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call do_IRQ io_return: LOCKDEP_SYS_EXIT TRACE_IRQS_ON io_tif: tm __TI_flags+3(%r12),_TIF_WORK_INT - bnz BASED(io_work) # there is work to do (signals etc.) + jnz io_work # there is work to do (signals etc.) io_restore: - RESTORE_ALL __LC_RETURN_PSW,0 + mvc __LC_RETURN_PSW(8),__PT_PSW(%r11) + ni __LC_RETURN_PSW+1,0xfd # clean wait state bit + stpt __LC_EXIT_TIMER + lm %r0,%r15,__PT_R0(%r11) + lpsw __LC_RETURN_PSW io_done: # @@ -595,28 +485,29 @@ io_done: # Before any work can be done, a switch to the kernel stack is required. # io_work: - tm SP_PSW+1(%r15),0x01 # returning to user ? - bo BASED(io_work_user) # yes -> do resched & signal + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jo io_work_user # yes -> do resched & signal #ifdef CONFIG_PREEMPT # check for preemptive scheduling icm %r0,15,__TI_precount(%r12) - bnz BASED(io_restore) # preemption disabled + jnz io_restore # preemption disabled tm __TI_flags+3(%r12),_TIF_NEED_RESCHED - bno BASED(io_restore) + jno io_restore # switch to kernel stack - l %r1,SP_R15(%r15) - s %r1,BASED(.Lc_spsize) - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain + l %r1,__PT_R15(%r11) + ahi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lr %r15,%r1 # TRACE_IRQS_ON already done at io_return, call # TRACE_IRQS_OFF to keep things symmetrical TRACE_IRQS_OFF - l %r1,BASED(.Lpreempt_schedule_irq) + l %r1,BASED(.Lpreempt_irq) basr %r14,%r1 # call preempt_schedule_irq - b BASED(io_return) + j io_return #else - b BASED(io_restore) + j io_restore #endif # @@ -624,9 +515,10 @@ io_work: # io_work_user: l %r1,__LC_KERNEL_STACK - s %r1,BASED(.Lc_spsize) - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain + ahi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lr %r15,%r1 # @@ -636,24 +528,24 @@ io_work_user: # io_work_tif: tm __TI_flags+3(%r12),_TIF_MCCK_PENDING - bo BASED(io_mcck_pending) + jo io_mcck_pending tm __TI_flags+3(%r12),_TIF_NEED_RESCHED - bo BASED(io_reschedule) + jo io_reschedule tm __TI_flags+3(%r12),_TIF_SIGPENDING - bo BASED(io_sigpending) + jo io_sigpending tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME - bo BASED(io_notify_resume) - b BASED(io_return) # beware of critical section cleanup + jo io_notify_resume + j io_return # beware of critical section cleanup # # _TIF_MCCK_PENDING is set, call handler # io_mcck_pending: # TRACE_IRQS_ON already done at io_return - l %r1,BASED(.Ls390_handle_mcck) + l %r1,BASED(.Lhandle_mcck) basr %r14,%r1 # TIF bit will be cleared by handler TRACE_IRQS_OFF - b BASED(io_return) + j io_return # # _TIF_NEED_RESCHED is set, call schedule @@ -661,37 +553,37 @@ io_mcck_pending: io_reschedule: # TRACE_IRQS_ON already done at io_return l %r1,BASED(.Lschedule) - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts basr %r14,%r1 # call scheduler - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts TRACE_IRQS_OFF - b BASED(io_return) + j io_return # # _TIF_SIGPENDING is set, call do_signal # io_sigpending: # TRACE_IRQS_ON already done at io_return - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - la %r2,SP_PTREGS(%r15) # load pt_regs l %r1,BASED(.Ldo_signal) + ssm __LC_SVC_NEW_PSW # reenable interrupts + lr %r2,%r11 # pass pointer to pt_regs basr %r14,%r1 # call do_signal - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts TRACE_IRQS_OFF - b BASED(io_return) + j io_return # # _TIF_SIGPENDING is set, call do_signal # io_notify_resume: # TRACE_IRQS_ON already done at io_return - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - la %r2,SP_PTREGS(%r15) # load pt_regs l %r1,BASED(.Ldo_notify_resume) - basr %r14,%r1 # call do_signal - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call do_notify_resume + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts TRACE_IRQS_OFF - b BASED(io_return) + j io_return /* * External interrupt handler routine @@ -700,23 +592,25 @@ io_notify_resume: ENTRY(ext_int_handler) stck __LC_INT_CLOCK stpt __LC_ASYNC_ENTER_TIMER - SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 - CREATE_STACK_FRAME __LC_SAVE_AREA+16 - mvc SP_PSW(8,%r15),0(%r12) # move user PSW to stack - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(ext_no_vtime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER -ext_no_vtime: + stm %r8,%r15,__LC_SAVE_AREA_ASYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_EXT_OLD_PSW + tmh %r8,0x0001 # interrupting from user ? + jz ext_skip + UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER +ext_skip: + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC + stm %r8,%r9,__PT_PSW(%r11) TRACE_IRQS_OFF - la %r2,SP_PTREGS(%r15) # address of register-save area + lr %r2,%r11 # pass pointer to pt_regs l %r3,__LC_CPU_ADDRESS # get cpu address + interruption code l %r4,__LC_EXT_PARAMS # get external parameters l %r1,BASED(.Ldo_extint) - basr %r14,%r1 - b BASED(io_return) + basr %r14,%r1 # call do_extint + j io_return __critical_end: @@ -728,82 +622,74 @@ ENTRY(mcck_int_handler) stck __LC_MCCK_CLOCK spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs - SAVE_ALL_BASE __LC_SAVE_AREA+32 - la %r12,__LC_MCK_OLD_PSW + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_MCK_OLD_PSW tm __LC_MCCK_CODE,0x80 # system damage? - bo BASED(mcck_int_main) # yes -> rest of mcck code invalid - mvc __LC_MCCK_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA + jo mcck_panic # yes -> rest of mcck code invalid + la %r14,__LC_CPU_TIMER_SAVE_AREA + mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? - bo BASED(1f) + jo 3f la %r14,__LC_SYNC_ENTER_TIMER clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER - bl BASED(0f) + jl 0f la %r14,__LC_ASYNC_ENTER_TIMER 0: clc 0(8,%r14),__LC_EXIT_TIMER - bl BASED(0f) + jl 1f la %r14,__LC_EXIT_TIMER -0: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER - bl BASED(0f) +1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER + jl 2f la %r14,__LC_LAST_UPDATE_TIMER -0: spt 0(%r14) +2: spt 0(%r14) mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) -1: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? - bno BASED(mcck_int_main) # no -> skip cleanup critical - tm __LC_MCK_OLD_PSW+1,0x01 # test problem state bit - bnz BASED(mcck_int_main) # from user -> load async stack - clc __LC_MCK_OLD_PSW+4(4),BASED(.Lcritical_end) - bhe BASED(mcck_int_main) - clc __LC_MCK_OLD_PSW+4(4),BASED(.Lcritical_start) - bl BASED(mcck_int_main) - l %r14,BASED(.Lcleanup_critical) - basr %r14,%r14 -mcck_int_main: - l %r14,__LC_PANIC_STACK # are we already on the panic stack? - slr %r14,%r15 - sra %r14,PAGE_SHIFT - be BASED(0f) - l %r15,__LC_PANIC_STACK # load panic stack -0: s %r15,BASED(.Lc_spsize) # make room for registers & psw - CREATE_STACK_FRAME __LC_SAVE_AREA+32 - mvc SP_PSW(8,%r15),0(%r12) - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm __LC_MCCK_CODE+2,0x08 # mwp of old psw valid? - bno BASED(mcck_no_vtime) # no -> skip cleanup critical - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(mcck_no_vtime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_MCCK_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_MCCK_ENTER_TIMER -mcck_no_vtime: - la %r2,SP_PTREGS(%r15) # load pt_regs - l %r1,BASED(.Ls390_mcck) - basr %r14,%r1 # call machine check handler - tm SP_PSW+1(%r15),0x01 # returning to user ? - bno BASED(mcck_return) +3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? + jno mcck_panic # no -> skip cleanup critical + tm %r8,0x0001 # interrupting from user ? + jz mcck_skip + UPDATE_VTIME %r14,%r15,__LC_MCCK_ENTER_TIMER +mcck_skip: + SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+32,__LC_PANIC_STACK,PAGE_SHIFT + mvc __PT_R0(64,%r11),__LC_GPREGS_SAVE_AREA + stm %r8,%r9,__PT_PSW(%r11) + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + l %r1,BASED(.Ldo_machine_check) + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call s390_do_machine_check + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jno mcck_return l %r1,__LC_KERNEL_STACK # switch to kernel stack - s %r1,BASED(.Lc_spsize) - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain + ahi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r15) lr %r15,%r1 - stosm __SF_EMPTY(%r15),0x04 # turn dat on + ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off tm __TI_flags+3(%r12),_TIF_MCCK_PENDING - bno BASED(mcck_return) + jno mcck_return TRACE_IRQS_OFF - l %r1,BASED(.Ls390_handle_mcck) - basr %r14,%r1 # call machine check handler + l %r1,BASED(.Lhandle_mcck) + basr %r14,%r1 # call s390_handle_mcck TRACE_IRQS_ON mcck_return: - mvc __LC_RETURN_MCCK_PSW(8),SP_PSW(%r15) # move return PSW + mvc __LC_RETURN_MCCK_PSW(8),__PT_PSW(%r11) # move return PSW ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? - bno BASED(0f) - lm %r0,%r15,SP_R0(%r15) # load gprs 0-15 + jno 0f + lm %r0,%r15,__PT_R0(%r11) stpt __LC_EXIT_TIMER - lpsw __LC_RETURN_MCCK_PSW # back to caller -0: lm %r0,%r15,SP_R0(%r15) # load gprs 0-15 - lpsw __LC_RETURN_MCCK_PSW # back to caller + lpsw __LC_RETURN_MCCK_PSW +0: lm %r0,%r15,__PT_R0(%r11) + lpsw __LC_RETURN_MCCK_PSW - RESTORE_ALL __LC_RETURN_MCCK_PSW,0 +mcck_panic: + l %r14,__LC_PANIC_STACK + slr %r14,%r15 + sra %r14,PAGE_SHIFT + jz 0f + l %r15,__LC_PANIC_STACK +0: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j mcck_skip /* * Restart interruption handler, kick starter for additional CPUs @@ -817,18 +703,18 @@ restart_base: stck __LC_LAST_UPDATE_CLOCK mvc __LC_LAST_UPDATE_TIMER(8),restart_vtime-restart_base(%r1) mvc __LC_EXIT_TIMER(8),restart_vtime-restart_base(%r1) - l %r15,__LC_SAVE_AREA+60 # load ksp + l %r15,__LC_GPREGS_SAVE_AREA+60 # load ksp lctl %c0,%c15,__LC_CREGS_SAVE_AREA # get new ctl regs lam %a0,%a15,__LC_AREGS_SAVE_AREA - lm %r6,%r15,__SF_GPRS(%r15) # load registers from clone + lm %r6,%r15,__SF_GPRS(%r15)# load registers from clone l %r1,__LC_THREAD_INFO mvc __LC_USER_TIMER(8),__TI_user_timer(%r1) mvc __LC_SYSTEM_TIMER(8),__TI_system_timer(%r1) xc __LC_STEAL_TIMER(8),__LC_STEAL_TIMER - stosm __SF_EMPTY(%r15),0x04 # now we can turn dat on + ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off basr %r14,0 l %r14,restart_addr-.(%r14) - basr %r14,%r14 # branch to start_secondary + basr %r14,%r14 # call start_secondary restart_addr: .long start_secondary .align 8 @@ -853,19 +739,19 @@ restart_go: # PSW restart interrupt handler # ENTRY(psw_restart_int_handler) - st %r15,__LC_SAVE_AREA_64(%r0) # save r15 + st %r15,__LC_SAVE_AREA_RESTART basr %r15,0 0: l %r15,.Lrestart_stack-0b(%r15) # load restart stack l %r15,0(%r15) - ahi %r15,-SP_SIZE # make room for pt_regs - stm %r0,%r14,SP_R0(%r15) # store gprs %r0-%r14 to stack - mvc SP_R15(4,%r15),__LC_SAVE_AREA_64(%r0)# store saved %r15 to stack - mvc SP_PSW(8,%r15),__LC_RST_OLD_PSW(%r0) # store restart old psw - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # set backchain to 0 + ahi %r15,-__PT_SIZE # create pt_regs on stack + stm %r0,%r14,__PT_R0(%r15) + mvc __PT_R15(4,%r15),__LC_SAVE_AREA_RESTART + mvc __PT_PSW(8,%r15),__LC_RST_OLD_PSW # store restart old psw + ahi %r15,-STACK_FRAME_OVERHEAD + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) basr %r14,0 1: l %r14,.Ldo_restart-1b(%r14) basr %r14,%r14 - basr %r14,0 # load disabled wait PSW if 2: lpsw restart_psw_crash-2b(%r14) # do_restart returns .align 4 @@ -887,213 +773,174 @@ restart_psw_crash: */ stack_overflow: l %r15,__LC_PANIC_STACK # change to panic stack - sl %r15,BASED(.Lc_spsize) - mvc SP_PSW(8,%r15),0(%r12) # move user PSW to stack - stm %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack - la %r1,__LC_SAVE_AREA - ch %r12,BASED(.L0x020) # old psw addr == __LC_SVC_OLD_PSW ? - be BASED(0f) - ch %r12,BASED(.L0x028) # old psw addr == __LC_PGM_OLD_PSW ? - be BASED(0f) - la %r1,__LC_SAVE_AREA+16 -0: mvc SP_R12(16,%r15),0(%r1) # move %r12-%r15 to stack - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear back chain - l %r1,BASED(1f) # branch to kernel_stack_overflow - la %r2,SP_PTREGS(%r15) # load pt_regs - br %r1 + ahi %r15,-__PT_SIZE # create pt_regs + stm %r0,%r7,__PT_R0(%r15) + stm %r8,%r9,__PT_PSW(%r15) + mvc __PT_R8(32,%r11),0(%r14) + lr %r15,%r11 + ahi %r15,-STACK_FRAME_OVERHEAD + l %r1,BASED(1f) + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + lr %r2,%r11 # pass pointer to pt_regs + br %r1 # branch to kernel_stack_overflow 1: .long kernel_stack_overflow #endif -cleanup_table_system_call: - .long system_call + 0x80000000, sysc_do_svc + 0x80000000 -cleanup_table_sysc_tif: - .long sysc_tif + 0x80000000, sysc_restore + 0x80000000 -cleanup_table_sysc_restore: - .long sysc_restore + 0x80000000, sysc_done + 0x80000000 -cleanup_table_io_tif: - .long io_tif + 0x80000000, io_restore + 0x80000000 -cleanup_table_io_restore: - .long io_restore + 0x80000000, io_done + 0x80000000 +cleanup_table: + .long system_call + 0x80000000 + .long sysc_do_svc + 0x80000000 + .long sysc_tif + 0x80000000 + .long sysc_restore + 0x80000000 + .long sysc_done + 0x80000000 + .long io_tif + 0x80000000 + .long io_restore + 0x80000000 + .long io_done + 0x80000000 cleanup_critical: - clc 4(4,%r12),BASED(cleanup_table_system_call) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_system_call+4) - bl BASED(cleanup_system_call) -0: - clc 4(4,%r12),BASED(cleanup_table_sysc_tif) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_sysc_tif+4) - bl BASED(cleanup_sysc_tif) -0: - clc 4(4,%r12),BASED(cleanup_table_sysc_restore) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_sysc_restore+4) - bl BASED(cleanup_sysc_restore) -0: - clc 4(4,%r12),BASED(cleanup_table_io_tif) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_io_tif+4) - bl BASED(cleanup_io_tif) -0: - clc 4(4,%r12),BASED(cleanup_table_io_restore) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_io_restore+4) - bl BASED(cleanup_io_restore) -0: - br %r14 + cl %r9,BASED(cleanup_table) # system_call + jl 0f + cl %r9,BASED(cleanup_table+4) # sysc_do_svc + jl cleanup_system_call + cl %r9,BASED(cleanup_table+8) # sysc_tif + jl 0f + cl %r9,BASED(cleanup_table+12) # sysc_restore + jl cleanup_sysc_tif + cl %r9,BASED(cleanup_table+16) # sysc_done + jl cleanup_sysc_restore + cl %r9,BASED(cleanup_table+20) # io_tif + jl 0f + cl %r9,BASED(cleanup_table+24) # io_restore + jl cleanup_io_tif + cl %r9,BASED(cleanup_table+28) # io_done + jl cleanup_io_restore +0: br %r14 cleanup_system_call: - mvc __LC_RETURN_PSW(8),0(%r12) - clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+4) - bh BASED(0f) - mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER - c %r12,BASED(.Lmck_old_psw) - be BASED(0f) + # check if stpt has been executed + cl %r9,BASED(cleanup_system_call_insn) + jh 0f mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER -0: c %r12,BASED(.Lmck_old_psw) - la %r12,__LC_SAVE_AREA+32 - be BASED(0f) - la %r12,__LC_SAVE_AREA+16 -0: clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+8) - bhe BASED(cleanup_vtime) - clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn) - bh BASED(0f) - mvc __LC_SAVE_AREA(16),0(%r12) -0: st %r13,4(%r12) - l %r15,__LC_KERNEL_STACK # problem state -> load ksp - s %r15,BASED(.Lc_spsize) # make room for registers & psw - st %r15,12(%r12) - CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC - mvc 0(4,%r12),__LC_THREAD_INFO -cleanup_vtime: - clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+12) - bhe BASED(cleanup_stime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER -cleanup_stime: - clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+16) - bh BASED(cleanup_update) - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER -cleanup_update: + chi %r11,__LC_SAVE_AREA_ASYNC + je 0f + mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER +0: # check if stm has been executed + cl %r9,BASED(cleanup_system_call_insn+4) + jh 0f + mvc __LC_SAVE_AREA_SYNC(32),0(%r11) +0: # set up saved registers r12, and r13 + st %r12,16(%r11) # r12 thread-info pointer + st %r13,20(%r11) # r13 literal-pool pointer + # check if the user time calculation has been done + cl %r9,BASED(cleanup_system_call_insn+8) + jh 0f + l %r10,__LC_EXIT_TIMER + l %r15,__LC_EXIT_TIMER+4 + SUB64 %r10,%r15,__LC_SYNC_ENTER_TIMER + ADD64 %r10,%r15,__LC_USER_TIMER + st %r10,__LC_USER_TIMER + st %r15,__LC_USER_TIMER+4 +0: # check if the system time calculation has been done + cl %r9,BASED(cleanup_system_call_insn+12) + jh 0f + l %r10,__LC_LAST_UPDATE_TIMER + l %r15,__LC_LAST_UPDATE_TIMER+4 + SUB64 %r10,%r15,__LC_EXIT_TIMER + ADD64 %r10,%r15,__LC_SYSTEM_TIMER + st %r10,__LC_SYSTEM_TIMER + st %r15,__LC_SYSTEM_TIMER+4 +0: # update accounting time stamp mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_system_call+4) - la %r12,__LC_RETURN_PSW + # set up saved register 11 + l %r15,__LC_KERNEL_STACK + ahi %r15,-__PT_SIZE + st %r15,12(%r11) # r11 pt_regs pointer + # fill pt_regs + mvc __PT_R8(32,%r15),__LC_SAVE_AREA_SYNC + stm %r0,%r7,__PT_R0(%r15) + mvc __PT_PSW(8,%r15),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r15),__LC_SVC_ILC + # setup saved register 15 + ahi %r15,-STACK_FRAME_OVERHEAD + st %r15,28(%r11) # r15 stack pointer + # set new psw address and exit + l %r9,BASED(cleanup_table+4) # sysc_do_svc + 0x80000000 br %r14 cleanup_system_call_insn: - .long sysc_saveall + 0x80000000 .long system_call + 0x80000000 - .long sysc_vtime + 0x80000000 - .long sysc_stime + 0x80000000 - .long sysc_update + 0x80000000 + .long sysc_stm + 0x80000000 + .long sysc_vtime + 0x80000000 + 36 + .long sysc_vtime + 0x80000000 + 76 cleanup_sysc_tif: - mvc __LC_RETURN_PSW(4),0(%r12) - mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_sysc_tif) - la %r12,__LC_RETURN_PSW + l %r9,BASED(cleanup_table+8) # sysc_tif + 0x80000000 br %r14 cleanup_sysc_restore: - clc 4(4,%r12),BASED(cleanup_sysc_restore_insn) - be BASED(2f) - mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER - c %r12,BASED(.Lmck_old_psw) - be BASED(0f) - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER -0: clc 4(4,%r12),BASED(cleanup_sysc_restore_insn+4) - be BASED(2f) - mvc __LC_RETURN_PSW(8),SP_PSW(%r15) - c %r12,BASED(.Lmck_old_psw) - la %r12,__LC_SAVE_AREA+32 - be BASED(1f) - la %r12,__LC_SAVE_AREA+16 -1: mvc 0(16,%r12),SP_R12(%r15) - lm %r0,%r11,SP_R0(%r15) - l %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW + cl %r9,BASED(cleanup_sysc_restore_insn) + jhe 0f + l %r9,12(%r11) # get saved pointer to pt_regs + mvc __LC_RETURN_PSW(8),__PT_PSW(%r9) + mvc 0(32,%r11),__PT_R8(%r9) + lm %r0,%r7,__PT_R0(%r9) +0: lm %r8,%r9,__LC_RETURN_PSW br %r14 cleanup_sysc_restore_insn: .long sysc_done - 4 + 0x80000000 - .long sysc_done - 8 + 0x80000000 cleanup_io_tif: - mvc __LC_RETURN_PSW(4),0(%r12) - mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_io_tif) - la %r12,__LC_RETURN_PSW + l %r9,BASED(cleanup_table+20) # io_tif + 0x80000000 br %r14 cleanup_io_restore: - clc 4(4,%r12),BASED(cleanup_io_restore_insn) - be BASED(1f) - mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER - clc 4(4,%r12),BASED(cleanup_io_restore_insn+4) - be BASED(1f) - mvc __LC_RETURN_PSW(8),SP_PSW(%r15) - mvc __LC_SAVE_AREA+32(16),SP_R12(%r15) - lm %r0,%r11,SP_R0(%r15) - l %r15,SP_R15(%r15) -1: la %r12,__LC_RETURN_PSW + cl %r9,BASED(cleanup_io_restore_insn) + jhe 0f + l %r9,12(%r11) # get saved r11 pointer to pt_regs + mvc __LC_RETURN_PSW(8),__PT_PSW(%r9) + ni __LC_RETURN_PSW+1,0xfd # clear wait state bit + mvc 0(32,%r11),__PT_R8(%r9) + lm %r0,%r7,__PT_R0(%r9) +0: lm %r8,%r9,__LC_RETURN_PSW br %r14 cleanup_io_restore_insn: .long io_done - 4 + 0x80000000 - .long io_done - 8 + 0x80000000 /* * Integer constants */ - .align 4 -.Lc_spsize: .long SP_SIZE -.Lc_overhead: .long STACK_FRAME_OVERHEAD -.Lnr_syscalls: .long NR_syscalls -.L0x018: .short 0x018 -.L0x020: .short 0x020 -.L0x028: .short 0x028 -.L0x030: .short 0x030 -.L0x038: .short 0x038 -.Lc_1: .long 1 + .align 4 +.Lnr_syscalls: .long NR_syscalls /* * Symbol constants */ -.Ls390_mcck: .long s390_do_machine_check -.Ls390_handle_mcck: - .long s390_handle_mcck -.Lmck_old_psw: .long __LC_MCK_OLD_PSW -.Ldo_IRQ: .long do_IRQ -.Ldo_extint: .long do_extint -.Ldo_signal: .long do_signal -.Ldo_notify_resume: - .long do_notify_resume -.Lhandle_per: .long do_per_trap -.Ldo_execve: .long do_execve -.Lexecve_tail: .long execve_tail -.Ljump_table: .long pgm_check_table -.Lschedule: .long schedule +.Ldo_machine_check: .long s390_do_machine_check +.Lhandle_mcck: .long s390_handle_mcck +.Ldo_IRQ: .long do_IRQ +.Ldo_extint: .long do_extint +.Ldo_signal: .long do_signal +.Ldo_notify_resume: .long do_notify_resume +.Ldo_per_trap: .long do_per_trap +.Ldo_execve: .long do_execve +.Lexecve_tail: .long execve_tail +.Ljump_table: .long pgm_check_table +.Lschedule: .long schedule #ifdef CONFIG_PREEMPT -.Lpreempt_schedule_irq: - .long preempt_schedule_irq +.Lpreempt_irq: .long preempt_schedule_irq #endif -.Ltrace_entry: .long do_syscall_trace_enter -.Ltrace_exit: .long do_syscall_trace_exit -.Lschedtail: .long schedule_tail -.Lsysc_table: .long sys_call_table +.Ltrace_enter: .long do_syscall_trace_enter +.Ltrace_exit: .long do_syscall_trace_exit +.Lschedule_tail: .long schedule_tail +.Lsys_call_table: .long sys_call_table +.Lsysc_per: .long sysc_per + 0x80000000 #ifdef CONFIG_TRACE_IRQFLAGS -.Ltrace_irq_on_caller: - .long trace_hardirqs_on_caller -.Ltrace_irq_off_caller: - .long trace_hardirqs_off_caller +.Lhardirqs_on: .long trace_hardirqs_on_caller +.Lhardirqs_off: .long trace_hardirqs_off_caller #endif #ifdef CONFIG_LOCKDEP -.Llockdep_sys_exit: - .long lockdep_sys_exit +.Llockdep_sys_exit: .long lockdep_sys_exit #endif -.Lcritical_start: - .long __critical_start + 0x80000000 -.Lcritical_end: - .long __critical_end + 0x80000000 -.Lcleanup_critical: - .long cleanup_critical +.Lcritical_start: .long __critical_start + 0x80000000 +.Lcritical_length: .long __critical_end - __critical_start .section .rodata, "a" #define SYSCALL(esa,esame,emu) .long esa diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 66729eb7bbc..bf538aaf407 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -5,11 +5,16 @@ #include <linux/signal.h> #include <asm/ptrace.h> -void do_protection_exception(struct pt_regs *, long, unsigned long); -void do_dat_exception(struct pt_regs *, long, unsigned long); -void do_asce_exception(struct pt_regs *, long, unsigned long); -extern int sysctl_userprocess_debug; +extern void (*pgm_check_table[128])(struct pt_regs *); +extern void *restart_stack; + +asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); +asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); + +void do_protection_exception(struct pt_regs *regs); +void do_dat_exception(struct pt_regs *regs); +void do_asce_exception(struct pt_regs *regs); void do_per_trap(struct pt_regs *regs); void syscall_trace(struct pt_regs *regs, int entryexit); @@ -17,11 +22,15 @@ void kernel_stack_overflow(struct pt_regs * regs); void do_signal(struct pt_regs *regs); int handle_signal32(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs); +void do_notify_resume(struct pt_regs *regs); void do_extint(struct pt_regs *regs, unsigned int, unsigned int, unsigned long); +void do_restart(void); int __cpuinit start_secondary(void *cpuvoid); void __init startup_init(void); -void die(const char * str, struct pt_regs * regs, long err); +void die(struct pt_regs *regs, const char *str); + +void __init time_init(void); struct s390_mmap_arg_struct; struct fadvise64_64_args; diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 713da076053..412a7b8783d 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -19,195 +19,133 @@ #include <asm/unistd.h> #include <asm/page.h> -/* - * Stack layout for the system_call stack entry. - * The first few entries are identical to the user_regs_struct. - */ -SP_PTREGS = STACK_FRAME_OVERHEAD -SP_ARGS = STACK_FRAME_OVERHEAD + __PT_ARGS -SP_PSW = STACK_FRAME_OVERHEAD + __PT_PSW -SP_R0 = STACK_FRAME_OVERHEAD + __PT_GPRS -SP_R1 = STACK_FRAME_OVERHEAD + __PT_GPRS + 8 -SP_R2 = STACK_FRAME_OVERHEAD + __PT_GPRS + 16 -SP_R3 = STACK_FRAME_OVERHEAD + __PT_GPRS + 24 -SP_R4 = STACK_FRAME_OVERHEAD + __PT_GPRS + 32 -SP_R5 = STACK_FRAME_OVERHEAD + __PT_GPRS + 40 -SP_R6 = STACK_FRAME_OVERHEAD + __PT_GPRS + 48 -SP_R7 = STACK_FRAME_OVERHEAD + __PT_GPRS + 56 -SP_R8 = STACK_FRAME_OVERHEAD + __PT_GPRS + 64 -SP_R9 = STACK_FRAME_OVERHEAD + __PT_GPRS + 72 -SP_R10 = STACK_FRAME_OVERHEAD + __PT_GPRS + 80 -SP_R11 = STACK_FRAME_OVERHEAD + __PT_GPRS + 88 -SP_R12 = STACK_FRAME_OVERHEAD + __PT_GPRS + 96 -SP_R13 = STACK_FRAME_OVERHEAD + __PT_GPRS + 104 -SP_R14 = STACK_FRAME_OVERHEAD + __PT_GPRS + 112 -SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 120 -SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2 -SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC -SP_SVCNR = STACK_FRAME_OVERHEAD + __PT_SVCNR -SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE +__PT_R0 = __PT_GPRS +__PT_R1 = __PT_GPRS + 8 +__PT_R2 = __PT_GPRS + 16 +__PT_R3 = __PT_GPRS + 24 +__PT_R4 = __PT_GPRS + 32 +__PT_R5 = __PT_GPRS + 40 +__PT_R6 = __PT_GPRS + 48 +__PT_R7 = __PT_GPRS + 56 +__PT_R8 = __PT_GPRS + 64 +__PT_R9 = __PT_GPRS + 72 +__PT_R10 = __PT_GPRS + 80 +__PT_R11 = __PT_GPRS + 88 +__PT_R12 = __PT_GPRS + 96 +__PT_R13 = __PT_GPRS + 104 +__PT_R14 = __PT_GPRS + 112 +__PT_R15 = __PT_GPRS + 120 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_PER_TRAP ) + _TIF_MCCK_PENDING | _TIF_PER_TRAP ) _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) -_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ - _TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8) +_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + _TIF_SYSCALL_TRACEPOINT) _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) #define BASED(name) name-system_call(%r13) - .macro SPP newpp -#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) - tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_SPP - jz .+8 - .insn s,0xb2800000,\newpp -#endif - .endm - - .macro HANDLE_SIE_INTERCEPT -#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) - tm __TI_flags+6(%r12),_TIF_SIE>>8 - jz 0f - SPP __LC_CMF_HPP # set host id - clc SP_PSW+8(8,%r15),BASED(.Lsie_loop) - jl 0f - clc SP_PSW+8(8,%r15),BASED(.Lsie_done) - jhe 0f - mvc SP_PSW+8(8,%r15),BASED(.Lsie_loop) -0: -#endif - .endm - -#ifdef CONFIG_TRACE_IRQFLAGS .macro TRACE_IRQS_ON +#ifdef CONFIG_TRACE_IRQFLAGS basr %r2,%r0 brasl %r14,trace_hardirqs_on_caller +#endif .endm .macro TRACE_IRQS_OFF +#ifdef CONFIG_TRACE_IRQFLAGS basr %r2,%r0 brasl %r14,trace_hardirqs_off_caller - .endm -#else -#define TRACE_IRQS_ON -#define TRACE_IRQS_OFF #endif + .endm -#ifdef CONFIG_LOCKDEP .macro LOCKDEP_SYS_EXIT - tm SP_PSW+1(%r15),0x01 # returning to user ? - jz 0f +#ifdef CONFIG_LOCKDEP + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jz .+10 brasl %r14,lockdep_sys_exit -0: - .endm -#else -#define LOCKDEP_SYS_EXIT #endif - - .macro UPDATE_VTIME lc_from,lc_to,lc_sum - lg %r10,\lc_from - slg %r10,\lc_to - alg %r10,\lc_sum - stg %r10,\lc_sum .endm -/* - * Register usage in interrupt handlers: - * R9 - pointer to current task structure - * R13 - pointer to literal pool - * R14 - return register for function calls - * R15 - kernel stack pointer - */ + .macro SPP newpp +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) + tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_SPP + jz .+8 + .insn s,0xb2800000,\newpp +#endif + .endm - .macro SAVE_ALL_SVC psworg,savearea - stmg %r11,%r15,\savearea - lg %r15,__LC_KERNEL_STACK # problem state -> load ksp - aghi %r15,-SP_SIZE # make room for registers & psw - lg %r11,__LC_LAST_BREAK + .macro HANDLE_SIE_INTERCEPT scratch +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) + tm __TI_flags+6(%r12),_TIF_SIE>>8 + jz .+42 + tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_SPP + jz .+8 + .insn s,0xb2800000,BASED(.Lhost_id) # set host id + lgr \scratch,%r9 + slg \scratch,BASED(.Lsie_loop) + clg \scratch,BASED(.Lsie_length) + jhe .+10 + lg %r9,BASED(.Lsie_loop) +#endif .endm - .macro SAVE_ALL_PGM psworg,savearea - stmg %r11,%r15,\savearea - tm \psworg+1,0x01 # test problem state bit + .macro CHECK_STACK stacksize,savearea #ifdef CONFIG_CHECK_STACK - jnz 1f - tml %r15,STACK_SIZE - CONFIG_STACK_GUARD - jnz 2f - la %r12,\psworg - j stack_overflow -#else - jz 2f + tml %r15,\stacksize - CONFIG_STACK_GUARD + lghi %r14,\savearea + jz stack_overflow #endif -1: lg %r15,__LC_KERNEL_STACK # problem state -> load ksp -2: aghi %r15,-SP_SIZE # make room for registers & psw - larl %r13,system_call - lg %r11,__LC_LAST_BREAK .endm - .macro SAVE_ALL_ASYNC psworg,savearea - stmg %r11,%r15,\savearea - larl %r13,system_call - lg %r11,__LC_LAST_BREAK - la %r12,\psworg - tm \psworg+1,0x01 # test problem state bit - jnz 1f # from user -> load kernel stack - clc \psworg+8(8),BASED(.Lcritical_end) + .macro SWITCH_ASYNC savearea,stack,shift + tmhh %r8,0x0001 # interrupting from user ? + jnz 1f + lgr %r14,%r9 + slg %r14,BASED(.Lcritical_start) + clg %r14,BASED(.Lcritical_length) jhe 0f - clc \psworg+8(8),BASED(.Lcritical_start) - jl 0f + lghi %r11,\savearea # inside critical section, do cleanup brasl %r14,cleanup_critical - tm 1(%r12),0x01 # retest problem state after cleanup + tmhh %r8,0x0001 # retest problem state after cleanup jnz 1f -0: lg %r14,__LC_ASYNC_STACK # are we already on the async. stack ? +0: lg %r14,\stack # are we already on the target stack? slgr %r14,%r15 - srag %r14,%r14,STACK_SHIFT -#ifdef CONFIG_CHECK_STACK + srag %r14,%r14,\shift jnz 1f - tml %r15,STACK_SIZE - CONFIG_STACK_GUARD - jnz 2f - j stack_overflow -#else - jz 2f -#endif -1: lg %r15,__LC_ASYNC_STACK # load async stack -2: aghi %r15,-SP_SIZE # make room for registers & psw - .endm - - .macro CREATE_STACK_FRAME savearea - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - stg %r2,SP_ORIG_R2(%r15) # store original content of gpr 2 - mvc SP_R11(40,%r15),\savearea # move %r11-%r15 to stack - stmg %r0,%r10,SP_R0(%r15) # store gprs %r0-%r10 to kernel stack + CHECK_STACK 1<<\shift,\savearea + j 2f +1: lg %r15,\stack # load target stack +2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) .endm - .macro RESTORE_ALL psworg,sync - mvc \psworg(16),SP_PSW(%r15) # move user PSW to lowcore - .if !\sync - ni \psworg+1,0xfd # clear wait state bit - .endif - lg %r14,__LC_VDSO_PER_CPU - lmg %r0,%r13,SP_R0(%r15) # load gprs 0-13 of user - stpt __LC_EXIT_TIMER - mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER - lmg %r14,%r15,SP_R14(%r15) # load grps 14-15 of user - lpswe \psworg # back to caller + .macro UPDATE_VTIME scratch,enter_timer + lg \scratch,__LC_EXIT_TIMER + slg \scratch,\enter_timer + alg \scratch,__LC_USER_TIMER + stg \scratch,__LC_USER_TIMER + lg \scratch,__LC_LAST_UPDATE_TIMER + slg \scratch,__LC_EXIT_TIMER + alg \scratch,__LC_SYSTEM_TIMER + stg \scratch,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer .endm - .macro LAST_BREAK - srag %r10,%r11,23 - jz 0f - stg %r11,__TI_last_break(%r12) -0: + .macro LAST_BREAK scratch + srag \scratch,%r10,23 + jz .+10 + stg %r10,__TI_last_break(%r12) .endm .macro REENABLE_IRQS - mvc __SF_EMPTY(1,%r15),SP_PSW(%r15) - ni __SF_EMPTY(%r15),0xbf - ssm __SF_EMPTY(%r15) + stg %r8,__LC_RETURN_PSW + ni __LC_RETURN_PSW,0xbf + ssm __LC_RETURN_PSW .endm .section .kprobes.text, "ax" @@ -246,64 +184,72 @@ __critical_start: ENTRY(system_call) stpt __LC_SYNC_ENTER_TIMER -sysc_saveall: - SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct +sysc_stmg: + stmg %r8,%r15,__LC_SAVE_AREA_SYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call +sysc_per: + lg %r15,__LC_KERNEL_STACK + aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs sysc_vtime: - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER -sysc_stime: - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER -sysc_update: - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - LAST_BREAK + UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER + LAST_BREAK %r13 + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC sysc_do_svc: - llgh %r7,SP_SVCNR(%r15) - slag %r7,%r7,2 # shift and test for svc 0 + oi __TI_flags+7(%r12),_TIF_SYSCALL + llgh %r8,__PT_INT_CODE+2(%r11) + slag %r8,%r8,2 # shift and test for svc 0 jnz sysc_nr_ok # svc 0: system call number in %r1 - llgfr %r1,%r1 # clear high word in r1 + llgfr %r1,%r1 # clear high word in r1 cghi %r1,NR_syscalls jnl sysc_nr_ok - sth %r1,SP_SVCNR(%r15) - slag %r7,%r1,2 # shift and test for svc 0 + sth %r1,__PT_INT_CODE+2(%r11) + slag %r8,%r1,2 sysc_nr_ok: - larl %r10,sys_call_table + larl %r10,sys_call_table # 64 bit system call table #ifdef CONFIG_COMPAT - tm __TI_flags+5(%r12),(_TIF_31BIT>>16) # running in 31 bit mode ? + tm __TI_flags+5(%r12),(_TIF_31BIT>>16) jno sysc_noemu - larl %r10,sys_call_table_emu # use 31 bit emulation system calls + larl %r10,sys_call_table_emu # 31 bit system call table sysc_noemu: #endif - tm __TI_flags+6(%r12),_TIF_SYSCALL - mvc SP_ARGS(8,%r15),SP_R7(%r15) - lgf %r8,0(%r7,%r10) # load address of system call routine + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + stg %r2,__PT_ORIG_GPR2(%r11) + stg %r7,STACK_FRAME_OVERHEAD(%r15) + lgf %r9,0(%r8,%r10) # get system call add. + tm __TI_flags+6(%r12),_TIF_TRACE >> 8 jnz sysc_tracesys - basr %r14,%r8 # call sys_xxxx - stg %r2,SP_R2(%r15) # store return value (change R2 on stack) + basr %r14,%r9 # call sys_xxxx + stg %r2,__PT_R2(%r11) # store return value sysc_return: LOCKDEP_SYS_EXIT sysc_tif: + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jno sysc_restore tm __TI_flags+7(%r12),_TIF_WORK_SVC - jnz sysc_work # there is work to do (signals etc.) + jnz sysc_work # check for work + ni __TI_flags+7(%r12),255-_TIF_SYSCALL sysc_restore: - RESTORE_ALL __LC_RETURN_PSW,1 + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r10,__PT_R0(%r11) + mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER + lmg %r11,%r15,__PT_R11(%r11) + lpswe __LC_RETURN_PSW sysc_done: # -# There is work to do, but first we need to check if we return to userspace. -# -sysc_work: - tm SP_PSW+1(%r15),0x01 # returning to user ? - jno sysc_restore - -# # One of the work bits is on. Find out which one. # -sysc_work_tif: +sysc_work: tm __TI_flags+7(%r12),_TIF_MCCK_PENDING jo sysc_mcck_pending tm __TI_flags+7(%r12),_TIF_NEED_RESCHED @@ -312,8 +258,6 @@ sysc_work_tif: jo sysc_sigpending tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME jo sysc_notify_resume - tm __TI_flags+7(%r12),_TIF_RESTART_SVC - jo sysc_restart tm __TI_flags+7(%r12),_TIF_PER_TRAP jo sysc_singlestep j sysc_return # beware of critical section cleanup @@ -323,7 +267,7 @@ sysc_work_tif: # sysc_reschedule: larl %r14,sysc_return - jg schedule # return point is sysc_return + jg schedule # # _TIF_MCCK_PENDING is set, call handler @@ -337,42 +281,33 @@ sysc_mcck_pending: # sysc_sigpending: ni __TI_flags+7(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - la %r2,SP_PTREGS(%r15) # load pt_regs - brasl %r14,do_signal # call do_signal - tm __TI_flags+7(%r12),_TIF_RESTART_SVC - jo sysc_restart - tm __TI_flags+7(%r12),_TIF_PER_TRAP - jo sysc_singlestep - j sysc_return + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_signal + tm __TI_flags+7(%r12),_TIF_SYSCALL + jno sysc_return + lmg %r2,%r7,__PT_R2(%r11) # load svc arguments + lghi %r8,0 # svc 0 returns -ENOSYS + lh %r1,__PT_INT_CODE+2(%r11) # load new svc number + cghi %r1,NR_syscalls + jnl sysc_nr_ok # invalid svc number -> do svc 0 + slag %r8,%r1,2 + j sysc_nr_ok # restart svc # # _TIF_NOTIFY_RESUME is set, call do_notify_resume # sysc_notify_resume: - la %r2,SP_PTREGS(%r15) # load pt_regs + lgr %r2,%r11 # pass pointer to pt_regs larl %r14,sysc_return - jg do_notify_resume # call do_notify_resume - -# -# _TIF_RESTART_SVC is set, set up registers and restart svc -# -sysc_restart: - ni __TI_flags+7(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC - lg %r7,SP_R2(%r15) # load new svc number - mvc SP_R2(8,%r15),SP_ORIG_R2(%r15) # restore first argument - lmg %r2,%r6,SP_R2(%r15) # load svc arguments - sth %r7,SP_SVCNR(%r15) - slag %r7,%r7,2 - j sysc_nr_ok # restart svc + jg do_notify_resume # # _TIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: - ni __TI_flags+7(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number - la %r2,SP_PTREGS(%r15) # address of register-save area - larl %r14,sysc_return # load adr. of system return + ni __TI_flags+7(%r12),255-(_TIF_SYSCALL | _TIF_PER_TRAP) + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,sysc_return jg do_per_trap # @@ -380,41 +315,41 @@ sysc_singlestep: # and after the system call # sysc_tracesys: - la %r2,SP_PTREGS(%r15) # load pt_regs + lgr %r2,%r11 # pass pointer to pt_regs la %r3,0 - llgh %r0,SP_SVCNR(%r15) - stg %r0,SP_R2(%r15) + llgh %r0,__PT_INT_CODE+2(%r11) + stg %r0,__PT_R2(%r11) brasl %r14,do_syscall_trace_enter lghi %r0,NR_syscalls clgr %r0,%r2 jnh sysc_tracenogo - sllg %r7,%r2,2 # svc number *4 - lgf %r8,0(%r7,%r10) + sllg %r8,%r2,2 + lgf %r9,0(%r8,%r10) sysc_tracego: - lmg %r3,%r6,SP_R3(%r15) - mvc SP_ARGS(8,%r15),SP_R7(%r15) - lg %r2,SP_ORIG_R2(%r15) - basr %r14,%r8 # call sys_xxx - stg %r2,SP_R2(%r15) # store return value + lmg %r3,%r7,__PT_R3(%r11) + stg %r7,STACK_FRAME_OVERHEAD(%r15) + lg %r2,__PT_ORIG_GPR2(%r11) + basr %r14,%r9 # call sys_xxx + stg %r2,__PT_R2(%r11) # store return value sysc_tracenogo: - tm __TI_flags+6(%r12),_TIF_SYSCALL + tm __TI_flags+6(%r12),_TIF_TRACE >> 8 jz sysc_return - la %r2,SP_PTREGS(%r15) # load pt_regs - larl %r14,sysc_return # return point is sysc_return + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,sysc_return jg do_syscall_trace_exit # # a new process exits the kernel with ret_from_fork # ENTRY(ret_from_fork) - lg %r13,__LC_SVC_NEW_PSW+8 - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # forking a kernel thread ? + la %r11,STACK_FRAME_OVERHEAD(%r15) + lg %r12,__LC_THREAD_INFO + tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ? jo 0f - stg %r15,SP_R15(%r15) # store stack pointer for new kthread + stg %r15,__PT_R15(%r11) # store stack pointer for new kthread 0: brasl %r14,schedule_tail TRACE_IRQS_ON - stosm 24(%r15),0x03 # reenable interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts j sysc_tracenogo # @@ -424,26 +359,26 @@ ENTRY(ret_from_fork) ENTRY(kernel_execve) stmg %r12,%r15,96(%r15) lgr %r14,%r15 - aghi %r15,-SP_SIZE + aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) stg %r14,__SF_BACKCHAIN(%r15) - la %r12,SP_PTREGS(%r15) + la %r12,STACK_FRAME_OVERHEAD(%r15) xc 0(__PT_SIZE,%r12),0(%r12) lgr %r5,%r12 brasl %r14,do_execve ltgfr %r2,%r2 je 0f - aghi %r15,SP_SIZE + aghi %r15,(STACK_FRAME_OVERHEAD + __PT_SIZE) lmg %r12,%r15,96(%r15) br %r14 # execve succeeded. -0: stnsm __SF_EMPTY(%r15),0xfc # disable interrupts +0: ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts lg %r15,__LC_KERNEL_STACK # load ksp - aghi %r15,-SP_SIZE # make room for registers & psw - lg %r13,__LC_SVC_NEW_PSW+8 - mvc SP_PTREGS(__PT_SIZE,%r15),0(%r12) # copy pt_regs + aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) + mvc 0(__PT_SIZE,%r11),0(%r12) # copy pt_regs lg %r12,__LC_THREAD_INFO xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts brasl %r14,execve_tail j sysc_return @@ -452,129 +387,72 @@ ENTRY(kernel_execve) */ ENTRY(pgm_check_handler) -/* - * First we need to check for a special case: - * Single stepping an instruction that disables the PER event mask will - * cause a PER event AFTER the mask has been set. Example: SVC or LPSW. - * For a single stepped SVC the program check handler gets control after - * the SVC new PSW has been loaded. But we want to execute the SVC first and - * then handle the PER event. Therefore we update the SVC old PSW to point - * to the pgm_check_handler and branch to the SVC handler after we checked - * if we have to load the kernel stack register. - * For every other possible cause for PER event without the PER mask set - * we just ignore the PER event (FIXME: is there anything we have to do - * for LPSW?). - */ stpt __LC_SYNC_ENTER_TIMER - tm __LC_PGM_INT_CODE+1,0x80 # check whether we got a per exception - jnz pgm_per # got per exception -> special case - SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - xc SP_ILC(4,%r15),SP_ILC(%r15) - mvc SP_PSW(16,%r15),__LC_PGM_OLD_PSW - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct - HANDLE_SIE_INTERCEPT - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz pgm_no_vtime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - LAST_BREAK -pgm_no_vtime: - stg %r11,SP_ARGS(%r15) - lgf %r3,__LC_PGM_ILC # load program interruption code - lg %r4,__LC_TRANS_EXC_CODE - REENABLE_IRQS - lghi %r8,0x7f - ngr %r8,%r3 - sll %r8,3 - larl %r1,pgm_check_table - lg %r1,0(%r8,%r1) # load address of handler routine - la %r2,SP_PTREGS(%r15) # address of register-save area - basr %r14,%r1 # branch to interrupt-handler -pgm_exit: - j sysc_return - -# -# handle per exception -# -pgm_per: - tm __LC_PGM_OLD_PSW,0x40 # test if per event recording is on - jnz pgm_per_std # ok, normal per event from user space -# ok its one of the special cases, now we need to find out which one - clc __LC_PGM_OLD_PSW(16),__LC_SVC_NEW_PSW - je pgm_svcper -# no interesting special case, ignore PER event - lpswe __LC_PGM_OLD_PSW - -# -# Normal per exception -# -pgm_per_std: - SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(16,%r15),__LC_PGM_OLD_PSW - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct - HANDLE_SIE_INTERCEPT - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz pgm_no_vtime2 - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - LAST_BREAK -pgm_no_vtime2: + stmg %r8,%r15,__LC_SAVE_AREA_SYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call + lmg %r8,%r9,__LC_PGM_OLD_PSW + HANDLE_SIE_INTERCEPT %r14 + tmhh %r8,0x0001 # test problem state bit + jnz 1f # -> fault in user space + tmhh %r8,0x4000 # PER bit set in old PSW ? + jnz 0f # -> enabled, can't be a double fault + tm __LC_PGM_ILC+3,0x80 # check for per exception + jnz pgm_svcper # -> single stepped svc +0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC + j 2f +1: UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER + LAST_BREAK %r14 + lg %r15,__LC_KERNEL_STACK +2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + stmg %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC + mvc __PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE + stg %r10,__PT_ARGS(%r11) + tm __LC_PGM_ILC+3,0x80 # check for per exception + jz 0f lg %r1,__TI_task(%r12) - tm SP_PSW+1(%r15),0x01 # kernel per event ? - jz kernel_per - mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE + tmhh %r8,0x0001 # kernel per event ? + jz pgm_kprobe + oi __TI_flags+7(%r12),_TIF_PER_TRAP mvc __THREAD_per_address(8,%r1),__LC_PER_ADDRESS + mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE mvc __THREAD_per_paid(1,%r1),__LC_PER_PAID - oi __TI_flags+7(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP - lgf %r3,__LC_PGM_ILC # load program interruption code - lg %r4,__LC_TRANS_EXC_CODE - REENABLE_IRQS - lghi %r8,0x7f - ngr %r8,%r3 # clear per-event-bit and ilc - je pgm_exit2 - sll %r8,3 +0: REENABLE_IRQS + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) larl %r1,pgm_check_table - lg %r1,0(%r8,%r1) # load address of handler routine - la %r2,SP_PTREGS(%r15) # address of register-save area + llgh %r10,__PT_INT_CODE+2(%r11) + nill %r10,0x007f + sll %r10,3 + je sysc_return + lg %r1,0(%r10,%r1) # load address of handler routine + lgr %r2,%r11 # pass pointer to pt_regs basr %r14,%r1 # branch to interrupt-handler -pgm_exit2: j sysc_return # -# it was a single stepped SVC that is causing all the trouble +# PER event in supervisor state, must be kprobes # -pgm_svcper: - SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - LAST_BREAK - lg %r8,__TI_task(%r12) - mvc __THREAD_per_cause(2,%r8),__LC_PER_CAUSE - mvc __THREAD_per_address(8,%r8),__LC_PER_ADDRESS - mvc __THREAD_per_paid(1,%r8),__LC_PER_PAID - oi __TI_flags+7(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - lmg %r2,%r6,SP_R2(%r15) # load svc arguments - j sysc_do_svc +pgm_kprobe: + REENABLE_IRQS + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_per_trap + j sysc_return # -# per was called from kernel, must be kprobes +# single stepped system call # -kernel_per: - REENABLE_IRQS - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number - la %r2,SP_PTREGS(%r15) # address of register-save area - brasl %r14,do_per_trap - j pgm_exit +pgm_svcper: + oi __TI_flags+7(%r12),_TIF_PER_TRAP + mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW + larl %r14,sysc_per + stg %r14,__LC_RETURN_PSW+8 + lpswe __LC_RETURN_PSW # branch to sysc_per and enable irqs /* * IO interrupt handler routine @@ -582,21 +460,25 @@ kernel_per: ENTRY(io_int_handler) stck __LC_INT_CLOCK stpt __LC_ASYNC_ENTER_TIMER - SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+40 - CREATE_STACK_FRAME __LC_SAVE_AREA+40 - mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct - HANDLE_SIE_INTERCEPT - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz io_no_vtime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER - LAST_BREAK -io_no_vtime: + stmg %r8,%r15,__LC_SAVE_AREA_ASYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call + lmg %r8,%r9,__LC_IO_OLD_PSW + HANDLE_SIE_INTERCEPT %r14 + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + tmhh %r8,0x0001 # interrupting from user? + jz io_skip + UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER + LAST_BREAK %r14 +io_skip: + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC + stmg %r8,%r9,__PT_PSW(%r11) TRACE_IRQS_OFF - la %r2,SP_PTREGS(%r15) # address of register-save area - brasl %r14,do_IRQ # call standard irq handler + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_IRQ io_return: LOCKDEP_SYS_EXIT TRACE_IRQS_ON @@ -604,7 +486,14 @@ io_tif: tm __TI_flags+7(%r12),_TIF_WORK_INT jnz io_work # there is work to do (signals etc.) io_restore: - RESTORE_ALL __LC_RETURN_PSW,0 + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r10,__PT_R0(%r11) + mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + ni __LC_RETURN_PSW+1,0xfd # clear wait state bit + stpt __LC_EXIT_TIMER + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER + lmg %r11,%r15,__PT_R11(%r11) + lpswe __LC_RETURN_PSW io_done: # @@ -617,7 +506,7 @@ io_done: # Before any work can be done, a switch to the kernel stack is required. # io_work: - tm SP_PSW+1(%r15),0x01 # returning to user ? + tm __PT_PSW+1(%r11),0x01 # returning to user ? jo io_work_user # yes -> do resched & signal #ifdef CONFIG_PREEMPT # check for preemptive scheduling @@ -626,10 +515,11 @@ io_work: tm __TI_flags+7(%r12),_TIF_NEED_RESCHED jno io_restore # switch to kernel stack - lg %r1,SP_R15(%r15) - aghi %r1,-SP_SIZE - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain + lg %r1,__PT_R15(%r11) + aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 # TRACE_IRQS_ON already done at io_return, call # TRACE_IRQS_OFF to keep things symmetrical @@ -645,9 +535,10 @@ io_work: # io_work_user: lg %r1,__LC_KERNEL_STACK - aghi %r1,-SP_SIZE - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain + aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 # @@ -680,9 +571,9 @@ io_mcck_pending: # io_reschedule: # TRACE_IRQS_ON already done at io_return - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts brasl %r14,schedule # call scheduler - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts TRACE_IRQS_OFF j io_return @@ -691,10 +582,10 @@ io_reschedule: # io_sigpending: # TRACE_IRQS_ON already done at io_return - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - la %r2,SP_PTREGS(%r15) # load pt_regs - brasl %r14,do_signal # call do_signal - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_signal + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts TRACE_IRQS_OFF j io_return @@ -703,10 +594,10 @@ io_sigpending: # io_notify_resume: # TRACE_IRQS_ON already done at io_return - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - la %r2,SP_PTREGS(%r15) # load pt_regs - brasl %r14,do_notify_resume # call do_notify_resume - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_notify_resume + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts TRACE_IRQS_OFF j io_return @@ -716,21 +607,24 @@ io_notify_resume: ENTRY(ext_int_handler) stck __LC_INT_CLOCK stpt __LC_ASYNC_ENTER_TIMER - SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+40 - CREATE_STACK_FRAME __LC_SAVE_AREA+40 - mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct - HANDLE_SIE_INTERCEPT - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz ext_no_vtime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER - LAST_BREAK -ext_no_vtime: + stmg %r8,%r15,__LC_SAVE_AREA_ASYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call + lmg %r8,%r9,__LC_EXT_OLD_PSW + HANDLE_SIE_INTERCEPT %r14 + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + tmhh %r8,0x0001 # interrupting from user ? + jz ext_skip + UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER + LAST_BREAK %r14 +ext_skip: + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC + stmg %r8,%r9,__PT_PSW(%r11) TRACE_IRQS_OFF lghi %r1,4096 - la %r2,SP_PTREGS(%r15) # address of register-save area + lgr %r2,%r11 # pass pointer to pt_regs llgf %r3,__LC_CPU_ADDRESS # get cpu address + interruption code llgf %r4,__LC_EXT_PARAMS # get external parameter lg %r5,__LC_EXT_PARAMS2-4096(%r1) # get 64 bit external parameter @@ -747,81 +641,77 @@ ENTRY(mcck_int_handler) la %r1,4095 # revalidate r1 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs - stmg %r11,%r15,__LC_SAVE_AREA+80 + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO larl %r13,system_call - lg %r11,__LC_LAST_BREAK - la %r12,__LC_MCK_OLD_PSW + lmg %r8,%r9,__LC_MCK_OLD_PSW + HANDLE_SIE_INTERCEPT %r14 tm __LC_MCCK_CODE,0x80 # system damage? - jo mcck_int_main # yes -> rest of mcck code invalid - la %r14,4095 - mvc __LC_MCCK_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA-4095(%r14) + jo mcck_panic # yes -> rest of mcck code invalid + lghi %r14,__LC_CPU_TIMER_SAVE_AREA + mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? - jo 1f + jo 3f la %r14,__LC_SYNC_ENTER_TIMER clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER jl 0f la %r14,__LC_ASYNC_ENTER_TIMER 0: clc 0(8,%r14),__LC_EXIT_TIMER - jl 0f + jl 1f la %r14,__LC_EXIT_TIMER -0: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER - jl 0f +1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER + jl 2f la %r14,__LC_LAST_UPDATE_TIMER -0: spt 0(%r14) +2: spt 0(%r14) mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) -1: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? - jno mcck_int_main # no -> skip cleanup critical - tm __LC_MCK_OLD_PSW+1,0x01 # test problem state bit - jnz mcck_int_main # from user -> load kernel stack - clc __LC_MCK_OLD_PSW+8(8),BASED(.Lcritical_end) - jhe mcck_int_main - clc __LC_MCK_OLD_PSW+8(8),BASED(.Lcritical_start) - jl mcck_int_main - brasl %r14,cleanup_critical -mcck_int_main: - lg %r14,__LC_PANIC_STACK # are we already on the panic stack? - slgr %r14,%r15 - srag %r14,%r14,PAGE_SHIFT - jz 0f - lg %r15,__LC_PANIC_STACK # load panic stack -0: aghi %r15,-SP_SIZE # make room for registers & psw - CREATE_STACK_FRAME __LC_SAVE_AREA+80 - mvc SP_PSW(16,%r15),0(%r12) - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm __LC_MCCK_CODE+2,0x08 # mwp of old psw valid? - jno mcck_no_vtime # no -> no timer update - HANDLE_SIE_INTERCEPT - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz mcck_no_vtime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_MCCK_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_MCCK_ENTER_TIMER - LAST_BREAK -mcck_no_vtime: - la %r2,SP_PTREGS(%r15) # load pt_regs +3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? + jno mcck_panic # no -> skip cleanup critical + SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_PANIC_STACK,PAGE_SHIFT + tm %r8,0x0001 # interrupting from user ? + jz mcck_skip + UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER + LAST_BREAK %r14 +mcck_skip: + lghi %r14,__LC_GPREGS_SAVE_AREA + mvc __PT_R0(128,%r11),0(%r14) + stmg %r8,%r9,__PT_PSW(%r11) + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,s390_do_machine_check - tm SP_PSW+1(%r15),0x01 # returning to user ? + tm __PT_PSW+1(%r11),0x01 # returning to user ? jno mcck_return lg %r1,__LC_KERNEL_STACK # switch to kernel stack - aghi %r1,-SP_SIZE - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain + aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 - stosm __SF_EMPTY(%r15),0x04 # turn dat on + ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off tm __TI_flags+7(%r12),_TIF_MCCK_PENDING jno mcck_return TRACE_IRQS_OFF brasl %r14,s390_handle_mcck TRACE_IRQS_ON mcck_return: - mvc __LC_RETURN_MCCK_PSW(16),SP_PSW(%r15) # move return PSW + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r10,__PT_R0(%r11) + mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit - lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? jno 0f stpt __LC_EXIT_TIMER -0: lpswe __LC_RETURN_MCCK_PSW # back to caller -mcck_done: + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER +0: lmg %r11,%r15,__PT_R11(%r11) + lpswe __LC_RETURN_MCCK_PSW + +mcck_panic: + lg %r14,__LC_PANIC_STACK + slgr %r14,%r15 + srag %r14,%r14,PAGE_SHIFT + jz 0f + lg %r15,__LC_PANIC_STACK +0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j mcck_skip /* * Restart interruption handler, kick starter for additional CPUs @@ -835,17 +725,18 @@ restart_base: stck __LC_LAST_UPDATE_CLOCK mvc __LC_LAST_UPDATE_TIMER(8),restart_vtime-restart_base(%r1) mvc __LC_EXIT_TIMER(8),restart_vtime-restart_base(%r1) - lg %r15,__LC_SAVE_AREA+120 # load ksp + lghi %r10,__LC_GPREGS_SAVE_AREA + lg %r15,120(%r10) # load ksp lghi %r10,__LC_CREGS_SAVE_AREA - lctlg %c0,%c15,0(%r10) # get new ctl regs + lctlg %c0,%c15,0(%r10) # get new ctl regs lghi %r10,__LC_AREGS_SAVE_AREA lam %a0,%a15,0(%r10) - lmg %r6,%r15,__SF_GPRS(%r15) # load registers from clone + lmg %r6,%r15,__SF_GPRS(%r15)# load registers from clone lg %r1,__LC_THREAD_INFO mvc __LC_USER_TIMER(8),__TI_user_timer(%r1) mvc __LC_SYSTEM_TIMER(8),__TI_system_timer(%r1) xc __LC_STEAL_TIMER(8),__LC_STEAL_TIMER - stosm __SF_EMPTY(%r15),0x04 # now we can turn dat on + ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off brasl %r14,start_secondary .align 8 restart_vtime: @@ -869,16 +760,16 @@ restart_go: # PSW restart interrupt handler # ENTRY(psw_restart_int_handler) - stg %r15,__LC_SAVE_AREA_64(%r0) # save r15 + stg %r15,__LC_SAVE_AREA_RESTART larl %r15,restart_stack # load restart stack lg %r15,0(%r15) - aghi %r15,-SP_SIZE # make room for pt_regs - stmg %r0,%r14,SP_R0(%r15) # store gprs %r0-%r14 to stack - mvc SP_R15(8,%r15),__LC_SAVE_AREA_64(%r0)# store saved %r15 to stack - mvc SP_PSW(16,%r15),__LC_RST_OLD_PSW(%r0)# store restart old psw - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # set backchain to 0 + aghi %r15,-__PT_SIZE # create pt_regs on stack + stmg %r0,%r14,__PT_R0(%r15) + mvc __PT_R15(8,%r15),__LC_SAVE_AREA_RESTART + mvc __PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw + aghi %r15,-STACK_FRAME_OVERHEAD + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) brasl %r14,do_restart - larl %r14,restart_psw_crash # load disabled wait PSW if lpswe 0(%r14) # do_restart returns .align 8 @@ -894,170 +785,153 @@ restart_psw_crash: * Setup a pt_regs so that show_trace can provide a good call trace. */ stack_overflow: - lg %r15,__LC_PANIC_STACK # change to panic stack - aghi %r15,-SP_SIZE - mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack - stmg %r0,%r10,SP_R0(%r15) # store gprs %r0-%r10 to kernel stack - la %r1,__LC_SAVE_AREA - chi %r12,__LC_SVC_OLD_PSW - je 0f - chi %r12,__LC_PGM_OLD_PSW - je 0f - la %r1,__LC_SAVE_AREA+40 -0: mvc SP_R11(40,%r15),0(%r1) # move %r11-%r15 to stack - mvc SP_ARGS(8,%r15),__LC_LAST_BREAK - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # clear back chain - la %r2,SP_PTREGS(%r15) # load pt_regs + lg %r11,__LC_PANIC_STACK # change to panic stack + aghi %r11,-__PT_SIZE # create pt_regs + stmg %r0,%r7,__PT_R0(%r11) + stmg %r8,%r9,__PT_PSW(%r11) + mvc __PT_R8(64,%r11),0(%r14) + stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2 + lgr %r15,%r11 + aghi %r15,-STACK_FRAME_OVERHEAD + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs jg kernel_stack_overflow #endif -cleanup_table_system_call: - .quad system_call, sysc_do_svc -cleanup_table_sysc_tif: - .quad sysc_tif, sysc_restore -cleanup_table_sysc_restore: - .quad sysc_restore, sysc_done -cleanup_table_io_tif: - .quad io_tif, io_restore -cleanup_table_io_restore: - .quad io_restore, io_done + .align 8 +cleanup_table: + .quad system_call + .quad sysc_do_svc + .quad sysc_tif + .quad sysc_restore + .quad sysc_done + .quad io_tif + .quad io_restore + .quad io_done cleanup_critical: - clc 8(8,%r12),BASED(cleanup_table_system_call) + clg %r9,BASED(cleanup_table) # system_call jl 0f - clc 8(8,%r12),BASED(cleanup_table_system_call+8) + clg %r9,BASED(cleanup_table+8) # sysc_do_svc jl cleanup_system_call -0: - clc 8(8,%r12),BASED(cleanup_table_sysc_tif) + clg %r9,BASED(cleanup_table+16) # sysc_tif jl 0f - clc 8(8,%r12),BASED(cleanup_table_sysc_tif+8) + clg %r9,BASED(cleanup_table+24) # sysc_restore jl cleanup_sysc_tif -0: - clc 8(8,%r12),BASED(cleanup_table_sysc_restore) - jl 0f - clc 8(8,%r12),BASED(cleanup_table_sysc_restore+8) + clg %r9,BASED(cleanup_table+32) # sysc_done jl cleanup_sysc_restore -0: - clc 8(8,%r12),BASED(cleanup_table_io_tif) + clg %r9,BASED(cleanup_table+40) # io_tif jl 0f - clc 8(8,%r12),BASED(cleanup_table_io_tif+8) + clg %r9,BASED(cleanup_table+48) # io_restore jl cleanup_io_tif -0: - clc 8(8,%r12),BASED(cleanup_table_io_restore) - jl 0f - clc 8(8,%r12),BASED(cleanup_table_io_restore+8) + clg %r9,BASED(cleanup_table+56) # io_done jl cleanup_io_restore -0: - br %r14 +0: br %r14 + cleanup_system_call: - mvc __LC_RETURN_PSW(16),0(%r12) - clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+8) + # check if stpt has been executed + clg %r9,BASED(cleanup_system_call_insn) jh 0f - mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER - cghi %r12,__LC_MCK_OLD_PSW - je 0f mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER -0: cghi %r12,__LC_MCK_OLD_PSW - la %r12,__LC_SAVE_AREA+80 + cghi %r11,__LC_SAVE_AREA_ASYNC je 0f - la %r12,__LC_SAVE_AREA+40 -0: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+16) - jhe cleanup_vtime - clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn) + mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER +0: # check if stmg has been executed + clg %r9,BASED(cleanup_system_call_insn+8) jh 0f - mvc __LC_SAVE_AREA(40),0(%r12) -0: lg %r15,__LC_KERNEL_STACK # problem state -> load ksp - aghi %r15,-SP_SIZE # make room for registers & psw - stg %r15,32(%r12) - stg %r11,0(%r12) - CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC - mvc 8(8,%r12),__LC_THREAD_INFO -cleanup_vtime: - clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+24) - jhe cleanup_stime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER -cleanup_stime: - clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+32) - jh cleanup_update - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER -cleanup_update: + mvc __LC_SAVE_AREA_SYNC(64),0(%r11) +0: # check if base register setup + TIF bit load has been done + clg %r9,BASED(cleanup_system_call_insn+16) + jhe 0f + # set up saved registers r10 and r12 + stg %r10,16(%r11) # r10 last break + stg %r12,32(%r11) # r12 thread-info pointer +0: # check if the user time update has been done + clg %r9,BASED(cleanup_system_call_insn+24) + jh 0f + lg %r15,__LC_EXIT_TIMER + slg %r15,__LC_SYNC_ENTER_TIMER + alg %r15,__LC_USER_TIMER + stg %r15,__LC_USER_TIMER +0: # check if the system time update has been done + clg %r9,BASED(cleanup_system_call_insn+32) + jh 0f + lg %r15,__LC_LAST_UPDATE_TIMER + slg %r15,__LC_EXIT_TIMER + alg %r15,__LC_SYSTEM_TIMER + stg %r15,__LC_SYSTEM_TIMER +0: # update accounting time stamp mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - srag %r12,%r11,23 - lg %r12,__LC_THREAD_INFO + # do LAST_BREAK + lg %r9,16(%r11) + srag %r9,%r9,23 jz 0f - stg %r11,__TI_last_break(%r12) -0: mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_system_call+8) - la %r12,__LC_RETURN_PSW + mvc __TI_last_break(8,%r12),16(%r11) +0: # set up saved register r11 + lg %r15,__LC_KERNEL_STACK + aghi %r15,-__PT_SIZE + stg %r15,24(%r11) # r11 pt_regs pointer + # fill pt_regs + mvc __PT_R8(64,%r15),__LC_SAVE_AREA_SYNC + stmg %r0,%r7,__PT_R0(%r15) + mvc __PT_PSW(16,%r15),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r15),__LC_SVC_ILC + # setup saved register r15 + aghi %r15,-STACK_FRAME_OVERHEAD + stg %r15,56(%r11) # r15 stack pointer + # set new psw address and exit + larl %r9,sysc_do_svc br %r14 cleanup_system_call_insn: - .quad sysc_saveall .quad system_call - .quad sysc_vtime - .quad sysc_stime - .quad sysc_update + .quad sysc_stmg + .quad sysc_per + .quad sysc_vtime+18 + .quad sysc_vtime+42 cleanup_sysc_tif: - mvc __LC_RETURN_PSW(8),0(%r12) - mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_sysc_tif) - la %r12,__LC_RETURN_PSW + larl %r9,sysc_tif br %r14 cleanup_sysc_restore: - clc 8(8,%r12),BASED(cleanup_sysc_restore_insn) - je 2f - clc 8(8,%r12),BASED(cleanup_sysc_restore_insn+8) - jhe 0f - mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER - cghi %r12,__LC_MCK_OLD_PSW + clg %r9,BASED(cleanup_sysc_restore_insn) je 0f - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER -0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) - cghi %r12,__LC_MCK_OLD_PSW - la %r12,__LC_SAVE_AREA+80 - je 1f - la %r12,__LC_SAVE_AREA+40 -1: mvc 0(40,%r12),SP_R11(%r15) - lmg %r0,%r10,SP_R0(%r15) - lg %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW + lg %r9,24(%r11) # get saved pointer to pt_regs + mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) + mvc 0(64,%r11),__PT_R8(%r9) + lmg %r0,%r7,__PT_R0(%r9) +0: lmg %r8,%r9,__LC_RETURN_PSW br %r14 cleanup_sysc_restore_insn: .quad sysc_done - 4 - .quad sysc_done - 16 cleanup_io_tif: - mvc __LC_RETURN_PSW(8),0(%r12) - mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_io_tif) - la %r12,__LC_RETURN_PSW + larl %r9,io_tif br %r14 cleanup_io_restore: - clc 8(8,%r12),BASED(cleanup_io_restore_insn) - je 1f - clc 8(8,%r12),BASED(cleanup_io_restore_insn+8) - jhe 0f - mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER -0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) - mvc __LC_SAVE_AREA+80(40),SP_R11(%r15) - lmg %r0,%r10,SP_R0(%r15) - lg %r15,SP_R15(%r15) -1: la %r12,__LC_RETURN_PSW + clg %r9,BASED(cleanup_io_restore_insn) + je 0f + lg %r9,24(%r11) # get saved r11 pointer to pt_regs + mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) + ni __LC_RETURN_PSW+1,0xfd # clear wait state bit + mvc 0(64,%r11),__PT_R8(%r9) + lmg %r0,%r7,__PT_R0(%r9) +0: lmg %r8,%r9,__LC_RETURN_PSW br %r14 cleanup_io_restore_insn: .quad io_done - 4 - .quad io_done - 16 /* * Integer constants */ - .align 4 + .align 8 .Lcritical_start: - .quad __critical_start -.Lcritical_end: - .quad __critical_end + .quad __critical_start +.Lcritical_length: + .quad __critical_end - __critical_start + #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) /* @@ -1069,6 +943,7 @@ ENTRY(sie64a) stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers stg %r2,__SF_EMPTY(%r15) # save control block pointer stg %r3,__SF_EMPTY+8(%r15) # save guest register save area + xc __SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # host id == 0 lmg %r0,%r13,0(%r3) # load guest gprs 0-13 lg %r14,__LC_THREAD_INFO # pointer thread_info struct oi __TI_flags+6(%r14),_TIF_SIE>>8 @@ -1085,7 +960,7 @@ sie_gmap: SPP __SF_EMPTY(%r15) # set guest id sie 0(%r14) sie_done: - SPP __LC_CMF_HPP # set host id + SPP __SF_EMPTY+16(%r15) # set host id lg %r14,__LC_THREAD_INFO # pointer thread_info struct sie_exit: lctlg %c1,%c1,__LC_USER_ASCE # load primary asce @@ -1096,6 +971,7 @@ sie_exit: lghi %r2,0 br %r14 sie_fault: + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce lg %r14,__LC_THREAD_INFO # pointer thread_info struct ni __TI_flags+6(%r14),255-(_TIF_SIE>>8) lg %r14,__SF_EMPTY+8(%r15) # load guest register save area @@ -1107,8 +983,10 @@ sie_fault: .align 8 .Lsie_loop: .quad sie_loop -.Lsie_done: - .quad sie_done +.Lsie_length: + .quad sie_done - sie_loop +.Lhost_id: + .quad 0 .section __ex_table,"a" .quad sie_loop,sie_fault diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 2d781bab37b..c27a0727f93 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -329,8 +329,8 @@ iplstart: # # reset files in VM reader # - stidp __LC_SAVE_AREA # store cpuid - tm __LC_SAVE_AREA,0xff # running VM ? + stidp __LC_SAVE_AREA_SYNC # store cpuid + tm __LC_SAVE_AREA_SYNC,0xff# running VM ? bno .Lnoreset la %r2,.Lreset lhi %r3,26 @@ -449,10 +449,28 @@ ENTRY(start) # .org 0x10000 ENTRY(startup) + j .Lep_startup_normal + .org 0x10008 +# +# This is a list of s390 kernel entry points. At address 0x1000f the number of +# valid entry points is stored. +# +# IMPORTANT: Do not change this table, it is s390 kernel ABI! +# + .ascii "S390EP" + .byte 0x00,0x01 +# +# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode +# + .org 0x10010 +ENTRY(startup_kdump) + j .Lep_startup_kdump +.Lep_startup_normal: basr %r13,0 # get base .LPG0: xc 0x200(256),0x200 # partially clear lowcore xc 0x300(256),0x300 + xc 0xe00(256),0xe00 stck __LC_LAST_UPDATE_CLOCK spt 5f-.LPG0(%r13) mvc __LC_LAST_UPDATE_TIMER(8),5f-.LPG0(%r13) @@ -534,6 +552,8 @@ ENTRY(startup) .align 8 5: .long 0x7fffffff,0xffffffff +#include "head_kdump.S" + # # params at 10400 (setup.h) # @@ -541,6 +561,8 @@ ENTRY(startup) .long 0,0 # IPL_DEVICE .long 0,0 # INITRD_START .long 0,0 # INITRD_SIZE + .long 0,0 # OLDMEM_BASE + .long 0,0 # OLDMEM_SIZE .org COMMAND_LINE .byte "root=/dev/ram0 ro" diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index f21954b44dc..d3f1ab7d90a 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -92,7 +92,7 @@ ENTRY(_stext) .LPG3: # check control registers stctl %c0,%c15,0(%r15) - oi 2(%r15),0x40 # enable sigp emergency signal + oi 2(%r15),0x60 # enable sigp emergency & external call oi 0(%r15),0x10 # switch on low address protection lctl %c0,%c15,0(%r15) diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index ae5d492b069..99348c0eaa4 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -90,7 +90,7 @@ ENTRY(_stext) .LPG3: # check control registers stctg %c0,%c15,0(%r15) - oi 6(%r15),0x40 # enable sigp emergency signal + oi 6(%r15),0x60 # enable sigp emergency & external call oi 4(%r15),0x10 # switch on low address proctection lctlg %c0,%c15,0(%r15) diff --git a/arch/s390/kernel/head_kdump.S b/arch/s390/kernel/head_kdump.S new file mode 100644 index 00000000000..e1ac3893e97 --- /dev/null +++ b/arch/s390/kernel/head_kdump.S @@ -0,0 +1,119 @@ +/* + * S390 kdump lowlevel functions (new kernel) + * + * Copyright IBM Corp. 2011 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#define DATAMOVER_ADDR 0x4000 +#define COPY_PAGE_ADDR 0x6000 + +#ifdef CONFIG_CRASH_DUMP + +# +# kdump entry (new kernel - not yet relocated) +# +# Note: This code has to be position independent +# + +.align 2 +.Lep_startup_kdump: + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,0x12 # Switch to esame mode + sam64 # Switch to 64 bit addressing + basr %r13,0 +.Lbase: + larl %r2,.Lbase_addr # Check, if we have been + lg %r2,0(%r2) # already relocated: + clgr %r2,%r13 # + jne .Lrelocate # No : Start data mover + lghi %r2,0 # Yes: Start kdump kernel + brasl %r14,startup_kdump_relocated + +.Lrelocate: + larl %r4,startup + lg %r2,0x418(%r4) # Get kdump base + lg %r3,0x420(%r4) # Get kdump size + + larl %r10,.Lcopy_start # Source of data mover + lghi %r8,DATAMOVER_ADDR # Target of data mover + mvc 0(256,%r8),0(%r10) # Copy data mover code + + agr %r8,%r2 # Copy data mover to + mvc 0(256,%r8),0(%r10) # reserved mem + + lghi %r14,DATAMOVER_ADDR # Jump to copied data mover + basr %r14,%r14 +.Lbase_addr: + .quad .Lbase + +# +# kdump data mover code (runs at address DATAMOVER_ADDR) +# +# r2: kdump base address +# r3: kdump size +# +.Lcopy_start: + basr %r13,0 # Base +0: + lgr %r11,%r2 # Save kdump base address + lgr %r12,%r2 + agr %r12,%r3 # Compute kdump end address + + lghi %r5,0 + lghi %r10,COPY_PAGE_ADDR # Load copy page address +1: + mvc 0(256,%r10),0(%r5) # Copy old kernel to tmp + mvc 0(256,%r5),0(%r11) # Copy new kernel to old + mvc 0(256,%r11),0(%r10) # Copy tmp to new + aghi %r11,256 + aghi %r5,256 + clgr %r11,%r12 + jl 1b + + lg %r14,.Lstartup_kdump-0b(%r13) + basr %r14,%r14 # Start relocated kernel +.Lstartup_kdump: + .long 0x00000000,0x00000000 + startup_kdump_relocated +.Lcopy_end: + +# +# Startup of kdump (relocated new kernel) +# +.align 2 +startup_kdump_relocated: + basr %r13,0 +0: + mvc 0(8,%r0),.Lrestart_psw-0b(%r13) # Setup restart PSW + mvc 464(16,%r0),.Lpgm_psw-0b(%r13) # Setup pgm check PSW + lhi %r1,1 # Start new kernel + diag %r1,%r1,0x308 # with diag 308 + +.Lno_diag308: # No diag 308 + sam31 # Switch to 31 bit addr mode + sr %r1,%r1 # Erase register r1 + sr %r2,%r2 # Erase register r2 + sigp %r1,%r2,0x12 # Switch to 31 bit arch mode + lpsw 0 # Start new kernel... +.align 8 +.Lrestart_psw: + .long 0x00080000,0x80000000 + startup +.Lpgm_psw: + .quad 0x0000000180000000,0x0000000000000000 + .Lno_diag308 +#else +.align 2 +.Lep_startup_kdump: +#ifdef CONFIG_64BIT + larl %r13,startup_kdump_crash + lpswe 0(%r13) +.align 8 +startup_kdump_crash: + .quad 0x0002000080000000,0x0000000000000000 + startup_kdump_crash +#else + basr %r13,0 +0: lpsw startup_kdump_crash-0b(%r13) +.align 8 +startup_kdump_crash: + .long 0x000a0000,0x00000000 + startup_kdump_crash +#endif /* CONFIG_64BIT */ +#endif /* CONFIG_CRASH_DUMP */ diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 48c71020636..affa8e68124 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -16,6 +16,7 @@ #include <linux/ctype.h> #include <linux/fs.h> #include <linux/gfp.h> +#include <linux/crash_dump.h> #include <asm/ipl.h> #include <asm/smp.h> #include <asm/setup.h> @@ -26,6 +27,7 @@ #include <asm/sclp.h> #include <asm/sigp.h> #include <asm/checksum.h> +#include "entry.h" #define IPL_PARM_BLOCK_VERSION 0 @@ -275,8 +277,8 @@ static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr, static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type); /* VM IPL PARM routines */ -size_t reipl_get_ascii_vmparm(char *dest, size_t size, - const struct ipl_parameter_block *ipb) +static size_t reipl_get_ascii_vmparm(char *dest, size_t size, + const struct ipl_parameter_block *ipb) { int i; size_t len; @@ -338,8 +340,8 @@ static size_t scpdata_length(const char* buf, size_t count) return count; } -size_t reipl_append_ascii_scpdata(char *dest, size_t size, - const struct ipl_parameter_block *ipb) +static size_t reipl_append_ascii_scpdata(char *dest, size_t size, + const struct ipl_parameter_block *ipb) { size_t count; size_t i; @@ -1738,7 +1740,11 @@ static struct kobj_attribute on_restart_attr = void do_restart(void) { + smp_restart_with_online_cpu(); smp_send_stop(); +#ifdef CONFIG_CRASH_DUMP + crash_kexec(NULL); +#endif on_restart_trigger.action->fn(&on_restart_trigger); stop_run(&on_restart_trigger); } @@ -2009,7 +2015,7 @@ static void do_reset_calls(void) u32 dump_prefix_page; -void s390_reset_system(void) +void s390_reset_system(void (*func)(void *), void *data) { struct _lowcore *lc; @@ -2028,15 +2034,19 @@ void s390_reset_system(void) __ctl_clear_bit(0,28); /* Set new machine check handler */ - S390_lowcore.mcck_new_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; + S390_lowcore.mcck_new_psw.mask = psw_kernel_bits | PSW_MASK_DAT; S390_lowcore.mcck_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler; /* Set new program check handler */ - S390_lowcore.program_new_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; + S390_lowcore.program_new_psw.mask = psw_kernel_bits | PSW_MASK_DAT; S390_lowcore.program_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; + /* Store status at absolute zero */ + store_status(); + do_reset_calls(); + if (func) + func(data); } - diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 1f4050d45f7..b9a7fdd9c81 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -33,7 +33,8 @@ static const struct irq_class intrclass_names[] = { {.name = "EXT" }, {.name = "I/O" }, {.name = "CLK", .desc = "[EXT] Clock Comparator" }, - {.name = "IPI", .desc = "[EXT] Signal Processor" }, + {.name = "EXC", .desc = "[EXT] External Call" }, + {.name = "EMS", .desc = "[EXT] Emergency Signal" }, {.name = "TMR", .desc = "[EXT] CPU Timer" }, {.name = "TAL", .desc = "[EXT] Timing Alert" }, {.name = "PFL", .desc = "[EXT] Pseudo Page Fault" }, @@ -42,8 +43,8 @@ static const struct irq_class intrclass_names[] = { {.name = "SCP", .desc = "[EXT] Service Call" }, {.name = "IUC", .desc = "[EXT] IUCV" }, {.name = "CPM", .desc = "[EXT] CPU Measurement" }, + {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt" }, {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt" }, - {.name = "QDI", .desc = "[I/O] QDIO Interrupt" }, {.name = "DAS", .desc = "[I/O] DASD" }, {.name = "C15", .desc = "[I/O] 3215" }, {.name = "C70", .desc = "[I/O] 3270" }, @@ -53,6 +54,7 @@ static const struct irq_class intrclass_names[] = { {.name = "CLW", .desc = "[I/O] CLAW" }, {.name = "CTC", .desc = "[I/O] CTC" }, {.name = "APB", .desc = "[I/O] AP Bus" }, + {.name = "CSC", .desc = "[I/O] CHSC Subchannel" }, {.name = "NMI", .desc = "[NMI] Machine Check" }, }; diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c index 44cc06bedf7..b987ab2c154 100644 --- a/arch/s390/kernel/jump_label.c +++ b/arch/s390/kernel/jump_label.c @@ -18,26 +18,15 @@ struct insn { } __packed; struct insn_args { - unsigned long *target; - struct insn *insn; - ssize_t size; + struct jump_entry *entry; + enum jump_label_type type; }; -static int __arch_jump_label_transform(void *data) +static void __jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) { - struct insn_args *args = data; - int rc; - - rc = probe_kernel_write(args->target, args->insn, args->size); - WARN_ON_ONCE(rc < 0); - return 0; -} - -void arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) -{ - struct insn_args args; struct insn insn; + int rc; if (type == JUMP_LABEL_ENABLE) { /* brcl 15,offset */ @@ -49,11 +38,33 @@ void arch_jump_label_transform(struct jump_entry *entry, insn.offset = 0; } - args.target = (void *) entry->code; - args.insn = &insn; - args.size = JUMP_LABEL_NOP_SIZE; + rc = probe_kernel_write((void *)entry->code, &insn, JUMP_LABEL_NOP_SIZE); + WARN_ON_ONCE(rc < 0); +} - stop_machine(__arch_jump_label_transform, &args, NULL); +static int __sm_arch_jump_label_transform(void *data) +{ + struct insn_args *args = data; + + __jump_label_transform(args->entry, args->type); + return 0; +} + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + struct insn_args args; + + args.entry = entry; + args.type = type; + + stop_machine(__sm_arch_jump_label_transform, &args, NULL); +} + +void arch_jump_label_transform_static(struct jump_entry *entry, + enum jump_label_type type) +{ + __jump_label_transform(entry, type); } #endif diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 1d05d669107..64b761aef00 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -635,7 +635,7 @@ void __kprobes jprobe_return(void) asm volatile(".word 0x0002"); } -void __kprobes jprobe_return_end(void) +static void __used __kprobes jprobe_return_end(void) { asm volatile("bcr 0,0"); } diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index b09b9c62573..47b168fb29c 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -1,10 +1,11 @@ /* * arch/s390/kernel/machine_kexec.c * - * Copyright IBM Corp. 2005,2006 + * Copyright IBM Corp. 2005,2011 * * Author(s): Rolf Adelsberger, * Heiko Carstens <heiko.carstens@de.ibm.com> + * Michael Holzheu <holzheu@linux.vnet.ibm.com> */ #include <linux/device.h> @@ -21,12 +22,162 @@ #include <asm/smp.h> #include <asm/reset.h> #include <asm/ipl.h> +#include <asm/diag.h> +#include <asm/asm-offsets.h> typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); extern const unsigned char relocate_kernel[]; extern const unsigned long long relocate_kernel_len; +#ifdef CONFIG_CRASH_DUMP + +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa); + +/* + * Create ELF notes for one CPU + */ +static void add_elf_notes(int cpu) +{ + struct save_area *sa = (void *) 4608 + store_prefix(); + void *ptr; + + memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa)); + ptr = (u64 *) per_cpu_ptr(crash_notes, cpu); + ptr = fill_cpu_elf_notes(ptr, sa); + memset(ptr, 0, sizeof(struct elf_note)); +} + +/* + * Store status of next available physical CPU + */ +static int store_status_next(int start_cpu, int this_cpu) +{ + struct save_area *sa = (void *) 4608 + store_prefix(); + int cpu, rc; + + for (cpu = start_cpu; cpu < 65536; cpu++) { + if (cpu == this_cpu) + continue; + do { + rc = raw_sigp(cpu, sigp_stop_and_store_status); + } while (rc == sigp_busy); + if (rc != sigp_order_code_accepted) + continue; + if (sa->pref_reg) + return cpu; + } + return -1; +} + +/* + * Initialize CPU ELF notes + */ +void setup_regs(void) +{ + unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE; + int cpu, this_cpu, phys_cpu = 0, first = 1; + + this_cpu = stap(); + + if (!S390_lowcore.prefixreg_save_area) + first = 0; + for_each_online_cpu(cpu) { + if (first) { + add_elf_notes(cpu); + first = 0; + continue; + } + phys_cpu = store_status_next(phys_cpu, this_cpu); + if (phys_cpu == -1) + break; + add_elf_notes(cpu); + phys_cpu++; + } + /* Copy dump CPU store status info to absolute zero */ + memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area)); +} + +#endif + +/* + * Start kdump: We expect here that a store status has been done on our CPU + */ +static void __do_machine_kdump(void *image) +{ +#ifdef CONFIG_CRASH_DUMP + int (*start_kdump)(int) = (void *)((struct kimage *) image)->start; + + __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA); + setup_regs(); + start_kdump(1); +#endif +} + +/* + * Check if kdump checksums are valid: We call purgatory with parameter "0" + */ +static int kdump_csum_valid(struct kimage *image) +{ +#ifdef CONFIG_CRASH_DUMP + int (*start_kdump)(int) = (void *)image->start; + int rc; + + __arch_local_irq_stnsm(0xfb); /* disable DAT */ + rc = start_kdump(0); + __arch_local_irq_stosm(0x04); /* enable DAT */ + return rc ? 0 : -EINVAL; +#else + return -EINVAL; +#endif +} + +/* + * Map or unmap crashkernel memory + */ +static void crash_map_pages(int enable) +{ + unsigned long size = resource_size(&crashk_res); + + BUG_ON(crashk_res.start % KEXEC_CRASH_MEM_ALIGN || + size % KEXEC_CRASH_MEM_ALIGN); + if (enable) + vmem_add_mapping(crashk_res.start, size); + else + vmem_remove_mapping(crashk_res.start, size); +} + +/* + * Map crashkernel memory + */ +void crash_map_reserved_pages(void) +{ + crash_map_pages(1); +} + +/* + * Unmap crashkernel memory + */ +void crash_unmap_reserved_pages(void) +{ + crash_map_pages(0); +} + +/* + * Give back memory to hypervisor before new kdump is loaded + */ +static int machine_kexec_prepare_kdump(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (MACHINE_IS_VM) + diag10_range(PFN_DOWN(crashk_res.start), + PFN_DOWN(crashk_res.end - crashk_res.start + 1)); + return 0; +#else + return -EINVAL; +#endif +} + int machine_kexec_prepare(struct kimage *image) { void *reboot_code_buffer; @@ -35,6 +186,9 @@ int machine_kexec_prepare(struct kimage *image) if (ipl_flags & IPL_NSS_VALID) return -ENOSYS; + if (image->type == KEXEC_TYPE_CRASH) + return machine_kexec_prepare_kdump(); + /* We don't support anything but the default image type for now. */ if (image->type != KEXEC_TYPE_DEFAULT) return -EINVAL; @@ -51,27 +205,54 @@ void machine_kexec_cleanup(struct kimage *image) { } +void arch_crash_save_vmcoreinfo(void) +{ + VMCOREINFO_SYMBOL(lowcore_ptr); + VMCOREINFO_SYMBOL(high_memory); + VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); +} + void machine_shutdown(void) { } -static void __machine_kexec(void *data) +/* + * Do normal kexec + */ +static void __do_machine_kexec(void *data) { relocate_kernel_t data_mover; struct kimage *image = data; - pfault_fini(); - s390_reset_system(); - data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page); /* Call the moving routine */ (*data_mover)(&image->head, image->start); - for (;;); } +/* + * Reset system and call either kdump or normal kexec + */ +static void __machine_kexec(void *data) +{ + struct kimage *image = data; + + pfault_fini(); + if (image->type == KEXEC_TYPE_CRASH) + s390_reset_system(__do_machine_kdump, data); + else + s390_reset_system(__do_machine_kexec, data); + disabled_wait((unsigned long) __builtin_return_address(0)); +} + +/* + * Do either kdump or normal kexec. In case of kdump we first ask + * purgatory, if kdump checksums are valid. + */ void machine_kexec(struct kimage *image) { + if (image->type == KEXEC_TYPE_CRASH && !kdump_csum_valid(image)) + return; tracer_disable(); smp_send_stop(); smp_switch_to_ipl_cpu(__machine_kexec, image); diff --git a/arch/s390/kernel/mem_detect.c b/arch/s390/kernel/mem_detect.c index 0fbe4e32f7b..22d502e885e 100644 --- a/arch/s390/kernel/mem_detect.c +++ b/arch/s390/kernel/mem_detect.c @@ -62,3 +62,84 @@ void detect_memory_layout(struct mem_chunk chunk[]) arch_local_irq_restore(flags); } EXPORT_SYMBOL(detect_memory_layout); + +/* + * Move memory chunks array from index "from" to index "to" + */ +static void mem_chunk_move(struct mem_chunk chunk[], int to, int from) +{ + int cnt = MEMORY_CHUNKS - to; + + memmove(&chunk[to], &chunk[from], cnt * sizeof(struct mem_chunk)); +} + +/* + * Initialize memory chunk + */ +static void mem_chunk_init(struct mem_chunk *chunk, unsigned long addr, + unsigned long size, int type) +{ + chunk->type = type; + chunk->addr = addr; + chunk->size = size; +} + +/* + * Create memory hole with given address, size, and type + */ +void create_mem_hole(struct mem_chunk chunk[], unsigned long addr, + unsigned long size, int type) +{ + unsigned long lh_start, lh_end, lh_size, ch_start, ch_end, ch_size; + int i, ch_type; + + for (i = 0; i < MEMORY_CHUNKS; i++) { + if (chunk[i].size == 0) + continue; + + /* Define chunk properties */ + ch_start = chunk[i].addr; + ch_size = chunk[i].size; + ch_end = ch_start + ch_size - 1; + ch_type = chunk[i].type; + + /* Is memory chunk hit by memory hole? */ + if (addr + size <= ch_start) + continue; /* No: memory hole in front of chunk */ + if (addr > ch_end) + continue; /* No: memory hole after chunk */ + + /* Yes: Define local hole properties */ + lh_start = max(addr, chunk[i].addr); + lh_end = min(addr + size - 1, ch_end); + lh_size = lh_end - lh_start + 1; + + if (lh_start == ch_start && lh_end == ch_end) { + /* Hole covers complete memory chunk */ + mem_chunk_init(&chunk[i], lh_start, lh_size, type); + } else if (lh_end == ch_end) { + /* Hole starts in memory chunk and convers chunk end */ + mem_chunk_move(chunk, i + 1, i); + mem_chunk_init(&chunk[i], ch_start, ch_size - lh_size, + ch_type); + mem_chunk_init(&chunk[i + 1], lh_start, lh_size, type); + i += 1; + } else if (lh_start == ch_start) { + /* Hole ends in memory chunk */ + mem_chunk_move(chunk, i + 1, i); + mem_chunk_init(&chunk[i], lh_start, lh_size, type); + mem_chunk_init(&chunk[i + 1], lh_end + 1, + ch_size - lh_size, ch_type); + break; + } else { + /* Hole splits memory chunk */ + mem_chunk_move(chunk, i + 2, i); + mem_chunk_init(&chunk[i], ch_start, + lh_start - ch_start, ch_type); + mem_chunk_init(&chunk[i + 1], lh_start, lh_size, type); + mem_chunk_init(&chunk[i + 2], lh_end + 1, + ch_end - lh_end, ch_type); + break; + } + } +} diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 541a7509fae..3201ae44799 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -12,6 +12,7 @@ #include <linux/sched.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/elfcore.h> #include <linux/smp.h> #include <linux/slab.h> #include <linux/interrupt.h> @@ -90,10 +91,12 @@ static void default_idle(void) void cpu_idle(void) { for (;;) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched()) default_idle(); - tick_nohz_restart_sched_tick(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); @@ -117,7 +120,8 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) struct pt_regs regs; memset(®s, 0, sizeof(regs)); - regs.psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT; + regs.psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE; regs.gprs[9] = (unsigned long) fn; regs.gprs[10] = (unsigned long) arg; diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 311e9d71288..6e0073e43f5 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -74,7 +74,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < NR_CPUS ? (void *)((unsigned long) *pos + 1) : NULL; + return *pos < nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index ef86ad24398..573bc29551e 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -42,34 +42,37 @@ enum s390_regset { REGSET_GENERAL, REGSET_FP, REGSET_LAST_BREAK, + REGSET_SYSTEM_CALL, REGSET_GENERAL_EXTENDED, }; void update_per_regs(struct task_struct *task) { - static const struct per_regs per_single_step = { - .control = PER_EVENT_IFETCH, - .start = 0, - .end = PSW_ADDR_INSN, - }; struct pt_regs *regs = task_pt_regs(task); struct thread_struct *thread = &task->thread; - const struct per_regs *new; - struct per_regs old; - - /* TIF_SINGLE_STEP overrides the user specified PER registers. */ - new = test_tsk_thread_flag(task, TIF_SINGLE_STEP) ? - &per_single_step : &thread->per_user; + struct per_regs old, new; + + /* Copy user specified PER registers */ + new.control = thread->per_user.control; + new.start = thread->per_user.start; + new.end = thread->per_user.end; + + /* merge TIF_SINGLE_STEP into user specified PER registers. */ + if (test_tsk_thread_flag(task, TIF_SINGLE_STEP)) { + new.control |= PER_EVENT_IFETCH; + new.start = 0; + new.end = PSW_ADDR_INSN; + } /* Take care of the PER enablement bit in the PSW. */ - if (!(new->control & PER_EVENT_MASK)) { + if (!(new.control & PER_EVENT_MASK)) { regs->psw.mask &= ~PSW_MASK_PER; return; } regs->psw.mask |= PSW_MASK_PER; __ctl_store(old, 9, 11); - if (memcmp(new, &old, sizeof(struct per_regs)) != 0) - __ctl_load(*new, 9, 11); + if (memcmp(&new, &old, sizeof(struct per_regs)) != 0) + __ctl_load(new, 9, 11); } void user_enable_single_step(struct task_struct *task) @@ -166,8 +169,8 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr) */ tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr); if (addr == (addr_t) &dummy->regs.psw.mask) - /* Remove per bit from user psw. */ - tmp &= ~PSW_MASK_PER; + /* Return a clean psw mask. */ + tmp = psw_user_bits | (tmp & PSW_MASK_USER); } else if (addr < (addr_t) &dummy->regs.orig_gpr2) { /* @@ -289,18 +292,10 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) * psw and gprs are stored on the stack */ if (addr == (addr_t) &dummy->regs.psw.mask && -#ifdef CONFIG_COMPAT - data != PSW_MASK_MERGE(psw_user32_bits, data) && -#endif - data != PSW_MASK_MERGE(psw_user_bits, data)) + ((data & ~PSW_MASK_USER) != psw_user_bits || + ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA)))) /* Invalid psw mask. */ return -EINVAL; -#ifndef CONFIG_64BIT - if (addr == (addr_t) &dummy->regs.psw.addr) - /* I'd like to reject addresses without the - high order bit but older gdb's rely on it */ - data |= PSW_ADDR_AMODE; -#endif *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { @@ -495,21 +490,21 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) __u32 tmp; if (addr < (addr_t) &dummy32->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ if (addr == (addr_t) &dummy32->regs.psw.mask) { /* Fake a 31 bit psw mask. */ - tmp = (__u32)(task_pt_regs(child)->psw.mask >> 32); - tmp = PSW32_MASK_MERGE(psw32_user_bits, tmp); + tmp = (__u32)(regs->psw.mask >> 32); + tmp = psw32_user_bits | (tmp & PSW32_MASK_USER); } else if (addr == (addr_t) &dummy32->regs.psw.addr) { /* Fake a 31 bit psw address. */ - tmp = (__u32) task_pt_regs(child)->psw.addr | - PSW32_ADDR_AMODE31; + tmp = (__u32) regs->psw.addr | + (__u32)(regs->psw.mask & PSW_MASK_BA); } else { /* gpr 0-15 */ - tmp = *(__u32 *)((addr_t) &task_pt_regs(child)->psw + - addr*2 + 4); + tmp = *(__u32 *)((addr_t) ®s->psw + addr*2 + 4); } } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { /* @@ -594,24 +589,27 @@ static int __poke_user_compat(struct task_struct *child, addr_t offset; if (addr < (addr_t) &dummy32->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw, gprs, acrs and orig_gpr2 are stored on the stack */ if (addr == (addr_t) &dummy32->regs.psw.mask) { /* Build a 64 bit psw mask from 31 bit mask. */ - if (tmp != PSW32_MASK_MERGE(psw32_user_bits, tmp)) + if ((tmp & ~PSW32_MASK_USER) != psw32_user_bits) /* Invalid psw mask. */ return -EINVAL; - task_pt_regs(child)->psw.mask = - PSW_MASK_MERGE(psw_user32_bits, (__u64) tmp << 32); + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (regs->psw.mask & PSW_MASK_BA) | + (__u64)(tmp & PSW32_MASK_USER) << 32; } else if (addr == (addr_t) &dummy32->regs.psw.addr) { /* Build a 64 bit psw address from 31 bit address. */ - task_pt_regs(child)->psw.addr = - (__u64) tmp & PSW32_ADDR_INSN; + regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN; + /* Transfer 31 bit amode bit to psw mask. */ + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | + (__u64)(tmp & PSW32_ADDR_AMODE); } else { /* gpr 0-15 */ - *(__u32*)((addr_t) &task_pt_regs(child)->psw - + addr*2 + 4) = tmp; + *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; } } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { /* @@ -735,7 +733,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) * debugger stored an invalid system call number. Skip * the system call and the system call restart handling. */ - regs->svcnr = 0; + clear_thread_flag(TIF_SYSCALL); ret = -1; } @@ -895,8 +893,36 @@ static int s390_last_break_get(struct task_struct *target, return 0; } +static int s390_last_break_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + #endif +static int s390_system_call_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + unsigned int *data = &task_thread_info(target)->system_call; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(unsigned int)); +} + +static int s390_system_call_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned int *data = &task_thread_info(target)->system_call; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(unsigned int)); +} + static const struct user_regset s390_regsets[] = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, @@ -921,8 +947,17 @@ static const struct user_regset s390_regsets[] = { .size = sizeof(long), .align = sizeof(long), .get = s390_last_break_get, + .set = s390_last_break_set, }, #endif + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(unsigned int), + .align = sizeof(unsigned int), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, }; static const struct user_regset_view user_s390_view = { @@ -1078,6 +1113,14 @@ static int s390_compat_last_break_get(struct task_struct *target, return 0; } +static int s390_compat_last_break_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + static const struct user_regset s390_compat_regsets[] = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, @@ -1101,6 +1144,15 @@ static const struct user_regset s390_compat_regsets[] = { .size = sizeof(long), .align = sizeof(long), .get = s390_compat_last_break_get, + .set = s390_compat_last_break_set, + }, + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(compat_uint_t), + .align = sizeof(compat_uint_t), + .get = s390_system_call_get, + .set = s390_system_call_set, }, [REGSET_GENERAL_EXTENDED] = { .core_note_type = NT_S390_HIGH_GPRS, diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index 303d961c3bb..ad67c214be0 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -10,6 +10,12 @@ #include <asm/asm-offsets.h> # +# store_status: Empty implementation until kdump is supported on 31 bit +# +ENTRY(store_status) + br %r14 + +# # do_reipl_asm # Parameter: r2 = schid of reipl device # diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S index e690975403f..36b32658fb2 100644 --- a/arch/s390/kernel/reipl64.S +++ b/arch/s390/kernel/reipl64.S @@ -17,11 +17,11 @@ # ENTRY(store_status) /* Save register one and load save area base */ - stg %r1,__LC_SAVE_AREA_64(%r0) + stg %r1,__LC_SAVE_AREA_RESTART lghi %r1,SAVE_AREA_BASE /* General purpose registers */ stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - lg %r2,__LC_SAVE_AREA_64(%r0) + lg %r2,__LC_SAVE_AREA_RESTART stg %r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1) /* Control registers */ stctg %c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) @@ -62,8 +62,11 @@ ENTRY(store_status) larl %r2,store_status stg %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1) br %r14 -.align 8 + + .section .bss + .align 8 .Lclkcmp: .quad 0x0000000000000000 + .previous # # do_reipl_asm diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 7b371c37061..354de0763ef 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -21,6 +21,7 @@ #include <linux/module.h> #include <linux/sched.h> #include <linux/kernel.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/stddef.h> #include <linux/unistd.h> @@ -42,6 +43,9 @@ #include <linux/reboot.h> #include <linux/topology.h> #include <linux/ftrace.h> +#include <linux/kexec.h> +#include <linux/crash_dump.h> +#include <linux/memory.h> #include <asm/ipl.h> #include <asm/uaccess.h> @@ -57,12 +61,13 @@ #include <asm/ebcdic.h> #include <asm/compat.h> #include <asm/kvm_virtio.h> +#include <asm/diag.h> -long psw_kernel_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY | - PSW_MASK_MCHECK | PSW_DEFAULT_KEY); -long psw_user_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY); +long psw_kernel_bits = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_PRIMARY | + PSW_MASK_EA | PSW_MASK_BA; +long psw_user_bits = PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | + PSW_MASK_PSTATE | PSW_ASC_HOME; /* * User copy operations. @@ -90,6 +95,15 @@ struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS]; int __initdata memory_end_set; unsigned long __initdata memory_end; +unsigned long VMALLOC_START; +EXPORT_SYMBOL(VMALLOC_START); + +unsigned long VMALLOC_END; +EXPORT_SYMBOL(VMALLOC_END); + +struct page *vmemmap; +EXPORT_SYMBOL(vmemmap); + /* An array with a pointer to the lowcore of every CPU. */ struct _lowcore *lowcore_ptr[NR_CPUS]; EXPORT_SYMBOL(lowcore_ptr); @@ -207,6 +221,8 @@ static void __init setup_zfcpdump(unsigned int console_devno) if (ipl_info.type != IPL_TYPE_FCP_DUMP) return; + if (OLDMEM_BASE) + return; if (console_devno != -1) sprintf(str, " cio_ignore=all,!0.0.%04x,!0.0.%04x", ipl_info.data.fcp.dev_id.devno, console_devno); @@ -271,25 +287,26 @@ static int __init early_parse_mem(char *p) } early_param("mem", early_parse_mem); +static int __init parse_vmalloc(char *arg) +{ + if (!arg) + return -EINVAL; + VMALLOC_END = (memparse(arg, &arg) + PAGE_SIZE - 1) & PAGE_MASK; + return 0; +} +early_param("vmalloc", parse_vmalloc); + unsigned int user_mode = HOME_SPACE_MODE; EXPORT_SYMBOL_GPL(user_mode); -static int set_amode_and_uaccess(unsigned long user_amode, - unsigned long user32_amode) +static int set_amode_primary(void) { - psw_user_bits = PSW_BASE_BITS | PSW_MASK_DAT | user_amode | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY; + psw_kernel_bits = (psw_kernel_bits & ~PSW_MASK_ASC) | PSW_ASC_HOME; + psw_user_bits = (psw_user_bits & ~PSW_MASK_ASC) | PSW_ASC_PRIMARY; #ifdef CONFIG_COMPAT - psw_user32_bits = PSW_BASE32_BITS | PSW_MASK_DAT | user_amode | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY; - psw32_user_bits = PSW32_BASE_BITS | PSW32_MASK_DAT | user32_amode | - PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK | - PSW32_MASK_PSTATE; + psw32_user_bits = + (psw32_user_bits & ~PSW32_MASK_ASC) | PSW32_ASC_PRIMARY; #endif - psw_kernel_bits = PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME | - PSW_MASK_MCHECK | PSW_DEFAULT_KEY; if (MACHINE_HAS_MVCOS) { memcpy(&uaccess, &uaccess_mvcos_switch, sizeof(uaccess)); @@ -325,7 +342,7 @@ early_param("user_mode", early_parse_user_mode); static void setup_addressing_mode(void) { if (user_mode == PRIMARY_SPACE_MODE) { - if (set_amode_and_uaccess(PSW_ASC_PRIMARY, PSW32_ASC_PRIMARY)) + if (set_amode_primary()) pr_info("Address spaces switched, " "mvcos available\n"); else @@ -344,24 +361,25 @@ setup_lowcore(void) */ BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096); lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0); - lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + lc->restart_psw.mask = psw_kernel_bits; lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; - if (user_mode != HOME_SPACE_MODE) - lc->restart_psw.mask |= PSW_ASC_HOME; - lc->external_new_psw.mask = psw_kernel_bits; + lc->external_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->external_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) ext_int_handler; - lc->svc_new_psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT; + lc->svc_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call; - lc->program_new_psw.mask = psw_kernel_bits; + lc->program_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->program_new_psw.addr = - PSW_ADDR_AMODE | (unsigned long)pgm_check_handler; - lc->mcck_new_psw.mask = - psw_kernel_bits & ~PSW_MASK_MCHECK & ~PSW_MASK_DAT; + PSW_ADDR_AMODE | (unsigned long) pgm_check_handler; + lc->mcck_new_psw.mask = psw_kernel_bits; lc->mcck_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) mcck_int_handler; - lc->io_new_psw.mask = psw_kernel_bits; + lc->io_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler; lc->clock_comparator = -1ULL; lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE; @@ -383,7 +401,6 @@ setup_lowcore(void) __ctl_set_bit(14, 29); } #else - lc->cmf_hpp = -1ULL; lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; #endif lc->sync_enter_timer = S390_lowcore.sync_enter_timer; @@ -435,10 +452,14 @@ static void __init setup_resources(void) for (i = 0; i < MEMORY_CHUNKS; i++) { if (!memory_chunk[i].size) continue; + if (memory_chunk[i].type == CHUNK_OLDMEM || + memory_chunk[i].type == CHUNK_CRASHK) + continue; res = alloc_bootmem_low(sizeof(*res)); res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; switch (memory_chunk[i].type) { case CHUNK_READ_WRITE: + case CHUNK_CRASHK: res->name = "System RAM"; break; case CHUNK_READ_ONLY: @@ -475,22 +496,19 @@ EXPORT_SYMBOL_GPL(real_memory_size); static void __init setup_memory_end(void) { - unsigned long memory_size; - unsigned long max_mem; + unsigned long vmax, vmalloc_size, tmp; int i; + #ifdef CONFIG_ZFCPDUMP - if (ipl_info.type == IPL_TYPE_FCP_DUMP) { + if (ipl_info.type == IPL_TYPE_FCP_DUMP && !OLDMEM_BASE) { memory_end = ZFCPDUMP_HSA_SIZE; memory_end_set = 1; } #endif - memory_size = 0; + real_memory_size = 0; memory_end &= PAGE_MASK; - max_mem = memory_end ? min(VMEM_MAX_PHYS, memory_end) : VMEM_MAX_PHYS; - memory_end = min(max_mem, memory_end); - /* * Make sure all chunks are MAX_ORDER aligned so we don't need the * extra checks that HOLES_IN_ZONE would require. @@ -510,23 +528,48 @@ static void __init setup_memory_end(void) chunk->addr = start; chunk->size = end - start; } + real_memory_size = max(real_memory_size, + chunk->addr + chunk->size); } + /* Choose kernel address space layout: 2, 3, or 4 levels. */ +#ifdef CONFIG_64BIT + vmalloc_size = VMALLOC_END ?: 128UL << 30; + tmp = (memory_end ?: real_memory_size) / PAGE_SIZE; + tmp = tmp * (sizeof(struct page) + PAGE_SIZE) + vmalloc_size; + if (tmp <= (1UL << 42)) + vmax = 1UL << 42; /* 3-level kernel page table */ + else + vmax = 1UL << 53; /* 4-level kernel page table */ +#else + vmalloc_size = VMALLOC_END ?: 96UL << 20; + vmax = 1UL << 31; /* 2-level kernel page table */ +#endif + /* vmalloc area is at the end of the kernel address space. */ + VMALLOC_END = vmax; + VMALLOC_START = vmax - vmalloc_size; + + /* Split remaining virtual space between 1:1 mapping & vmemmap array */ + tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page)); + tmp = VMALLOC_START - tmp * sizeof(struct page); + tmp &= ~((vmax >> 11) - 1); /* align to page table level */ + tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS); + vmemmap = (struct page *) tmp; + + /* Take care that memory_end is set and <= vmemmap */ + memory_end = min(memory_end ?: real_memory_size, tmp); + + /* Fixup memory chunk array to fit into 0..memory_end */ for (i = 0; i < MEMORY_CHUNKS; i++) { struct mem_chunk *chunk = &memory_chunk[i]; - real_memory_size = max(real_memory_size, - chunk->addr + chunk->size); - if (chunk->addr >= max_mem) { + if (chunk->addr >= memory_end) { memset(chunk, 0, sizeof(*chunk)); continue; } - if (chunk->addr + chunk->size > max_mem) - chunk->size = max_mem - chunk->addr; - memory_size = max(memory_size, chunk->addr + chunk->size); + if (chunk->addr + chunk->size > memory_end) + chunk->size = memory_end - chunk->addr; } - if (!memory_end) - memory_end = memory_size; } void *restart_stack __attribute__((__section__(".data"))); @@ -545,11 +588,200 @@ static void __init setup_restart_psw(void) * Setup restart PSW for absolute zero lowcore. This is necesary * if PSW restart is done on an offline CPU that has lowcore zero */ - psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + psw.mask = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; copy_to_absolute_zero(&S390_lowcore.restart_psw, &psw, sizeof(psw)); } +static void __init setup_vmcoreinfo(void) +{ +#ifdef CONFIG_KEXEC + unsigned long ptr = paddr_vmcoreinfo_note(); + + copy_to_absolute_zero(&S390_lowcore.vmcore_info, &ptr, sizeof(ptr)); +#endif +} + +#ifdef CONFIG_CRASH_DUMP + +/* + * Find suitable location for crashkernel memory + */ +static unsigned long __init find_crash_base(unsigned long crash_size, + char **msg) +{ + unsigned long crash_base; + struct mem_chunk *chunk; + int i; + + if (memory_chunk[0].size < crash_size) { + *msg = "first memory chunk must be at least crashkernel size"; + return 0; + } + if (OLDMEM_BASE && crash_size == OLDMEM_SIZE) + return OLDMEM_BASE; + + for (i = MEMORY_CHUNKS - 1; i >= 0; i--) { + chunk = &memory_chunk[i]; + if (chunk->size == 0) + continue; + if (chunk->type != CHUNK_READ_WRITE) + continue; + if (chunk->size < crash_size) + continue; + crash_base = (chunk->addr + chunk->size) - crash_size; + if (crash_base < crash_size) + continue; + if (crash_base < ZFCPDUMP_HSA_SIZE_MAX) + continue; + if (crash_base < (unsigned long) INITRD_START + INITRD_SIZE) + continue; + return crash_base; + } + *msg = "no suitable area found"; + return 0; +} + +/* + * Check if crash_base and crash_size is valid + */ +static int __init verify_crash_base(unsigned long crash_base, + unsigned long crash_size, + char **msg) +{ + struct mem_chunk *chunk; + int i; + + /* + * Because we do the swap to zero, we must have at least 'crash_size' + * bytes free space before crash_base + */ + if (crash_size > crash_base) { + *msg = "crashkernel offset must be greater than size"; + return -EINVAL; + } + + /* First memory chunk must be at least crash_size */ + if (memory_chunk[0].size < crash_size) { + *msg = "first memory chunk must be at least crashkernel size"; + return -EINVAL; + } + /* Check if we fit into the respective memory chunk */ + for (i = 0; i < MEMORY_CHUNKS; i++) { + chunk = &memory_chunk[i]; + if (chunk->size == 0) + continue; + if (crash_base < chunk->addr) + continue; + if (crash_base >= chunk->addr + chunk->size) + continue; + /* we have found the memory chunk */ + if (crash_base + crash_size > chunk->addr + chunk->size) { + *msg = "selected memory chunk is too small for " + "crashkernel memory"; + return -EINVAL; + } + return 0; + } + *msg = "invalid memory range specified"; + return -EINVAL; +} + +/* + * Reserve kdump memory by creating a memory hole in the mem_chunk array + */ +static void __init reserve_kdump_bootmem(unsigned long addr, unsigned long size, + int type) +{ + create_mem_hole(memory_chunk, addr, size, type); +} + +/* + * When kdump is enabled, we have to ensure that no memory from + * the area [0 - crashkernel memory size] and + * [crashk_res.start - crashk_res.end] is set offline. + */ +static int kdump_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct memory_notify *arg = data; + + if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res))) + return NOTIFY_BAD; + if (arg->start_pfn > PFN_DOWN(crashk_res.end)) + return NOTIFY_OK; + if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start)) + return NOTIFY_OK; + return NOTIFY_BAD; +} + +static struct notifier_block kdump_mem_nb = { + .notifier_call = kdump_mem_notifier, +}; + +#endif + +/* + * Make sure that oldmem, where the dump is stored, is protected + */ +static void reserve_oldmem(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (!OLDMEM_BASE) + return; + + reserve_kdump_bootmem(OLDMEM_BASE, OLDMEM_SIZE, CHUNK_OLDMEM); + reserve_kdump_bootmem(OLDMEM_SIZE, memory_end - OLDMEM_SIZE, + CHUNK_OLDMEM); + if (OLDMEM_BASE + OLDMEM_SIZE == real_memory_size) + saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1; + else + saved_max_pfn = PFN_DOWN(real_memory_size) - 1; +#endif +} + +/* + * Reserve memory for kdump kernel to be loaded with kexec + */ +static void __init reserve_crashkernel(void) +{ +#ifdef CONFIG_CRASH_DUMP + unsigned long long crash_base, crash_size; + char *msg; + int rc; + + rc = parse_crashkernel(boot_command_line, memory_end, &crash_size, + &crash_base); + if (rc || crash_size == 0) + return; + crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); + crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN); + if (register_memory_notifier(&kdump_mem_nb)) + return; + if (!crash_base) + crash_base = find_crash_base(crash_size, &msg); + if (!crash_base) { + pr_info("crashkernel reservation failed: %s\n", msg); + unregister_memory_notifier(&kdump_mem_nb); + return; + } + if (verify_crash_base(crash_base, crash_size, &msg)) { + pr_info("crashkernel reservation failed: %s\n", msg); + unregister_memory_notifier(&kdump_mem_nb); + return; + } + if (!OLDMEM_BASE && MACHINE_IS_VM) + diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + insert_resource(&iomem_resource, &crashk_res); + reserve_kdump_bootmem(crash_base, crash_size, CHUNK_CRASHK); + pr_info("Reserving %lluMB of memory at %lluMB " + "for crashkernel (System RAM: %luMB)\n", + crash_size >> 20, crash_base >> 20, memory_end >> 20); +#endif +} + static void __init setup_memory(void) { @@ -580,6 +812,14 @@ setup_memory(void) if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) { start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE; +#ifdef CONFIG_CRASH_DUMP + if (OLDMEM_BASE) { + /* Move initrd behind kdump oldmem */ + if (start + INITRD_SIZE > OLDMEM_BASE && + start < OLDMEM_BASE + OLDMEM_SIZE) + start = OLDMEM_BASE + OLDMEM_SIZE; + } +#endif if (start + INITRD_SIZE > memory_end) { pr_err("initrd extends beyond end of " "memory (0x%08lx > 0x%08lx) " @@ -610,14 +850,16 @@ setup_memory(void) for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { unsigned long start_chunk, end_chunk, pfn; - if (memory_chunk[i].type != CHUNK_READ_WRITE) + if (memory_chunk[i].type != CHUNK_READ_WRITE && + memory_chunk[i].type != CHUNK_CRASHK) continue; start_chunk = PFN_DOWN(memory_chunk[i].addr); end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size); end_chunk = min(end_chunk, end_pfn); if (start_chunk >= end_chunk) continue; - add_active_range(0, start_chunk, end_chunk); + memblock_add_node(PFN_PHYS(start_chunk), + PFN_PHYS(end_chunk - start_chunk), 0); pfn = max(start_chunk, start_pfn); for (; pfn < end_chunk; pfn++) page_set_storage_key(PFN_PHYS(pfn), @@ -644,6 +886,15 @@ setup_memory(void) reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size, BOOTMEM_DEFAULT); +#ifdef CONFIG_CRASH_DUMP + if (crashk_res.start) + reserve_bootmem(crashk_res.start, + crashk_res.end - crashk_res.start + 1, + BOOTMEM_DEFAULT); + if (is_kdump_kernel()) + reserve_bootmem(elfcorehdr_addr - OLDMEM_BASE, + PAGE_ALIGN(elfcorehdr_size), BOOTMEM_DEFAULT); +#endif #ifdef CONFIG_BLK_DEV_INITRD if (INITRD_START && INITRD_SIZE) { if (INITRD_START + INITRD_SIZE <= memory_end) { @@ -812,8 +1063,11 @@ setup_arch(char **cmdline_p) setup_ipl(); setup_memory_end(); setup_addressing_mode(); + reserve_oldmem(); + reserve_crashkernel(); setup_memory(); setup_resources(); + setup_vmcoreinfo(); setup_restart_psw(); setup_lowcore(); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 9a40e1cc5ec..a8ba840294f 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -30,6 +30,7 @@ #include <asm/ucontext.h> #include <asm/uaccess.h> #include <asm/lowcore.h> +#include <asm/compat.h> #include "entry.h" #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) @@ -116,7 +117,8 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) /* Copy a 'clean' PSW mask to the user to avoid leaking information about whether PER is currently on. */ - user_sregs.regs.psw.mask = PSW_MASK_MERGE(psw_user_bits, regs->psw.mask); + user_sregs.regs.psw.mask = psw_user_bits | + (regs->psw.mask & PSW_MASK_USER); user_sregs.regs.psw.addr = regs->psw.addr; memcpy(&user_sregs.regs.gprs, ®s->gprs, sizeof(sregs->regs.gprs)); memcpy(&user_sregs.regs.acrs, current->thread.acrs, @@ -143,9 +145,13 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) err = __copy_from_user(&user_sregs, sregs, sizeof(_sigregs)); if (err) return err; - regs->psw.mask = PSW_MASK_MERGE(regs->psw.mask, - user_sregs.regs.psw.mask); - regs->psw.addr = PSW_ADDR_AMODE | user_sregs.regs.psw.addr; + /* Use regs->psw.mask instead of psw_user_bits to preserve PER bit. */ + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (user_sregs.regs.psw.mask & PSW_MASK_USER); + /* Check for invalid amode */ + if (regs->psw.mask & PSW_MASK_EA) + regs->psw.mask |= PSW_MASK_BA; + regs->psw.addr = user_sregs.regs.psw.addr; memcpy(®s->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs)); memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, sizeof(sregs->regs.acrs)); @@ -156,7 +162,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) current->thread.fp_regs.fpc &= FPC_VALID_MASK; restore_fp_regs(¤t->thread.fp_regs); - regs->svcnr = 0; /* disable syscall checks */ + clear_thread_flag(TIF_SYSCALL); /* No longer in a system call */ return 0; } @@ -288,6 +294,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, /* Set up registers for signal handler */ regs->gprs[15] = (unsigned long) frame; + regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA; /* 64 bit amode */ regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; regs->gprs[2] = map_signal(sig); @@ -295,9 +302,13 @@ static int setup_frame(int sig, struct k_sigaction *ka, /* We forgot to include these in the sigcontext. To avoid breaking binary compatibility, they are passed as args. */ - regs->gprs[4] = current->thread.trap_no; - regs->gprs[5] = current->thread.prot_addr; - regs->gprs[6] = task_thread_info(current)->last_break; + if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL || + sig == SIGTRAP || sig == SIGFPE) { + /* set extra registers only for synchronous signals */ + regs->gprs[4] = regs->int_code & 127; + regs->gprs[5] = regs->int_parm_long; + regs->gprs[6] = task_thread_info(current)->last_break; + } /* Place signal number on stack to allow backtrace from handler. */ if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) @@ -356,6 +367,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up registers for signal handler */ regs->gprs[15] = (unsigned long) frame; + regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA; /* 64 bit amode */ regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; regs->gprs[2] = map_signal(sig); @@ -401,7 +413,6 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka, */ void do_signal(struct pt_regs *regs) { - unsigned long retval = 0, continue_addr = 0, restart_addr = 0; siginfo_t info; int signr; struct k_sigaction ka; @@ -421,54 +432,45 @@ void do_signal(struct pt_regs *regs) else oldset = ¤t->blocked; - /* Are we from a system call? */ - if (regs->svcnr) { - continue_addr = regs->psw.addr; - restart_addr = continue_addr - regs->ilc; - retval = regs->gprs[2]; - - /* Prepare for system call restart. We do this here so that a - debugger will see the already changed PSW. */ - switch (retval) { - case -ERESTARTNOHAND: - case -ERESTARTSYS: - case -ERESTARTNOINTR: - regs->gprs[2] = regs->orig_gpr2; - regs->psw.addr = restart_addr; - break; - case -ERESTART_RESTARTBLOCK: - regs->gprs[2] = -EINTR; - } - regs->svcnr = 0; /* Don't deal with this again. */ - } - - /* Get signal to deliver. When running under ptrace, at this point - the debugger may change all our registers ... */ + /* + * Get signal to deliver. When running under ptrace, at this point + * the debugger may change all our registers, including the system + * call information. + */ + current_thread_info()->system_call = + test_thread_flag(TIF_SYSCALL) ? regs->int_code : 0; signr = get_signal_to_deliver(&info, &ka, regs, NULL); - /* Depending on the signal settings we may need to revert the - decision to restart the system call. */ - if (signr > 0 && regs->psw.addr == restart_addr) { - if (retval == -ERESTARTNOHAND - || (retval == -ERESTARTSYS - && !(current->sighand->action[signr-1].sa.sa_flags - & SA_RESTART))) { - regs->gprs[2] = -EINTR; - regs->psw.addr = continue_addr; - } - } - if (signr > 0) { /* Whee! Actually deliver the signal. */ - int ret; -#ifdef CONFIG_COMPAT - if (is_compat_task()) { - ret = handle_signal32(signr, &ka, &info, oldset, regs); - } - else -#endif - ret = handle_signal(signr, &ka, &info, oldset, regs); - if (!ret) { + if (current_thread_info()->system_call) { + regs->int_code = current_thread_info()->system_call; + /* Check for system call restarting. */ + switch (regs->gprs[2]) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + regs->gprs[2] = -EINTR; + break; + case -ERESTARTSYS: + if (!(ka.sa.sa_flags & SA_RESTART)) { + regs->gprs[2] = -EINTR; + break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + regs->gprs[2] = regs->orig_gpr2; + regs->psw.addr = + __rewind_psw(regs->psw, + regs->int_code >> 16); + break; + } + } + /* No longer in a system call */ + clear_thread_flag(TIF_SYSCALL); + + if ((is_compat_task() ? + handle_signal32(signr, &ka, &info, oldset, regs) : + handle_signal(signr, &ka, &info, oldset, regs)) == 0) { /* * A signal was successfully delivered; the saved * sigmask will have been stored in the signal frame, @@ -482,11 +484,30 @@ void do_signal(struct pt_regs *regs) * Let tracing know that we've done the handler setup. */ tracehook_signal_handler(signr, &info, &ka, regs, - test_thread_flag(TIF_SINGLE_STEP)); + test_thread_flag(TIF_SINGLE_STEP)); } return; } + /* No handlers present - check for system call restart */ + clear_thread_flag(TIF_SYSCALL); + if (current_thread_info()->system_call) { + regs->int_code = current_thread_info()->system_call; + switch (regs->gprs[2]) { + case -ERESTART_RESTARTBLOCK: + /* Restart with sys_restart_syscall */ + regs->int_code = __NR_restart_syscall; + /* fallthrough */ + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + /* Restart system call with magic TIF bit. */ + regs->gprs[2] = regs->orig_gpr2; + set_thread_flag(TIF_SYSCALL); + break; + } + } + /* * If there's no signal to deliver, we just put the saved sigmask back. */ @@ -494,13 +515,6 @@ void do_signal(struct pt_regs *regs) clear_thread_flag(TIF_RESTORE_SIGMASK); sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } - - /* Restart a different system call. */ - if (retval == -ERESTART_RESTARTBLOCK - && regs->psw.addr == continue_addr) { - regs->gprs[2] = __NR_restart_syscall; - set_thread_flag(TIF_RESTART_SVC); - } } void do_notify_resume(struct pt_regs *regs) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 6ab16ac64d2..2398ce6b15a 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -38,6 +38,7 @@ #include <linux/timex.h> #include <linux/bootmem.h> #include <linux/slab.h> +#include <linux/crash_dump.h> #include <asm/asm-offsets.h> #include <asm/ipl.h> #include <asm/setup.h> @@ -68,9 +69,7 @@ enum s390_cpu_state { }; DEFINE_MUTEX(smp_cpu_state_mutex); -int smp_cpu_polarization[NR_CPUS]; static int smp_cpu_state[NR_CPUS]; -static int cpu_management; static DEFINE_PER_CPU(struct cpu, cpu_devices); @@ -97,6 +96,29 @@ static inline int cpu_stopped(int cpu) return raw_cpu_stopped(cpu_logical_map(cpu)); } +/* + * Ensure that PSW restart is done on an online CPU + */ +void smp_restart_with_online_cpu(void) +{ + int cpu; + + for_each_online_cpu(cpu) { + if (stap() == __cpu_logical_map[cpu]) { + /* We are online: Enable DAT again and return */ + __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); + return; + } + } + /* We are not online: Do PSW restart on an online CPU */ + while (sigp(cpu, sigp_restart) == sigp_busy) + cpu_relax(); + /* And stop ourself */ + while (raw_sigp(stap(), sigp_stop) == sigp_busy) + cpu_relax(); + for (;;); +} + void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) { struct _lowcore *lc, *current_lc; @@ -106,14 +128,16 @@ void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) if (smp_processor_id() == 0) func(data); - __load_psw_mask(PSW_BASE_BITS | PSW_DEFAULT_KEY); + __load_psw_mask(PSW_DEFAULT_KEY | PSW_MASK_BASE | + PSW_MASK_EA | PSW_MASK_BA); /* Disable lowcore protection */ __ctl_clear_bit(0, 28); current_lc = lowcore_ptr[smp_processor_id()]; lc = lowcore_ptr[0]; if (!lc) lc = current_lc; - lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + lc->restart_psw.mask = + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) smp_restart_cpu; if (!cpu_online(0)) smp_switch_to_cpu(func, data, 0, stap(), __cpu_logical_map[0]); @@ -123,29 +147,59 @@ void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) sp -= sizeof(struct pt_regs); regs = (struct pt_regs *) sp; memcpy(®s->gprs, ¤t_lc->gpregs_save_area, sizeof(regs->gprs)); - regs->psw = lc->psw_save_area; + regs->psw = current_lc->psw_save_area; sp -= STACK_FRAME_OVERHEAD; sf = (struct stack_frame *) sp; - sf->back_chain = regs->gprs[15]; + sf->back_chain = 0; smp_switch_to_cpu(func, data, sp, stap(), __cpu_logical_map[0]); } +static void smp_stop_cpu(void) +{ + while (sigp(smp_processor_id(), sigp_stop) == sigp_busy) + cpu_relax(); +} + void smp_send_stop(void) { - int cpu, rc; + cpumask_t cpumask; + int cpu; + u64 end; /* Disable all interrupts/machine checks */ - __load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK); + __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); trace_hardirqs_off(); - /* stop all processors */ - for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) - continue; - do { - rc = sigp(cpu, sigp_stop); - } while (rc == sigp_busy); + cpumask_copy(&cpumask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &cpumask); + + if (oops_in_progress) { + /* + * Give the other cpus the opportunity to complete + * outstanding interrupts before stopping them. + */ + end = get_clock() + (1000000UL << 12); + for_each_cpu(cpu, &cpumask) { + set_bit(ec_stop_cpu, (unsigned long *) + &lowcore_ptr[cpu]->ext_call_fast); + while (sigp(cpu, sigp_emergency_signal) == sigp_busy && + get_clock() < end) + cpu_relax(); + } + while (get_clock() < end) { + for_each_cpu(cpu, &cpumask) + if (cpu_stopped(cpu)) + cpumask_clear_cpu(cpu, &cpumask); + if (cpumask_empty(&cpumask)) + break; + cpu_relax(); + } + } + /* stop all processors */ + for_each_cpu(cpu, &cpumask) { + while (sigp(cpu, sigp_stop) == sigp_busy) + cpu_relax(); while (!cpu_stopped(cpu)) cpu_relax(); } @@ -161,12 +215,18 @@ static void do_ext_call_interrupt(unsigned int ext_int_code, { unsigned long bits; - kstat_cpu(smp_processor_id()).irqs[EXTINT_IPI]++; + if ((ext_int_code & 0xffff) == 0x1202) + kstat_cpu(smp_processor_id()).irqs[EXTINT_EXC]++; + else + kstat_cpu(smp_processor_id()).irqs[EXTINT_EMS]++; /* * handle bit signal external calls */ bits = xchg(&S390_lowcore.ext_call_fast, 0); + if (test_bit(ec_stop_cpu, &bits)) + smp_stop_cpu(); + if (test_bit(ec_schedule, &bits)) scheduler_ipi(); @@ -175,6 +235,7 @@ static void do_ext_call_interrupt(unsigned int ext_int_code, if (test_bit(ec_call_function_single, &bits)) generic_smp_call_function_single_interrupt(); + } /* @@ -183,12 +244,19 @@ static void do_ext_call_interrupt(unsigned int ext_int_code, */ static void smp_ext_bitcall(int cpu, int sig) { + int order; + /* * Set signaling bit in lowcore of target cpu and kick it */ set_bit(sig, (unsigned long *) &lowcore_ptr[cpu]->ext_call_fast); - while (sigp(cpu, sigp_emergency_signal) == sigp_busy) + while (1) { + order = smp_vcpu_scheduled(cpu) ? + sigp_external_call : sigp_emergency_signal; + if (sigp(cpu, order) != sigp_busy) + break; udelay(10); + } } void arch_send_call_function_ipi_mask(const struct cpumask *mask) @@ -281,11 +349,13 @@ void smp_ctl_clear_bit(int cr, int bit) } EXPORT_SYMBOL(smp_ctl_clear_bit); -#ifdef CONFIG_ZFCPDUMP +#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP) static void __init smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) { - if (ipl_info.type != IPL_TYPE_FCP_DUMP) + if (ipl_info.type != IPL_TYPE_FCP_DUMP && !OLDMEM_BASE) + return; + if (is_kdump_kernel()) return; if (cpu >= NR_CPUS) { pr_warning("CPU %i exceeds the maximum %i and is excluded from " @@ -331,7 +401,7 @@ static int smp_rescan_cpus_sigp(cpumask_t avail) if (cpu_known(cpu_id)) continue; __cpu_logical_map[logical_cpu] = cpu_id; - smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN; + cpu_set_polarization(logical_cpu, POLARIZATION_UNKNOWN); if (!cpu_stopped(logical_cpu)) continue; set_cpu_present(logical_cpu, true); @@ -365,7 +435,7 @@ static int smp_rescan_cpus_sclp(cpumask_t avail) if (cpu_known(cpu_id)) continue; __cpu_logical_map[logical_cpu] = cpu_id; - smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN; + cpu_set_polarization(logical_cpu, POLARIZATION_UNKNOWN); set_cpu_present(logical_cpu, true); if (cpu >= info->configured) smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY; @@ -403,6 +473,18 @@ static void __init smp_detect_cpus(void) info = kmalloc(sizeof(*info), GFP_KERNEL); if (!info) panic("smp_detect_cpus failed to allocate memory\n"); +#ifdef CONFIG_CRASH_DUMP + if (OLDMEM_BASE && !is_kdump_kernel()) { + struct save_area *save_area; + + save_area = kmalloc(sizeof(*save_area), GFP_KERNEL); + if (!save_area) + panic("could not allocate memory for save area\n"); + copy_oldmem_page(1, (void *) save_area, sizeof(*save_area), + 0x200, 0); + zfcpdump_save_areas[0] = save_area; + } +#endif /* Use sigp detection algorithm if sclp doesn't work. */ if (sclp_get_cpu_info(info)) { smp_use_sigp_detection = 1; @@ -463,7 +545,8 @@ int __cpuinit start_secondary(void *cpuvoid) set_cpu_online(smp_processor_id(), true); ipi_call_unlock(); __ctl_clear_bit(0, 28); /* Disable lowcore protection */ - S390_lowcore.restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + S390_lowcore.restart_psw.mask = + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; S390_lowcore.restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; __ctl_set_bit(0, 28); /* Enable lowcore protection */ @@ -511,7 +594,8 @@ static int __cpuinit smp_alloc_lowcore(int cpu) memset((char *)lowcore + 512, 0, sizeof(*lowcore) - 512); lowcore->async_stack = async_stack + ASYNC_SIZE; lowcore->panic_stack = panic_stack + PAGE_SIZE; - lowcore->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + lowcore->restart_psw.mask = + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; lowcore->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) restart_int_handler; if (user_mode != HOME_SPACE_MODE) @@ -604,7 +688,7 @@ int __cpuinit __cpu_up(unsigned int cpu) - sizeof(struct stack_frame)); memset(sf, 0, sizeof(struct stack_frame)); sf->gprs[9] = (unsigned long) sf; - cpu_lowcore->save_area[15] = (unsigned long) sf; + cpu_lowcore->gpregs_save_area[15] = (unsigned long) sf; __ctl_store(cpu_lowcore->cregs_save_area, 0, 15); atomic_inc(&init_mm.context.attach_count); asm volatile( @@ -712,6 +796,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) /* request the 0x1201 emergency signal external interrupt */ if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0) panic("Couldn't request external interrupt 0x1201"); + /* request the 0x1202 external call external interrupt */ + if (register_external_interrupt(0x1202, do_ext_call_interrupt) != 0) + panic("Couldn't request external interrupt 0x1202"); /* Reallocate current lowcore, but keep its contents. */ lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); @@ -751,7 +838,7 @@ void __init smp_prepare_boot_cpu(void) S390_lowcore.percpu_offset = __per_cpu_offset[0]; current_set[0] = current; smp_cpu_state[0] = CPU_STATE_CONFIGURED; - smp_cpu_polarization[0] = POLARIZATION_UNKNWN; + cpu_set_polarization(0, POLARIZATION_UNKNOWN); } void __init smp_cpus_done(unsigned int max_cpus) @@ -776,8 +863,8 @@ int setup_profiling_timer(unsigned int multiplier) } #ifdef CONFIG_HOTPLUG_CPU -static ssize_t cpu_configure_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t cpu_configure_show(struct device *dev, + struct device_attribute *attr, char *buf) { ssize_t count; @@ -787,8 +874,8 @@ static ssize_t cpu_configure_show(struct sys_device *dev, return count; } -static ssize_t cpu_configure_store(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t cpu_configure_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { int cpu = dev->id; @@ -813,7 +900,8 @@ static ssize_t cpu_configure_store(struct sys_device *dev, rc = sclp_cpu_deconfigure(__cpu_logical_map[cpu]); if (!rc) { smp_cpu_state[cpu] = CPU_STATE_STANDBY; - smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN; + cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + topology_expect_change(); } } break; @@ -822,7 +910,8 @@ static ssize_t cpu_configure_store(struct sys_device *dev, rc = sclp_cpu_configure(__cpu_logical_map[cpu]); if (!rc) { smp_cpu_state[cpu] = CPU_STATE_CONFIGURED; - smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN; + cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + topology_expect_change(); } } break; @@ -834,52 +923,21 @@ out: put_online_cpus(); return rc ? rc : count; } -static SYSDEV_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); +static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); #endif /* CONFIG_HOTPLUG_CPU */ -static ssize_t cpu_polarization_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) -{ - int cpu = dev->id; - ssize_t count; - - mutex_lock(&smp_cpu_state_mutex); - switch (smp_cpu_polarization[cpu]) { - case POLARIZATION_HRZ: - count = sprintf(buf, "horizontal\n"); - break; - case POLARIZATION_VL: - count = sprintf(buf, "vertical:low\n"); - break; - case POLARIZATION_VM: - count = sprintf(buf, "vertical:medium\n"); - break; - case POLARIZATION_VH: - count = sprintf(buf, "vertical:high\n"); - break; - default: - count = sprintf(buf, "unknown\n"); - break; - } - mutex_unlock(&smp_cpu_state_mutex); - return count; -} -static SYSDEV_ATTR(polarization, 0444, cpu_polarization_show, NULL); - -static ssize_t show_cpu_address(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_cpu_address(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", __cpu_logical_map[dev->id]); } -static SYSDEV_ATTR(address, 0444, show_cpu_address, NULL); - +static DEVICE_ATTR(address, 0444, show_cpu_address, NULL); static struct attribute *cpu_common_attrs[] = { #ifdef CONFIG_HOTPLUG_CPU - &attr_configure.attr, + &dev_attr_configure.attr, #endif - &attr_address.attr, - &attr_polarization.attr, + &dev_attr_address.attr, NULL, }; @@ -887,8 +945,8 @@ static struct attribute_group cpu_common_attr_group = { .attrs = cpu_common_attrs, }; -static ssize_t show_capability(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_capability(struct device *dev, + struct device_attribute *attr, char *buf) { unsigned int capability; int rc; @@ -898,10 +956,10 @@ static ssize_t show_capability(struct sys_device *dev, return rc; return sprintf(buf, "%u\n", capability); } -static SYSDEV_ATTR(capability, 0444, show_capability, NULL); +static DEVICE_ATTR(capability, 0444, show_capability, NULL); -static ssize_t show_idle_count(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_idle_count(struct device *dev, + struct device_attribute *attr, char *buf) { struct s390_idle_data *idle; unsigned long long idle_count; @@ -921,10 +979,10 @@ repeat: goto repeat; return sprintf(buf, "%llu\n", idle_count); } -static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL); +static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); -static ssize_t show_idle_time(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_idle_time(struct device *dev, + struct device_attribute *attr, char *buf) { struct s390_idle_data *idle; unsigned long long now, idle_time, idle_enter; @@ -946,12 +1004,12 @@ repeat: goto repeat; return sprintf(buf, "%llu\n", idle_time >> 12); } -static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL); +static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); static struct attribute *cpu_online_attrs[] = { - &attr_capability.attr, - &attr_idle_count.attr, - &attr_idle_time_us.attr, + &dev_attr_capability.attr, + &dev_attr_idle_count.attr, + &dev_attr_idle_time_us.attr, NULL, }; @@ -964,7 +1022,7 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self, { unsigned int cpu = (unsigned int)(long)hcpu; struct cpu *c = &per_cpu(cpu_devices, cpu); - struct sys_device *s = &c->sysdev; + struct device *s = &c->dev; struct s390_idle_data *idle; int err = 0; @@ -990,7 +1048,7 @@ static struct notifier_block __cpuinitdata smp_cpu_nb = { static int __devinit smp_add_present_cpu(int cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); - struct sys_device *s = &c->sysdev; + struct device *s = &c->dev; int rc; c->hotpluggable = 1; @@ -1000,11 +1058,20 @@ static int __devinit smp_add_present_cpu(int cpu) rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group); if (rc) goto out_cpu; - if (!cpu_online(cpu)) - goto out; - rc = sysfs_create_group(&s->kobj, &cpu_online_attr_group); - if (!rc) - return 0; + if (cpu_online(cpu)) { + rc = sysfs_create_group(&s->kobj, &cpu_online_attr_group); + if (rc) + goto out_online; + } + rc = topology_cpu_init(c); + if (rc) + goto out_topology; + return 0; + +out_topology: + if (cpu_online(cpu)) + sysfs_remove_group(&s->kobj, &cpu_online_attr_group); +out_online: sysfs_remove_group(&s->kobj, &cpu_common_attr_group); out_cpu: #ifdef CONFIG_HOTPLUG_CPU @@ -1043,8 +1110,8 @@ out: return rc; } -static ssize_t __ref rescan_store(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t __ref rescan_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { @@ -1053,64 +1120,19 @@ static ssize_t __ref rescan_store(struct sysdev_class *class, rc = smp_rescan_cpus(); return rc ? rc : count; } -static SYSDEV_CLASS_ATTR(rescan, 0200, NULL, rescan_store); +static DEVICE_ATTR(rescan, 0200, NULL, rescan_store); #endif /* CONFIG_HOTPLUG_CPU */ -static ssize_t dispatching_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, - char *buf) -{ - ssize_t count; - - mutex_lock(&smp_cpu_state_mutex); - count = sprintf(buf, "%d\n", cpu_management); - mutex_unlock(&smp_cpu_state_mutex); - return count; -} - -static ssize_t dispatching_store(struct sysdev_class *dev, - struct sysdev_class_attribute *attr, - const char *buf, - size_t count) +static int __init s390_smp_init(void) { - int val, rc; - char delim; - - if (sscanf(buf, "%d %c", &val, &delim) != 1) - return -EINVAL; - if (val != 0 && val != 1) - return -EINVAL; - rc = 0; - get_online_cpus(); - mutex_lock(&smp_cpu_state_mutex); - if (cpu_management == val) - goto out; - rc = topology_set_cpu_management(val); - if (!rc) - cpu_management = val; -out: - mutex_unlock(&smp_cpu_state_mutex); - put_online_cpus(); - return rc ? rc : count; -} -static SYSDEV_CLASS_ATTR(dispatching, 0644, dispatching_show, - dispatching_store); - -static int __init topology_init(void) -{ - int cpu; - int rc; + int cpu, rc; register_cpu_notifier(&smp_cpu_nb); - #ifdef CONFIG_HOTPLUG_CPU - rc = sysdev_class_create_file(&cpu_sysdev_class, &attr_rescan); + rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan); if (rc) return rc; #endif - rc = sysdev_class_create_file(&cpu_sysdev_class, &attr_dispatching); - if (rc) - return rc; for_each_present_cpu(cpu) { rc = smp_add_present_cpu(cpu); if (rc) @@ -1118,4 +1140,4 @@ static int __init topology_init(void) } return 0; } -subsys_initcall(topology_init); +subsys_initcall(s390_smp_init); diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index cf9e5c6d552..47df775c844 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -7,6 +7,8 @@ */ #include <linux/pfn.h> +#include <linux/suspend.h> +#include <linux/mm.h> #include <asm/system.h> /* @@ -14,6 +16,123 @@ */ extern const void __nosave_begin, __nosave_end; +/* + * The restore of the saved pages in an hibernation image will set + * the change and referenced bits in the storage key for each page. + * Overindication of the referenced bits after an hibernation cycle + * does not cause any harm but the overindication of the change bits + * would cause trouble. + * Use the ARCH_SAVE_PAGE_KEYS hooks to save the storage key of each + * page to the most significant byte of the associated page frame + * number in the hibernation image. + */ + +/* + * Key storage is allocated as a linked list of pages. + * The size of the keys array is (PAGE_SIZE - sizeof(long)) + */ +struct page_key_data { + struct page_key_data *next; + unsigned char data[]; +}; + +#define PAGE_KEY_DATA_SIZE (PAGE_SIZE - sizeof(struct page_key_data *)) + +static struct page_key_data *page_key_data; +static struct page_key_data *page_key_rp, *page_key_wp; +static unsigned long page_key_rx, page_key_wx; + +/* + * For each page in the hibernation image one additional byte is + * stored in the most significant byte of the page frame number. + * On suspend no additional memory is required but on resume the + * keys need to be memorized until the page data has been restored. + * Only then can the storage keys be set to their old state. + */ +unsigned long page_key_additional_pages(unsigned long pages) +{ + return DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE); +} + +/* + * Free page_key_data list of arrays. + */ +void page_key_free(void) +{ + struct page_key_data *pkd; + + while (page_key_data) { + pkd = page_key_data; + page_key_data = pkd->next; + free_page((unsigned long) pkd); + } +} + +/* + * Allocate page_key_data list of arrays with enough room to store + * one byte for each page in the hibernation image. + */ +int page_key_alloc(unsigned long pages) +{ + struct page_key_data *pk; + unsigned long size; + + size = DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE); + while (size--) { + pk = (struct page_key_data *) get_zeroed_page(GFP_KERNEL); + if (!pk) { + page_key_free(); + return -ENOMEM; + } + pk->next = page_key_data; + page_key_data = pk; + } + page_key_rp = page_key_wp = page_key_data; + page_key_rx = page_key_wx = 0; + return 0; +} + +/* + * Save the storage key into the upper 8 bits of the page frame number. + */ +void page_key_read(unsigned long *pfn) +{ + unsigned long addr; + + addr = (unsigned long) page_address(pfn_to_page(*pfn)); + *(unsigned char *) pfn = (unsigned char) page_get_storage_key(addr); +} + +/* + * Extract the storage key from the upper 8 bits of the page frame number + * and store it in the page_key_data list of arrays. + */ +void page_key_memorize(unsigned long *pfn) +{ + page_key_wp->data[page_key_wx] = *(unsigned char *) pfn; + *(unsigned char *) pfn = 0; + if (++page_key_wx < PAGE_KEY_DATA_SIZE) + return; + page_key_wp = page_key_wp->next; + page_key_wx = 0; +} + +/* + * Get the next key from the page_key_data list of arrays and set the + * storage key of the page referred by @address. If @address refers to + * a "safe" page the swsusp_arch_resume code will transfer the storage + * key from the buffer page to the original page. + */ +void page_key_write(void *address) +{ + page_set_storage_key((unsigned long) address, + page_key_rp->data[page_key_rx], 0); + if (++page_key_rx >= PAGE_KEY_DATA_SIZE) + return; + page_key_rp = page_key_rp->next; + page_key_rx = 0; +} + int pfn_is_nosave(unsigned long pfn) { unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin)); diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S index 51bcdb50a23..acb78cdee89 100644 --- a/arch/s390/kernel/swsusp_asm64.S +++ b/arch/s390/kernel/swsusp_asm64.S @@ -136,11 +136,14 @@ ENTRY(swsusp_arch_resume) 0: lg %r2,8(%r1) lg %r4,0(%r1) + iske %r0,%r4 lghi %r3,PAGE_SIZE lghi %r5,PAGE_SIZE 1: mvcle %r2,%r4,0 jo 1b + lg %r2,8(%r1) + sske %r0,%r2 lg %r1,16(%r1) ltgr %r1,%r1 jnz 0b diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c index 476081440df..78ea1948ff5 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/sys_s390.c @@ -60,74 +60,22 @@ out: } /* - * sys_ipc() is the de-multiplexer for the SysV IPC calls.. - * - * This is really horribly ugly. + * sys_ipc() is the de-multiplexer for the SysV IPC calls. */ SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second, unsigned long, third, void __user *, ptr) { - struct ipc_kludge tmp; - int ret; - - switch (call) { - case SEMOP: - return sys_semtimedop(first, (struct sembuf __user *)ptr, - (unsigned)second, NULL); - case SEMTIMEDOP: - return sys_semtimedop(first, (struct sembuf __user *)ptr, - (unsigned)second, - (const struct timespec __user *) third); - case SEMGET: - return sys_semget(first, (int)second, third); - case SEMCTL: { - union semun fourth; - if (!ptr) - return -EINVAL; - if (get_user(fourth.__pad, (void __user * __user *) ptr)) - return -EFAULT; - return sys_semctl(first, (int)second, third, fourth); - } - case MSGSND: - return sys_msgsnd (first, (struct msgbuf __user *) ptr, - (size_t)second, third); - break; - case MSGRCV: - if (!ptr) - return -EINVAL; - if (copy_from_user (&tmp, (struct ipc_kludge __user *) ptr, - sizeof (struct ipc_kludge))) - return -EFAULT; - return sys_msgrcv (first, tmp.msgp, - (size_t)second, tmp.msgtyp, third); - case MSGGET: - return sys_msgget((key_t)first, (int)second); - case MSGCTL: - return sys_msgctl(first, (int)second, - (struct msqid_ds __user *)ptr); - - case SHMAT: { - ulong raddr; - ret = do_shmat(first, (char __user *)ptr, - (int)second, &raddr); - if (ret) - return ret; - return put_user (raddr, (ulong __user *) third); - break; - } - case SHMDT: - return sys_shmdt ((char __user *)ptr); - case SHMGET: - return sys_shmget(first, (size_t)second, third); - case SHMCTL: - return sys_shmctl(first, (int)second, - (struct shmid_ds __user *) ptr); - default: - return -ENOSYS; - - } - - return -EINVAL; + if (call >> 16) + return -EINVAL; + /* The s390 sys_ipc variant has only five parameters instead of six + * like the generic variant. The only difference is the handling of + * the SEMTIMEDOP subcall where on s390 the third parameter is used + * as a pointer to a struct timespec where the generic variant uses + * the fifth parameter. + * Therefore we can call the generic variant by simply passing the + * third parameter also as fifth parameter. + */ + return sys_ipc(call, first, second, third, ptr, third); } #ifdef CONFIG_64BIT diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 73eb08c874f..bcab2f04ba5 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -348,3 +348,5 @@ SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime_wrapper) SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper) SYSCALL(sys_setns,sys_setns,sys_setns_wrapper) +SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv_wrapper) /* 340 */ +SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev_wrapper) diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 5c9e439bf3f..2a94b774695 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -442,7 +442,7 @@ void s390_adjust_jiffies(void) */ FP_UNPACK_SP(SA, &fmil); if ((info->capability >> 23) == 0) - FP_FROM_INT_S(SB, info->capability, 32, int); + FP_FROM_INT_S(SB, (long) info->capability, 64, long); else FP_UNPACK_SP(SB, &info->capability); FP_DIV_S(SR, SA, SB); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index dff933065ab..fa02f443f5f 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -27,7 +27,7 @@ #include <linux/cpu.h> #include <linux/stop_machine.h> #include <linux/time.h> -#include <linux/sysdev.h> +#include <linux/device.h> #include <linux/delay.h> #include <linux/init.h> #include <linux/smp.h> @@ -48,6 +48,7 @@ #include <asm/timer.h> #include <asm/etr.h> #include <asm/cio.h> +#include "entry.h" /* change this if you have some constant time drift */ #define USECS_PER_JIFFY ((unsigned long) 1000000/HZ) @@ -109,10 +110,14 @@ static void fixup_clock_comparator(unsigned long long delta) set_clock_comparator(S390_lowcore.clock_comparator); } -static int s390_next_event(unsigned long delta, +static int s390_next_ktime(ktime_t expires, struct clock_event_device *evt) { - S390_lowcore.clock_comparator = get_clock() + delta; + u64 nsecs; + + nsecs = ktime_to_ns(ktime_sub(expires, ktime_get_monotonic_offset())); + do_div(nsecs, 125); + S390_lowcore.clock_comparator = TOD_UNIX_EPOCH + (nsecs << 9); set_clock_comparator(S390_lowcore.clock_comparator); return 0; } @@ -137,14 +142,15 @@ void init_cpu_timer(void) cpu = smp_processor_id(); cd = &per_cpu(comparators, cpu); cd->name = "comparator"; - cd->features = CLOCK_EVT_FEAT_ONESHOT; + cd->features = CLOCK_EVT_FEAT_ONESHOT | + CLOCK_EVT_FEAT_KTIME; cd->mult = 16777; cd->shift = 12; cd->min_delta_ns = 1; cd->max_delta_ns = LONG_MAX; cd->rating = 400; cd->cpumask = cpumask_of(cpu); - cd->set_next_event = s390_next_event; + cd->set_next_ktime = s390_next_ktime; cd->set_mode = s390_set_mode; clockevents_register_device(cd); @@ -1110,34 +1116,35 @@ out_unlock: /* * Sysfs interface functions */ -static struct sysdev_class etr_sysclass = { - .name = "etr", +static struct bus_type etr_subsys = { + .name = "etr", + .dev_name = "etr", }; -static struct sys_device etr_port0_dev = { +static struct device etr_port0_dev = { .id = 0, - .cls = &etr_sysclass, + .bus = &etr_subsys, }; -static struct sys_device etr_port1_dev = { +static struct device etr_port1_dev = { .id = 1, - .cls = &etr_sysclass, + .bus = &etr_subsys, }; /* - * ETR class attributes + * ETR subsys attributes */ -static ssize_t etr_stepping_port_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t etr_stepping_port_show(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%i\n", etr_port0.esw.p); } -static SYSDEV_CLASS_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL); +static DEVICE_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL); -static ssize_t etr_stepping_mode_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t etr_stepping_mode_show(struct device *dev, + struct device_attribute *attr, char *buf) { char *mode_str; @@ -1151,12 +1158,12 @@ static ssize_t etr_stepping_mode_show(struct sysdev_class *class, return sprintf(buf, "%s\n", mode_str); } -static SYSDEV_CLASS_ATTR(stepping_mode, 0400, etr_stepping_mode_show, NULL); +static DEVICE_ATTR(stepping_mode, 0400, etr_stepping_mode_show, NULL); /* * ETR port attributes */ -static inline struct etr_aib *etr_aib_from_dev(struct sys_device *dev) +static inline struct etr_aib *etr_aib_from_dev(struct device *dev) { if (dev == &etr_port0_dev) return etr_port0_online ? &etr_port0 : NULL; @@ -1164,8 +1171,8 @@ static inline struct etr_aib *etr_aib_from_dev(struct sys_device *dev) return etr_port1_online ? &etr_port1 : NULL; } -static ssize_t etr_online_show(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t etr_online_show(struct device *dev, + struct device_attribute *attr, char *buf) { unsigned int online; @@ -1174,8 +1181,8 @@ static ssize_t etr_online_show(struct sys_device *dev, return sprintf(buf, "%i\n", online); } -static ssize_t etr_online_store(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t etr_online_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { unsigned int value; @@ -1212,20 +1219,20 @@ out: return count; } -static SYSDEV_ATTR(online, 0600, etr_online_show, etr_online_store); +static DEVICE_ATTR(online, 0600, etr_online_show, etr_online_store); -static ssize_t etr_stepping_control_show(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t etr_stepping_control_show(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ? etr_eacr.e0 : etr_eacr.e1); } -static SYSDEV_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL); +static DEVICE_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL); -static ssize_t etr_mode_code_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_mode_code_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!etr_port0_online && !etr_port1_online) /* Status word is not uptodate if both ports are offline. */ @@ -1234,10 +1241,10 @@ static ssize_t etr_mode_code_show(struct sys_device *dev, etr_port0.esw.psc0 : etr_port0.esw.psc1); } -static SYSDEV_ATTR(state_code, 0400, etr_mode_code_show, NULL); +static DEVICE_ATTR(state_code, 0400, etr_mode_code_show, NULL); -static ssize_t etr_untuned_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_untuned_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1246,10 +1253,10 @@ static ssize_t etr_untuned_show(struct sys_device *dev, return sprintf(buf, "%i\n", aib->edf1.u); } -static SYSDEV_ATTR(untuned, 0400, etr_untuned_show, NULL); +static DEVICE_ATTR(untuned, 0400, etr_untuned_show, NULL); -static ssize_t etr_network_id_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_network_id_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1258,10 +1265,10 @@ static ssize_t etr_network_id_show(struct sys_device *dev, return sprintf(buf, "%i\n", aib->edf1.net_id); } -static SYSDEV_ATTR(network, 0400, etr_network_id_show, NULL); +static DEVICE_ATTR(network, 0400, etr_network_id_show, NULL); -static ssize_t etr_id_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_id_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1270,10 +1277,10 @@ static ssize_t etr_id_show(struct sys_device *dev, return sprintf(buf, "%i\n", aib->edf1.etr_id); } -static SYSDEV_ATTR(id, 0400, etr_id_show, NULL); +static DEVICE_ATTR(id, 0400, etr_id_show, NULL); -static ssize_t etr_port_number_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_port_number_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1282,10 +1289,10 @@ static ssize_t etr_port_number_show(struct sys_device *dev, return sprintf(buf, "%i\n", aib->edf1.etr_pn); } -static SYSDEV_ATTR(port, 0400, etr_port_number_show, NULL); +static DEVICE_ATTR(port, 0400, etr_port_number_show, NULL); -static ssize_t etr_coupled_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_coupled_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1294,10 +1301,10 @@ static ssize_t etr_coupled_show(struct sys_device *dev, return sprintf(buf, "%i\n", aib->edf3.c); } -static SYSDEV_ATTR(coupled, 0400, etr_coupled_show, NULL); +static DEVICE_ATTR(coupled, 0400, etr_coupled_show, NULL); -static ssize_t etr_local_time_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_local_time_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1306,10 +1313,10 @@ static ssize_t etr_local_time_show(struct sys_device *dev, return sprintf(buf, "%i\n", aib->edf3.blto); } -static SYSDEV_ATTR(local_time, 0400, etr_local_time_show, NULL); +static DEVICE_ATTR(local_time, 0400, etr_local_time_show, NULL); -static ssize_t etr_utc_offset_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_utc_offset_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1318,64 +1325,64 @@ static ssize_t etr_utc_offset_show(struct sys_device *dev, return sprintf(buf, "%i\n", aib->edf3.buo); } -static SYSDEV_ATTR(utc_offset, 0400, etr_utc_offset_show, NULL); +static DEVICE_ATTR(utc_offset, 0400, etr_utc_offset_show, NULL); -static struct sysdev_attribute *etr_port_attributes[] = { - &attr_online, - &attr_stepping_control, - &attr_state_code, - &attr_untuned, - &attr_network, - &attr_id, - &attr_port, - &attr_coupled, - &attr_local_time, - &attr_utc_offset, +static struct device_attribute *etr_port_attributes[] = { + &dev_attr_online, + &dev_attr_stepping_control, + &dev_attr_state_code, + &dev_attr_untuned, + &dev_attr_network, + &dev_attr_id, + &dev_attr_port, + &dev_attr_coupled, + &dev_attr_local_time, + &dev_attr_utc_offset, NULL }; -static int __init etr_register_port(struct sys_device *dev) +static int __init etr_register_port(struct device *dev) { - struct sysdev_attribute **attr; + struct device_attribute **attr; int rc; - rc = sysdev_register(dev); + rc = device_register(dev); if (rc) goto out; for (attr = etr_port_attributes; *attr; attr++) { - rc = sysdev_create_file(dev, *attr); + rc = device_create_file(dev, *attr); if (rc) goto out_unreg; } return 0; out_unreg: for (; attr >= etr_port_attributes; attr--) - sysdev_remove_file(dev, *attr); - sysdev_unregister(dev); + device_remove_file(dev, *attr); + device_unregister(dev); out: return rc; } -static void __init etr_unregister_port(struct sys_device *dev) +static void __init etr_unregister_port(struct device *dev) { - struct sysdev_attribute **attr; + struct device_attribute **attr; for (attr = etr_port_attributes; *attr; attr++) - sysdev_remove_file(dev, *attr); - sysdev_unregister(dev); + device_remove_file(dev, *attr); + device_unregister(dev); } static int __init etr_init_sysfs(void) { int rc; - rc = sysdev_class_register(&etr_sysclass); + rc = subsys_system_register(&etr_subsys, NULL); if (rc) goto out; - rc = sysdev_class_create_file(&etr_sysclass, &attr_stepping_port); + rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_port); if (rc) - goto out_unreg_class; - rc = sysdev_class_create_file(&etr_sysclass, &attr_stepping_mode); + goto out_unreg_subsys; + rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_mode); if (rc) goto out_remove_stepping_port; rc = etr_register_port(&etr_port0_dev); @@ -1389,11 +1396,11 @@ static int __init etr_init_sysfs(void) out_remove_port0: etr_unregister_port(&etr_port0_dev); out_remove_stepping_mode: - sysdev_class_remove_file(&etr_sysclass, &attr_stepping_mode); + device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_mode); out_remove_stepping_port: - sysdev_class_remove_file(&etr_sysclass, &attr_stepping_port); -out_unreg_class: - sysdev_class_unregister(&etr_sysclass); + device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_port); +out_unreg_subsys: + bus_unregister(&etr_subsys); out: return rc; } @@ -1593,14 +1600,15 @@ out_unlock: } /* - * STP class sysfs interface functions + * STP subsys sysfs interface functions */ -static struct sysdev_class stp_sysclass = { - .name = "stp", +static struct bus_type stp_subsys = { + .name = "stp", + .dev_name = "stp", }; -static ssize_t stp_ctn_id_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_ctn_id_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online) @@ -1609,10 +1617,10 @@ static ssize_t stp_ctn_id_show(struct sysdev_class *class, *(unsigned long long *) stp_info.ctnid); } -static SYSDEV_CLASS_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL); +static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL); -static ssize_t stp_ctn_type_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_ctn_type_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online) @@ -1620,10 +1628,10 @@ static ssize_t stp_ctn_type_show(struct sysdev_class *class, return sprintf(buf, "%i\n", stp_info.ctn); } -static SYSDEV_CLASS_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL); +static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL); -static ssize_t stp_dst_offset_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_dst_offset_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online || !(stp_info.vbits & 0x2000)) @@ -1631,10 +1639,10 @@ static ssize_t stp_dst_offset_show(struct sysdev_class *class, return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); } -static SYSDEV_CLASS_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL); +static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL); -static ssize_t stp_leap_seconds_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_leap_seconds_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online || !(stp_info.vbits & 0x8000)) @@ -1642,10 +1650,10 @@ static ssize_t stp_leap_seconds_show(struct sysdev_class *class, return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); } -static SYSDEV_CLASS_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL); +static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL); -static ssize_t stp_stratum_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_stratum_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online) @@ -1653,10 +1661,10 @@ static ssize_t stp_stratum_show(struct sysdev_class *class, return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); } -static SYSDEV_CLASS_ATTR(stratum, 0400, stp_stratum_show, NULL); +static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL); -static ssize_t stp_time_offset_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_time_offset_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online || !(stp_info.vbits & 0x0800)) @@ -1664,10 +1672,10 @@ static ssize_t stp_time_offset_show(struct sysdev_class *class, return sprintf(buf, "%i\n", (int) stp_info.tto); } -static SYSDEV_CLASS_ATTR(time_offset, 0400, stp_time_offset_show, NULL); +static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL); -static ssize_t stp_time_zone_offset_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_time_zone_offset_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online || !(stp_info.vbits & 0x4000)) @@ -1675,11 +1683,11 @@ static ssize_t stp_time_zone_offset_show(struct sysdev_class *class, return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); } -static SYSDEV_CLASS_ATTR(time_zone_offset, 0400, +static DEVICE_ATTR(time_zone_offset, 0400, stp_time_zone_offset_show, NULL); -static ssize_t stp_timing_mode_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_timing_mode_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online) @@ -1687,10 +1695,10 @@ static ssize_t stp_timing_mode_show(struct sysdev_class *class, return sprintf(buf, "%i\n", stp_info.tmd); } -static SYSDEV_CLASS_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL); +static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL); -static ssize_t stp_timing_state_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_timing_state_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online) @@ -1698,17 +1706,17 @@ static ssize_t stp_timing_state_show(struct sysdev_class *class, return sprintf(buf, "%i\n", stp_info.tst); } -static SYSDEV_CLASS_ATTR(timing_state, 0400, stp_timing_state_show, NULL); +static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL); -static ssize_t stp_online_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_online_show(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%i\n", stp_online); } -static ssize_t stp_online_store(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_online_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { unsigned int value; @@ -1730,47 +1738,47 @@ static ssize_t stp_online_store(struct sysdev_class *class, } /* - * Can't use SYSDEV_CLASS_ATTR because the attribute should be named - * stp/online but attr_online already exists in this file .. + * Can't use DEVICE_ATTR because the attribute should be named + * stp/online but dev_attr_online already exists in this file .. */ -static struct sysdev_class_attribute attr_stp_online = { +static struct device_attribute dev_attr_stp_online = { .attr = { .name = "online", .mode = 0600 }, .show = stp_online_show, .store = stp_online_store, }; -static struct sysdev_class_attribute *stp_attributes[] = { - &attr_ctn_id, - &attr_ctn_type, - &attr_dst_offset, - &attr_leap_seconds, - &attr_stp_online, - &attr_stratum, - &attr_time_offset, - &attr_time_zone_offset, - &attr_timing_mode, - &attr_timing_state, +static struct device_attribute *stp_attributes[] = { + &dev_attr_ctn_id, + &dev_attr_ctn_type, + &dev_attr_dst_offset, + &dev_attr_leap_seconds, + &dev_attr_stp_online, + &dev_attr_stratum, + &dev_attr_time_offset, + &dev_attr_time_zone_offset, + &dev_attr_timing_mode, + &dev_attr_timing_state, NULL }; static int __init stp_init_sysfs(void) { - struct sysdev_class_attribute **attr; + struct device_attribute **attr; int rc; - rc = sysdev_class_register(&stp_sysclass); + rc = subsys_system_register(&stp_subsys, NULL); if (rc) goto out; for (attr = stp_attributes; *attr; attr++) { - rc = sysdev_class_create_file(&stp_sysclass, *attr); + rc = device_create_file(stp_subsys.dev_root, *attr); if (rc) goto out_unreg; } return 0; out_unreg: for (; attr >= stp_attributes; attr--) - sysdev_class_remove_file(&stp_sysclass, *attr); - sysdev_class_unregister(&stp_sysclass); + device_remove_file(stp_subsys.dev_root, *attr); + bus_unregister(&stp_subsys); out: return rc; } diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 0cd340b7263..7370a41948c 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -1,22 +1,22 @@ /* - * Copyright IBM Corp. 2007 + * Copyright IBM Corp. 2007,2011 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> */ #define KMSG_COMPONENT "cpu" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/init.h> -#include <linux/device.h> +#include <linux/workqueue.h> #include <linux/bootmem.h> +#include <linux/cpuset.h> +#include <linux/device.h> +#include <linux/kernel.h> #include <linux/sched.h> -#include <linux/workqueue.h> +#include <linux/init.h> +#include <linux/delay.h> #include <linux/cpu.h> #include <linux/smp.h> -#include <linux/cpuset.h> -#include <asm/delay.h> +#include <linux/mm.h> #define PTF_HORIZONTAL (0UL) #define PTF_VERTICAL (1UL) @@ -31,7 +31,6 @@ struct mask_info { static int topology_enabled = 1; static void topology_work_fn(struct work_struct *work); static struct sysinfo_15_1_x *tl_info; -static struct timer_list topology_timer; static void set_topology_timer(void); static DECLARE_WORK(topology_work, topology_work_fn); /* topology_lock protects the core linked list */ @@ -41,11 +40,12 @@ static struct mask_info core_info; cpumask_t cpu_core_map[NR_CPUS]; unsigned char cpu_core_id[NR_CPUS]; -#ifdef CONFIG_SCHED_BOOK static struct mask_info book_info; cpumask_t cpu_book_map[NR_CPUS]; unsigned char cpu_book_id[NR_CPUS]; -#endif + +/* smp_cpu_state_mutex must be held when accessing this array */ +int cpu_polarization[NR_CPUS]; static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) { @@ -68,8 +68,10 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) return mask; } -static void add_cpus_to_mask(struct topology_cpu *tl_cpu, - struct mask_info *book, struct mask_info *core) +static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu, + struct mask_info *book, + struct mask_info *core, + int one_core_per_cpu) { unsigned int cpu; @@ -83,15 +85,19 @@ static void add_cpus_to_mask(struct topology_cpu *tl_cpu, for_each_present_cpu(lcpu) { if (cpu_logical_map(lcpu) != rcpu) continue; -#ifdef CONFIG_SCHED_BOOK cpumask_set_cpu(lcpu, &book->mask); cpu_book_id[lcpu] = book->id; -#endif cpumask_set_cpu(lcpu, &core->mask); - cpu_core_id[lcpu] = core->id; - smp_cpu_polarization[lcpu] = tl_cpu->pp; + if (one_core_per_cpu) { + cpu_core_id[lcpu] = rcpu; + core = core->next; + } else { + cpu_core_id[lcpu] = core->id; + } + cpu_set_polarization(lcpu, tl_cpu->pp); } } + return core; } static void clear_masks(void) @@ -103,13 +109,11 @@ static void clear_masks(void) cpumask_clear(&info->mask); info = info->next; } -#ifdef CONFIG_SCHED_BOOK info = &book_info; while (info) { cpumask_clear(&info->mask); info = info->next; } -#endif } static union topology_entry *next_tle(union topology_entry *tle) @@ -119,43 +123,75 @@ static union topology_entry *next_tle(union topology_entry *tle) return (union topology_entry *)((struct topology_container *)tle + 1); } -static void tl_to_cores(struct sysinfo_15_1_x *info) +static void __tl_to_cores_generic(struct sysinfo_15_1_x *info) { -#ifdef CONFIG_SCHED_BOOK - struct mask_info *book = &book_info; -#else - struct mask_info *book = NULL; -#endif struct mask_info *core = &core_info; + struct mask_info *book = &book_info; union topology_entry *tle, *end; - - spin_lock_irq(&topology_lock); - clear_masks(); tle = info->tle; end = (union topology_entry *)((unsigned long)info + info->length); while (tle < end) { switch (tle->nl) { -#ifdef CONFIG_SCHED_BOOK case 2: book = book->next; book->id = tle->container.id; break; -#endif case 1: core = core->next; core->id = tle->container.id; break; case 0: - add_cpus_to_mask(&tle->cpu, book, core); + add_cpus_to_mask(&tle->cpu, book, core, 0); break; default: clear_masks(); - goto out; + return; } tle = next_tle(tle); } -out: +} + +static void __tl_to_cores_z10(struct sysinfo_15_1_x *info) +{ + struct mask_info *core = &core_info; + struct mask_info *book = &book_info; + union topology_entry *tle, *end; + + tle = info->tle; + end = (union topology_entry *)((unsigned long)info + info->length); + while (tle < end) { + switch (tle->nl) { + case 1: + book = book->next; + book->id = tle->container.id; + break; + case 0: + core = add_cpus_to_mask(&tle->cpu, book, core, 1); + break; + default: + clear_masks(); + return; + } + tle = next_tle(tle); + } +} + +static void tl_to_cores(struct sysinfo_15_1_x *info) +{ + struct cpuid cpu_id; + + get_cpu_id(&cpu_id); + spin_lock_irq(&topology_lock); + clear_masks(); + switch (cpu_id.machine) { + case 0x2097: + case 0x2098: + __tl_to_cores_z10(info); + break; + default: + __tl_to_cores_generic(info); + } spin_unlock_irq(&topology_lock); } @@ -165,7 +201,7 @@ static void topology_update_polarization_simple(void) mutex_lock(&smp_cpu_state_mutex); for_each_possible_cpu(cpu) - smp_cpu_polarization[cpu] = POLARIZATION_HRZ; + cpu_set_polarization(cpu, POLARIZATION_HRZ); mutex_unlock(&smp_cpu_state_mutex); } @@ -184,8 +220,7 @@ static int ptf(unsigned long fc) int topology_set_cpu_management(int fc) { - int cpu; - int rc; + int cpu, rc; if (!MACHINE_HAS_TOPOLOGY) return -EOPNOTSUPP; @@ -196,7 +231,7 @@ int topology_set_cpu_management(int fc) if (rc) return -EBUSY; for_each_possible_cpu(cpu) - smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN; + cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); return rc; } @@ -208,29 +243,25 @@ static void update_cpu_core_map(void) spin_lock_irqsave(&topology_lock, flags); for_each_possible_cpu(cpu) { cpu_core_map[cpu] = cpu_group_map(&core_info, cpu); -#ifdef CONFIG_SCHED_BOOK cpu_book_map[cpu] = cpu_group_map(&book_info, cpu); -#endif } spin_unlock_irqrestore(&topology_lock, flags); } void store_topology(struct sysinfo_15_1_x *info) { -#ifdef CONFIG_SCHED_BOOK int rc; rc = stsi(info, 15, 1, 3); if (rc != -ENOSYS) return; -#endif stsi(info, 15, 1, 2); } int arch_update_cpu_topology(void) { struct sysinfo_15_1_x *info = tl_info; - struct sys_device *sysdev; + struct device *dev; int cpu; if (!MACHINE_HAS_TOPOLOGY) { @@ -242,8 +273,8 @@ int arch_update_cpu_topology(void) tl_to_cores(info); update_cpu_core_map(); for_each_online_cpu(cpu) { - sysdev = get_cpu_sysdev(cpu); - kobject_uevent(&sysdev->kobj, KOBJ_CHANGE); + dev = get_cpu_device(cpu); + kobject_uevent(&dev->kobj, KOBJ_CHANGE); } return 1; } @@ -265,12 +296,30 @@ static void topology_timer_fn(unsigned long ignored) set_topology_timer(); } +static struct timer_list topology_timer = + TIMER_DEFERRED_INITIALIZER(topology_timer_fn, 0, 0); + +static atomic_t topology_poll = ATOMIC_INIT(0); + static void set_topology_timer(void) { - topology_timer.function = topology_timer_fn; - topology_timer.data = 0; - topology_timer.expires = jiffies + 60 * HZ; - add_timer(&topology_timer); + if (atomic_add_unless(&topology_poll, -1, 0)) + mod_timer(&topology_timer, jiffies + HZ / 10); + else + mod_timer(&topology_timer, jiffies + HZ * 60); +} + +void topology_expect_change(void) +{ + if (!MACHINE_HAS_TOPOLOGY) + return; + /* This is racy, but it doesn't matter since it is just a heuristic. + * Worst case is that we poll in a higher frequency for a bit longer. + */ + if (atomic_read(&topology_poll) > 60) + return; + atomic_add(60, &topology_poll); + set_topology_timer(); } static int __init early_parse_topology(char *p) @@ -282,25 +331,8 @@ static int __init early_parse_topology(char *p) } early_param("topology", early_parse_topology); -static int __init init_topology_update(void) -{ - int rc; - - rc = 0; - if (!MACHINE_HAS_TOPOLOGY) { - topology_update_polarization_simple(); - goto out; - } - init_timer_deferrable(&topology_timer); - set_topology_timer(); -out: - update_cpu_core_map(); - return rc; -} -__initcall(init_topology_update); - -static void alloc_masks(struct sysinfo_15_1_x *info, struct mask_info *mask, - int offset) +static void __init alloc_masks(struct sysinfo_15_1_x *info, + struct mask_info *mask, int offset) { int i, nr_masks; @@ -326,10 +358,108 @@ void __init s390_init_cpu_topology(void) store_topology(info); pr_info("The CPU configuration topology of the machine is:"); for (i = 0; i < TOPOLOGY_NR_MAG; i++) - printk(" %d", info->mag[i]); - printk(" / %d\n", info->mnest); - alloc_masks(info, &core_info, 2); -#ifdef CONFIG_SCHED_BOOK - alloc_masks(info, &book_info, 3); -#endif + printk(KERN_CONT " %d", info->mag[i]); + printk(KERN_CONT " / %d\n", info->mnest); + alloc_masks(info, &core_info, 1); + alloc_masks(info, &book_info, 2); +} + +static int cpu_management; + +static ssize_t dispatching_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + ssize_t count; + + mutex_lock(&smp_cpu_state_mutex); + count = sprintf(buf, "%d\n", cpu_management); + mutex_unlock(&smp_cpu_state_mutex); + return count; +} + +static ssize_t dispatching_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + int val, rc; + char delim; + + if (sscanf(buf, "%d %c", &val, &delim) != 1) + return -EINVAL; + if (val != 0 && val != 1) + return -EINVAL; + rc = 0; + get_online_cpus(); + mutex_lock(&smp_cpu_state_mutex); + if (cpu_management == val) + goto out; + rc = topology_set_cpu_management(val); + if (rc) + goto out; + cpu_management = val; + topology_expect_change(); +out: + mutex_unlock(&smp_cpu_state_mutex); + put_online_cpus(); + return rc ? rc : count; +} +static DEVICE_ATTR(dispatching, 0644, dispatching_show, + dispatching_store); + +static ssize_t cpu_polarization_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int cpu = dev->id; + ssize_t count; + + mutex_lock(&smp_cpu_state_mutex); + switch (cpu_read_polarization(cpu)) { + case POLARIZATION_HRZ: + count = sprintf(buf, "horizontal\n"); + break; + case POLARIZATION_VL: + count = sprintf(buf, "vertical:low\n"); + break; + case POLARIZATION_VM: + count = sprintf(buf, "vertical:medium\n"); + break; + case POLARIZATION_VH: + count = sprintf(buf, "vertical:high\n"); + break; + default: + count = sprintf(buf, "unknown\n"); + break; + } + mutex_unlock(&smp_cpu_state_mutex); + return count; +} +static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); + +static struct attribute *topology_cpu_attrs[] = { + &dev_attr_polarization.attr, + NULL, +}; + +static struct attribute_group topology_cpu_attr_group = { + .attrs = topology_cpu_attrs, +}; + +int topology_cpu_init(struct cpu *cpu) +{ + return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); +} + +static int __init topology_init(void) +{ + if (!MACHINE_HAS_TOPOLOGY) { + topology_update_polarization_simple(); + goto out; + } + set_topology_timer(); +out: + update_cpu_core_map(); + return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); } +device_initcall(topology_init); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index ffabcd9d336..5ce3750b181 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -43,9 +43,9 @@ #include <asm/debug.h> #include "entry.h" -void (*pgm_check_table[128])(struct pt_regs *, long, unsigned long); +void (*pgm_check_table[128])(struct pt_regs *regs); -int show_unhandled_signals; +int show_unhandled_signals = 1; #define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; }) @@ -200,7 +200,7 @@ void show_registers(struct pt_regs *regs) mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC), mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM)); #ifdef CONFIG_64BIT - printk(" EA:%x", mask_bits(regs, PSW_BASE_BITS)); + printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA)); #endif printk("\n%s GPRS: " FOURLONG, mode, regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); @@ -234,7 +234,7 @@ void show_regs(struct pt_regs *regs) static DEFINE_SPINLOCK(die_lock); -void die(const char * str, struct pt_regs * regs, long err) +void die(struct pt_regs *regs, const char *str) { static int die_counter; @@ -243,7 +243,7 @@ void die(const char * str, struct pt_regs * regs, long err) console_verbose(); spin_lock_irq(&die_lock); bust_spinlocks(1); - printk("%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); + printk("%s: %04x [#%d] ", str, regs->int_code & 0xffff, ++die_counter); #ifdef CONFIG_PREEMPT printk("PREEMPT "); #endif @@ -254,7 +254,7 @@ void die(const char * str, struct pt_regs * regs, long err) printk("DEBUG_PAGEALLOC"); #endif printk("\n"); - notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV); + notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV); show_regs(regs); bust_spinlocks(0); add_taint(TAINT_DIE); @@ -267,8 +267,7 @@ void die(const char * str, struct pt_regs * regs, long err) do_exit(SIGSEGV); } -static void inline report_user_fault(struct pt_regs *regs, long int_code, - int signr) +static inline void report_user_fault(struct pt_regs *regs, int signr) { if ((task_pid_nr(current) > 1) && !show_unhandled_signals) return; @@ -276,7 +275,7 @@ static void inline report_user_fault(struct pt_regs *regs, long int_code, return; if (!printk_ratelimit()) return; - printk("User process fault: interruption code 0x%lX ", int_code); + printk("User process fault: interruption code 0x%X ", regs->int_code); print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN); printk("\n"); show_regs(regs); @@ -287,19 +286,28 @@ int is_valid_bugaddr(unsigned long addr) return 1; } -static inline void __kprobes do_trap(long pgm_int_code, int signr, char *str, - struct pt_regs *regs, siginfo_t *info) +static inline void __user *get_psw_address(struct pt_regs *regs) { - if (notify_die(DIE_TRAP, str, regs, pgm_int_code, - pgm_int_code, signr) == NOTIFY_STOP) + return (void __user *) + ((regs->psw.addr - (regs->int_code >> 16)) & PSW_ADDR_INSN); +} + +static void __kprobes do_trap(struct pt_regs *regs, + int si_signo, int si_code, char *str) +{ + siginfo_t info; + + if (notify_die(DIE_TRAP, str, regs, 0, + regs->int_code, si_signo) == NOTIFY_STOP) return; if (regs->psw.mask & PSW_MASK_PSTATE) { - struct task_struct *tsk = current; - - tsk->thread.trap_no = pgm_int_code & 0xffff; - force_sig_info(signr, info, tsk); - report_user_fault(regs, pgm_int_code, signr); + info.si_signo = si_signo; + info.si_errno = 0; + info.si_code = si_code; + info.si_addr = get_psw_address(regs); + force_sig_info(si_signo, &info, current); + report_user_fault(regs, si_signo); } else { const struct exception_table_entry *fixup; fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); @@ -311,18 +319,11 @@ static inline void __kprobes do_trap(long pgm_int_code, int signr, char *str, btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs); if (btt == BUG_TRAP_TYPE_WARN) return; - die(str, regs, pgm_int_code); + die(regs, str); } } } -static inline void __user *get_psw_address(struct pt_regs *regs, - long pgm_int_code) -{ - return (void __user *) - ((regs->psw.addr - (pgm_int_code >> 16)) & PSW_ADDR_INSN); -} - void __kprobes do_per_trap(struct pt_regs *regs) { siginfo_t info; @@ -334,30 +335,24 @@ void __kprobes do_per_trap(struct pt_regs *regs) info.si_signo = SIGTRAP; info.si_errno = 0; info.si_code = TRAP_HWBKPT; - info.si_addr = (void *) current->thread.per_event.address; + info.si_addr = + (void __force __user *) current->thread.per_event.address; force_sig_info(SIGTRAP, &info, current); } -static void default_trap_handler(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +static void default_trap_handler(struct pt_regs *regs) { if (regs->psw.mask & PSW_MASK_PSTATE) { - report_user_fault(regs, pgm_int_code, SIGSEGV); + report_user_fault(regs, SIGSEGV); do_exit(SIGSEGV); } else - die("Unknown program exception", regs, pgm_int_code); + die(regs, "Unknown program exception"); } #define DO_ERROR_INFO(name, signr, sicode, str) \ -static void name(struct pt_regs *regs, long pgm_int_code, \ - unsigned long trans_exc_code) \ +static void name(struct pt_regs *regs) \ { \ - siginfo_t info; \ - info.si_signo = signr; \ - info.si_errno = 0; \ - info.si_code = sicode; \ - info.si_addr = get_psw_address(regs, pgm_int_code); \ - do_trap(pgm_int_code, signr, str, regs, &info); \ + do_trap(regs, signr, sicode, str); \ } DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, @@ -387,42 +382,34 @@ DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, DO_ERROR_INFO(translation_exception, SIGILL, ILL_ILLOPN, "translation exception") -static inline void do_fp_trap(struct pt_regs *regs, void __user *location, - int fpc, long pgm_int_code) +static inline void do_fp_trap(struct pt_regs *regs, int fpc) { - siginfo_t si; - - si.si_signo = SIGFPE; - si.si_errno = 0; - si.si_addr = location; - si.si_code = 0; + int si_code = 0; /* FPC[2] is Data Exception Code */ if ((fpc & 0x00000300) == 0) { /* bits 6 and 7 of DXC are 0 iff IEEE exception */ if (fpc & 0x8000) /* invalid fp operation */ - si.si_code = FPE_FLTINV; + si_code = FPE_FLTINV; else if (fpc & 0x4000) /* div by 0 */ - si.si_code = FPE_FLTDIV; + si_code = FPE_FLTDIV; else if (fpc & 0x2000) /* overflow */ - si.si_code = FPE_FLTOVF; + si_code = FPE_FLTOVF; else if (fpc & 0x1000) /* underflow */ - si.si_code = FPE_FLTUND; + si_code = FPE_FLTUND; else if (fpc & 0x0800) /* inexact */ - si.si_code = FPE_FLTRES; + si_code = FPE_FLTRES; } - do_trap(pgm_int_code, SIGFPE, - "floating point exception", regs, &si); + do_trap(regs, SIGFPE, si_code, "floating point exception"); } -static void __kprobes illegal_op(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +static void __kprobes illegal_op(struct pt_regs *regs) { siginfo_t info; __u8 opcode[6]; __u16 __user *location; int signal = 0; - location = get_psw_address(regs, pgm_int_code); + location = get_psw_address(regs); if (regs->psw.mask & PSW_MASK_PSTATE) { if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) @@ -466,44 +453,31 @@ static void __kprobes illegal_op(struct pt_regs *regs, long pgm_int_code, * If we get an illegal op in kernel mode, send it through the * kprobes notifier. If kprobes doesn't pick it up, SIGILL */ - if (notify_die(DIE_BPT, "bpt", regs, pgm_int_code, + if (notify_die(DIE_BPT, "bpt", regs, 0, 3, SIGTRAP) != NOTIFY_STOP) signal = SIGILL; } #ifdef CONFIG_MATHEMU if (signal == SIGFPE) - do_fp_trap(regs, location, - current->thread.fp_regs.fpc, pgm_int_code); - else if (signal == SIGSEGV) { - info.si_signo = signal; - info.si_errno = 0; - info.si_code = SEGV_MAPERR; - info.si_addr = (void __user *) location; - do_trap(pgm_int_code, signal, - "user address fault", regs, &info); - } else + do_fp_trap(regs, current->thread.fp_regs.fpc); + else if (signal == SIGSEGV) + do_trap(regs, signal, SEGV_MAPERR, "user address fault"); + else #endif - if (signal) { - info.si_signo = signal; - info.si_errno = 0; - info.si_code = ILL_ILLOPC; - info.si_addr = (void __user *) location; - do_trap(pgm_int_code, signal, - "illegal operation", regs, &info); - } + if (signal) + do_trap(regs, signal, ILL_ILLOPC, "illegal operation"); } #ifdef CONFIG_MATHEMU -void specification_exception(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +void specification_exception(struct pt_regs *regs) { __u8 opcode[6]; __u16 __user *location = NULL; int signal = 0; - location = (__u16 __user *) get_psw_address(regs, pgm_int_code); + location = (__u16 __user *) get_psw_address(regs); if (regs->psw.mask & PSW_MASK_PSTATE) { get_user(*((__u16 *) opcode), location); @@ -538,30 +512,21 @@ void specification_exception(struct pt_regs *regs, long pgm_int_code, signal = SIGILL; if (signal == SIGFPE) - do_fp_trap(regs, location, - current->thread.fp_regs.fpc, pgm_int_code); - else if (signal) { - siginfo_t info; - info.si_signo = signal; - info.si_errno = 0; - info.si_code = ILL_ILLOPN; - info.si_addr = location; - do_trap(pgm_int_code, signal, - "specification exception", regs, &info); - } + do_fp_trap(regs, current->thread.fp_regs.fpc); + else if (signal) + do_trap(regs, signal, ILL_ILLOPN, "specification exception"); } #else DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, "specification exception"); #endif -static void data_exception(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +static void data_exception(struct pt_regs *regs) { __u16 __user *location; int signal = 0; - location = get_psw_address(regs, pgm_int_code); + location = get_psw_address(regs); if (MACHINE_HAS_IEEE) asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc)); @@ -626,32 +591,18 @@ static void data_exception(struct pt_regs *regs, long pgm_int_code, else signal = SIGILL; if (signal == SIGFPE) - do_fp_trap(regs, location, - current->thread.fp_regs.fpc, pgm_int_code); - else if (signal) { - siginfo_t info; - info.si_signo = signal; - info.si_errno = 0; - info.si_code = ILL_ILLOPN; - info.si_addr = location; - do_trap(pgm_int_code, signal, "data exception", regs, &info); - } + do_fp_trap(regs, current->thread.fp_regs.fpc); + else if (signal) + do_trap(regs, signal, ILL_ILLOPN, "data exception"); } -static void space_switch_exception(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +static void space_switch_exception(struct pt_regs *regs) { - siginfo_t info; - /* Set user psw back to home space mode. */ if (regs->psw.mask & PSW_MASK_PSTATE) regs->psw.mask |= PSW_ASC_HOME; /* Send SIGILL. */ - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_PRVOPC; - info.si_addr = get_psw_address(regs, pgm_int_code); - do_trap(pgm_int_code, SIGILL, "space switch event", regs, &info); + do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event"); } void __kprobes kernel_stack_overflow(struct pt_regs * regs) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 56fe6bc81fe..e4c79ebb40e 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -43,6 +43,8 @@ SECTIONS NOTES :text :note + .dummy : { *(.dummy) } :data + RODATA #ifdef CONFIG_SHARED_KERNEL diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 2d6228f60cd..bb48977f546 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -170,7 +170,8 @@ void __kprobes vtime_stop_cpu(void) psw_t psw; /* Wait for external, I/O or machine check interrupt. */ - psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT; + psw.mask = psw_kernel_bits | PSW_MASK_WAIT | + PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; idle->nohz_delay = 0; @@ -183,7 +184,8 @@ void __kprobes vtime_stop_cpu(void) * set_cpu_timer(VTIMER_MAX_SLICE); * idle->idle_enter = get_clock(); * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | - * PSW_MASK_IO | PSW_MASK_EXT); + * PSW_MASK_DAT | PSW_MASK_IO | + * PSW_MASK_EXT | PSW_MASK_MCHECK); * The difference is that the inline assembly makes sure that * the last three instruction are stpt, stck and lpsw in that * order. This is done to increase the precision. @@ -216,7 +218,8 @@ void __kprobes vtime_stop_cpu(void) * vq->idle = get_cpu_timer(); * idle->idle_enter = get_clock(); * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | - * PSW_MASK_IO | PSW_MASK_EXT); + * PSW_MASK_DAT | PSW_MASK_IO | + * PSW_MASK_EXT | PSW_MASK_MCHECK); * The difference is that the inline assembly makes sure that * the last three instruction are stpt, stck and lpsw in that * order. This is done to increase the precision. @@ -458,7 +461,7 @@ void add_virt_timer_periodic(void *new) } EXPORT_SYMBOL(add_virt_timer_periodic); -int __mod_vtimer(struct vtimer_list *timer, __u64 expires, int periodic) +static int __mod_vtimer(struct vtimer_list *timer, __u64 expires, int periodic) { struct vtimer_queue *vq; unsigned long flags; diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 9e4c84187cf..8943e82cd4d 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -1,7 +1,7 @@ /* * diag.c - handling diagnose instructions * - * Copyright IBM Corp. 2008 + * Copyright IBM Corp. 2008,2011 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -15,6 +15,34 @@ #include <linux/kvm_host.h> #include "kvm-s390.h" +static int diag_release_pages(struct kvm_vcpu *vcpu) +{ + unsigned long start, end; + unsigned long prefix = vcpu->arch.sie_block->prefix; + + start = vcpu->arch.guest_gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; + end = vcpu->arch.guest_gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; + + if (start & ~PAGE_MASK || end & ~PAGE_MASK || start > end + || start < 2 * PAGE_SIZE) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end); + vcpu->stat.diagnose_10++; + + /* we checked for start > end above */ + if (end < prefix || start >= prefix + 2 * PAGE_SIZE) { + gmap_discard(start, end, vcpu->arch.gmap); + } else { + if (start < prefix) + gmap_discard(start, prefix, vcpu->arch.gmap); + if (end >= prefix) + gmap_discard(prefix + 2 * PAGE_SIZE, + end, vcpu->arch.gmap); + } + return 0; +} + static int __diag_time_slice_end(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); @@ -42,7 +70,7 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } - atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM; vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL; vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT; @@ -57,6 +85,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; switch (code) { + case 0x10: + return diag_release_pages(vcpu); case 0x44: return __diag_time_slice_end(vcpu); case 0x308: diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index c7c51898984..02434543eab 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -132,7 +132,6 @@ static int handle_stop(struct kvm_vcpu *vcpu) int rc = 0; vcpu->stat.exit_stop_request++; - atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); spin_lock_bh(&vcpu->arch.local_int.lock); if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; @@ -149,6 +148,8 @@ static int handle_stop(struct kvm_vcpu *vcpu) } if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) { + atomic_set_mask(CPUSTAT_STOPPED, + &vcpu->arch.sie_block->cpuflags); vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP; VCPU_EVENT(vcpu, 3, "%s", "cpu stopped"); rc = -EOPNOTSUPP; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index c9aeb4b4d0b..278ee009ce6 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -38,6 +38,11 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info *inti) { switch (inti->type) { + case KVM_S390_INT_EXTERNAL_CALL: + if (psw_extint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[0] & 0x2000ul) + return 1; case KVM_S390_INT_EMERGENCY: if (psw_extint_disabled(vcpu)) return 0; @@ -98,6 +103,7 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info *inti) { switch (inti->type) { + case KVM_S390_INT_EXTERNAL_CALL: case KVM_S390_INT_EMERGENCY: case KVM_S390_INT_SERVICE: case KVM_S390_INT_VIRTIO: @@ -143,6 +149,28 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, exception = 1; break; + case KVM_S390_INT_EXTERNAL_CALL: + VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); + vcpu->stat.deliver_external_call++; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, inti->extcall.code); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + break; + case KVM_S390_INT_SERVICE: VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", inti->ext.ext_params); @@ -224,6 +252,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, offsetof(struct _lowcore, restart_psw), sizeof(psw_t)); if (rc == -EFAULT) exception = 1; + atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); break; case KVM_S390_PROGRAM_INT: @@ -522,6 +551,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, break; case KVM_S390_PROGRAM_INT: case KVM_S390_SIGP_STOP: + case KVM_S390_INT_EXTERNAL_CALL: case KVM_S390_INT_EMERGENCY: default: kfree(inti); @@ -581,6 +611,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, break; case KVM_S390_SIGP_STOP: case KVM_S390_RESTART: + case KVM_S390_INT_EXTERNAL_CALL: case KVM_S390_INT_EMERGENCY: VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type); inti->type = s390int->type; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index dc2b580e27b..d1c44573245 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -46,6 +46,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, { "instruction_lctl", VCPU_STAT(instruction_lctl) }, { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, + { "deliver_external_call", VCPU_STAT(deliver_external_call) }, { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, @@ -64,11 +65,14 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_stfl", VCPU_STAT(instruction_stfl) }, { "instruction_tprot", VCPU_STAT(instruction_tprot) }, { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, + { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, + { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, + { "diagnose_10", VCPU_STAT(diagnose_10) }, { "diagnose_44", VCPU_STAT(diagnose_44) }, { NULL } }; @@ -124,6 +128,7 @@ int kvm_dev_ioctl_check_extension(long ext) switch (ext) { case KVM_CAP_S390_PSW: case KVM_CAP_S390_GMAP: + case KVM_CAP_SYNC_MMU: r = 1; break; default: @@ -175,6 +180,8 @@ int kvm_arch_init_vm(struct kvm *kvm) if (rc) goto out_err; + rc = -ENOMEM; + kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); if (!kvm->arch.sca) goto out_err; @@ -265,10 +272,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) restore_fp_regs(&vcpu->arch.guest_fpregs); restore_access_regs(vcpu->arch.guest_acrs); gmap_enable(vcpu->arch.gmap); + atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); gmap_disable(vcpu->arch.gmap); save_fp_regs(&vcpu->arch.guest_fpregs); save_access_regs(vcpu->arch.guest_acrs); @@ -296,7 +305,9 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { - atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | CPUSTAT_SM); + atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | + CPUSTAT_SM | + CPUSTAT_STOPPED); vcpu->arch.sie_block->ecb = 6; vcpu->arch.sie_block->eca = 0xC1002001U; vcpu->arch.sie_block->fac = (int) (long) facilities; @@ -312,11 +323,17 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { - struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); - int rc = -ENOMEM; + struct kvm_vcpu *vcpu; + int rc = -EINVAL; + + if (id >= KVM_MAX_VCPUS) + goto out; + rc = -ENOMEM; + + vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); if (!vcpu) - goto out_nomem; + goto out; vcpu->arch.sie_block = (struct kvm_s390_sie_block *) get_zeroed_page(GFP_KERNEL); @@ -352,7 +369,7 @@ out_free_sie_block: free_page((unsigned long)(vcpu->arch.sie_block)); out_free_cpu: kfree(vcpu); -out_nomem: +out: return ERR_PTR(rc); } @@ -386,6 +403,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, { memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs)); memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); + restore_access_regs(vcpu->arch.guest_acrs); return 0; } @@ -401,6 +419,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); vcpu->arch.guest_fpregs.fpc = fpu->fpc; + restore_fp_regs(&vcpu->arch.guest_fpregs); return 0; } @@ -415,7 +434,7 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) { int rc = 0; - if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING) + if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED)) rc = -EBUSY; else { vcpu->run->psw_mask = psw.mask; @@ -488,7 +507,7 @@ rerun_vcpu: if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); - atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 39162636108..d0263895992 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -336,6 +336,7 @@ static int handle_tprot(struct kvm_vcpu *vcpu) u64 address1 = disp1 + base1 ? vcpu->arch.guest_gprs[base1] : 0; u64 address2 = disp2 + base2 ? vcpu->arch.guest_gprs[base2] : 0; struct vm_area_struct *vma; + unsigned long user_address; vcpu->stat.instruction_tprot++; @@ -349,9 +350,14 @@ static int handle_tprot(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; + /* we must resolve the address without holding the mmap semaphore. + * This is ok since the userspace hypervisor is not supposed to change + * the mapping while the guest queries the memory. Otherwise the guest + * might crash or get wrong info anyway. */ + user_address = (unsigned long) __guestaddr_to_user(vcpu, address1); + down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, - (unsigned long) __guestaddr_to_user(vcpu, address1)); + vma = find_vma(current->mm, user_address); if (!vma) { up_read(¤t->mm->mmap_sem); return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index d6a50c1fb2e..0a7941d74bc 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -31,9 +31,11 @@ #define SIGP_SET_PREFIX 0x0d #define SIGP_STORE_STATUS_ADDR 0x0e #define SIGP_SET_ARCH 0x12 +#define SIGP_SENSE_RUNNING 0x15 /* cpu status bits */ #define SIGP_STAT_EQUIPMENT_CHECK 0x80000000UL +#define SIGP_STAT_NOT_RUNNING 0x00000400UL #define SIGP_STAT_INCORRECT_STATE 0x00000200UL #define SIGP_STAT_INVALID_PARAMETER 0x00000100UL #define SIGP_STAT_EXT_CALL_PENDING 0x00000080UL @@ -57,8 +59,8 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, spin_lock(&fi->lock); if (fi->local_int[cpu_addr] == NULL) rc = 3; /* not operational */ - else if (atomic_read(fi->local_int[cpu_addr]->cpuflags) - & CPUSTAT_RUNNING) { + else if (!(atomic_read(fi->local_int[cpu_addr]->cpuflags) + & CPUSTAT_STOPPED)) { *reg &= 0xffffffff00000000UL; rc = 1; /* status stored */ } else { @@ -87,6 +89,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) return -ENOMEM; inti->type = KVM_S390_INT_EMERGENCY; + inti->emerg.code = vcpu->vcpu_id; spin_lock(&fi->lock); li = fi->local_int[cpu_addr]; @@ -103,9 +106,47 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) wake_up_interruptible(&li->wq); spin_unlock_bh(&li->lock); rc = 0; /* order accepted */ + VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); +unlock: + spin_unlock(&fi->lock); + return rc; +} + +static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) +{ + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_local_interrupt *li; + struct kvm_s390_interrupt_info *inti; + int rc; + + if (cpu_addr >= KVM_MAX_VCPUS) + return 3; /* not operational */ + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + inti->type = KVM_S390_INT_EXTERNAL_CALL; + inti->extcall.code = vcpu->vcpu_id; + + spin_lock(&fi->lock); + li = fi->local_int[cpu_addr]; + if (li == NULL) { + rc = 3; /* not operational */ + kfree(inti); + goto unlock; + } + spin_lock_bh(&li->lock); + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&li->wq); + spin_unlock_bh(&li->lock); + rc = 0; /* order accepted */ + VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr); unlock: spin_unlock(&fi->lock); - VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); return rc; } @@ -212,7 +253,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, spin_lock_bh(&li->lock); /* cpu must be in stopped state */ - if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) { + if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { rc = 1; /* incorrect state */ *reg &= SIGP_STAT_INCORRECT_STATE; kfree(inti); @@ -236,6 +277,38 @@ out_fi: return rc; } +static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, + unsigned long *reg) +{ + int rc; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + + if (cpu_addr >= KVM_MAX_VCPUS) + return 3; /* not operational */ + + spin_lock(&fi->lock); + if (fi->local_int[cpu_addr] == NULL) + rc = 3; /* not operational */ + else { + if (atomic_read(fi->local_int[cpu_addr]->cpuflags) + & CPUSTAT_RUNNING) { + /* running */ + rc = 1; + } else { + /* not running */ + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STAT_NOT_RUNNING; + rc = 0; + } + } + spin_unlock(&fi->lock); + + VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr, + rc); + + return rc; +} + int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) { int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; @@ -267,6 +340,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) rc = __sigp_sense(vcpu, cpu_addr, &vcpu->arch.guest_gprs[r1]); break; + case SIGP_EXTERNAL_CALL: + vcpu->stat.instruction_sigp_external_call++; + rc = __sigp_external_call(vcpu, cpu_addr); + break; case SIGP_EMERGENCY: vcpu->stat.instruction_sigp_emergency++; rc = __sigp_emergency(vcpu, cpu_addr); @@ -288,6 +365,11 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) rc = __sigp_set_prefix(vcpu, cpu_addr, parameter, &vcpu->arch.guest_gprs[r1]); break; + case SIGP_SENSE_RUNNING: + vcpu->stat.instruction_sigp_sense_running++; + rc = __sigp_sense_running(vcpu, cpu_addr, + &vcpu->arch.guest_gprs[r1]); + break; case SIGP_RESTART: vcpu->stat.instruction_sigp_restart++; /* user space must know about restart */ diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index a65229d91c9..db92f044024 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -32,7 +32,8 @@ static void __udelay_disabled(unsigned long long usecs) u64 clock_saved; u64 end; - mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT; + mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_WAIT | + PSW_MASK_EXT | PSW_MASK_MCHECK; end = get_clock() + (usecs << 12); clock_saved = local_tick_disable(); __ctl_store(cr0_saved, 0, 0); diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 74833831417..342ae35a5ba 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -342,7 +342,8 @@ int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old) if (segment_eq(get_fs(), KERNEL_DS)) return __futex_atomic_op_pt(op, uaddr, oparg, old); spin_lock(¤t->mm->page_table_lock); - uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr); + uaddr = (u32 __force __user *) + __dat_user_addr((__force unsigned long) uaddr); if (!uaddr) { spin_unlock(¤t->mm->page_table_lock); return -EFAULT; @@ -378,7 +379,8 @@ int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr, if (segment_eq(get_fs(), KERNEL_DS)) return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval); spin_lock(¤t->mm->page_table_lock); - uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr); + uaddr = (u32 __force __user *) + __dat_user_addr((__force unsigned long) uaddr); if (!uaddr) { spin_unlock(¤t->mm->page_table_lock); return -EFAULT; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 9564fc779b2..354dd39073e 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -125,8 +125,7 @@ static inline int user_space_fault(unsigned long trans_exc_code) return trans_exc_code != 3; } -static inline void report_user_fault(struct pt_regs *regs, long int_code, - int signr, unsigned long address) +static inline void report_user_fault(struct pt_regs *regs, long signr) { if ((task_pid_nr(current) > 1) && !show_unhandled_signals) return; @@ -134,10 +133,12 @@ static inline void report_user_fault(struct pt_regs *regs, long int_code, return; if (!printk_ratelimit()) return; - printk("User process fault: interruption code 0x%lX ", int_code); + printk(KERN_ALERT "User process fault: interruption code 0x%X ", + regs->int_code); print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN); - printk("\n"); - printk("failing address: %lX\n", address); + printk(KERN_CONT "\n"); + printk(KERN_ALERT "failing address: %lX\n", + regs->int_parm_long & __FAIL_ADDR_MASK); show_regs(regs); } @@ -145,24 +146,18 @@ static inline void report_user_fault(struct pt_regs *regs, long int_code, * Send SIGSEGV to task. This is an external routine * to keep the stack usage of do_page_fault small. */ -static noinline void do_sigsegv(struct pt_regs *regs, long int_code, - int si_code, unsigned long trans_exc_code) +static noinline void do_sigsegv(struct pt_regs *regs, int si_code) { struct siginfo si; - unsigned long address; - address = trans_exc_code & __FAIL_ADDR_MASK; - current->thread.prot_addr = address; - current->thread.trap_no = int_code; - report_user_fault(regs, int_code, SIGSEGV, address); + report_user_fault(regs, SIGSEGV); si.si_signo = SIGSEGV; si.si_code = si_code; - si.si_addr = (void __user *) address; + si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK); force_sig_info(SIGSEGV, &si, current); } -static noinline void do_no_context(struct pt_regs *regs, long int_code, - unsigned long trans_exc_code) +static noinline void do_no_context(struct pt_regs *regs) { const struct exception_table_entry *fixup; unsigned long address; @@ -178,55 +173,48 @@ static noinline void do_no_context(struct pt_regs *regs, long int_code, * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ - address = trans_exc_code & __FAIL_ADDR_MASK; - if (!user_space_fault(trans_exc_code)) + address = regs->int_parm_long & __FAIL_ADDR_MASK; + if (!user_space_fault(regs->int_parm_long)) printk(KERN_ALERT "Unable to handle kernel pointer dereference" " at virtual kernel address %p\n", (void *)address); else printk(KERN_ALERT "Unable to handle kernel paging request" " at virtual user address %p\n", (void *)address); - die("Oops", regs, int_code); + die(regs, "Oops"); do_exit(SIGKILL); } -static noinline void do_low_address(struct pt_regs *regs, long int_code, - unsigned long trans_exc_code) +static noinline void do_low_address(struct pt_regs *regs) { /* Low-address protection hit in kernel mode means NULL pointer write access in kernel mode. */ if (regs->psw.mask & PSW_MASK_PSTATE) { /* Low-address protection hit in user mode 'cannot happen'. */ - die ("Low-address protection", regs, int_code); + die (regs, "Low-address protection"); do_exit(SIGKILL); } - do_no_context(regs, int_code, trans_exc_code); + do_no_context(regs); } -static noinline void do_sigbus(struct pt_regs *regs, long int_code, - unsigned long trans_exc_code) +static noinline void do_sigbus(struct pt_regs *regs) { struct task_struct *tsk = current; - unsigned long address; struct siginfo si; /* * Send a sigbus, regardless of whether we were in kernel * or user mode. */ - address = trans_exc_code & __FAIL_ADDR_MASK; - tsk->thread.prot_addr = address; - tsk->thread.trap_no = int_code; si.si_signo = SIGBUS; si.si_errno = 0; si.si_code = BUS_ADRERR; - si.si_addr = (void __user *) address; + si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK); force_sig_info(SIGBUS, &si, tsk); } -static noinline void do_fault_error(struct pt_regs *regs, long int_code, - unsigned long trans_exc_code, int fault) +static noinline void do_fault_error(struct pt_regs *regs, int fault) { int si_code; @@ -238,24 +226,24 @@ static noinline void do_fault_error(struct pt_regs *regs, long int_code, /* User mode accesses just cause a SIGSEGV */ si_code = (fault == VM_FAULT_BADMAP) ? SEGV_MAPERR : SEGV_ACCERR; - do_sigsegv(regs, int_code, si_code, trans_exc_code); + do_sigsegv(regs, si_code); return; } case VM_FAULT_BADCONTEXT: - do_no_context(regs, int_code, trans_exc_code); + do_no_context(regs); break; default: /* fault & VM_FAULT_ERROR */ if (fault & VM_FAULT_OOM) { if (!(regs->psw.mask & PSW_MASK_PSTATE)) - do_no_context(regs, int_code, trans_exc_code); + do_no_context(regs); else pagefault_out_of_memory(); } else if (fault & VM_FAULT_SIGBUS) { /* Kernel mode? Handle exceptions or die */ if (!(regs->psw.mask & PSW_MASK_PSTATE)) - do_no_context(regs, int_code, trans_exc_code); + do_no_context(regs); else - do_sigbus(regs, int_code, trans_exc_code); + do_sigbus(regs); } else BUG(); break; @@ -273,12 +261,12 @@ static noinline void do_fault_error(struct pt_regs *regs, long int_code, * 11 Page translation -> Not present (nullification) * 3b Region third trans. -> Not present (nullification) */ -static inline int do_exception(struct pt_regs *regs, int access, - unsigned long trans_exc_code) +static inline int do_exception(struct pt_regs *regs, int access) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; + unsigned long trans_exc_code; unsigned long address; unsigned int flags; int fault; @@ -288,6 +276,7 @@ static inline int do_exception(struct pt_regs *regs, int access, tsk = current; mm = tsk->mm; + trans_exc_code = regs->int_parm_long; /* * Verify that the fault happened in user space, that @@ -307,7 +296,7 @@ static inline int do_exception(struct pt_regs *regs, int access, #ifdef CONFIG_PGSTE if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) { - address = gmap_fault(address, + address = __gmap_fault(address, (struct gmap *) S390_lowcore.gmap); if (address == -EFAULT) { fault = VM_FAULT_BADMAP; @@ -387,45 +376,46 @@ out: return fault; } -void __kprobes do_protection_exception(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +void __kprobes do_protection_exception(struct pt_regs *regs) { + unsigned long trans_exc_code; int fault; + trans_exc_code = regs->int_parm_long; /* Protection exception is suppressing, decrement psw address. */ - regs->psw.addr -= (pgm_int_code >> 16); + regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); /* * Check for low-address protection. This needs to be treated * as a special case because the translation exception code * field is not guaranteed to contain valid data in this case. */ if (unlikely(!(trans_exc_code & 4))) { - do_low_address(regs, pgm_int_code, trans_exc_code); + do_low_address(regs); return; } - fault = do_exception(regs, VM_WRITE, trans_exc_code); + fault = do_exception(regs, VM_WRITE); if (unlikely(fault)) - do_fault_error(regs, 4, trans_exc_code, fault); + do_fault_error(regs, fault); } -void __kprobes do_dat_exception(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +void __kprobes do_dat_exception(struct pt_regs *regs) { int access, fault; access = VM_READ | VM_EXEC | VM_WRITE; - fault = do_exception(regs, access, trans_exc_code); + fault = do_exception(regs, access); if (unlikely(fault)) - do_fault_error(regs, pgm_int_code & 255, trans_exc_code, fault); + do_fault_error(regs, fault); } #ifdef CONFIG_64BIT -void __kprobes do_asce_exception(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +void __kprobes do_asce_exception(struct pt_regs *regs) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; + unsigned long trans_exc_code; + trans_exc_code = regs->int_parm_long; if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) goto no_context; @@ -440,12 +430,12 @@ void __kprobes do_asce_exception(struct pt_regs *regs, long pgm_int_code, /* User mode accesses just cause a SIGSEGV */ if (regs->psw.mask & PSW_MASK_PSTATE) { - do_sigsegv(regs, pgm_int_code, SEGV_MAPERR, trans_exc_code); + do_sigsegv(regs, SEGV_MAPERR); return; } no_context: - do_no_context(regs, pgm_int_code, trans_exc_code); + do_no_context(regs); } #endif @@ -454,19 +444,20 @@ int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write) struct pt_regs regs; int access, fault; - regs.psw.mask = psw_kernel_bits; + regs.psw.mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK; if (!irqs_disabled()) regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT; regs.psw.addr = (unsigned long) __builtin_return_address(0); regs.psw.addr |= PSW_ADDR_AMODE; - uaddr &= PAGE_MASK; + regs.int_code = pgm_int_code; + regs.int_parm_long = (uaddr & PAGE_MASK) | 2; access = write ? VM_WRITE : VM_READ; - fault = do_exception(®s, access, uaddr | 2); + fault = do_exception(®s, access); if (unlikely(fault)) { if (fault & VM_FAULT_OOM) return -EFAULT; else if (fault & VM_FAULT_SIGBUS) - do_sigbus(®s, pgm_int_code, uaddr); + do_sigbus(®s); } return fault ? -EFAULT : 0; } @@ -509,7 +500,7 @@ int pfault_init(void) .reserved = __PF_RES_FIELD }; int rc; - if (!MACHINE_IS_VM || pfault_disable) + if (pfault_disable) return -1; asm volatile( " diag %1,%0,0x258\n" @@ -530,7 +521,7 @@ void pfault_fini(void) .refversn = 2, }; - if (!MACHINE_IS_VM || pfault_disable) + if (pfault_disable) return; asm volatile( " diag %0,0,0x258\n" @@ -587,8 +578,13 @@ static void pfault_interrupt(unsigned int ext_int_code, } else { /* Completion interrupt was faster than initial * interrupt. Set pfault_wait to -1 so the initial - * interrupt doesn't put the task to sleep. */ - tsk->thread.pfault_wait = -1; + * interrupt doesn't put the task to sleep. + * If the task is not running, ignore the completion + * interrupt since it must be a leftover of a PFAULT + * CANCEL operation which didn't remove all pending + * completion interrupts. */ + if (tsk->state == TASK_RUNNING) + tsk->thread.pfault_wait = -1; } put_task_struct(tsk); } else { @@ -638,8 +634,6 @@ static int __init pfault_irq_init(void) { int rc; - if (!MACHINE_IS_VM) - return 0; rc = register_external_interrupt(0x2603, pfault_interrupt); if (rc) goto out_extint; diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 45b405ca256..65cb06e2af4 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -52,7 +52,7 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long mask, result; - struct page *head, *page; + struct page *head, *page, *tail; int refs; result = write ? 0 : _SEGMENT_ENTRY_RO; @@ -64,6 +64,7 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, refs = 0; head = pmd_page(pmd); page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + tail = page; do { VM_BUG_ON(compound_head(page) != head); pages[*nr] = page; @@ -81,6 +82,17 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, *nr -= refs; while (refs--) put_page(head); + return 0; + } + + /* + * Any tail page need their mapcount reference taken before we + * return. + */ + while (refs--) { + if (PageTail(tail)) + get_huge_page_tail(tail); + tail++; } return 1; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 59b663109d9..5d633019d8f 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -26,6 +26,7 @@ #include <linux/pfn.h> #include <linux/poison.h> #include <linux/initrd.h> +#include <linux/export.h> #include <linux/gfp.h> #include <asm/processor.h> #include <asm/system.h> @@ -92,18 +93,22 @@ static unsigned long setup_zero_pages(void) void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; - unsigned long pgd_type; + unsigned long pgd_type, asce_bits; init_mm.pgd = swapper_pg_dir; - S390_lowcore.kernel_asce = __pa(init_mm.pgd) & PAGE_MASK; #ifdef CONFIG_64BIT - /* A three level page table (4TB) is enough for the kernel space. */ - S390_lowcore.kernel_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; - pgd_type = _REGION3_ENTRY_EMPTY; + if (VMALLOC_END > (1UL << 42)) { + asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; + pgd_type = _REGION2_ENTRY_EMPTY; + } else { + asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; + pgd_type = _REGION3_ENTRY_EMPTY; + } #else - S390_lowcore.kernel_asce |= _ASCE_TABLE_LENGTH; + asce_bits = _ASCE_TABLE_LENGTH; pgd_type = _SEGMENT_ENTRY_EMPTY; #endif + S390_lowcore.kernel_asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; clear_table((unsigned long *) init_mm.pgd, pgd_type, sizeof(unsigned long)*2048); vmem_map_init(); diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 5dbbaa6e594..1cb8427bedf 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/errno.h> +#include <linux/gfp.h> #include <asm/system.h> /* @@ -60,6 +61,9 @@ long probe_kernel_write(void *dst, const void *src, size_t size) return copied < 0 ? -EFAULT : 0; } +/* + * Copy memory in real mode (kernel to kernel) + */ int memcpy_real(void *dest, void *src, size_t count) { register unsigned long _dest asm("2") = (unsigned long) dest; @@ -101,3 +105,55 @@ void copy_to_absolute_zero(void *dest, void *src, size_t count) __ctl_load(cr0, 0, 0); preempt_enable(); } + +/* + * Copy memory from kernel (real) to user (virtual) + */ +int copy_to_user_real(void __user *dest, void *src, size_t count) +{ + int offs = 0, size, rc; + char *buf; + + buf = (char *) __get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + rc = -EFAULT; + while (offs < count) { + size = min(PAGE_SIZE, count - offs); + if (memcpy_real(buf, src + offs, size)) + goto out; + if (copy_to_user(dest + offs, buf, size)) + goto out; + offs += size; + } + rc = 0; +out: + free_page((unsigned long) buf); + return rc; +} + +/* + * Copy memory from user (virtual) to kernel (real) + */ +int copy_from_user_real(void *dest, void __user *src, size_t count) +{ + int offs = 0, size, rc; + char *buf; + + buf = (char *) __get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + rc = -EFAULT; + while (offs < count) { + size = min(PAGE_SIZE, count - offs); + if (copy_from_user(buf, src + offs, size)) + goto out; + if (memcpy_real(dest + offs, buf, size)) + goto out; + offs += size; + } + rc = 0; +out: + free_page((unsigned long) buf); + return rc; +} diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index c9a9f7f1818..f09c74881b7 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -26,6 +26,7 @@ #include <linux/personality.h> #include <linux/mm.h> +#include <linux/mman.h> #include <linux/module.h> #include <linux/random.h> #include <asm/pgalloc.h> diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index d013ed39743..b36537a5f43 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -5,6 +5,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/hugetlb.h> +#include <asm/cacheflush.h> #include <asm/pgtable.h> static void change_page_attr(unsigned long addr, int numpages, diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 5d56c2b95b1..9a4d02f64f1 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp. 2007,2009 + * Copyright IBM Corp. 2007,2011 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ @@ -33,17 +33,6 @@ #define FRAG_MASK 0x03 #endif -unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE; -EXPORT_SYMBOL(VMALLOC_START); - -static int __init parse_vmalloc(char *arg) -{ - if (!arg) - return -EINVAL; - VMALLOC_START = (VMALLOC_END - memparse(arg, &arg)) & PAGE_MASK; - return 0; -} -early_param("vmalloc", parse_vmalloc); unsigned long *crst_table_alloc(struct mm_struct *mm) { @@ -222,6 +211,7 @@ void gmap_free(struct gmap *gmap) /* Free all segment & region tables. */ down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { table = (unsigned long *) page_to_phys(page); if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) @@ -230,6 +220,7 @@ void gmap_free(struct gmap *gmap) gmap_unlink_segment(gmap, table); __free_pages(page, ALLOC_ORDER); } + spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); list_del(&gmap->list); kfree(gmap); @@ -256,25 +247,29 @@ void gmap_disable(struct gmap *gmap) } EXPORT_SYMBOL_GPL(gmap_disable); +/* + * gmap_alloc_table is assumed to be called with mmap_sem held + */ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, unsigned long init) { struct page *page; unsigned long *new; + /* since we dont free the gmap table until gmap_free we can unlock */ + spin_unlock(&gmap->mm->page_table_lock); page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); + spin_lock(&gmap->mm->page_table_lock); if (!page) return -ENOMEM; new = (unsigned long *) page_to_phys(page); crst_table_init(new, init); - down_read(&gmap->mm->mmap_sem); if (*table & _REGION_ENTRY_INV) { list_add(&page->lru, &gmap->crst_list); *table = (unsigned long) new | _REGION_ENTRY_LENGTH | (*table & _REGION_ENTRY_TYPE_MASK); } else __free_pages(page, ALLOC_ORDER); - up_read(&gmap->mm->mmap_sem); return 0; } @@ -299,6 +294,7 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) flush = 0; down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); for (off = 0; off < len; off += PMD_SIZE) { /* Walk the guest addr space page table */ table = gmap->table + (((to + off) >> 53) & 0x7ff); @@ -320,6 +316,7 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) *table = _SEGMENT_ENTRY_INV; } out: + spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); if (flush) gmap_flush_tlb(gmap); @@ -350,6 +347,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, flush = 0; down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); for (off = 0; off < len; off += PMD_SIZE) { /* Walk the gmap address space page table */ table = gmap->table + (((to + off) >> 53) & 0x7ff); @@ -373,19 +371,24 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, flush |= gmap_unlink_segment(gmap, table); *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); } + spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); if (flush) gmap_flush_tlb(gmap); return 0; out_unmap: + spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); gmap_unmap_segment(gmap, to, len); return -ENOMEM; } EXPORT_SYMBOL_GPL(gmap_map_segment); -unsigned long gmap_fault(unsigned long address, struct gmap *gmap) +/* + * this function is assumed to be called with mmap_sem held + */ +unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) { unsigned long *table, vmaddr, segment; struct mm_struct *mm; @@ -445,16 +448,75 @@ unsigned long gmap_fault(unsigned long address, struct gmap *gmap) page = pmd_page(*pmd); mp = (struct gmap_pgtable *) page->index; rmap->entry = table; + spin_lock(&mm->page_table_lock); list_add(&rmap->list, &mp->mapper); + spin_unlock(&mm->page_table_lock); /* Set gmap segment table entry to page table. */ *table = pmd_val(*pmd) & PAGE_MASK; return vmaddr | (address & ~PMD_MASK); } return -EFAULT; +} + +unsigned long gmap_fault(unsigned long address, struct gmap *gmap) +{ + unsigned long rc; + down_read(&gmap->mm->mmap_sem); + rc = __gmap_fault(address, gmap); + up_read(&gmap->mm->mmap_sem); + + return rc; } EXPORT_SYMBOL_GPL(gmap_fault); +void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) +{ + + unsigned long *table, address, size; + struct vm_area_struct *vma; + struct gmap_pgtable *mp; + struct page *page; + + down_read(&gmap->mm->mmap_sem); + address = from; + while (address < to) { + /* Walk the gmap address space page table */ + table = gmap->table + ((address >> 53) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 42) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 31) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 20) & 0x7ff); + if (unlikely(*table & _SEGMENT_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + page = pfn_to_page(*table >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + vma = find_vma(gmap->mm, mp->vmaddr); + size = min(to - address, PMD_SIZE - (address & ~PMD_MASK)); + zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK), + size, NULL); + address = (address + PMD_SIZE) & PMD_MASK; + } + up_read(&gmap->mm->mmap_sem); +} +EXPORT_SYMBOL_GPL(gmap_discard); + void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table) { struct gmap_rmap *rmap, *next; @@ -662,8 +724,9 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) void __tlb_remove_table(void *_table) { - void *table = (void *)((unsigned long) _table & PAGE_MASK); - unsigned type = (unsigned long) _table & ~PAGE_MASK; + const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK; + void *table = (void *)((unsigned long) _table & ~mask); + unsigned type = (unsigned long) _table & mask; if (type) __page_table_free_rcu(table, type); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 781ff516956..4799383e2df 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -335,6 +335,9 @@ void __init vmem_map_init(void) ro_start = ((unsigned long)&_stext) & PAGE_MASK; ro_end = PFN_ALIGN((unsigned long)&_eshared); for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + if (memory_chunk[i].type == CHUNK_CRASHK || + memory_chunk[i].type == CHUNK_OLDMEM) + continue; start = memory_chunk[i].addr; end = memory_chunk[i].addr + memory_chunk[i].size; if (start >= ro_end || end <= ro_start) @@ -368,6 +371,9 @@ static int __init vmem_convert_memory_chunk(void) for (i = 0; i < MEMORY_CHUNKS; i++) { if (!memory_chunk[i].size) continue; + if (memory_chunk[i].type == CHUNK_CRASHK || + memory_chunk[i].type == CHUNK_OLDMEM) + continue; seg = kzalloc(sizeof(*seg), GFP_KERNEL); if (!seg) panic("Out of memory...\n"); diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index 4552ce40c81..9daee91e6c3 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -22,6 +22,7 @@ #include <asm/irq.h> #include "hwsampler.h" +#include "op_counter.h" #define MAX_NUM_SDB 511 #define MIN_NUM_SDB 1 @@ -896,6 +897,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, if (sample_data_ptr->P == 1) { /* userspace sample */ unsigned int pid = sample_data_ptr->prim_asn; + if (!counter_config.user) + goto skip_sample; rcu_read_lock(); tsk = pid_task(find_vpid(pid), PIDTYPE_PID); if (tsk) @@ -903,6 +906,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, rcu_read_unlock(); } else { /* kernelspace sample */ + if (!counter_config.kernel) + goto skip_sample; regs = task_pt_regs(current); } @@ -910,7 +915,7 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0, !sample_data_ptr->P, tsk); mutex_unlock(&hws_sem); - + skip_sample: sample_data_ptr++; } } @@ -994,7 +999,7 @@ allocate_error: * * Returns 0 on success, !0 on failure. */ -int hwsampler_deallocate() +int hwsampler_deallocate(void) { int rc; @@ -1035,7 +1040,7 @@ unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu) return cb->sample_overflow; } -int hwsampler_setup() +int hwsampler_setup(void) { int rc; int cpu; @@ -1102,7 +1107,7 @@ setup_exit: return rc; } -int hwsampler_shutdown() +int hwsampler_shutdown(void) { int rc; @@ -1203,7 +1208,7 @@ start_all_exit: * * Returns 0 on success, !0 on failure. */ -int hwsampler_stop_all() +int hwsampler_stop_all(void) { int tmp_rc, rc, cpu; struct hws_cpu_buffer *cb; diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 6efc18b5e60..2297be406c6 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -2,10 +2,11 @@ * arch/s390/oprofile/init.c * * S390 Version - * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright (C) 2002-2011 IBM Deutschland Entwicklung GmbH, IBM Corporation * Author(s): Thomas Spatzier (tspat@de.ibm.com) * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com) * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com) + * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) * * @remark Copyright 2002-2011 OProfile authors */ @@ -14,6 +15,8 @@ #include <linux/init.h> #include <linux/errno.h> #include <linux/fs.h> +#include <linux/module.h> +#include <asm/processor.h> #include "../../../drivers/oprofile/oprof.h" @@ -22,6 +25,7 @@ extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth); #ifdef CONFIG_64BIT #include "hwsampler.h" +#include "op_counter.h" #define DEFAULT_INTERVAL 4127518 @@ -35,16 +39,41 @@ static unsigned long oprofile_max_interval; static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS; static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS; -static int hwsampler_file; +static int hwsampler_enabled; static int hwsampler_running; /* start_mutex must be held to change */ +static int hwsampler_available; static struct oprofile_operations timer_ops; +struct op_counter_config counter_config; + +enum __force_cpu_type { + reserved = 0, /* do not force */ + timer, +}; +static int force_cpu_type; + +static int set_cpu_type(const char *str, struct kernel_param *kp) +{ + if (!strcmp(str, "timer")) { + force_cpu_type = timer; + printk(KERN_INFO "oprofile: forcing timer to be returned " + "as cpu type\n"); + } else { + force_cpu_type = 0; + } + + return 0; +} +module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); +MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling" + "(report cpu_type \"timer\""); + static int oprofile_hwsampler_start(void) { int retval; - hwsampler_running = hwsampler_file; + hwsampler_running = hwsampler_enabled; if (!hwsampler_running) return timer_ops.start(); @@ -72,10 +101,16 @@ static void oprofile_hwsampler_stop(void) return; } +/* + * File ops used for: + * /dev/oprofile/0/enabled + * /dev/oprofile/hwsampling/hwsampler (cpu_type = timer) + */ + static ssize_t hwsampler_read(struct file *file, char __user *buf, size_t count, loff_t *offset) { - return oprofilefs_ulong_to_user(hwsampler_file, buf, count, offset); + return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset); } static ssize_t hwsampler_write(struct file *file, char const __user *buf, @@ -88,9 +123,12 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf, return -EINVAL; retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval) + if (retval <= 0) return retval; + if (val != 0 && val != 1) + return -EINVAL; + if (oprofile_started) /* * save to do without locking as we set @@ -99,7 +137,7 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf, */ return -EBUSY; - hwsampler_file = val; + hwsampler_enabled = val; return count; } @@ -109,38 +147,311 @@ static const struct file_operations hwsampler_fops = { .write = hwsampler_write, }; +/* + * File ops used for: + * /dev/oprofile/0/count + * /dev/oprofile/hwsampling/hw_interval (cpu_type = timer) + * + * Make sure that the value is within the hardware range. + */ + +static ssize_t hw_interval_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(oprofile_hw_interval, buf, + count, offset); +} + +static ssize_t hw_interval_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + if (val < oprofile_min_interval) + oprofile_hw_interval = oprofile_min_interval; + else if (val > oprofile_max_interval) + oprofile_hw_interval = oprofile_max_interval; + else + oprofile_hw_interval = val; + + return count; +} + +static const struct file_operations hw_interval_fops = { + .read = hw_interval_read, + .write = hw_interval_write, +}; + +/* + * File ops used for: + * /dev/oprofile/0/event + * Only a single event with number 0 is supported with this counter. + * + * /dev/oprofile/0/unit_mask + * This is a dummy file needed by the user space tools. + * No value other than 0 is accepted or returned. + */ + +static ssize_t hwsampler_zero_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(0, buf, count, offset); +} + +static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + if (val != 0) + return -EINVAL; + return count; +} + +static const struct file_operations zero_fops = { + .read = hwsampler_zero_read, + .write = hwsampler_zero_write, +}; + +/* /dev/oprofile/0/kernel file ops. */ + +static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(counter_config.kernel, + buf, count, offset); +} + +static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + counter_config.kernel = val; + + return count; +} + +static const struct file_operations kernel_fops = { + .read = hwsampler_kernel_read, + .write = hwsampler_kernel_write, +}; + +/* /dev/oprofile/0/user file ops. */ + +static ssize_t hwsampler_user_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(counter_config.user, + buf, count, offset); +} + +static ssize_t hwsampler_user_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + counter_config.user = val; + + return count; +} + +static const struct file_operations user_fops = { + .read = hwsampler_user_read, + .write = hwsampler_user_write, +}; + + +/* + * File ops used for: /dev/oprofile/timer/enabled + * The value always has to be the inverted value of hwsampler_enabled. So + * no separate variable is created. That way we do not need locking. + */ + +static ssize_t timer_enabled_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset); +} + +static ssize_t timer_enabled_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + /* Timer cannot be disabled without having hardware sampling. */ + if (val == 0 && !hwsampler_available) + return -EINVAL; + + if (oprofile_started) + /* + * save to do without locking as we set + * hwsampler_running in start() when start_mutex is + * held + */ + return -EBUSY; + + hwsampler_enabled = !val; + + return count; +} + +static const struct file_operations timer_enabled_fops = { + .read = timer_enabled_read, + .write = timer_enabled_write, +}; + + static int oprofile_create_hwsampling_files(struct super_block *sb, - struct dentry *root) + struct dentry *root) { - struct dentry *hw_dir; + struct dentry *dir; + + dir = oprofilefs_mkdir(sb, root, "timer"); + if (!dir) + return -EINVAL; + + oprofilefs_create_file(sb, dir, "enabled", &timer_enabled_fops); + + if (!hwsampler_available) + return 0; /* reinitialize default values */ - hwsampler_file = 1; + hwsampler_enabled = 1; + counter_config.kernel = 1; + counter_config.user = 1; - hw_dir = oprofilefs_mkdir(sb, root, "hwsampling"); - if (!hw_dir) - return -EINVAL; + if (!force_cpu_type) { + /* + * Create the counter file system. A single virtual + * counter is created which can be used to + * enable/disable hardware sampling dynamically from + * user space. The user space will configure a single + * counter with a single event. The value of 'event' + * and 'unit_mask' are not evaluated by the kernel code + * and can only be set to 0. + */ + + dir = oprofilefs_mkdir(sb, root, "0"); + if (!dir) + return -EINVAL; - oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops); - oprofilefs_create_ulong(sb, hw_dir, "hw_interval", - &oprofile_hw_interval); - oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval", - &oprofile_min_interval); - oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval", - &oprofile_max_interval); - oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks", - &oprofile_sdbt_blocks); + oprofilefs_create_file(sb, dir, "enabled", &hwsampler_fops); + oprofilefs_create_file(sb, dir, "event", &zero_fops); + oprofilefs_create_file(sb, dir, "count", &hw_interval_fops); + oprofilefs_create_file(sb, dir, "unit_mask", &zero_fops); + oprofilefs_create_file(sb, dir, "kernel", &kernel_fops); + oprofilefs_create_file(sb, dir, "user", &user_fops); + oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks", + &oprofile_sdbt_blocks); + } else { + /* + * Hardware sampling can be used but the cpu_type is + * forced to timer in order to deal with legacy user + * space tools. The /dev/oprofile/hwsampling fs is + * provided in that case. + */ + dir = oprofilefs_mkdir(sb, root, "hwsampling"); + if (!dir) + return -EINVAL; + + oprofilefs_create_file(sb, dir, "hwsampler", + &hwsampler_fops); + oprofilefs_create_file(sb, dir, "hw_interval", + &hw_interval_fops); + oprofilefs_create_ro_ulong(sb, dir, "hw_min_interval", + &oprofile_min_interval); + oprofilefs_create_ro_ulong(sb, dir, "hw_max_interval", + &oprofile_max_interval); + oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks", + &oprofile_sdbt_blocks); + } return 0; } static int oprofile_hwsampler_init(struct oprofile_operations *ops) { + /* + * Initialize the timer mode infrastructure as well in order + * to be able to switch back dynamically. oprofile_timer_init + * is not supposed to fail. + */ + if (oprofile_timer_init(ops)) + BUG(); + + memcpy(&timer_ops, ops, sizeof(timer_ops)); + ops->create_files = oprofile_create_hwsampling_files; + + /* + * If the user space tools do not support newer cpu types, + * the force_cpu_type module parameter + * can be used to always return \"timer\" as cpu type. + */ + if (force_cpu_type != timer) { + struct cpuid id; + + get_cpu_id (&id); + + switch (id.machine) { + case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break; + case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break; + default: return -ENODEV; + } + } + if (hwsampler_setup()) return -ENODEV; /* - * create hwsampler files only if hwsampler_setup() succeeds. + * Query the range for the sampling interval from the + * hardware. */ oprofile_min_interval = hwsampler_query_min_interval(); if (oprofile_min_interval == 0) @@ -155,23 +466,17 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops) if (oprofile_hw_interval > oprofile_max_interval) oprofile_hw_interval = oprofile_max_interval; - if (oprofile_timer_init(ops)) - return -ENODEV; - - printk(KERN_INFO "oprofile: using hardware sampling\n"); - - memcpy(&timer_ops, ops, sizeof(timer_ops)); + printk(KERN_INFO "oprofile: System z hardware sampling " + "facility found.\n"); ops->start = oprofile_hwsampler_start; ops->stop = oprofile_hwsampler_stop; - ops->create_files = oprofile_create_hwsampling_files; return 0; } static void oprofile_hwsampler_exit(void) { - oprofile_timer_exit(); hwsampler_shutdown(); } @@ -182,7 +487,15 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) ops->backtrace = s390_backtrace; #ifdef CONFIG_64BIT - return oprofile_hwsampler_init(ops); + + /* + * -ENODEV is not reported to the caller. The module itself + * will use the timer mode sampling as fallback and this is + * always available. + */ + hwsampler_available = oprofile_hwsampler_init(ops) == 0; + + return 0; #else return -ENODEV; #endif diff --git a/arch/s390/oprofile/op_counter.h b/arch/s390/oprofile/op_counter.h new file mode 100644 index 00000000000..1a8d3ca0901 --- /dev/null +++ b/arch/s390/oprofile/op_counter.h @@ -0,0 +1,23 @@ +/** + * arch/s390/oprofile/op_counter.h + * + * Copyright (C) 2011 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) + * + * @remark Copyright 2011 OProfile authors + */ + +#ifndef OP_COUNTER_H +#define OP_COUNTER_H + +struct op_counter_config { + /* `enabled' maps to the hwsampler_file variable. */ + /* `count' maps to the oprofile_hw_interval variable. */ + /* `event' and `unit_mask' are unused. */ + unsigned long kernel; + unsigned long user; +}; + +extern struct op_counter_config counter_config; + +#endif /* OP_COUNTER_H */ |