diff options
Diffstat (limited to 'arch/s390')
69 files changed, 2444 insertions, 2188 deletions
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild index ae4b01060ed..9858476fa0f 100644 --- a/arch/s390/Kbuild +++ b/arch/s390/Kbuild @@ -1,6 +1,7 @@ -obj-y += kernel/ -obj-y += mm/ -obj-y += crypto/ -obj-y += appldata/ -obj-y += hypfs/ -obj-y += kvm/ +obj-y += kernel/ +obj-y += mm/ +obj-$(CONFIG_KVM) += kvm/ +obj-$(CONFIG_CRYPTO_HW) += crypto/ +obj-$(CONFIG_S390_HYPFS_FS) += hypfs/ +obj-$(CONFIG_APPLDATA_BASE) += appldata/ +obj-$(CONFIG_MATHEMU) += math-emu/ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a9fbd43395f..d1727584230 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -87,11 +87,13 @@ config S390 select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO select HAVE_KERNEL_XZ - select HAVE_GET_USER_PAGES_FAST select HAVE_ARCH_MUTEX_CPU_RELAX select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 select HAVE_RCU_TABLE_FREE if SMP select ARCH_SAVE_PAGE_KEYS if HIBERNATION + select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP + select ARCH_DISCARD_MEMBLOCK select ARCH_INLINE_SPIN_TRYLOCK select ARCH_INLINE_SPIN_TRYLOCK_BH select ARCH_INLINE_SPIN_LOCK @@ -191,18 +193,13 @@ config HOTPLUG_CPU Say N if you want to disable CPU hotplug. config SCHED_MC - def_bool y - prompt "Multi-core scheduler support" - depends on SMP - help - Multi-core scheduler support improves the CPU scheduler's decision - making when dealing with multi-core CPU chips at a cost of slightly - increased overhead in some places. + def_bool n config SCHED_BOOK def_bool y prompt "Book scheduler support" - depends on SMP && SCHED_MC + depends on SMP + select SCHED_MC help Book scheduler support improves the CPU scheduler's decision making when dealing with machines that have several books. @@ -345,9 +342,6 @@ config WARN_DYNAMIC_STACK Say N if you are unsure. -config ARCH_POPULATES_NODE_MAP - def_bool y - comment "Kernel preemption" source "kernel/Kconfig.preempt" @@ -572,6 +566,7 @@ config KEXEC config CRASH_DUMP bool "kernel crash dumps" depends on 64BIT + select KEXEC help Generate crash dump after being started by kexec. Crash dump kernels are loaded in the main kernel with kexec-tools diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 27a0b5df5ea..e9f35334169 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -99,7 +99,6 @@ core-y += arch/s390/ libs-y += arch/s390/lib/ drivers-y += drivers/s390/ -drivers-$(CONFIG_MATHEMU) += arch/s390/math-emu/ # must be linked after kernel drivers-$(CONFIG_OPROFILE) += arch/s390/oprofile/ diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c index 92f1cb745d6..4de031d6b76 100644 --- a/arch/s390/appldata/appldata_os.c +++ b/arch/s390/appldata/appldata_os.c @@ -115,21 +115,21 @@ static void appldata_get_os_data(void *data) j = 0; for_each_online_cpu(i) { os_data->os_cpu[j].per_cpu_user = - cputime_to_jiffies(kstat_cpu(i).cpustat.user); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]); os_data->os_cpu[j].per_cpu_nice = - cputime_to_jiffies(kstat_cpu(i).cpustat.nice); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]); os_data->os_cpu[j].per_cpu_system = - cputime_to_jiffies(kstat_cpu(i).cpustat.system); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]); os_data->os_cpu[j].per_cpu_idle = - cputime_to_jiffies(kstat_cpu(i).cpustat.idle); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]); os_data->os_cpu[j].per_cpu_irq = - cputime_to_jiffies(kstat_cpu(i).cpustat.irq); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]); os_data->os_cpu[j].per_cpu_softirq = - cputime_to_jiffies(kstat_cpu(i).cpustat.softirq); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]); os_data->os_cpu[j].per_cpu_iowait = - cputime_to_jiffies(kstat_cpu(i).cpustat.iowait); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]); os_data->os_cpu[j].per_cpu_steal = - cputime_to_jiffies(kstat_cpu(i).cpustat.steal); + cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]); os_data->os_cpu[j].cpu_id = i; j++; } diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 635d677d328..f2737a005af 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -23,4 +23,4 @@ $(obj)/compressed/vmlinux: FORCE install: $(CONFIGURE) $(obj)/image sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/image \ - System.map Kerntypes "$(INSTALL_PATH)" + System.map "$(INSTALL_PATH)" diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h index 49676771bd6..ffd1ac255f1 100644 --- a/arch/s390/crypto/crypt_s390.h +++ b/arch/s390/crypto/crypt_s390.h @@ -368,9 +368,12 @@ static inline int crypt_s390_func_available(int func, if (facility_mask & CRYPT_S390_MSA && !test_facility(17)) return 0; - if (facility_mask & CRYPT_S390_MSA3 && !test_facility(76)) + + if (facility_mask & CRYPT_S390_MSA3 && + (!test_facility(2) || !test_facility(76))) return 0; - if (facility_mask & CRYPT_S390_MSA4 && !test_facility(77)) + if (facility_mask & CRYPT_S390_MSA4 && + (!test_facility(2) || !test_facility(77))) return 0; switch (func & CRYPT_S390_OP_MASK) { diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 481f4f76f66..8a2a887478c 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -97,7 +97,7 @@ static void hypfs_delete_tree(struct dentry *root) } } -static struct inode *hypfs_make_inode(struct super_block *sb, int mode) +static struct inode *hypfs_make_inode(struct super_block *sb, umode_t mode) { struct inode *ret = new_inode(sb); @@ -107,7 +107,7 @@ static struct inode *hypfs_make_inode(struct super_block *sb, int mode) ret->i_uid = hypfs_info->uid; ret->i_gid = hypfs_info->gid; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; - if (mode & S_IFDIR) + if (S_ISDIR(mode)) set_nlink(ret, 2); } return ret; @@ -259,9 +259,9 @@ static int hypfs_parse_options(char *options, struct super_block *sb) return 0; } -static int hypfs_show_options(struct seq_file *s, struct vfsmount *mnt) +static int hypfs_show_options(struct seq_file *s, struct dentry *root) { - struct hypfs_sb_info *hypfs_info = mnt->mnt_sb->s_fs_info; + struct hypfs_sb_info *hypfs_info = root->d_sb->s_fs_info; seq_printf(s, ",uid=%u", hypfs_info->uid); seq_printf(s, ",gid=%u", hypfs_info->gid); @@ -333,7 +333,7 @@ static void hypfs_kill_super(struct super_block *sb) static struct dentry *hypfs_create_file(struct super_block *sb, struct dentry *parent, const char *name, - char *data, mode_t mode) + char *data, umode_t mode) { struct dentry *dentry; struct inode *inode; @@ -350,13 +350,13 @@ static struct dentry *hypfs_create_file(struct super_block *sb, dentry = ERR_PTR(-ENOMEM); goto fail; } - if (mode & S_IFREG) { + if (S_ISREG(mode)) { inode->i_fop = &hypfs_file_ops; if (data) inode->i_size = strlen(data); else inode->i_size = 0; - } else if (mode & S_IFDIR) { + } else if (S_ISDIR(mode)) { inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; inc_nlink(parent->d_inode); diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 08143487829..c23c3900c30 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -16,114 +16,100 @@ /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ -typedef unsigned long long cputime_t; -typedef unsigned long long cputime64_t; +typedef unsigned long long __nocast cputime_t; +typedef unsigned long long __nocast cputime64_t; -#ifndef __s390x__ - -static inline unsigned int -__div(unsigned long long n, unsigned int base) +static inline unsigned long __div(unsigned long long n, unsigned long base) { +#ifndef __s390x__ register_pair rp; rp.pair = n >> 1; asm ("dr %0,%1" : "+d" (rp) : "d" (base >> 1)); return rp.subreg.odd; +#else /* __s390x__ */ + return n / base; +#endif /* __s390x__ */ } -#else /* __s390x__ */ +#define cputime_one_jiffy jiffies_to_cputime(1) + +/* + * Convert cputime to jiffies and back. + */ +static inline unsigned long cputime_to_jiffies(const cputime_t cputime) +{ + return __div((__force unsigned long long) cputime, 4096000000ULL / HZ); +} -static inline unsigned int -__div(unsigned long long n, unsigned int base) +static inline cputime_t jiffies_to_cputime(const unsigned int jif) { - return n / base; + return (__force cputime_t)(jif * (4096000000ULL / HZ)); } -#endif /* __s390x__ */ +static inline u64 cputime64_to_jiffies64(cputime64_t cputime) +{ + unsigned long long jif = (__force unsigned long long) cputime; + do_div(jif, 4096000000ULL / HZ); + return jif; +} -#define cputime_zero (0ULL) -#define cputime_one_jiffy jiffies_to_cputime(1) -#define cputime_max ((~0UL >> 1) - 1) -#define cputime_add(__a, __b) ((__a) + (__b)) -#define cputime_sub(__a, __b) ((__a) - (__b)) -#define cputime_div(__a, __n) ({ \ - unsigned long long __div = (__a); \ - do_div(__div,__n); \ - __div; \ -}) -#define cputime_halve(__a) ((__a) >> 1) -#define cputime_eq(__a, __b) ((__a) == (__b)) -#define cputime_gt(__a, __b) ((__a) > (__b)) -#define cputime_ge(__a, __b) ((__a) >= (__b)) -#define cputime_lt(__a, __b) ((__a) < (__b)) -#define cputime_le(__a, __b) ((__a) <= (__b)) -#define cputime_to_jiffies(__ct) (__div((__ct), 4096000000ULL / HZ)) -#define cputime_to_scaled(__ct) (__ct) -#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (4096000000ULL / HZ)) - -#define cputime64_zero (0ULL) -#define cputime64_add(__a, __b) ((__a) + (__b)) -#define cputime_to_cputime64(__ct) (__ct) - -static inline u64 -cputime64_to_jiffies64(cputime64_t cputime) -{ - do_div(cputime, 4096000000ULL / HZ); - return cputime; +static inline cputime64_t jiffies64_to_cputime64(const u64 jif) +{ + return (__force cputime64_t)(jif * (4096000000ULL / HZ)); } /* * Convert cputime to microseconds and back. */ -static inline unsigned int -cputime_to_usecs(const cputime_t cputime) +static inline unsigned int cputime_to_usecs(const cputime_t cputime) { - return cputime_div(cputime, 4096); + return (__force unsigned long long) cputime >> 12; } -static inline cputime_t -usecs_to_cputime(const unsigned int m) +static inline cputime_t usecs_to_cputime(const unsigned int m) { - return (cputime_t) m * 4096; + return (__force cputime_t)(m * 4096ULL); } +#define usecs_to_cputime64(m) usecs_to_cputime(m) + /* * Convert cputime to milliseconds and back. */ -static inline unsigned int -cputime_to_secs(const cputime_t cputime) +static inline unsigned int cputime_to_secs(const cputime_t cputime) { - return __div(cputime, 2048000000) >> 1; + return __div((__force unsigned long long) cputime, 2048000000) >> 1; } -static inline cputime_t -secs_to_cputime(const unsigned int s) +static inline cputime_t secs_to_cputime(const unsigned int s) { - return (cputime_t) s * 4096000000ULL; + return (__force cputime_t)(s * 4096000000ULL); } /* * Convert cputime to timespec and back. */ -static inline cputime_t -timespec_to_cputime(const struct timespec *value) +static inline cputime_t timespec_to_cputime(const struct timespec *value) { - return value->tv_nsec * 4096 / 1000 + (u64) value->tv_sec * 4096000000ULL; + unsigned long long ret = value->tv_sec * 4096000000ULL; + return (__force cputime_t)(ret + value->tv_nsec * 4096 / 1000); } -static inline void -cputime_to_timespec(const cputime_t cputime, struct timespec *value) +static inline void cputime_to_timespec(const cputime_t cputime, + struct timespec *value) { + unsigned long long __cputime = (__force unsigned long long) cputime; #ifndef __s390x__ register_pair rp; - rp.pair = cputime >> 1; + rp.pair = __cputime >> 1; asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL)); value->tv_nsec = rp.subreg.even * 1000 / 4096; value->tv_sec = rp.subreg.odd; #else - value->tv_nsec = (cputime % 4096000000ULL) * 1000 / 4096; - value->tv_sec = cputime / 4096000000ULL; + value->tv_nsec = (__cputime % 4096000000ULL) * 1000 / 4096; + value->tv_sec = __cputime / 4096000000ULL; #endif } @@ -132,50 +118,52 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value) * Since cputime and timeval have the same resolution (microseconds) * this is easy. */ -static inline cputime_t -timeval_to_cputime(const struct timeval *value) +static inline cputime_t timeval_to_cputime(const struct timeval *value) { - return value->tv_usec * 4096 + (u64) value->tv_sec * 4096000000ULL; + unsigned long long ret = value->tv_sec * 4096000000ULL; + return (__force cputime_t)(ret + value->tv_usec * 4096ULL); } -static inline void -cputime_to_timeval(const cputime_t cputime, struct timeval *value) +static inline void cputime_to_timeval(const cputime_t cputime, + struct timeval *value) { + unsigned long long __cputime = (__force unsigned long long) cputime; #ifndef __s390x__ register_pair rp; - rp.pair = cputime >> 1; + rp.pair = __cputime >> 1; asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL)); value->tv_usec = rp.subreg.even / 4096; value->tv_sec = rp.subreg.odd; #else - value->tv_usec = (cputime % 4096000000ULL) / 4096; - value->tv_sec = cputime / 4096000000ULL; + value->tv_usec = (__cputime % 4096000000ULL) / 4096; + value->tv_sec = __cputime / 4096000000ULL; #endif } /* * Convert cputime to clock and back. */ -static inline clock_t -cputime_to_clock_t(cputime_t cputime) +static inline clock_t cputime_to_clock_t(cputime_t cputime) { - return cputime_div(cputime, 4096000000ULL / USER_HZ); + unsigned long long clock = (__force unsigned long long) cputime; + do_div(clock, 4096000000ULL / USER_HZ); + return clock; } -static inline cputime_t -clock_t_to_cputime(unsigned long x) +static inline cputime_t clock_t_to_cputime(unsigned long x) { - return (cputime_t) x * (4096000000ULL / USER_HZ); + return (__force cputime_t)(x * (4096000000ULL / USER_HZ)); } /* * Convert cputime64 to clock. */ -static inline clock_t -cputime64_to_clock_t(cputime64_t cputime) +static inline clock_t cputime64_to_clock_t(cputime64_t cputime) { - return cputime_div(cputime, 4096000000ULL / USER_HZ); + unsigned long long clock = (__force unsigned long long) cputime; + do_div(clock, 4096000000ULL / USER_HZ); + return clock; } struct s390_idle_data { diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index 18124b75a7a..9d88db1f55d 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -73,7 +73,7 @@ typedef struct debug_info { struct dentry* debugfs_entries[DEBUG_MAX_VIEWS]; struct debug_view* views[DEBUG_MAX_VIEWS]; char name[DEBUG_MAX_NAME_LEN]; - mode_t mode; + umode_t mode; } debug_info_t; typedef int (debug_header_proc_t) (debug_info_t* id, @@ -124,7 +124,7 @@ debug_info_t *debug_register(const char *name, int pages, int nr_areas, int buf_size); debug_info_t *debug_register_mode(const char *name, int pages, int nr_areas, - int buf_size, mode_t mode, uid_t uid, + int buf_size, umode_t mode, uid_t uid, gid_t gid); void debug_unregister(debug_info_t* id); diff --git a/arch/s390/include/asm/kdebug.h b/arch/s390/include/asm/kdebug.h index 40db27cd6e6..5c1abd47612 100644 --- a/arch/s390/include/asm/kdebug.h +++ b/arch/s390/include/asm/kdebug.h @@ -22,6 +22,6 @@ enum die_val { DIE_NMI_IPI, }; -extern void die(const char *, struct pt_regs *, long); +extern void die(struct pt_regs *, const char *); #endif diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 24e18473d92..b0c235cb6ad 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -47,7 +47,7 @@ struct sca_block { #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) -#define CPUSTAT_HOST 0x80000000 +#define CPUSTAT_STOPPED 0x80000000 #define CPUSTAT_WAIT 0x10000000 #define CPUSTAT_ECALL_PEND 0x08000000 #define CPUSTAT_STOP_INT 0x04000000 @@ -139,6 +139,7 @@ struct kvm_vcpu_stat { u32 instruction_stfl; u32 instruction_tprot; u32 instruction_sigp_sense; + u32 instruction_sigp_sense_running; u32 instruction_sigp_external_call; u32 instruction_sigp_emergency; u32 instruction_sigp_stop; diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 9e13c7d56cc..707f2306725 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -97,47 +97,52 @@ struct _lowcore { __u32 gpregs_save_area[16]; /* 0x0180 */ __u32 cregs_save_area[16]; /* 0x01c0 */ + /* Save areas. */ + __u32 save_area_sync[8]; /* 0x0200 */ + __u32 save_area_async[8]; /* 0x0220 */ + __u32 save_area_restart[1]; /* 0x0240 */ + __u8 pad_0x0244[0x0248-0x0244]; /* 0x0244 */ + /* Return psws. */ - __u32 save_area[16]; /* 0x0200 */ - psw_t return_psw; /* 0x0240 */ - psw_t return_mcck_psw; /* 0x0248 */ + psw_t return_psw; /* 0x0248 */ + psw_t return_mcck_psw; /* 0x0250 */ /* CPU time accounting values */ - __u64 sync_enter_timer; /* 0x0250 */ - __u64 async_enter_timer; /* 0x0258 */ - __u64 mcck_enter_timer; /* 0x0260 */ - __u64 exit_timer; /* 0x0268 */ - __u64 user_timer; /* 0x0270 */ - __u64 system_timer; /* 0x0278 */ - __u64 steal_timer; /* 0x0280 */ - __u64 last_update_timer; /* 0x0288 */ - __u64 last_update_clock; /* 0x0290 */ + __u64 sync_enter_timer; /* 0x0258 */ + __u64 async_enter_timer; /* 0x0260 */ + __u64 mcck_enter_timer; /* 0x0268 */ + __u64 exit_timer; /* 0x0270 */ + __u64 user_timer; /* 0x0278 */ + __u64 system_timer; /* 0x0280 */ + __u64 steal_timer; /* 0x0288 */ + __u64 last_update_timer; /* 0x0290 */ + __u64 last_update_clock; /* 0x0298 */ /* Current process. */ - __u32 current_task; /* 0x0298 */ - __u32 thread_info; /* 0x029c */ - __u32 kernel_stack; /* 0x02a0 */ + __u32 current_task; /* 0x02a0 */ + __u32 thread_info; /* 0x02a4 */ + __u32 kernel_stack; /* 0x02a8 */ /* Interrupt and panic stack. */ - __u32 async_stack; /* 0x02a4 */ - __u32 panic_stack; /* 0x02a8 */ + __u32 async_stack; /* 0x02ac */ + __u32 panic_stack; /* 0x02b0 */ /* Address space pointer. */ - __u32 kernel_asce; /* 0x02ac */ - __u32 user_asce; /* 0x02b0 */ - __u32 current_pid; /* 0x02b4 */ + __u32 kernel_asce; /* 0x02b4 */ + __u32 user_asce; /* 0x02b8 */ + __u32 current_pid; /* 0x02bc */ /* SMP info area */ - __u32 cpu_nr; /* 0x02b8 */ - __u32 softirq_pending; /* 0x02bc */ - __u32 percpu_offset; /* 0x02c0 */ - __u32 ext_call_fast; /* 0x02c4 */ - __u64 int_clock; /* 0x02c8 */ - __u64 mcck_clock; /* 0x02d0 */ - __u64 clock_comparator; /* 0x02d8 */ - __u32 machine_flags; /* 0x02e0 */ - __u32 ftrace_func; /* 0x02e4 */ - __u8 pad_0x02e8[0x0300-0x02e8]; /* 0x02e8 */ + __u32 cpu_nr; /* 0x02c0 */ + __u32 softirq_pending; /* 0x02c4 */ + __u32 percpu_offset; /* 0x02c8 */ + __u32 ext_call_fast; /* 0x02cc */ + __u64 int_clock; /* 0x02d0 */ + __u64 mcck_clock; /* 0x02d8 */ + __u64 clock_comparator; /* 0x02e0 */ + __u32 machine_flags; /* 0x02e8 */ + __u32 ftrace_func; /* 0x02ec */ + __u8 pad_0x02f8[0x0300-0x02f0]; /* 0x02f0 */ /* Interrupt response block */ __u8 irb[64]; /* 0x0300 */ @@ -229,57 +234,62 @@ struct _lowcore { psw_t mcck_new_psw; /* 0x01e0 */ psw_t io_new_psw; /* 0x01f0 */ - /* Entry/exit save area & return psws. */ - __u64 save_area[16]; /* 0x0200 */ - psw_t return_psw; /* 0x0280 */ - psw_t return_mcck_psw; /* 0x0290 */ + /* Save areas. */ + __u64 save_area_sync[8]; /* 0x0200 */ + __u64 save_area_async[8]; /* 0x0240 */ + __u64 save_area_restart[1]; /* 0x0280 */ + __u8 pad_0x0288[0x0290-0x0288]; /* 0x0288 */ + + /* Return psws. */ + psw_t return_psw; /* 0x0290 */ + psw_t return_mcck_psw; /* 0x02a0 */ /* CPU accounting and timing values. */ - __u64 sync_enter_timer; /* 0x02a0 */ - __u64 async_enter_timer; /* 0x02a8 */ - __u64 mcck_enter_timer; /* 0x02b0 */ - __u64 exit_timer; /* 0x02b8 */ - __u64 user_timer; /* 0x02c0 */ - __u64 system_timer; /* 0x02c8 */ - __u64 steal_timer; /* 0x02d0 */ - __u64 last_update_timer; /* 0x02d8 */ - __u64 last_update_clock; /* 0x02e0 */ + __u64 sync_enter_timer; /* 0x02b0 */ + __u64 async_enter_timer; /* 0x02b8 */ + __u64 mcck_enter_timer; /* 0x02c0 */ + __u64 exit_timer; /* 0x02c8 */ + __u64 user_timer; /* 0x02d0 */ + __u64 system_timer; /* 0x02d8 */ + __u64 steal_timer; /* 0x02e0 */ + __u64 last_update_timer; /* 0x02e8 */ + __u64 last_update_clock; /* 0x02f0 */ /* Current process. */ - __u64 current_task; /* 0x02e8 */ - __u64 thread_info; /* 0x02f0 */ - __u64 kernel_stack; /* 0x02f8 */ + __u64 current_task; /* 0x02f8 */ + __u64 thread_info; /* 0x0300 */ + __u64 kernel_stack; /* 0x0308 */ /* Interrupt and panic stack. */ - __u64 async_stack; /* 0x0300 */ - __u64 panic_stack; /* 0x0308 */ + __u64 async_stack; /* 0x0310 */ + __u64 panic_stack; /* 0x0318 */ /* Address space pointer. */ - __u64 kernel_asce; /* 0x0310 */ - __u64 user_asce; /* 0x0318 */ - __u64 current_pid; /* 0x0320 */ + __u64 kernel_asce; /* 0x0320 */ + __u64 user_asce; /* 0x0328 */ + __u64 current_pid; /* 0x0330 */ /* SMP info area */ - __u32 cpu_nr; /* 0x0328 */ - __u32 softirq_pending; /* 0x032c */ - __u64 percpu_offset; /* 0x0330 */ - __u64 ext_call_fast; /* 0x0338 */ - __u64 int_clock; /* 0x0340 */ - __u64 mcck_clock; /* 0x0348 */ - __u64 clock_comparator; /* 0x0350 */ - __u64 vdso_per_cpu_data; /* 0x0358 */ - __u64 machine_flags; /* 0x0360 */ - __u64 ftrace_func; /* 0x0368 */ - __u64 gmap; /* 0x0370 */ - __u64 cmf_hpp; /* 0x0378 */ + __u32 cpu_nr; /* 0x0338 */ + __u32 softirq_pending; /* 0x033c */ + __u64 percpu_offset; /* 0x0340 */ + __u64 ext_call_fast; /* 0x0348 */ + __u64 int_clock; /* 0x0350 */ + __u64 mcck_clock; /* 0x0358 */ + __u64 clock_comparator; /* 0x0360 */ + __u64 vdso_per_cpu_data; /* 0x0368 */ + __u64 machine_flags; /* 0x0370 */ + __u64 ftrace_func; /* 0x0378 */ + __u64 gmap; /* 0x0380 */ + __u8 pad_0x0388[0x0400-0x0388]; /* 0x0388 */ /* Interrupt response block. */ - __u8 irb[64]; /* 0x0380 */ + __u8 irb[64]; /* 0x0400 */ /* Per cpu primary space access list */ - __u32 paste[16]; /* 0x03c0 */ + __u32 paste[16]; /* 0x0440 */ - __u8 pad_0x0400[0x0e00-0x0400]; /* 0x0400 */ + __u8 pad_0x0480[0x0e00-0x0480]; /* 0x0480 */ /* * 0xe00 contains the address of the IPL Parameter Information diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index 5325c89a584..0fbd1899c7b 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -19,7 +19,7 @@ #define ARCH_NEEDS_WEAK_PER_CPU #endif -#define arch_irqsafe_cpu_to_op(pcp, val, op) \ +#define arch_this_cpu_to_op(pcp, val, op) \ do { \ typedef typeof(pcp) pcp_op_T__; \ pcp_op_T__ old__, new__, prev__; \ @@ -41,27 +41,27 @@ do { \ preempt_enable(); \ } while (0) -#define irqsafe_cpu_add_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) -#define irqsafe_cpu_add_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) -#define irqsafe_cpu_add_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) -#define irqsafe_cpu_add_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) +#define this_cpu_add_1(pcp, val) arch_this_cpu_to_op(pcp, val, +) +#define this_cpu_add_2(pcp, val) arch_this_cpu_to_op(pcp, val, +) +#define this_cpu_add_4(pcp, val) arch_this_cpu_to_op(pcp, val, +) +#define this_cpu_add_8(pcp, val) arch_this_cpu_to_op(pcp, val, +) -#define irqsafe_cpu_and_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) -#define irqsafe_cpu_and_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) -#define irqsafe_cpu_and_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) -#define irqsafe_cpu_and_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) +#define this_cpu_and_1(pcp, val) arch_this_cpu_to_op(pcp, val, &) +#define this_cpu_and_2(pcp, val) arch_this_cpu_to_op(pcp, val, &) +#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op(pcp, val, &) +#define this_cpu_and_8(pcp, val) arch_this_cpu_to_op(pcp, val, &) -#define irqsafe_cpu_or_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) -#define irqsafe_cpu_or_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) -#define irqsafe_cpu_or_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) -#define irqsafe_cpu_or_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) +#define this_cpu_or_1(pcp, val) arch_this_cpu_to_op(pcp, val, |) +#define this_cpu_or_2(pcp, val) arch_this_cpu_to_op(pcp, val, |) +#define this_cpu_or_4(pcp, val) arch_this_cpu_to_op(pcp, val, |) +#define this_cpu_or_8(pcp, val) arch_this_cpu_to_op(pcp, val, |) -#define irqsafe_cpu_xor_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) -#define irqsafe_cpu_xor_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) -#define irqsafe_cpu_xor_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) -#define irqsafe_cpu_xor_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) +#define this_cpu_xor_1(pcp, val) arch_this_cpu_to_op(pcp, val, ^) +#define this_cpu_xor_2(pcp, val) arch_this_cpu_to_op(pcp, val, ^) +#define this_cpu_xor_4(pcp, val) arch_this_cpu_to_op(pcp, val, ^) +#define this_cpu_xor_8(pcp, val) arch_this_cpu_to_op(pcp, val, ^) -#define arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) \ +#define arch_this_cpu_cmpxchg(pcp, oval, nval) \ ({ \ typedef typeof(pcp) pcp_op_T__; \ pcp_op_T__ ret__; \ @@ -79,10 +79,10 @@ do { \ ret__; \ }) -#define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) -#define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) -#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) -#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) +#define this_cpu_cmpxchg_1(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) +#define this_cpu_cmpxchg_2(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) +#define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) +#define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) #include <asm-generic/percpu.h> diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 34ede0ea85a..011358c1b18 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -128,28 +128,11 @@ static inline int is_zero_pfn(unsigned long pfn) * effect, this also makes sure that 64 bit module code cannot be used * as system call address. */ - extern unsigned long VMALLOC_START; +extern unsigned long VMALLOC_END; +extern struct page *vmemmap; -#ifndef __s390x__ -#define VMALLOC_SIZE (96UL << 20) -#define VMALLOC_END 0x7e000000UL -#define VMEM_MAP_END 0x80000000UL -#else /* __s390x__ */ -#define VMALLOC_SIZE (128UL << 30) -#define VMALLOC_END 0x3e000000000UL -#define VMEM_MAP_END 0x40000000000UL -#endif /* __s390x__ */ - -/* - * VMEM_MAX_PHYS is the highest physical address that can be added to the 1:1 - * mapping. This needs to be calculated at compile time since the size of the - * VMEM_MAP is static but the size of struct page can change. - */ -#define VMEM_MAX_PAGES ((VMEM_MAP_END - VMALLOC_END) / sizeof(struct page)) -#define VMEM_MAX_PFN min(VMALLOC_START >> PAGE_SHIFT, VMEM_MAX_PAGES) -#define VMEM_MAX_PHYS ((VMEM_MAX_PFN << PAGE_SHIFT) & ~((16 << 20) - 1)) -#define vmemmap ((struct page *) VMALLOC_END) +#define VMEM_MAX_PHYS ((unsigned long) vmemmap) /* * A 31 bit pagetable entry of S390 has following format: @@ -593,14 +576,16 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) unsigned long address, bits; unsigned char skey; + if (!pte_present(*ptep)) + return pgste; address = pte_val(*ptep) & PAGE_MASK; skey = page_get_storage_key(address); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); /* Clear page changed & referenced bit in the storage key */ - if (bits) { - skey ^= bits; - page_set_storage_key(address, skey, 1); - } + if (bits & _PAGE_CHANGED) + page_set_storage_key(address, skey ^ bits, 1); + else if (bits) + page_reset_referenced(address); /* Transfer page changed & referenced bit to guest bits in pgste */ pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */ /* Get host changed & referenced bits from pgste */ @@ -625,6 +610,8 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) #ifdef CONFIG_PGSTE int young; + if (!pte_present(*ptep)) + return pgste; young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); /* Transfer page referenced bit to pte software bit (host view) */ if (young || (pgste_val(pgste) & RCP_HR_BIT)) @@ -638,13 +625,15 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) } -static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste) +static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) { #ifdef CONFIG_PGSTE unsigned long address; unsigned long okey, nkey; - address = pte_val(*ptep) & PAGE_MASK; + if (!pte_present(entry)) + return; + address = pte_val(entry) & PAGE_MASK; okey = nkey = page_get_storage_key(address); nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT); /* Set page access key and fetch protection bit from pgste */ @@ -712,7 +701,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); - pgste_set_pte(ptep, pgste); + pgste_set_pte(ptep, pgste, entry); *ptep = entry; pgste_set_unlock(ptep, pgste); } else diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 5f33d37d032..d25843a6a91 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -80,8 +80,6 @@ struct thread_struct { unsigned int acrs[NUM_ACRS]; unsigned long ksp; /* kernel stack pointer */ mm_segment_t mm_segment; - unsigned long prot_addr; /* address of protection-excep. */ - unsigned int trap_no; unsigned long gmap_addr; /* address of last gmap fault. */ struct per_regs per_user; /* User specified PER registers */ struct per_event per_event; /* Cause of the last PER trap */ @@ -238,7 +236,7 @@ static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc) /* * Function to drop a processor into disabled wait state */ -static inline void ATTRIB_NORET disabled_wait(unsigned long code) +static inline void __noreturn disabled_wait(unsigned long code) { unsigned long ctl_buf; psw_t dw_psw; diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index a65846340d5..56da355678f 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -324,7 +324,8 @@ struct pt_regs psw_t psw; unsigned long gprs[NUM_GPRS]; unsigned long orig_gpr2; - unsigned int svc_code; + unsigned int int_code; + unsigned long int_parm_long; }; /* diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index e63d13dd3bf..d75c8e78f7e 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -352,7 +352,7 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, * @no_output_qs: number of output queues * @input_handler: handler to be called for input queues * @output_handler: handler to be called for output queues - * @queue_start_poll: polling handlers (one per input queue or NULL) + * @queue_start_poll_array: polling handlers (one per input queue or NULL) * @int_parm: interruption parameter * @input_sbal_addr_array: address of no_input_qs * 128 pointers * @output_sbal_addr_array: address of no_output_qs * 128 pointers @@ -372,7 +372,8 @@ struct qdio_initialize { unsigned int no_output_qs; qdio_handler_t *input_handler; qdio_handler_t *output_handler; - void (**queue_start_poll) (struct ccw_device *, int, unsigned long); + void (**queue_start_poll_array) (struct ccw_device *, int, + unsigned long); int scan_threshold; unsigned long int_parm; void **input_sbal_addr_array; diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 5a099714df0..097183c7040 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -82,6 +82,7 @@ extern unsigned int user_mode; #define MACHINE_FLAG_LPAR (1UL << 12) #define MACHINE_FLAG_SPP (1UL << 13) #define MACHINE_FLAG_TOPOLOGY (1UL << 14) +#define MACHINE_FLAG_STCKF (1UL << 15) #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) @@ -100,6 +101,7 @@ extern unsigned int user_mode; #define MACHINE_HAS_PFMF (0) #define MACHINE_HAS_SPP (0) #define MACHINE_HAS_TOPOLOGY (0) +#define MACHINE_HAS_STCKF (0) #else /* __s390x__ */ #define MACHINE_HAS_IEEE (1) #define MACHINE_HAS_CSP (1) @@ -111,6 +113,7 @@ extern unsigned int user_mode; #define MACHINE_HAS_PFMF (S390_lowcore.machine_flags & MACHINE_FLAG_PFMF) #define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP) #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) +#define MACHINE_HAS_STCKF (S390_lowcore.machine_flags & MACHINE_FLAG_STCKF) #endif /* __s390x__ */ #define ZFCPDUMP_HSA_SIZE (32UL<<20) diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index e3bffd4e2d6..7040b8567cd 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -56,6 +56,7 @@ enum { ec_schedule = 0, ec_call_function, ec_call_function_single, + ec_stop_cpu, }; /* diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index ab47a69fdf0..c32e9123b40 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -23,7 +23,6 @@ extern void __cpu_die (unsigned int cpu); extern int __cpu_up (unsigned int cpu); extern struct mutex smp_cpu_state_mutex; -extern int smp_cpu_polarization[]; extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h index fdff1e995c7..67b5c1b14b5 100644 --- a/arch/s390/include/asm/socket.h +++ b/arch/s390/include/asm/socket.h @@ -70,4 +70,7 @@ #define SO_RXQ_OVFL 40 +#define SO_WIFI_STATUS 41 +#define SCM_WIFI_STATUS SO_WIFI_STATUS + #endif /* _ASM_SOCKET_H */ diff --git a/arch/s390/include/asm/sparsemem.h b/arch/s390/include/asm/sparsemem.h index 545d219e6a2..0fb34027d3f 100644 --- a/arch/s390/include/asm/sparsemem.h +++ b/arch/s390/include/asm/sparsemem.h @@ -4,8 +4,8 @@ #ifdef CONFIG_64BIT #define SECTION_SIZE_BITS 28 -#define MAX_PHYSADDR_BITS 42 -#define MAX_PHYSMEM_BITS 42 +#define MAX_PHYSADDR_BITS 46 +#define MAX_PHYSMEM_BITS 46 #else diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index b239ff53b18..fb214dd9b7e 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -27,7 +27,7 @@ static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { return test_tsk_thread_flag(task, TIF_SYSCALL) ? - (regs->svc_code & 0xffff) : -1; + (regs->int_code & 0xffff) : -1; } static inline void syscall_rollback(struct task_struct *task, diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index ef573c1d71a..d73cc6b6000 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -20,8 +20,6 @@ struct task_struct; -extern int sysctl_userprocess_debug; - extern struct task_struct *__switch_to(void *, void *); extern void update_per_regs(struct task_struct *task); diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index a23183423b1..a73038155e0 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -102,7 +102,6 @@ static inline struct thread_info *current_thread_info(void) #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 19 /* restore signal mask in do_signal() */ #define TIF_SINGLE_STEP 20 /* This task is single stepped */ -#define TIF_FREEZE 21 /* thread is freezing for suspend */ #define _TIF_SYSCALL (1<<TIF_SYSCALL) #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) @@ -119,7 +118,6 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) #define _TIF_31BIT (1<<TIF_31BIT) #define _TIF_SINGLE_STEP (1<<TIF_SINGLE_STEP) -#define _TIF_FREEZE (1<<TIF_FREEZE) #ifdef CONFIG_64BIT #define is_32bit_task() (test_thread_flag(TIF_31BIT)) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index d610bef9c5e..c447a27a7fd 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -90,7 +90,7 @@ static inline unsigned long long get_clock_fast(void) { unsigned long long clk; - if (test_facility(25)) + if (MACHINE_HAS_STCKF) asm volatile(".insn s,0xb27c0000,%0" : "=Q" (clk) : : "cc"); else clk = get_clock(); diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 005d77d8ae2..0837de80c35 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -4,6 +4,10 @@ #include <linux/cpumask.h> #include <asm/sysinfo.h> +struct cpu; + +#ifdef CONFIG_SCHED_BOOK + extern unsigned char cpu_core_id[NR_CPUS]; extern cpumask_t cpu_core_map[NR_CPUS]; @@ -16,8 +20,6 @@ static inline const struct cpumask *cpu_coregroup_mask(int cpu) #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) #define mc_capable() (1) -#ifdef CONFIG_SCHED_BOOK - extern unsigned char cpu_book_id[NR_CPUS]; extern cpumask_t cpu_book_map[NR_CPUS]; @@ -29,19 +31,45 @@ static inline const struct cpumask *cpu_book_mask(int cpu) #define topology_book_id(cpu) (cpu_book_id[cpu]) #define topology_book_cpumask(cpu) (&cpu_book_map[cpu]) -#endif /* CONFIG_SCHED_BOOK */ - +int topology_cpu_init(struct cpu *); int topology_set_cpu_management(int fc); void topology_schedule_update(void); void store_topology(struct sysinfo_15_1_x *info); +void topology_expect_change(void); + +#else /* CONFIG_SCHED_BOOK */ + +static inline void topology_schedule_update(void) { } +static inline int topology_cpu_init(struct cpu *cpu) { return 0; } +static inline void topology_expect_change(void) { } -#define POLARIZATION_UNKNWN (-1) +#endif /* CONFIG_SCHED_BOOK */ + +#define POLARIZATION_UNKNOWN (-1) #define POLARIZATION_HRZ (0) #define POLARIZATION_VL (1) #define POLARIZATION_VM (2) #define POLARIZATION_VH (3) -#ifdef CONFIG_SMP +extern int cpu_polarization[]; + +static inline void cpu_set_polarization(int cpu, int val) +{ +#ifdef CONFIG_SCHED_BOOK + cpu_polarization[cpu] = val; +#endif +} + +static inline int cpu_read_polarization(int cpu) +{ +#ifdef CONFIG_SCHED_BOOK + return cpu_polarization[cpu]; +#else + return POLARIZATION_HRZ; +#endif +} + +#ifdef CONFIG_SCHED_BOOK void s390_init_cpu_topology(void); #else static inline void s390_init_cpu_topology(void) diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h index eeb52ccf499..05ebbcdbbf6 100644 --- a/arch/s390/include/asm/types.h +++ b/arch/s390/include/asm/types.h @@ -13,8 +13,6 @@ #ifndef __ASSEMBLY__ -typedef unsigned short umode_t; - /* A address type so that arithmetic can be done on it & it can be upgraded to 64 bit when necessary */ diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index 404bdb9671b..8a8008fe7b8 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -277,7 +277,9 @@ #define __NR_clock_adjtime 337 #define __NR_syncfs 338 #define __NR_setns 339 -#define NR_syscalls 340 +#define __NR_process_vm_readv 340 +#define __NR_process_vm_writev 341 +#define NR_syscalls 342 /* * There are some system calls that are not present on 64 bit, some @@ -396,6 +398,7 @@ #define __ARCH_WANT_SYS_SIGNAL #define __ARCH_WANT_SYS_UTIME #define __ARCH_WANT_SYS_SOCKETCALL +#define __ARCH_WANT_SYS_IPC #define __ARCH_WANT_SYS_FADVISE64 #define __ARCH_WANT_SYS_GETPGRP #define __ARCH_WANT_SYS_LLSEEK diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index dd4f0764091..7d9ec924e7e 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -32,7 +32,8 @@ extra-y += head.o init_task.o vmlinux.lds extra-y += $(if $(CONFIG_64BIT),head64.o,head31.o) obj-$(CONFIG_MODULES) += s390_ksyms.o module.o -obj-$(CONFIG_SMP) += smp.o topology.o +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SCHED_BOOK) += topology.o obj-$(CONFIG_SMP) += $(if $(CONFIG_64BIT),switch_cpu64.o, \ switch_cpu.o) obj-$(CONFIG_HIBERNATION) += suspend.o swsusp_asm64.o diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 751318765e2..6e6a72e66d6 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -45,7 +45,8 @@ int main(void) DEFINE(__PT_PSW, offsetof(struct pt_regs, psw)); DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs)); DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2)); - DEFINE(__PT_SVC_CODE, offsetof(struct pt_regs, svc_code)); + DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code)); + DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long)); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain)); @@ -108,7 +109,9 @@ int main(void) DEFINE(__LC_PGM_NEW_PSW, offsetof(struct _lowcore, program_new_psw)); DEFINE(__LC_MCK_NEW_PSW, offsetof(struct _lowcore, mcck_new_psw)); DEFINE(__LC_IO_NEW_PSW, offsetof(struct _lowcore, io_new_psw)); - DEFINE(__LC_SAVE_AREA, offsetof(struct _lowcore, save_area)); + DEFINE(__LC_SAVE_AREA_SYNC, offsetof(struct _lowcore, save_area_sync)); + DEFINE(__LC_SAVE_AREA_ASYNC, offsetof(struct _lowcore, save_area_async)); + DEFINE(__LC_SAVE_AREA_RESTART, offsetof(struct _lowcore, save_area_restart)); DEFINE(__LC_RETURN_PSW, offsetof(struct _lowcore, return_psw)); DEFINE(__LC_RETURN_MCCK_PSW, offsetof(struct _lowcore, return_mcck_psw)); DEFINE(__LC_SYNC_ENTER_TIMER, offsetof(struct _lowcore, sync_enter_timer)); @@ -150,7 +153,6 @@ int main(void) DEFINE(__LC_LAST_BREAK, offsetof(struct _lowcore, breaking_event_addr)); DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data)); DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap)); - DEFINE(__LC_CMF_HPP, offsetof(struct _lowcore, cmf_hpp)); DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce)); #endif /* CONFIG_32BIT */ return 0; diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index f8828d38fa6..3aa4d00aaf5 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -33,7 +33,7 @@ s390_base_mcck_handler_fn: .previous ENTRY(s390_base_ext_handler) - stmg %r0,%r15,__LC_SAVE_AREA + stmg %r0,%r15,__LC_SAVE_AREA_ASYNC basr %r13,0 0: aghi %r15,-STACK_FRAME_OVERHEAD larl %r1,s390_base_ext_handler_fn @@ -41,7 +41,7 @@ ENTRY(s390_base_ext_handler) ltgr %r1,%r1 jz 1f basr %r14,%r1 -1: lmg %r0,%r15,__LC_SAVE_AREA +1: lmg %r0,%r15,__LC_SAVE_AREA_ASYNC ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit lpswe __LC_EXT_OLD_PSW @@ -53,7 +53,7 @@ s390_base_ext_handler_fn: .previous ENTRY(s390_base_pgm_handler) - stmg %r0,%r15,__LC_SAVE_AREA + stmg %r0,%r15,__LC_SAVE_AREA_SYNC basr %r13,0 0: aghi %r15,-STACK_FRAME_OVERHEAD larl %r1,s390_base_pgm_handler_fn @@ -61,7 +61,7 @@ ENTRY(s390_base_pgm_handler) ltgr %r1,%r1 jz 1f basr %r14,%r1 - lmg %r0,%r15,__LC_SAVE_AREA + lmg %r0,%r15,__LC_SAVE_AREA_SYNC lpswe __LC_PGM_OLD_PSW 1: lpswe disabled_wait_psw-0b(%r13) @@ -142,7 +142,7 @@ s390_base_mcck_handler_fn: .previous ENTRY(s390_base_ext_handler) - stm %r0,%r15,__LC_SAVE_AREA + stm %r0,%r15,__LC_SAVE_AREA_ASYNC basr %r13,0 0: ahi %r15,-STACK_FRAME_OVERHEAD l %r1,2f-0b(%r13) @@ -150,7 +150,7 @@ ENTRY(s390_base_ext_handler) ltr %r1,%r1 jz 1f basr %r14,%r1 -1: lm %r0,%r15,__LC_SAVE_AREA +1: lm %r0,%r15,__LC_SAVE_AREA_ASYNC ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit lpsw __LC_EXT_OLD_PSW @@ -164,7 +164,7 @@ s390_base_ext_handler_fn: .previous ENTRY(s390_base_pgm_handler) - stm %r0,%r15,__LC_SAVE_AREA + stm %r0,%r15,__LC_SAVE_AREA_SYNC basr %r13,0 0: ahi %r15,-STACK_FRAME_OVERHEAD l %r1,2f-0b(%r13) @@ -172,7 +172,7 @@ ENTRY(s390_base_pgm_handler) ltr %r1,%r1 jz 1f basr %r14,%r1 - lm %r0,%r15,__LC_SAVE_AREA + lm %r0,%r15,__LC_SAVE_AREA_SYNC lpsw __LC_PGM_OLD_PSW 1: lpsw disabled_wait_psw-0b(%r13) diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 84a98289844..ab64bdbab2a 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -278,9 +278,6 @@ asmlinkage long sys32_ipc(u32 call, int first, int second, int third, u32 ptr) { if (call >> 16) /* hack for backward compatibility */ return -EINVAL; - - call &= 0xffff; - switch (call) { case SEMTIMEDOP: return compat_sys_semtimedop(first, compat_ptr(ptr), diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 4f68c81d3ff..6fe78c2f95d 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -501,8 +501,12 @@ static int setup_frame32(int sig, struct k_sigaction *ka, /* We forgot to include these in the sigcontext. To avoid breaking binary compatibility, they are passed as args. */ - regs->gprs[4] = current->thread.trap_no; - regs->gprs[5] = current->thread.prot_addr; + if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL || + sig == SIGTRAP || sig == SIGFPE) { + /* set extra registers only for synchronous signals */ + regs->gprs[4] = regs->int_code & 127; + regs->gprs[5] = regs->int_parm_long; + } /* Place signal number on stack to allow backtrace from handler. */ if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo)) @@ -544,9 +548,9 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64) ka->sa.sa_restorer; + regs->gprs[14] = (__u64) ka->sa.sa_restorer | PSW32_ADDR_AMODE; } else { - regs->gprs[14] = (__u64) frame->retcode; + regs->gprs[14] = (__u64) frame->retcode | PSW32_ADDR_AMODE; err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, (u16 __force __user *)(frame->retcode)); } diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 5006a1d9f5d..18c51df9fe0 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1627,3 +1627,23 @@ ENTRY(sys_setns_wrapper) lgfr %r2,%r2 # int lgfr %r3,%r3 # int jg sys_setns + +ENTRY(compat_sys_process_vm_readv_wrapper) + lgfr %r2,%r2 # compat_pid_t + llgtr %r3,%r3 # struct compat_iovec __user * + llgfr %r4,%r4 # unsigned long + llgtr %r5,%r5 # struct compat_iovec __user * + llgfr %r6,%r6 # unsigned long + llgf %r0,164(%r15) # unsigned long + stg %r0,160(%r15) + jg sys_process_vm_readv + +ENTRY(compat_sys_process_vm_writev_wrapper) + lgfr %r2,%r2 # compat_pid_t + llgtr %r3,%r3 # struct compat_iovec __user * + llgfr %r4,%r4 # unsigned long + llgtr %r5,%r5 # struct compat_iovec __user * + llgfr %r6,%r6 # unsigned long + llgf %r0,164(%r15) # unsigned long + stg %r0,160(%r15) + jg sys_process_vm_writev diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 5ad6bc078bf..6848828b962 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -74,7 +74,7 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf, static int debug_open(struct inode *inode, struct file *file); static int debug_close(struct inode *inode, struct file *file); static debug_info_t *debug_info_create(const char *name, int pages_per_area, - int nr_areas, int buf_size, mode_t mode); + int nr_areas, int buf_size, umode_t mode); static void debug_info_get(debug_info_t *); static void debug_info_put(debug_info_t *); static int debug_prolog_level_fn(debug_info_t * id, @@ -330,7 +330,7 @@ debug_info_free(debug_info_t* db_info){ static debug_info_t* debug_info_create(const char *name, int pages_per_area, int nr_areas, - int buf_size, mode_t mode) + int buf_size, umode_t mode) { debug_info_t* rc; @@ -688,7 +688,7 @@ debug_close(struct inode *inode, struct file *file) */ debug_info_t *debug_register_mode(const char *name, int pages_per_area, - int nr_areas, int buf_size, mode_t mode, + int nr_areas, int buf_size, umode_t mode, uid_t uid, gid_t gid) { debug_info_t *rc = NULL; @@ -1090,7 +1090,7 @@ debug_register_view(debug_info_t * id, struct debug_view *view) int rc = 0; int i; unsigned long flags; - mode_t mode; + umode_t mode; struct dentry *pde; if (!id) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 45df6d456aa..e2f847599c8 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1578,10 +1578,15 @@ void show_code(struct pt_regs *regs) ptr += sprintf(ptr, "%s Code:", mode); hops = 0; while (start < end && hops < 8) { - *ptr++ = (start == 32) ? '>' : ' '; + opsize = insn_length(code[start]); + if (start + opsize == 32) + *ptr++ = '#'; + else if (start == 32) + *ptr++ = '>'; + else + *ptr++ = ' '; addr = regs->psw.addr + start - 32; ptr += sprintf(ptr, ONELONG, addr); - opsize = insn_length(code[start]); if (start + opsize >= end) break; for (i = 0; i < opsize; i++) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 37394b3413e..52098d6dfaa 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -390,6 +390,8 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS; if (test_facility(40)) S390_lowcore.machine_flags |= MACHINE_FLAG_SPP; + if (test_facility(25)) + S390_lowcore.machine_flags |= MACHINE_FLAG_STCKF; #endif } @@ -432,18 +434,22 @@ static void __init append_to_cmdline(size_t (*ipl_data)(char *, size_t)) } } -static void __init setup_boot_command_line(void) +static inline int has_ebcdic_char(const char *str) { int i; - /* convert arch command line to ascii */ - for (i = 0; i < ARCH_COMMAND_LINE_SIZE; i++) - if (COMMAND_LINE[i] & 0x80) - break; - if (i < ARCH_COMMAND_LINE_SIZE) - EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE); - COMMAND_LINE[ARCH_COMMAND_LINE_SIZE-1] = 0; + for (i = 0; str[i]; i++) + if (str[i] & 0x80) + return 1; + return 0; +} +static void __init setup_boot_command_line(void) +{ + COMMAND_LINE[ARCH_COMMAND_LINE_SIZE - 1] = 0; + /* convert arch command line to ascii if necessary */ + if (has_ebcdic_char(COMMAND_LINE)) + EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE); /* copy arch command line */ strlcpy(boot_command_line, strstrip(COMMAND_LINE), ARCH_COMMAND_LINE_SIZE); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index b13157057e0..3705700ed37 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -19,32 +19,22 @@ #include <asm/unistd.h> #include <asm/page.h> -/* - * Stack layout for the system_call stack entry. - * The first few entries are identical to the user_regs_struct. - */ -SP_PTREGS = STACK_FRAME_OVERHEAD -SP_ARGS = STACK_FRAME_OVERHEAD + __PT_ARGS -SP_PSW = STACK_FRAME_OVERHEAD + __PT_PSW -SP_R0 = STACK_FRAME_OVERHEAD + __PT_GPRS -SP_R1 = STACK_FRAME_OVERHEAD + __PT_GPRS + 4 -SP_R2 = STACK_FRAME_OVERHEAD + __PT_GPRS + 8 -SP_R3 = STACK_FRAME_OVERHEAD + __PT_GPRS + 12 -SP_R4 = STACK_FRAME_OVERHEAD + __PT_GPRS + 16 -SP_R5 = STACK_FRAME_OVERHEAD + __PT_GPRS + 20 -SP_R6 = STACK_FRAME_OVERHEAD + __PT_GPRS + 24 -SP_R7 = STACK_FRAME_OVERHEAD + __PT_GPRS + 28 -SP_R8 = STACK_FRAME_OVERHEAD + __PT_GPRS + 32 -SP_R9 = STACK_FRAME_OVERHEAD + __PT_GPRS + 36 -SP_R10 = STACK_FRAME_OVERHEAD + __PT_GPRS + 40 -SP_R11 = STACK_FRAME_OVERHEAD + __PT_GPRS + 44 -SP_R12 = STACK_FRAME_OVERHEAD + __PT_GPRS + 48 -SP_R13 = STACK_FRAME_OVERHEAD + __PT_GPRS + 52 -SP_R14 = STACK_FRAME_OVERHEAD + __PT_GPRS + 56 -SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 60 -SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2 -SP_SVC_CODE = STACK_FRAME_OVERHEAD + __PT_SVC_CODE -SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE +__PT_R0 = __PT_GPRS +__PT_R1 = __PT_GPRS + 4 +__PT_R2 = __PT_GPRS + 8 +__PT_R3 = __PT_GPRS + 12 +__PT_R4 = __PT_GPRS + 16 +__PT_R5 = __PT_GPRS + 20 +__PT_R6 = __PT_GPRS + 24 +__PT_R7 = __PT_GPRS + 28 +__PT_R8 = __PT_GPRS + 32 +__PT_R9 = __PT_GPRS + 36 +__PT_R10 = __PT_GPRS + 40 +__PT_R11 = __PT_GPRS + 44 +__PT_R12 = __PT_GPRS + 48 +__PT_R13 = __PT_GPRS + 524 +__PT_R14 = __PT_GPRS + 56 +__PT_R15 = __PT_GPRS + 60 _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING | _TIF_PER_TRAP ) @@ -58,133 +48,91 @@ STACK_SIZE = 1 << STACK_SHIFT #define BASED(name) name-system_call(%r13) -#ifdef CONFIG_TRACE_IRQFLAGS .macro TRACE_IRQS_ON +#ifdef CONFIG_TRACE_IRQFLAGS basr %r2,%r0 - l %r1,BASED(.Ltrace_irq_on_caller) - basr %r14,%r1 + l %r1,BASED(.Lhardirqs_on) + basr %r14,%r1 # call trace_hardirqs_on_caller +#endif .endm .macro TRACE_IRQS_OFF +#ifdef CONFIG_TRACE_IRQFLAGS basr %r2,%r0 - l %r1,BASED(.Ltrace_irq_off_caller) - basr %r14,%r1 - .endm -#else -#define TRACE_IRQS_ON -#define TRACE_IRQS_OFF + l %r1,BASED(.Lhardirqs_off) + basr %r14,%r1 # call trace_hardirqs_off_caller #endif + .endm -#ifdef CONFIG_LOCKDEP .macro LOCKDEP_SYS_EXIT - tm SP_PSW+1(%r15),0x01 # returning to user ? - jz 0f +#ifdef CONFIG_LOCKDEP + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jz .+10 l %r1,BASED(.Llockdep_sys_exit) - basr %r14,%r1 -0: - .endm -#else -#define LOCKDEP_SYS_EXIT + basr %r14,%r1 # call lockdep_sys_exit #endif - -/* - * Register usage in interrupt handlers: - * R9 - pointer to current task structure - * R13 - pointer to literal pool - * R14 - return register for function calls - * R15 - kernel stack pointer - */ - - .macro UPDATE_VTIME lc_from,lc_to,lc_sum - lm %r10,%r11,\lc_from - sl %r10,\lc_to - sl %r11,\lc_to+4 - bc 3,BASED(0f) - sl %r10,BASED(.Lc_1) -0: al %r10,\lc_sum - al %r11,\lc_sum+4 - bc 12,BASED(1f) - al %r10,BASED(.Lc_1) -1: stm %r10,%r11,\lc_sum - .endm - - .macro SAVE_ALL_SVC psworg,savearea - stm %r12,%r15,\savearea - l %r13,__LC_SVC_NEW_PSW+4 # load &system_call to %r13 - l %r15,__LC_KERNEL_STACK # problem state -> load ksp - s %r15,BASED(.Lc_spsize) # make room for registers & psw - .endm - - .macro SAVE_ALL_BASE savearea - stm %r12,%r15,\savearea - l %r13,__LC_SVC_NEW_PSW+4 # load &system_call to %r13 .endm - .macro SAVE_ALL_PGM psworg,savearea - tm \psworg+1,0x01 # test problem state bit + .macro CHECK_STACK stacksize,savearea #ifdef CONFIG_CHECK_STACK - bnz BASED(1f) - tml %r15,STACK_SIZE - CONFIG_STACK_GUARD - bnz BASED(2f) - la %r12,\psworg - b BASED(stack_overflow) -#else - bz BASED(2f) + tml %r15,\stacksize - CONFIG_STACK_GUARD + la %r14,\savearea + jz stack_overflow #endif -1: l %r15,__LC_KERNEL_STACK # problem state -> load ksp -2: s %r15,BASED(.Lc_spsize) # make room for registers & psw .endm - .macro SAVE_ALL_ASYNC psworg,savearea - stm %r12,%r15,\savearea - l %r13,__LC_SVC_NEW_PSW+4 # load &system_call to %r13 - la %r12,\psworg - tm \psworg+1,0x01 # test problem state bit - bnz BASED(1f) # from user -> load async stack - clc \psworg+4(4),BASED(.Lcritical_end) - bhe BASED(0f) - clc \psworg+4(4),BASED(.Lcritical_start) - bl BASED(0f) - l %r14,BASED(.Lcleanup_critical) - basr %r14,%r14 - tm 1(%r12),0x01 # retest problem state after cleanup - bnz BASED(1f) -0: l %r14,__LC_ASYNC_STACK # are we already on the async stack ? + .macro SWITCH_ASYNC savearea,stack,shift + tmh %r8,0x0001 # interrupting from user ? + jnz 1f + lr %r14,%r9 + sl %r14,BASED(.Lcritical_start) + cl %r14,BASED(.Lcritical_length) + jhe 0f + la %r11,\savearea # inside critical section, do cleanup + bras %r14,cleanup_critical + tmh %r8,0x0001 # retest problem state after cleanup + jnz 1f +0: l %r14,\stack # are we already on the target stack? slr %r14,%r15 - sra %r14,STACK_SHIFT -#ifdef CONFIG_CHECK_STACK - bnz BASED(1f) - tml %r15,STACK_SIZE - CONFIG_STACK_GUARD - bnz BASED(2f) - b BASED(stack_overflow) -#else - bz BASED(2f) -#endif -1: l %r15,__LC_ASYNC_STACK -2: s %r15,BASED(.Lc_spsize) # make room for registers & psw + sra %r14,\shift + jnz 1f + CHECK_STACK 1<<\shift,\savearea + j 2f +1: l %r15,\stack # load target stack +2: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) .endm - .macro CREATE_STACK_FRAME savearea - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) - st %r2,SP_ORIG_R2(%r15) # store original content of gpr 2 - mvc SP_R12(16,%r15),\savearea # move %r12-%r15 to stack - stm %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack + .macro ADD64 high,low,timer + al \high,\timer + al \low,\timer+4 + brc 12,.+8 + ahi \high,1 .endm - .macro RESTORE_ALL psworg,sync - mvc \psworg(8),SP_PSW(%r15) # move user PSW to lowcore - .if !\sync - ni \psworg+1,0xfd # clear wait state bit - .endif - lm %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user - stpt __LC_EXIT_TIMER - lpsw \psworg # back to caller + .macro SUB64 high,low,timer + sl \high,\timer + sl \low,\timer+4 + brc 3,.+8 + ahi \high,-1 + .endm + + .macro UPDATE_VTIME high,low,enter_timer + lm \high,\low,__LC_EXIT_TIMER + SUB64 \high,\low,\enter_timer + ADD64 \high,\low,__LC_USER_TIMER + stm \high,\low,__LC_USER_TIMER + lm \high,\low,__LC_LAST_UPDATE_TIMER + SUB64 \high,\low,__LC_EXIT_TIMER + ADD64 \high,\low,__LC_SYSTEM_TIMER + stm \high,\low,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer .endm .macro REENABLE_IRQS - mvc __SF_EMPTY(1,%r15),SP_PSW(%r15) - ni __SF_EMPTY(%r15),0xbf - ssm __SF_EMPTY(%r15) + st %r8,__LC_RETURN_PSW + ni __LC_RETURN_PSW,0xbf + ssm __LC_RETURN_PSW .endm .section .kprobes.text, "ax" @@ -197,14 +145,13 @@ STACK_SIZE = 1 << STACK_SHIFT * gpr2 = prev */ ENTRY(__switch_to) - basr %r1,0 -0: l %r4,__THREAD_info(%r2) # get thread_info of prev + l %r4,__THREAD_info(%r2) # get thread_info of prev l %r5,__THREAD_info(%r3) # get thread_info of next tm __TI_flags+3(%r4),_TIF_MCCK_PENDING # machine check pending? - bz 1f-0b(%r1) + jz 0f ni __TI_flags+3(%r4),255-_TIF_MCCK_PENDING # clear flag in prev oi __TI_flags+3(%r5),_TIF_MCCK_PENDING # set it in next -1: stm %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task +0: stm %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task st %r15,__THREAD_ksp(%r2) # store kernel stack of prev l %r15,__THREAD_ksp(%r3) # load kernel stack of next lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 @@ -224,48 +171,55 @@ __critical_start: ENTRY(system_call) stpt __LC_SYNC_ENTER_TIMER -sysc_saveall: - SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct - mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC - oi __TI_flags+3(%r12),_TIF_SYSCALL +sysc_stm: + stm %r8,%r15,__LC_SAVE_AREA_SYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 +sysc_per: + l %r15,__LC_KERNEL_STACK + ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs sysc_vtime: - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER -sysc_stime: - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER -sysc_update: - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER + UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC + mvc __PT_PSW(8,%r11),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC sysc_do_svc: - xr %r7,%r7 - icm %r7,3,SP_SVC_CODE+2(%r15)# load svc number and test for svc 0 - bnz BASED(sysc_nr_ok) # svc number > 0 + oi __TI_flags+3(%r12),_TIF_SYSCALL + lh %r8,__PT_INT_CODE+2(%r11) + sla %r8,2 # shift and test for svc0 + jnz sysc_nr_ok # svc 0: system call number in %r1 cl %r1,BASED(.Lnr_syscalls) - bnl BASED(sysc_nr_ok) - sth %r1,SP_SVC_CODE+2(%r15) - lr %r7,%r1 # copy svc number to %r7 + jnl sysc_nr_ok + sth %r1,__PT_INT_CODE+2(%r11) + lr %r8,%r1 + sla %r8,2 sysc_nr_ok: - sll %r7,2 # svc number *4 - l %r10,BASED(.Lsysc_table) + l %r10,BASED(.Lsys_call_table) # 31 bit system call table + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + st %r2,__PT_ORIG_GPR2(%r11) + st %r7,STACK_FRAME_OVERHEAD(%r15) + l %r9,0(%r8,%r10) # get system call addr. tm __TI_flags+2(%r12),_TIF_TRACE >> 8 - mvc SP_ARGS(4,%r15),SP_R7(%r15) - l %r8,0(%r7,%r10) # get system call addr. - bnz BASED(sysc_tracesys) - basr %r14,%r8 # call sys_xxxx - st %r2,SP_R2(%r15) # store return value (change R2 on stack) + jnz sysc_tracesys + basr %r14,%r9 # call sys_xxxx + st %r2,__PT_R2(%r11) # store return value sysc_return: LOCKDEP_SYS_EXIT sysc_tif: - tm SP_PSW+1(%r15),0x01 # returning to user ? - bno BASED(sysc_restore) + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jno sysc_restore tm __TI_flags+3(%r12),_TIF_WORK_SVC - bnz BASED(sysc_work) # there is work to do (signals etc.) + jnz sysc_work # check for work ni __TI_flags+3(%r12),255-_TIF_SYSCALL sysc_restore: - RESTORE_ALL __LC_RETURN_PSW,1 + mvc __LC_RETURN_PSW(8),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + lm %r0,%r15,__PT_R0(%r11) + lpsw __LC_RETURN_PSW sysc_done: # @@ -273,16 +227,16 @@ sysc_done: # sysc_work: tm __TI_flags+3(%r12),_TIF_MCCK_PENDING - bo BASED(sysc_mcck_pending) + jo sysc_mcck_pending tm __TI_flags+3(%r12),_TIF_NEED_RESCHED - bo BASED(sysc_reschedule) + jo sysc_reschedule tm __TI_flags+3(%r12),_TIF_SIGPENDING - bo BASED(sysc_sigpending) + jo sysc_sigpending tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME - bo BASED(sysc_notify_resume) + jo sysc_notify_resume tm __TI_flags+3(%r12),_TIF_PER_TRAP - bo BASED(sysc_singlestep) - b BASED(sysc_return) # beware of critical section cleanup + jo sysc_singlestep + j sysc_return # beware of critical section cleanup # # _TIF_NEED_RESCHED is set, call schedule @@ -290,13 +244,13 @@ sysc_work: sysc_reschedule: l %r1,BASED(.Lschedule) la %r14,BASED(sysc_return) - br %r1 # call scheduler + br %r1 # call schedule # # _TIF_MCCK_PENDING is set, call handler # sysc_mcck_pending: - l %r1,BASED(.Ls390_handle_mcck) + l %r1,BASED(.Lhandle_mcck) la %r14,BASED(sysc_return) br %r1 # TIF bit will be cleared by handler @@ -305,23 +259,24 @@ sysc_mcck_pending: # sysc_sigpending: ni __TI_flags+3(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - la %r2,SP_PTREGS(%r15) # load pt_regs + lr %r2,%r11 # pass pointer to pt_regs l %r1,BASED(.Ldo_signal) basr %r14,%r1 # call do_signal tm __TI_flags+3(%r12),_TIF_SYSCALL - bno BASED(sysc_return) - lm %r2,%r6,SP_R2(%r15) # load svc arguments - xr %r7,%r7 # svc 0 returns -ENOSYS - clc SP_SVC_CODE+2(2,%r15),BASED(.Lnr_syscalls+2) - bnl BASED(sysc_nr_ok) # invalid svc number -> do svc 0 - icm %r7,3,SP_SVC_CODE+2(%r15)# load new svc number - b BASED(sysc_nr_ok) # restart svc + jno sysc_return + lm %r2,%r7,__PT_R2(%r11) # load svc arguments + xr %r8,%r8 # svc 0 returns -ENOSYS + clc __PT_INT_CODE+2(2,%r11),BASED(.Lnr_syscalls+2) + jnl sysc_nr_ok # invalid svc number -> do svc 0 + lh %r8,__PT_INT_CODE+2(%r11) # load new svc number + sla %r8,2 + j sysc_nr_ok # restart svc # # _TIF_NOTIFY_RESUME is set, call do_notify_resume # sysc_notify_resume: - la %r2,SP_PTREGS(%r15) # load pt_regs + lr %r2,%r11 # pass pointer to pt_regs l %r1,BASED(.Ldo_notify_resume) la %r14,BASED(sysc_return) br %r1 # call do_notify_resume @@ -331,56 +286,57 @@ sysc_notify_resume: # sysc_singlestep: ni __TI_flags+3(%r12),255-(_TIF_SYSCALL | _TIF_PER_TRAP) - la %r2,SP_PTREGS(%r15) # address of register-save area - l %r1,BASED(.Lhandle_per) # load adr. of per handler - la %r14,BASED(sysc_return) # load adr. of system return - br %r1 # branch to do_per_trap + lr %r2,%r11 # pass pointer to pt_regs + l %r1,BASED(.Ldo_per_trap) + la %r14,BASED(sysc_return) + br %r1 # call do_per_trap # # call tracehook_report_syscall_entry/tracehook_report_syscall_exit before # and after the system call # sysc_tracesys: - l %r1,BASED(.Ltrace_entry) - la %r2,SP_PTREGS(%r15) # load pt_regs + l %r1,BASED(.Ltrace_enter) + lr %r2,%r11 # pass pointer to pt_regs la %r3,0 xr %r0,%r0 - icm %r0,3,SP_SVC_CODE(%r15) - st %r0,SP_R2(%r15) - basr %r14,%r1 + icm %r0,3,__PT_INT_CODE+2(%r11) + st %r0,__PT_R2(%r11) + basr %r14,%r1 # call do_syscall_trace_enter cl %r2,BASED(.Lnr_syscalls) - bnl BASED(sysc_tracenogo) - lr %r7,%r2 - sll %r7,2 # svc number *4 - l %r8,0(%r7,%r10) + jnl sysc_tracenogo + lr %r8,%r2 + sll %r8,2 + l %r9,0(%r8,%r10) sysc_tracego: - lm %r3,%r6,SP_R3(%r15) - mvc SP_ARGS(4,%r15),SP_R7(%r15) - l %r2,SP_ORIG_R2(%r15) - basr %r14,%r8 # call sys_xxx - st %r2,SP_R2(%r15) # store return value + lm %r3,%r7,__PT_R3(%r11) + st %r7,STACK_FRAME_OVERHEAD(%r15) + l %r2,__PT_ORIG_GPR2(%r11) + basr %r14,%r9 # call sys_xxx + st %r2,__PT_R2(%r11) # store return value sysc_tracenogo: tm __TI_flags+2(%r12),_TIF_TRACE >> 8 - bz BASED(sysc_return) + jz sysc_return l %r1,BASED(.Ltrace_exit) - la %r2,SP_PTREGS(%r15) # load pt_regs + lr %r2,%r11 # pass pointer to pt_regs la %r14,BASED(sysc_return) - br %r1 + br %r1 # call do_syscall_trace_exit # # a new process exits the kernel with ret_from_fork # ENTRY(ret_from_fork) + la %r11,STACK_FRAME_OVERHEAD(%r15) + l %r12,__LC_THREAD_INFO l %r13,__LC_SVC_NEW_PSW+4 - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # forking a kernel thread ? - bo BASED(0f) - st %r15,SP_R15(%r15) # store stack pointer for new kthread -0: l %r1,BASED(.Lschedtail) - basr %r14,%r1 + tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ? + jo 0f + st %r15,__PT_R15(%r11) # store stack pointer for new kthread +0: l %r1,BASED(.Lschedule_tail) + basr %r14,%r1 # call schedule_tail TRACE_IRQS_ON - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - b BASED(sysc_tracenogo) + ssm __LC_SVC_NEW_PSW # reenable interrupts + j sysc_tracenogo # # kernel_execve function needs to deal with pt_regs that is not @@ -390,153 +346,98 @@ ENTRY(kernel_execve) stm %r12,%r15,48(%r15) lr %r14,%r15 l %r13,__LC_SVC_NEW_PSW+4 - s %r15,BASED(.Lc_spsize) + ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) st %r14,__SF_BACKCHAIN(%r15) - la %r12,SP_PTREGS(%r15) + la %r12,STACK_FRAME_OVERHEAD(%r15) xc 0(__PT_SIZE,%r12),0(%r12) l %r1,BASED(.Ldo_execve) lr %r5,%r12 - basr %r14,%r1 + basr %r14,%r1 # call do_execve ltr %r2,%r2 - be BASED(0f) - a %r15,BASED(.Lc_spsize) + je 0f + ahi %r15,(STACK_FRAME_OVERHEAD + __PT_SIZE) lm %r12,%r15,48(%r15) br %r14 # execve succeeded. -0: stnsm __SF_EMPTY(%r15),0xfc # disable interrupts +0: ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts l %r15,__LC_KERNEL_STACK # load ksp - s %r15,BASED(.Lc_spsize) # make room for registers & psw - mvc SP_PTREGS(__PT_SIZE,%r15),0(%r12) # copy pt_regs + ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) + mvc 0(__PT_SIZE,%r11),0(%r12) # copy pt_regs l %r12,__LC_THREAD_INFO xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts l %r1,BASED(.Lexecve_tail) - basr %r14,%r1 - b BASED(sysc_return) + basr %r14,%r1 # call execve_tail + j sysc_return /* * Program check handler routine */ ENTRY(pgm_check_handler) -/* - * First we need to check for a special case: - * Single stepping an instruction that disables the PER event mask will - * cause a PER event AFTER the mask has been set. Example: SVC or LPSW. - * For a single stepped SVC the program check handler gets control after - * the SVC new PSW has been loaded. But we want to execute the SVC first and - * then handle the PER event. Therefore we update the SVC old PSW to point - * to the pgm_check_handler and branch to the SVC handler after we checked - * if we have to load the kernel stack register. - * For every other possible cause for PER event without the PER mask set - * we just ignore the PER event (FIXME: is there anything we have to do - * for LPSW?). - */ stpt __LC_SYNC_ENTER_TIMER - SAVE_ALL_BASE __LC_SAVE_AREA - tm __LC_PGM_INT_CODE+1,0x80 # check whether we got a per exception - bnz BASED(pgm_per) # got per exception -> special case - SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(8,%r15),__LC_PGM_OLD_PSW - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(pgm_no_vtime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -pgm_no_vtime: - l %r3,__LC_PGM_ILC # load program interruption code - l %r4,__LC_TRANS_EXC_CODE - REENABLE_IRQS - la %r8,0x7f - nr %r8,%r3 - sll %r8,2 - l %r1,BASED(.Ljump_table) - l %r1,0(%r8,%r1) # load address of handler routine - la %r2,SP_PTREGS(%r15) # address of register-save area - basr %r14,%r1 # branch to interrupt-handler -pgm_exit: - b BASED(sysc_return) - -# -# handle per exception -# -pgm_per: - tm __LC_PGM_OLD_PSW,0x40 # test if per event recording is on - bnz BASED(pgm_per_std) # ok, normal per event from user space -# ok its one of the special cases, now we need to find out which one - clc __LC_PGM_OLD_PSW(8),__LC_SVC_NEW_PSW - be BASED(pgm_svcper) -# no interesting special case, ignore PER event - lm %r12,%r15,__LC_SAVE_AREA - lpsw 0x28 - -# -# Normal per exception -# -pgm_per_std: - SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(8,%r15),__LC_PGM_OLD_PSW - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(pgm_no_vtime2) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -pgm_no_vtime2: + stm %r8,%r15,__LC_SAVE_AREA_SYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_PGM_OLD_PSW + tmh %r8,0x0001 # test problem state bit + jnz 1f # -> fault in user space + tmh %r8,0x4000 # PER bit set in old PSW ? + jnz 0f # -> enabled, can't be a double fault + tm __LC_PGM_ILC+3,0x80 # check for per exception + jnz pgm_svcper # -> single stepped svc +0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC + j 2f +1: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER + l %r15,__LC_KERNEL_STACK +2: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_SYNC + stm %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC + mvc __PT_INT_PARM_LONG(4,%r11),__LC_TRANS_EXC_CODE + tm __LC_PGM_ILC+3,0x80 # check for per exception + jz 0f l %r1,__TI_task(%r12) - tm SP_PSW+1(%r15),0x01 # kernel per event ? - bz BASED(kernel_per) - mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE + tmh %r8,0x0001 # kernel per event ? + jz pgm_kprobe + oi __TI_flags+3(%r12),_TIF_PER_TRAP mvc __THREAD_per_address(4,%r1),__LC_PER_ADDRESS + mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE mvc __THREAD_per_paid(1,%r1),__LC_PER_PAID - oi __TI_flags+3(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP - l %r3,__LC_PGM_ILC # load program interruption code - l %r4,__LC_TRANS_EXC_CODE - REENABLE_IRQS - la %r8,0x7f - nr %r8,%r3 # clear per-event-bit and ilc - be BASED(pgm_exit2) # only per or per+check ? - sll %r8,2 +0: REENABLE_IRQS + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) l %r1,BASED(.Ljump_table) - l %r1,0(%r8,%r1) # load address of handler routine - la %r2,SP_PTREGS(%r15) # address of register-save area + la %r10,0x7f + n %r10,__PT_INT_CODE(%r11) + je sysc_return + sll %r10,2 + l %r1,0(%r10,%r1) # load address of handler routine + lr %r2,%r11 # pass pointer to pt_regs basr %r14,%r1 # branch to interrupt-handler -pgm_exit2: - b BASED(sysc_return) + j sysc_return # -# it was a single stepped SVC that is causing all the trouble +# PER event in supervisor state, must be kprobes # -pgm_svcper: - SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct - mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC - oi __TI_flags+3(%r12),(_TIF_SYSCALL | _TIF_PER_TRAP) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - l %r8,__TI_task(%r12) - mvc __THREAD_per_cause(2,%r8),__LC_PER_CAUSE - mvc __THREAD_per_address(4,%r8),__LC_PER_ADDRESS - mvc __THREAD_per_paid(1,%r8),__LC_PER_PAID - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - lm %r2,%r6,SP_R2(%r15) # load svc arguments - b BASED(sysc_do_svc) +pgm_kprobe: + REENABLE_IRQS + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + l %r1,BASED(.Ldo_per_trap) + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call do_per_trap + j sysc_return # -# per was called from kernel, must be kprobes +# single stepped system call # -kernel_per: - REENABLE_IRQS - la %r2,SP_PTREGS(%r15) # address of register-save area - l %r1,BASED(.Lhandle_per) # load adr. of per handler - basr %r14,%r1 # branch to do_single_step - b BASED(pgm_exit) +pgm_svcper: + oi __TI_flags+3(%r12),_TIF_PER_TRAP + mvc __LC_RETURN_PSW(4),__LC_SVC_NEW_PSW + mvc __LC_RETURN_PSW+4(4),BASED(.Lsysc_per) + lpsw __LC_RETURN_PSW # branch to sysc_per and enable irqs /* * IO interrupt handler routine @@ -545,28 +446,35 @@ kernel_per: ENTRY(io_int_handler) stck __LC_INT_CLOCK stpt __LC_ASYNC_ENTER_TIMER - SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 - CREATE_STACK_FRAME __LC_SAVE_AREA+16 - mvc SP_PSW(8,%r15),0(%r12) # move user PSW to stack - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(io_no_vtime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER -io_no_vtime: + stm %r8,%r15,__LC_SAVE_AREA_ASYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_IO_OLD_PSW + tmh %r8,0x0001 # interrupting from user ? + jz io_skip + UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER +io_skip: + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC + stm %r8,%r9,__PT_PSW(%r11) TRACE_IRQS_OFF - l %r1,BASED(.Ldo_IRQ) # load address of do_IRQ - la %r2,SP_PTREGS(%r15) # address of register-save area - basr %r14,%r1 # branch to standard irq handler + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + l %r1,BASED(.Ldo_IRQ) + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call do_IRQ io_return: LOCKDEP_SYS_EXIT TRACE_IRQS_ON io_tif: tm __TI_flags+3(%r12),_TIF_WORK_INT - bnz BASED(io_work) # there is work to do (signals etc.) + jnz io_work # there is work to do (signals etc.) io_restore: - RESTORE_ALL __LC_RETURN_PSW,0 + mvc __LC_RETURN_PSW(8),__PT_PSW(%r11) + ni __LC_RETURN_PSW+1,0xfd # clean wait state bit + stpt __LC_EXIT_TIMER + lm %r0,%r15,__PT_R0(%r11) + lpsw __LC_RETURN_PSW io_done: # @@ -577,28 +485,29 @@ io_done: # Before any work can be done, a switch to the kernel stack is required. # io_work: - tm SP_PSW+1(%r15),0x01 # returning to user ? - bo BASED(io_work_user) # yes -> do resched & signal + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jo io_work_user # yes -> do resched & signal #ifdef CONFIG_PREEMPT # check for preemptive scheduling icm %r0,15,__TI_precount(%r12) - bnz BASED(io_restore) # preemption disabled + jnz io_restore # preemption disabled tm __TI_flags+3(%r12),_TIF_NEED_RESCHED - bno BASED(io_restore) + jno io_restore # switch to kernel stack - l %r1,SP_R15(%r15) - s %r1,BASED(.Lc_spsize) - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain + l %r1,__PT_R15(%r11) + ahi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lr %r15,%r1 # TRACE_IRQS_ON already done at io_return, call # TRACE_IRQS_OFF to keep things symmetrical TRACE_IRQS_OFF - l %r1,BASED(.Lpreempt_schedule_irq) + l %r1,BASED(.Lpreempt_irq) basr %r14,%r1 # call preempt_schedule_irq - b BASED(io_return) + j io_return #else - b BASED(io_restore) + j io_restore #endif # @@ -606,9 +515,10 @@ io_work: # io_work_user: l %r1,__LC_KERNEL_STACK - s %r1,BASED(.Lc_spsize) - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain + ahi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lr %r15,%r1 # @@ -618,24 +528,24 @@ io_work_user: # io_work_tif: tm __TI_flags+3(%r12),_TIF_MCCK_PENDING - bo BASED(io_mcck_pending) + jo io_mcck_pending tm __TI_flags+3(%r12),_TIF_NEED_RESCHED - bo BASED(io_reschedule) + jo io_reschedule tm __TI_flags+3(%r12),_TIF_SIGPENDING - bo BASED(io_sigpending) + jo io_sigpending tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME - bo BASED(io_notify_resume) - b BASED(io_return) # beware of critical section cleanup + jo io_notify_resume + j io_return # beware of critical section cleanup # # _TIF_MCCK_PENDING is set, call handler # io_mcck_pending: # TRACE_IRQS_ON already done at io_return - l %r1,BASED(.Ls390_handle_mcck) + l %r1,BASED(.Lhandle_mcck) basr %r14,%r1 # TIF bit will be cleared by handler TRACE_IRQS_OFF - b BASED(io_return) + j io_return # # _TIF_NEED_RESCHED is set, call schedule @@ -643,37 +553,37 @@ io_mcck_pending: io_reschedule: # TRACE_IRQS_ON already done at io_return l %r1,BASED(.Lschedule) - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts basr %r14,%r1 # call scheduler - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts TRACE_IRQS_OFF - b BASED(io_return) + j io_return # # _TIF_SIGPENDING is set, call do_signal # io_sigpending: # TRACE_IRQS_ON already done at io_return - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - la %r2,SP_PTREGS(%r15) # load pt_regs l %r1,BASED(.Ldo_signal) + ssm __LC_SVC_NEW_PSW # reenable interrupts + lr %r2,%r11 # pass pointer to pt_regs basr %r14,%r1 # call do_signal - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts TRACE_IRQS_OFF - b BASED(io_return) + j io_return # # _TIF_SIGPENDING is set, call do_signal # io_notify_resume: # TRACE_IRQS_ON already done at io_return - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - la %r2,SP_PTREGS(%r15) # load pt_regs l %r1,BASED(.Ldo_notify_resume) - basr %r14,%r1 # call do_signal - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call do_notify_resume + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts TRACE_IRQS_OFF - b BASED(io_return) + j io_return /* * External interrupt handler routine @@ -682,23 +592,25 @@ io_notify_resume: ENTRY(ext_int_handler) stck __LC_INT_CLOCK stpt __LC_ASYNC_ENTER_TIMER - SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 - CREATE_STACK_FRAME __LC_SAVE_AREA+16 - mvc SP_PSW(8,%r15),0(%r12) # move user PSW to stack - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(ext_no_vtime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER -ext_no_vtime: + stm %r8,%r15,__LC_SAVE_AREA_ASYNC + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_EXT_OLD_PSW + tmh %r8,0x0001 # interrupting from user ? + jz ext_skip + UPDATE_VTIME %r14,%r15,__LC_ASYNC_ENTER_TIMER +ext_skip: + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + stm %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC + stm %r8,%r9,__PT_PSW(%r11) TRACE_IRQS_OFF - la %r2,SP_PTREGS(%r15) # address of register-save area + lr %r2,%r11 # pass pointer to pt_regs l %r3,__LC_CPU_ADDRESS # get cpu address + interruption code l %r4,__LC_EXT_PARAMS # get external parameters l %r1,BASED(.Ldo_extint) - basr %r14,%r1 - b BASED(io_return) + basr %r14,%r1 # call do_extint + j io_return __critical_end: @@ -710,82 +622,74 @@ ENTRY(mcck_int_handler) stck __LC_MCCK_CLOCK spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs - SAVE_ALL_BASE __LC_SAVE_AREA+32 - la %r12,__LC_MCK_OLD_PSW + l %r12,__LC_THREAD_INFO + l %r13,__LC_SVC_NEW_PSW+4 + lm %r8,%r9,__LC_MCK_OLD_PSW tm __LC_MCCK_CODE,0x80 # system damage? - bo BASED(mcck_int_main) # yes -> rest of mcck code invalid - mvc __LC_MCCK_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA + jo mcck_panic # yes -> rest of mcck code invalid + la %r14,__LC_CPU_TIMER_SAVE_AREA + mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? - bo BASED(1f) + jo 3f la %r14,__LC_SYNC_ENTER_TIMER clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER - bl BASED(0f) + jl 0f la %r14,__LC_ASYNC_ENTER_TIMER 0: clc 0(8,%r14),__LC_EXIT_TIMER - bl BASED(0f) + jl 1f la %r14,__LC_EXIT_TIMER -0: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER - bl BASED(0f) +1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER + jl 2f la %r14,__LC_LAST_UPDATE_TIMER -0: spt 0(%r14) +2: spt 0(%r14) mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) -1: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? - bno BASED(mcck_int_main) # no -> skip cleanup critical - tm __LC_MCK_OLD_PSW+1,0x01 # test problem state bit - bnz BASED(mcck_int_main) # from user -> load async stack - clc __LC_MCK_OLD_PSW+4(4),BASED(.Lcritical_end) - bhe BASED(mcck_int_main) - clc __LC_MCK_OLD_PSW+4(4),BASED(.Lcritical_start) - bl BASED(mcck_int_main) - l %r14,BASED(.Lcleanup_critical) - basr %r14,%r14 -mcck_int_main: - l %r14,__LC_PANIC_STACK # are we already on the panic stack? - slr %r14,%r15 - sra %r14,PAGE_SHIFT - be BASED(0f) - l %r15,__LC_PANIC_STACK # load panic stack -0: s %r15,BASED(.Lc_spsize) # make room for registers & psw - CREATE_STACK_FRAME __LC_SAVE_AREA+32 - mvc SP_PSW(8,%r15),0(%r12) - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm __LC_MCCK_CODE+2,0x08 # mwp of old psw valid? - bno BASED(mcck_no_vtime) # no -> skip cleanup critical - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - bz BASED(mcck_no_vtime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_MCCK_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_MCCK_ENTER_TIMER -mcck_no_vtime: - la %r2,SP_PTREGS(%r15) # load pt_regs - l %r1,BASED(.Ls390_mcck) - basr %r14,%r1 # call machine check handler - tm SP_PSW+1(%r15),0x01 # returning to user ? - bno BASED(mcck_return) +3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? + jno mcck_panic # no -> skip cleanup critical + tm %r8,0x0001 # interrupting from user ? + jz mcck_skip + UPDATE_VTIME %r14,%r15,__LC_MCCK_ENTER_TIMER +mcck_skip: + SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+32,__LC_PANIC_STACK,PAGE_SHIFT + mvc __PT_R0(64,%r11),__LC_GPREGS_SAVE_AREA + stm %r8,%r9,__PT_PSW(%r11) + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + l %r1,BASED(.Ldo_machine_check) + lr %r2,%r11 # pass pointer to pt_regs + basr %r14,%r1 # call s390_do_machine_check + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jno mcck_return l %r1,__LC_KERNEL_STACK # switch to kernel stack - s %r1,BASED(.Lc_spsize) - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain + ahi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r15) lr %r15,%r1 - stosm __SF_EMPTY(%r15),0x04 # turn dat on + ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off tm __TI_flags+3(%r12),_TIF_MCCK_PENDING - bno BASED(mcck_return) + jno mcck_return TRACE_IRQS_OFF - l %r1,BASED(.Ls390_handle_mcck) - basr %r14,%r1 # call machine check handler + l %r1,BASED(.Lhandle_mcck) + basr %r14,%r1 # call s390_handle_mcck TRACE_IRQS_ON mcck_return: - mvc __LC_RETURN_MCCK_PSW(8),SP_PSW(%r15) # move return PSW + mvc __LC_RETURN_MCCK_PSW(8),__PT_PSW(%r11) # move return PSW ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? - bno BASED(0f) - lm %r0,%r15,SP_R0(%r15) # load gprs 0-15 + jno 0f + lm %r0,%r15,__PT_R0(%r11) stpt __LC_EXIT_TIMER - lpsw __LC_RETURN_MCCK_PSW # back to caller -0: lm %r0,%r15,SP_R0(%r15) # load gprs 0-15 - lpsw __LC_RETURN_MCCK_PSW # back to caller + lpsw __LC_RETURN_MCCK_PSW +0: lm %r0,%r15,__PT_R0(%r11) + lpsw __LC_RETURN_MCCK_PSW - RESTORE_ALL __LC_RETURN_MCCK_PSW,0 +mcck_panic: + l %r14,__LC_PANIC_STACK + slr %r14,%r15 + sra %r14,PAGE_SHIFT + jz 0f + l %r15,__LC_PANIC_STACK +0: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j mcck_skip /* * Restart interruption handler, kick starter for additional CPUs @@ -799,18 +703,18 @@ restart_base: stck __LC_LAST_UPDATE_CLOCK mvc __LC_LAST_UPDATE_TIMER(8),restart_vtime-restart_base(%r1) mvc __LC_EXIT_TIMER(8),restart_vtime-restart_base(%r1) - l %r15,__LC_SAVE_AREA+60 # load ksp + l %r15,__LC_GPREGS_SAVE_AREA+60 # load ksp lctl %c0,%c15,__LC_CREGS_SAVE_AREA # get new ctl regs lam %a0,%a15,__LC_AREGS_SAVE_AREA - lm %r6,%r15,__SF_GPRS(%r15) # load registers from clone + lm %r6,%r15,__SF_GPRS(%r15)# load registers from clone l %r1,__LC_THREAD_INFO mvc __LC_USER_TIMER(8),__TI_user_timer(%r1) mvc __LC_SYSTEM_TIMER(8),__TI_system_timer(%r1) xc __LC_STEAL_TIMER(8),__LC_STEAL_TIMER - stosm __SF_EMPTY(%r15),0x04 # now we can turn dat on + ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off basr %r14,0 l %r14,restart_addr-.(%r14) - basr %r14,%r14 # branch to start_secondary + basr %r14,%r14 # call start_secondary restart_addr: .long start_secondary .align 8 @@ -835,19 +739,19 @@ restart_go: # PSW restart interrupt handler # ENTRY(psw_restart_int_handler) - st %r15,__LC_SAVE_AREA+48(%r0) # save r15 + st %r15,__LC_SAVE_AREA_RESTART basr %r15,0 0: l %r15,.Lrestart_stack-0b(%r15) # load restart stack l %r15,0(%r15) - ahi %r15,-SP_SIZE # make room for pt_regs - stm %r0,%r14,SP_R0(%r15) # store gprs %r0-%r14 to stack - mvc SP_R15(4,%r15),__LC_SAVE_AREA+48(%r0)# store saved %r15 to stack - mvc SP_PSW(8,%r15),__LC_RST_OLD_PSW(%r0) # store restart old psw - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # set backchain to 0 + ahi %r15,-__PT_SIZE # create pt_regs on stack + stm %r0,%r14,__PT_R0(%r15) + mvc __PT_R15(4,%r15),__LC_SAVE_AREA_RESTART + mvc __PT_PSW(8,%r15),__LC_RST_OLD_PSW # store restart old psw + ahi %r15,-STACK_FRAME_OVERHEAD + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) basr %r14,0 1: l %r14,.Ldo_restart-1b(%r14) basr %r14,%r14 - basr %r14,0 # load disabled wait PSW if 2: lpsw restart_psw_crash-2b(%r14) # do_restart returns .align 4 @@ -869,215 +773,174 @@ restart_psw_crash: */ stack_overflow: l %r15,__LC_PANIC_STACK # change to panic stack - sl %r15,BASED(.Lc_spsize) - mvc SP_PSW(8,%r15),0(%r12) # move user PSW to stack - stm %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack - la %r1,__LC_SAVE_AREA - ch %r12,BASED(.L0x020) # old psw addr == __LC_SVC_OLD_PSW ? - be BASED(0f) - ch %r12,BASED(.L0x028) # old psw addr == __LC_PGM_OLD_PSW ? - be BASED(0f) - la %r1,__LC_SAVE_AREA+16 -0: mvc SP_R12(16,%r15),0(%r1) # move %r12-%r15 to stack - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear back chain - l %r1,BASED(1f) # branch to kernel_stack_overflow - la %r2,SP_PTREGS(%r15) # load pt_regs - br %r1 + ahi %r15,-__PT_SIZE # create pt_regs + stm %r0,%r7,__PT_R0(%r15) + stm %r8,%r9,__PT_PSW(%r15) + mvc __PT_R8(32,%r11),0(%r14) + lr %r15,%r11 + ahi %r15,-STACK_FRAME_OVERHEAD + l %r1,BASED(1f) + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + lr %r2,%r11 # pass pointer to pt_regs + br %r1 # branch to kernel_stack_overflow 1: .long kernel_stack_overflow #endif -cleanup_table_system_call: - .long system_call + 0x80000000, sysc_do_svc + 0x80000000 -cleanup_table_sysc_tif: - .long sysc_tif + 0x80000000, sysc_restore + 0x80000000 -cleanup_table_sysc_restore: - .long sysc_restore + 0x80000000, sysc_done + 0x80000000 -cleanup_table_io_tif: - .long io_tif + 0x80000000, io_restore + 0x80000000 -cleanup_table_io_restore: - .long io_restore + 0x80000000, io_done + 0x80000000 +cleanup_table: + .long system_call + 0x80000000 + .long sysc_do_svc + 0x80000000 + .long sysc_tif + 0x80000000 + .long sysc_restore + 0x80000000 + .long sysc_done + 0x80000000 + .long io_tif + 0x80000000 + .long io_restore + 0x80000000 + .long io_done + 0x80000000 cleanup_critical: - clc 4(4,%r12),BASED(cleanup_table_system_call) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_system_call+4) - bl BASED(cleanup_system_call) -0: - clc 4(4,%r12),BASED(cleanup_table_sysc_tif) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_sysc_tif+4) - bl BASED(cleanup_sysc_tif) -0: - clc 4(4,%r12),BASED(cleanup_table_sysc_restore) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_sysc_restore+4) - bl BASED(cleanup_sysc_restore) -0: - clc 4(4,%r12),BASED(cleanup_table_io_tif) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_io_tif+4) - bl BASED(cleanup_io_tif) -0: - clc 4(4,%r12),BASED(cleanup_table_io_restore) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_io_restore+4) - bl BASED(cleanup_io_restore) -0: - br %r14 + cl %r9,BASED(cleanup_table) # system_call + jl 0f + cl %r9,BASED(cleanup_table+4) # sysc_do_svc + jl cleanup_system_call + cl %r9,BASED(cleanup_table+8) # sysc_tif + jl 0f + cl %r9,BASED(cleanup_table+12) # sysc_restore + jl cleanup_sysc_tif + cl %r9,BASED(cleanup_table+16) # sysc_done + jl cleanup_sysc_restore + cl %r9,BASED(cleanup_table+20) # io_tif + jl 0f + cl %r9,BASED(cleanup_table+24) # io_restore + jl cleanup_io_tif + cl %r9,BASED(cleanup_table+28) # io_done + jl cleanup_io_restore +0: br %r14 cleanup_system_call: - mvc __LC_RETURN_PSW(8),0(%r12) - clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+4) - bh BASED(0f) - mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER - c %r12,BASED(.Lmck_old_psw) - be BASED(0f) + # check if stpt has been executed + cl %r9,BASED(cleanup_system_call_insn) + jh 0f mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER -0: c %r12,BASED(.Lmck_old_psw) - la %r12,__LC_SAVE_AREA+32 - be BASED(0f) - la %r12,__LC_SAVE_AREA+16 -0: clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+8) - bhe BASED(cleanup_vtime) - clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn) - bh BASED(0f) - mvc __LC_SAVE_AREA(16),0(%r12) -0: st %r13,4(%r12) - l %r15,__LC_KERNEL_STACK # problem state -> load ksp - s %r15,BASED(.Lc_spsize) # make room for registers & psw - st %r15,12(%r12) - CREATE_STACK_FRAME __LC_SAVE_AREA - mvc 0(4,%r12),__LC_THREAD_INFO - l %r12,__LC_THREAD_INFO - mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC - oi __TI_flags+3(%r12),_TIF_SYSCALL -cleanup_vtime: - clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+12) - bhe BASED(cleanup_stime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER -cleanup_stime: - clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+16) - bh BASED(cleanup_update) - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER -cleanup_update: + chi %r11,__LC_SAVE_AREA_ASYNC + je 0f + mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER +0: # check if stm has been executed + cl %r9,BASED(cleanup_system_call_insn+4) + jh 0f + mvc __LC_SAVE_AREA_SYNC(32),0(%r11) +0: # set up saved registers r12, and r13 + st %r12,16(%r11) # r12 thread-info pointer + st %r13,20(%r11) # r13 literal-pool pointer + # check if the user time calculation has been done + cl %r9,BASED(cleanup_system_call_insn+8) + jh 0f + l %r10,__LC_EXIT_TIMER + l %r15,__LC_EXIT_TIMER+4 + SUB64 %r10,%r15,__LC_SYNC_ENTER_TIMER + ADD64 %r10,%r15,__LC_USER_TIMER + st %r10,__LC_USER_TIMER + st %r15,__LC_USER_TIMER+4 +0: # check if the system time calculation has been done + cl %r9,BASED(cleanup_system_call_insn+12) + jh 0f + l %r10,__LC_LAST_UPDATE_TIMER + l %r15,__LC_LAST_UPDATE_TIMER+4 + SUB64 %r10,%r15,__LC_EXIT_TIMER + ADD64 %r10,%r15,__LC_SYSTEM_TIMER + st %r10,__LC_SYSTEM_TIMER + st %r15,__LC_SYSTEM_TIMER+4 +0: # update accounting time stamp mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_system_call+4) - la %r12,__LC_RETURN_PSW + # set up saved register 11 + l %r15,__LC_KERNEL_STACK + ahi %r15,-__PT_SIZE + st %r15,12(%r11) # r11 pt_regs pointer + # fill pt_regs + mvc __PT_R8(32,%r15),__LC_SAVE_AREA_SYNC + stm %r0,%r7,__PT_R0(%r15) + mvc __PT_PSW(8,%r15),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r15),__LC_SVC_ILC + # setup saved register 15 + ahi %r15,-STACK_FRAME_OVERHEAD + st %r15,28(%r11) # r15 stack pointer + # set new psw address and exit + l %r9,BASED(cleanup_table+4) # sysc_do_svc + 0x80000000 br %r14 cleanup_system_call_insn: - .long sysc_saveall + 0x80000000 .long system_call + 0x80000000 - .long sysc_vtime + 0x80000000 - .long sysc_stime + 0x80000000 - .long sysc_update + 0x80000000 + .long sysc_stm + 0x80000000 + .long sysc_vtime + 0x80000000 + 36 + .long sysc_vtime + 0x80000000 + 76 cleanup_sysc_tif: - mvc __LC_RETURN_PSW(4),0(%r12) - mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_sysc_tif) - la %r12,__LC_RETURN_PSW + l %r9,BASED(cleanup_table+8) # sysc_tif + 0x80000000 br %r14 cleanup_sysc_restore: - clc 4(4,%r12),BASED(cleanup_sysc_restore_insn) - be BASED(2f) - mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER - c %r12,BASED(.Lmck_old_psw) - be BASED(0f) - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER -0: clc 4(4,%r12),BASED(cleanup_sysc_restore_insn+4) - be BASED(2f) - mvc __LC_RETURN_PSW(8),SP_PSW(%r15) - c %r12,BASED(.Lmck_old_psw) - la %r12,__LC_SAVE_AREA+32 - be BASED(1f) - la %r12,__LC_SAVE_AREA+16 -1: mvc 0(16,%r12),SP_R12(%r15) - lm %r0,%r11,SP_R0(%r15) - l %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW + cl %r9,BASED(cleanup_sysc_restore_insn) + jhe 0f + l %r9,12(%r11) # get saved pointer to pt_regs + mvc __LC_RETURN_PSW(8),__PT_PSW(%r9) + mvc 0(32,%r11),__PT_R8(%r9) + lm %r0,%r7,__PT_R0(%r9) +0: lm %r8,%r9,__LC_RETURN_PSW br %r14 cleanup_sysc_restore_insn: .long sysc_done - 4 + 0x80000000 - .long sysc_done - 8 + 0x80000000 cleanup_io_tif: - mvc __LC_RETURN_PSW(4),0(%r12) - mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_io_tif) - la %r12,__LC_RETURN_PSW + l %r9,BASED(cleanup_table+20) # io_tif + 0x80000000 br %r14 cleanup_io_restore: - clc 4(4,%r12),BASED(cleanup_io_restore_insn) - be BASED(1f) - mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER - clc 4(4,%r12),BASED(cleanup_io_restore_insn+4) - be BASED(1f) - mvc __LC_RETURN_PSW(8),SP_PSW(%r15) - mvc __LC_SAVE_AREA+32(16),SP_R12(%r15) - lm %r0,%r11,SP_R0(%r15) - l %r15,SP_R15(%r15) -1: la %r12,__LC_RETURN_PSW + cl %r9,BASED(cleanup_io_restore_insn) + jhe 0f + l %r9,12(%r11) # get saved r11 pointer to pt_regs + mvc __LC_RETURN_PSW(8),__PT_PSW(%r9) + ni __LC_RETURN_PSW+1,0xfd # clear wait state bit + mvc 0(32,%r11),__PT_R8(%r9) + lm %r0,%r7,__PT_R0(%r9) +0: lm %r8,%r9,__LC_RETURN_PSW br %r14 cleanup_io_restore_insn: .long io_done - 4 + 0x80000000 - .long io_done - 8 + 0x80000000 /* * Integer constants */ - .align 4 -.Lc_spsize: .long SP_SIZE -.Lc_overhead: .long STACK_FRAME_OVERHEAD -.Lnr_syscalls: .long NR_syscalls -.L0x018: .short 0x018 -.L0x020: .short 0x020 -.L0x028: .short 0x028 -.L0x030: .short 0x030 -.L0x038: .short 0x038 -.Lc_1: .long 1 + .align 4 +.Lnr_syscalls: .long NR_syscalls /* * Symbol constants */ -.Ls390_mcck: .long s390_do_machine_check -.Ls390_handle_mcck: - .long s390_handle_mcck -.Lmck_old_psw: .long __LC_MCK_OLD_PSW -.Ldo_IRQ: .long do_IRQ -.Ldo_extint: .long do_extint -.Ldo_signal: .long do_signal -.Ldo_notify_resume: - .long do_notify_resume -.Lhandle_per: .long do_per_trap -.Ldo_execve: .long do_execve -.Lexecve_tail: .long execve_tail -.Ljump_table: .long pgm_check_table -.Lschedule: .long schedule +.Ldo_machine_check: .long s390_do_machine_check +.Lhandle_mcck: .long s390_handle_mcck +.Ldo_IRQ: .long do_IRQ +.Ldo_extint: .long do_extint +.Ldo_signal: .long do_signal +.Ldo_notify_resume: .long do_notify_resume +.Ldo_per_trap: .long do_per_trap +.Ldo_execve: .long do_execve +.Lexecve_tail: .long execve_tail +.Ljump_table: .long pgm_check_table +.Lschedule: .long schedule #ifdef CONFIG_PREEMPT -.Lpreempt_schedule_irq: - .long preempt_schedule_irq +.Lpreempt_irq: .long preempt_schedule_irq #endif -.Ltrace_entry: .long do_syscall_trace_enter -.Ltrace_exit: .long do_syscall_trace_exit -.Lschedtail: .long schedule_tail -.Lsysc_table: .long sys_call_table +.Ltrace_enter: .long do_syscall_trace_enter +.Ltrace_exit: .long do_syscall_trace_exit +.Lschedule_tail: .long schedule_tail +.Lsys_call_table: .long sys_call_table +.Lsysc_per: .long sysc_per + 0x80000000 #ifdef CONFIG_TRACE_IRQFLAGS -.Ltrace_irq_on_caller: - .long trace_hardirqs_on_caller -.Ltrace_irq_off_caller: - .long trace_hardirqs_off_caller +.Lhardirqs_on: .long trace_hardirqs_on_caller +.Lhardirqs_off: .long trace_hardirqs_off_caller #endif #ifdef CONFIG_LOCKDEP -.Llockdep_sys_exit: - .long lockdep_sys_exit +.Llockdep_sys_exit: .long lockdep_sys_exit #endif -.Lcritical_start: - .long __critical_start + 0x80000000 -.Lcritical_end: - .long __critical_end + 0x80000000 -.Lcleanup_critical: - .long cleanup_critical +.Lcritical_start: .long __critical_start + 0x80000000 +.Lcritical_length: .long __critical_end - __critical_start .section .rodata, "a" #define SYSCALL(esa,esame,emu) .long esa diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index ef8fb1d6e8d..bf538aaf407 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -6,15 +6,15 @@ #include <asm/ptrace.h> -extern void (*pgm_check_table[128])(struct pt_regs *, long, unsigned long); +extern void (*pgm_check_table[128])(struct pt_regs *); extern void *restart_stack; asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); -void do_protection_exception(struct pt_regs *, long, unsigned long); -void do_dat_exception(struct pt_regs *, long, unsigned long); -void do_asce_exception(struct pt_regs *, long, unsigned long); +void do_protection_exception(struct pt_regs *regs); +void do_dat_exception(struct pt_regs *regs); +void do_asce_exception(struct pt_regs *regs); void do_per_trap(struct pt_regs *regs); void syscall_trace(struct pt_regs *regs, int entryexit); @@ -28,7 +28,7 @@ void do_extint(struct pt_regs *regs, unsigned int, unsigned int, unsigned long); void do_restart(void); int __cpuinit start_secondary(void *cpuvoid); void __init startup_init(void); -void die(const char * str, struct pt_regs * regs, long err); +void die(struct pt_regs *regs, const char *str); void __init time_init(void); diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 83a93747e2f..412a7b8783d 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -19,32 +19,22 @@ #include <asm/unistd.h> #include <asm/page.h> -/* - * Stack layout for the system_call stack entry. - * The first few entries are identical to the user_regs_struct. - */ -SP_PTREGS = STACK_FRAME_OVERHEAD -SP_ARGS = STACK_FRAME_OVERHEAD + __PT_ARGS -SP_PSW = STACK_FRAME_OVERHEAD + __PT_PSW -SP_R0 = STACK_FRAME_OVERHEAD + __PT_GPRS -SP_R1 = STACK_FRAME_OVERHEAD + __PT_GPRS + 8 -SP_R2 = STACK_FRAME_OVERHEAD + __PT_GPRS + 16 -SP_R3 = STACK_FRAME_OVERHEAD + __PT_GPRS + 24 -SP_R4 = STACK_FRAME_OVERHEAD + __PT_GPRS + 32 -SP_R5 = STACK_FRAME_OVERHEAD + __PT_GPRS + 40 -SP_R6 = STACK_FRAME_OVERHEAD + __PT_GPRS + 48 -SP_R7 = STACK_FRAME_OVERHEAD + __PT_GPRS + 56 -SP_R8 = STACK_FRAME_OVERHEAD + __PT_GPRS + 64 -SP_R9 = STACK_FRAME_OVERHEAD + __PT_GPRS + 72 -SP_R10 = STACK_FRAME_OVERHEAD + __PT_GPRS + 80 -SP_R11 = STACK_FRAME_OVERHEAD + __PT_GPRS + 88 -SP_R12 = STACK_FRAME_OVERHEAD + __PT_GPRS + 96 -SP_R13 = STACK_FRAME_OVERHEAD + __PT_GPRS + 104 -SP_R14 = STACK_FRAME_OVERHEAD + __PT_GPRS + 112 -SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 120 -SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2 -SP_SVC_CODE = STACK_FRAME_OVERHEAD + __PT_SVC_CODE -SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE +__PT_R0 = __PT_GPRS +__PT_R1 = __PT_GPRS + 8 +__PT_R2 = __PT_GPRS + 16 +__PT_R3 = __PT_GPRS + 24 +__PT_R4 = __PT_GPRS + 32 +__PT_R5 = __PT_GPRS + 40 +__PT_R6 = __PT_GPRS + 48 +__PT_R7 = __PT_GPRS + 56 +__PT_R8 = __PT_GPRS + 64 +__PT_R9 = __PT_GPRS + 72 +__PT_R10 = __PT_GPRS + 80 +__PT_R11 = __PT_GPRS + 88 +__PT_R12 = __PT_GPRS + 96 +__PT_R13 = __PT_GPRS + 104 +__PT_R14 = __PT_GPRS + 112 +__PT_R15 = __PT_GPRS + 120 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT @@ -59,154 +49,103 @@ _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) #define BASED(name) name-system_call(%r13) - .macro SPP newpp -#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) - tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_SPP - jz .+8 - .insn s,0xb2800000,\newpp -#endif - .endm - - .macro HANDLE_SIE_INTERCEPT -#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) - tm __TI_flags+6(%r12),_TIF_SIE>>8 - jz 0f - SPP __LC_CMF_HPP # set host id - clc SP_PSW+8(8,%r15),BASED(.Lsie_loop) - jl 0f - clc SP_PSW+8(8,%r15),BASED(.Lsie_done) - jhe 0f - mvc SP_PSW+8(8,%r15),BASED(.Lsie_loop) -0: -#endif - .endm - -#ifdef CONFIG_TRACE_IRQFLAGS .macro TRACE_IRQS_ON +#ifdef CONFIG_TRACE_IRQFLAGS basr %r2,%r0 brasl %r14,trace_hardirqs_on_caller +#endif .endm .macro TRACE_IRQS_OFF +#ifdef CONFIG_TRACE_IRQFLAGS basr %r2,%r0 brasl %r14,trace_hardirqs_off_caller - .endm -#else -#define TRACE_IRQS_ON -#define TRACE_IRQS_OFF #endif + .endm -#ifdef CONFIG_LOCKDEP .macro LOCKDEP_SYS_EXIT - tm SP_PSW+1(%r15),0x01 # returning to user ? - jz 0f +#ifdef CONFIG_LOCKDEP + tm __PT_PSW+1(%r11),0x01 # returning to user ? + jz .+10 brasl %r14,lockdep_sys_exit -0: - .endm -#else -#define LOCKDEP_SYS_EXIT #endif - - .macro UPDATE_VTIME lc_from,lc_to,lc_sum - lg %r10,\lc_from - slg %r10,\lc_to - alg %r10,\lc_sum - stg %r10,\lc_sum .endm -/* - * Register usage in interrupt handlers: - * R9 - pointer to current task structure - * R13 - pointer to literal pool - * R14 - return register for function calls - * R15 - kernel stack pointer - */ + .macro SPP newpp +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) + tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_SPP + jz .+8 + .insn s,0xb2800000,\newpp +#endif + .endm - .macro SAVE_ALL_SVC psworg,savearea - stmg %r11,%r15,\savearea - lg %r15,__LC_KERNEL_STACK # problem state -> load ksp - aghi %r15,-SP_SIZE # make room for registers & psw - lg %r11,__LC_LAST_BREAK + .macro HANDLE_SIE_INTERCEPT scratch +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) + tm __TI_flags+6(%r12),_TIF_SIE>>8 + jz .+42 + tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_SPP + jz .+8 + .insn s,0xb2800000,BASED(.Lhost_id) # set host id + lgr \scratch,%r9 + slg \scratch,BASED(.Lsie_loop) + clg \scratch,BASED(.Lsie_length) + jhe .+10 + lg %r9,BASED(.Lsie_loop) +#endif .endm - .macro SAVE_ALL_PGM psworg,savearea - stmg %r11,%r15,\savearea - tm \psworg+1,0x01 # test problem state bit + .macro CHECK_STACK stacksize,savearea #ifdef CONFIG_CHECK_STACK - jnz 1f - tml %r15,STACK_SIZE - CONFIG_STACK_GUARD - jnz 2f - la %r12,\psworg - j stack_overflow -#else - jz 2f + tml %r15,\stacksize - CONFIG_STACK_GUARD + lghi %r14,\savearea + jz stack_overflow #endif -1: lg %r15,__LC_KERNEL_STACK # problem state -> load ksp -2: aghi %r15,-SP_SIZE # make room for registers & psw - larl %r13,system_call - lg %r11,__LC_LAST_BREAK .endm - .macro SAVE_ALL_ASYNC psworg,savearea - stmg %r11,%r15,\savearea - larl %r13,system_call - lg %r11,__LC_LAST_BREAK - la %r12,\psworg - tm \psworg+1,0x01 # test problem state bit - jnz 1f # from user -> load kernel stack - clc \psworg+8(8),BASED(.Lcritical_end) + .macro SWITCH_ASYNC savearea,stack,shift + tmhh %r8,0x0001 # interrupting from user ? + jnz 1f + lgr %r14,%r9 + slg %r14,BASED(.Lcritical_start) + clg %r14,BASED(.Lcritical_length) jhe 0f - clc \psworg+8(8),BASED(.Lcritical_start) - jl 0f + lghi %r11,\savearea # inside critical section, do cleanup brasl %r14,cleanup_critical - tm 1(%r12),0x01 # retest problem state after cleanup + tmhh %r8,0x0001 # retest problem state after cleanup jnz 1f -0: lg %r14,__LC_ASYNC_STACK # are we already on the async. stack ? +0: lg %r14,\stack # are we already on the target stack? slgr %r14,%r15 - srag %r14,%r14,STACK_SHIFT -#ifdef CONFIG_CHECK_STACK + srag %r14,%r14,\shift jnz 1f - tml %r15,STACK_SIZE - CONFIG_STACK_GUARD - jnz 2f - j stack_overflow -#else - jz 2f -#endif -1: lg %r15,__LC_ASYNC_STACK # load async stack -2: aghi %r15,-SP_SIZE # make room for registers & psw - .endm - - .macro CREATE_STACK_FRAME savearea - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - stg %r2,SP_ORIG_R2(%r15) # store original content of gpr 2 - mvc SP_R11(40,%r15),\savearea # move %r11-%r15 to stack - stmg %r0,%r10,SP_R0(%r15) # store gprs %r0-%r10 to kernel stack + CHECK_STACK 1<<\shift,\savearea + j 2f +1: lg %r15,\stack # load target stack +2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) .endm - .macro RESTORE_ALL psworg,sync - mvc \psworg(16),SP_PSW(%r15) # move user PSW to lowcore - .if !\sync - ni \psworg+1,0xfd # clear wait state bit - .endif - lg %r14,__LC_VDSO_PER_CPU - lmg %r0,%r13,SP_R0(%r15) # load gprs 0-13 of user - stpt __LC_EXIT_TIMER - mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER - lmg %r14,%r15,SP_R14(%r15) # load grps 14-15 of user - lpswe \psworg # back to caller + .macro UPDATE_VTIME scratch,enter_timer + lg \scratch,__LC_EXIT_TIMER + slg \scratch,\enter_timer + alg \scratch,__LC_USER_TIMER + stg \scratch,__LC_USER_TIMER + lg \scratch,__LC_LAST_UPDATE_TIMER + slg \scratch,__LC_EXIT_TIMER + alg \scratch,__LC_SYSTEM_TIMER + stg \scratch,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer .endm - .macro LAST_BREAK - srag %r10,%r11,23 - jz 0f - stg %r11,__TI_last_break(%r12) -0: + .macro LAST_BREAK scratch + srag \scratch,%r10,23 + jz .+10 + stg %r10,__TI_last_break(%r12) .endm .macro REENABLE_IRQS - mvc __SF_EMPTY(1,%r15),SP_PSW(%r15) - ni __SF_EMPTY(%r15),0xbf - ssm __SF_EMPTY(%r15) + stg %r8,__LC_RETURN_PSW + ni __LC_RETURN_PSW,0xbf + ssm __LC_RETURN_PSW .endm .section .kprobes.text, "ax" @@ -245,55 +184,66 @@ __critical_start: ENTRY(system_call) stpt __LC_SYNC_ENTER_TIMER -sysc_saveall: - SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct - mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC - oi __TI_flags+7(%r12),_TIF_SYSCALL +sysc_stmg: + stmg %r8,%r15,__LC_SAVE_AREA_SYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call +sysc_per: + lg %r15,__LC_KERNEL_STACK + aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs sysc_vtime: - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER -sysc_stime: - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER -sysc_update: - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - LAST_BREAK + UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER + LAST_BREAK %r13 + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC sysc_do_svc: - llgh %r7,SP_SVC_CODE+2(%r15) - slag %r7,%r7,2 # shift and test for svc 0 + oi __TI_flags+7(%r12),_TIF_SYSCALL + llgh %r8,__PT_INT_CODE+2(%r11) + slag %r8,%r8,2 # shift and test for svc 0 jnz sysc_nr_ok # svc 0: system call number in %r1 - llgfr %r1,%r1 # clear high word in r1 + llgfr %r1,%r1 # clear high word in r1 cghi %r1,NR_syscalls jnl sysc_nr_ok - sth %r1,SP_SVC_CODE+2(%r15) - slag %r7,%r1,2 # shift and test for svc 0 + sth %r1,__PT_INT_CODE+2(%r11) + slag %r8,%r1,2 sysc_nr_ok: - larl %r10,sys_call_table + larl %r10,sys_call_table # 64 bit system call table #ifdef CONFIG_COMPAT - tm __TI_flags+5(%r12),(_TIF_31BIT>>16) # running in 31 bit mode ? + tm __TI_flags+5(%r12),(_TIF_31BIT>>16) jno sysc_noemu - larl %r10,sys_call_table_emu # use 31 bit emulation system calls + larl %r10,sys_call_table_emu # 31 bit system call table sysc_noemu: #endif + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + stg %r2,__PT_ORIG_GPR2(%r11) + stg %r7,STACK_FRAME_OVERHEAD(%r15) + lgf %r9,0(%r8,%r10) # get system call add. tm __TI_flags+6(%r12),_TIF_TRACE >> 8 - mvc SP_ARGS(8,%r15),SP_R7(%r15) - lgf %r8,0(%r7,%r10) # load address of system call routine jnz sysc_tracesys - basr %r14,%r8 # call sys_xxxx - stg %r2,SP_R2(%r15) # store return value (change R2 on stack) + basr %r14,%r9 # call sys_xxxx + stg %r2,__PT_R2(%r11) # store return value sysc_return: LOCKDEP_SYS_EXIT sysc_tif: - tm SP_PSW+1(%r15),0x01 # returning to user ? + tm __PT_PSW+1(%r11),0x01 # returning to user ? jno sysc_restore tm __TI_flags+7(%r12),_TIF_WORK_SVC - jnz sysc_work # there is work to do (signals etc.) + jnz sysc_work # check for work ni __TI_flags+7(%r12),255-_TIF_SYSCALL sysc_restore: - RESTORE_ALL __LC_RETURN_PSW,1 + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r10,__PT_R0(%r11) + mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER + lmg %r11,%r15,__PT_R11(%r11) + lpswe __LC_RETURN_PSW sysc_done: # @@ -317,7 +267,7 @@ sysc_work: # sysc_reschedule: larl %r14,sysc_return - jg schedule # return point is sysc_return + jg schedule # # _TIF_MCCK_PENDING is set, call handler @@ -331,33 +281,33 @@ sysc_mcck_pending: # sysc_sigpending: ni __TI_flags+7(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - la %r2,SP_PTREGS(%r15) # load pt_regs - brasl %r14,do_signal # call do_signal + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_signal tm __TI_flags+7(%r12),_TIF_SYSCALL jno sysc_return - lmg %r2,%r6,SP_R2(%r15) # load svc arguments - lghi %r7,0 # svc 0 returns -ENOSYS - lh %r1,SP_SVC_CODE+2(%r15) # load new svc number + lmg %r2,%r7,__PT_R2(%r11) # load svc arguments + lghi %r8,0 # svc 0 returns -ENOSYS + lh %r1,__PT_INT_CODE+2(%r11) # load new svc number cghi %r1,NR_syscalls jnl sysc_nr_ok # invalid svc number -> do svc 0 - slag %r7,%r1,2 + slag %r8,%r1,2 j sysc_nr_ok # restart svc # # _TIF_NOTIFY_RESUME is set, call do_notify_resume # sysc_notify_resume: - la %r2,SP_PTREGS(%r15) # load pt_regs + lgr %r2,%r11 # pass pointer to pt_regs larl %r14,sysc_return - jg do_notify_resume # call do_notify_resume + jg do_notify_resume # # _TIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: ni __TI_flags+7(%r12),255-(_TIF_SYSCALL | _TIF_PER_TRAP) - la %r2,SP_PTREGS(%r15) # address of register-save area - larl %r14,sysc_return # load adr. of system return + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,sysc_return jg do_per_trap # @@ -365,41 +315,41 @@ sysc_singlestep: # and after the system call # sysc_tracesys: - la %r2,SP_PTREGS(%r15) # load pt_regs + lgr %r2,%r11 # pass pointer to pt_regs la %r3,0 - llgh %r0,SP_SVC_CODE+2(%r15) - stg %r0,SP_R2(%r15) + llgh %r0,__PT_INT_CODE+2(%r11) + stg %r0,__PT_R2(%r11) brasl %r14,do_syscall_trace_enter lghi %r0,NR_syscalls clgr %r0,%r2 jnh sysc_tracenogo - sllg %r7,%r2,2 # svc number *4 - lgf %r8,0(%r7,%r10) + sllg %r8,%r2,2 + lgf %r9,0(%r8,%r10) sysc_tracego: - lmg %r3,%r6,SP_R3(%r15) - mvc SP_ARGS(8,%r15),SP_R7(%r15) - lg %r2,SP_ORIG_R2(%r15) - basr %r14,%r8 # call sys_xxx - stg %r2,SP_R2(%r15) # store return value + lmg %r3,%r7,__PT_R3(%r11) + stg %r7,STACK_FRAME_OVERHEAD(%r15) + lg %r2,__PT_ORIG_GPR2(%r11) + basr %r14,%r9 # call sys_xxx + stg %r2,__PT_R2(%r11) # store return value sysc_tracenogo: tm __TI_flags+6(%r12),_TIF_TRACE >> 8 jz sysc_return - la %r2,SP_PTREGS(%r15) # load pt_regs - larl %r14,sysc_return # return point is sysc_return + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,sysc_return jg do_syscall_trace_exit # # a new process exits the kernel with ret_from_fork # ENTRY(ret_from_fork) - lg %r13,__LC_SVC_NEW_PSW+8 - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm SP_PSW+1(%r15),0x01 # forking a kernel thread ? + la %r11,STACK_FRAME_OVERHEAD(%r15) + lg %r12,__LC_THREAD_INFO + tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ? jo 0f - stg %r15,SP_R15(%r15) # store stack pointer for new kthread + stg %r15,__PT_R15(%r11) # store stack pointer for new kthread 0: brasl %r14,schedule_tail TRACE_IRQS_ON - stosm 24(%r15),0x03 # reenable interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts j sysc_tracenogo # @@ -409,26 +359,26 @@ ENTRY(ret_from_fork) ENTRY(kernel_execve) stmg %r12,%r15,96(%r15) lgr %r14,%r15 - aghi %r15,-SP_SIZE + aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) stg %r14,__SF_BACKCHAIN(%r15) - la %r12,SP_PTREGS(%r15) + la %r12,STACK_FRAME_OVERHEAD(%r15) xc 0(__PT_SIZE,%r12),0(%r12) lgr %r5,%r12 brasl %r14,do_execve ltgfr %r2,%r2 je 0f - aghi %r15,SP_SIZE + aghi %r15,(STACK_FRAME_OVERHEAD + __PT_SIZE) lmg %r12,%r15,96(%r15) br %r14 # execve succeeded. -0: stnsm __SF_EMPTY(%r15),0xfc # disable interrupts +0: ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts lg %r15,__LC_KERNEL_STACK # load ksp - aghi %r15,-SP_SIZE # make room for registers & psw - lg %r13,__LC_SVC_NEW_PSW+8 - mvc SP_PTREGS(__PT_SIZE,%r15),0(%r12) # copy pt_regs + aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) + mvc 0(__PT_SIZE,%r11),0(%r12) # copy pt_regs lg %r12,__LC_THREAD_INFO xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts brasl %r14,execve_tail j sysc_return @@ -437,127 +387,72 @@ ENTRY(kernel_execve) */ ENTRY(pgm_check_handler) -/* - * First we need to check for a special case: - * Single stepping an instruction that disables the PER event mask will - * cause a PER event AFTER the mask has been set. Example: SVC or LPSW. - * For a single stepped SVC the program check handler gets control after - * the SVC new PSW has been loaded. But we want to execute the SVC first and - * then handle the PER event. Therefore we update the SVC old PSW to point - * to the pgm_check_handler and branch to the SVC handler after we checked - * if we have to load the kernel stack register. - * For every other possible cause for PER event without the PER mask set - * we just ignore the PER event (FIXME: is there anything we have to do - * for LPSW?). - */ stpt __LC_SYNC_ENTER_TIMER - tm __LC_PGM_INT_CODE+1,0x80 # check whether we got a per exception - jnz pgm_per # got per exception -> special case - SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(16,%r15),__LC_PGM_OLD_PSW - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct - HANDLE_SIE_INTERCEPT - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz pgm_no_vtime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - LAST_BREAK -pgm_no_vtime: - stg %r11,SP_ARGS(%r15) - lgf %r3,__LC_PGM_ILC # load program interruption code - lg %r4,__LC_TRANS_EXC_CODE - REENABLE_IRQS - lghi %r8,0x7f - ngr %r8,%r3 - sll %r8,3 - larl %r1,pgm_check_table - lg %r1,0(%r8,%r1) # load address of handler routine - la %r2,SP_PTREGS(%r15) # address of register-save area - basr %r14,%r1 # branch to interrupt-handler -pgm_exit: - j sysc_return - -# -# handle per exception -# -pgm_per: - tm __LC_PGM_OLD_PSW,0x40 # test if per event recording is on - jnz pgm_per_std # ok, normal per event from user space -# ok its one of the special cases, now we need to find out which one - clc __LC_PGM_OLD_PSW(16),__LC_SVC_NEW_PSW - je pgm_svcper -# no interesting special case, ignore PER event - lpswe __LC_PGM_OLD_PSW - -# -# Normal per exception -# -pgm_per_std: - SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - mvc SP_PSW(16,%r15),__LC_PGM_OLD_PSW - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct - HANDLE_SIE_INTERCEPT - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz pgm_no_vtime2 - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - LAST_BREAK -pgm_no_vtime2: + stmg %r8,%r15,__LC_SAVE_AREA_SYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call + lmg %r8,%r9,__LC_PGM_OLD_PSW + HANDLE_SIE_INTERCEPT %r14 + tmhh %r8,0x0001 # test problem state bit + jnz 1f # -> fault in user space + tmhh %r8,0x4000 # PER bit set in old PSW ? + jnz 0f # -> enabled, can't be a double fault + tm __LC_PGM_ILC+3,0x80 # check for per exception + jnz pgm_svcper # -> single stepped svc +0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC + j 2f +1: UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER + LAST_BREAK %r14 + lg %r15,__LC_KERNEL_STACK +2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + stmg %r8,%r9,__PT_PSW(%r11) + mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC + mvc __PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE + stg %r10,__PT_ARGS(%r11) + tm __LC_PGM_ILC+3,0x80 # check for per exception + jz 0f lg %r1,__TI_task(%r12) - tm SP_PSW+1(%r15),0x01 # kernel per event ? - jz kernel_per - mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE + tmhh %r8,0x0001 # kernel per event ? + jz pgm_kprobe + oi __TI_flags+7(%r12),_TIF_PER_TRAP mvc __THREAD_per_address(8,%r1),__LC_PER_ADDRESS + mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE mvc __THREAD_per_paid(1,%r1),__LC_PER_PAID - oi __TI_flags+7(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP - lgf %r3,__LC_PGM_ILC # load program interruption code - lg %r4,__LC_TRANS_EXC_CODE - REENABLE_IRQS - lghi %r8,0x7f - ngr %r8,%r3 # clear per-event-bit and ilc - je pgm_exit2 - sll %r8,3 +0: REENABLE_IRQS + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) larl %r1,pgm_check_table - lg %r1,0(%r8,%r1) # load address of handler routine - la %r2,SP_PTREGS(%r15) # address of register-save area + llgh %r10,__PT_INT_CODE+2(%r11) + nill %r10,0x007f + sll %r10,3 + je sysc_return + lg %r1,0(%r10,%r1) # load address of handler routine + lgr %r2,%r11 # pass pointer to pt_regs basr %r14,%r1 # branch to interrupt-handler -pgm_exit2: j sysc_return # -# it was a single stepped SVC that is causing all the trouble +# PER event in supervisor state, must be kprobes # -pgm_svcper: - SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SAVE_AREA - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct - mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC - oi __TI_flags+7(%r12),(_TIF_SYSCALL | _TIF_PER_TRAP) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - LAST_BREAK - lg %r8,__TI_task(%r12) - mvc __THREAD_per_cause(2,%r8),__LC_PER_CAUSE - mvc __THREAD_per_address(8,%r8),__LC_PER_ADDRESS - mvc __THREAD_per_paid(1,%r8),__LC_PER_PAID - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - lmg %r2,%r6,SP_R2(%r15) # load svc arguments - j sysc_do_svc +pgm_kprobe: + REENABLE_IRQS + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_per_trap + j sysc_return # -# per was called from kernel, must be kprobes +# single stepped system call # -kernel_per: - REENABLE_IRQS - la %r2,SP_PTREGS(%r15) # address of register-save area - brasl %r14,do_per_trap - j pgm_exit +pgm_svcper: + oi __TI_flags+7(%r12),_TIF_PER_TRAP + mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW + larl %r14,sysc_per + stg %r14,__LC_RETURN_PSW+8 + lpswe __LC_RETURN_PSW # branch to sysc_per and enable irqs /* * IO interrupt handler routine @@ -565,21 +460,25 @@ kernel_per: ENTRY(io_int_handler) stck __LC_INT_CLOCK stpt __LC_ASYNC_ENTER_TIMER - SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+40 - CREATE_STACK_FRAME __LC_SAVE_AREA+40 - mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct - HANDLE_SIE_INTERCEPT - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz io_no_vtime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER - LAST_BREAK -io_no_vtime: + stmg %r8,%r15,__LC_SAVE_AREA_ASYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call + lmg %r8,%r9,__LC_IO_OLD_PSW + HANDLE_SIE_INTERCEPT %r14 + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + tmhh %r8,0x0001 # interrupting from user? + jz io_skip + UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER + LAST_BREAK %r14 +io_skip: + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC + stmg %r8,%r9,__PT_PSW(%r11) TRACE_IRQS_OFF - la %r2,SP_PTREGS(%r15) # address of register-save area - brasl %r14,do_IRQ # call standard irq handler + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_IRQ io_return: LOCKDEP_SYS_EXIT TRACE_IRQS_ON @@ -587,7 +486,14 @@ io_tif: tm __TI_flags+7(%r12),_TIF_WORK_INT jnz io_work # there is work to do (signals etc.) io_restore: - RESTORE_ALL __LC_RETURN_PSW,0 + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r10,__PT_R0(%r11) + mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + ni __LC_RETURN_PSW+1,0xfd # clear wait state bit + stpt __LC_EXIT_TIMER + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER + lmg %r11,%r15,__PT_R11(%r11) + lpswe __LC_RETURN_PSW io_done: # @@ -600,7 +506,7 @@ io_done: # Before any work can be done, a switch to the kernel stack is required. # io_work: - tm SP_PSW+1(%r15),0x01 # returning to user ? + tm __PT_PSW+1(%r11),0x01 # returning to user ? jo io_work_user # yes -> do resched & signal #ifdef CONFIG_PREEMPT # check for preemptive scheduling @@ -609,10 +515,11 @@ io_work: tm __TI_flags+7(%r12),_TIF_NEED_RESCHED jno io_restore # switch to kernel stack - lg %r1,SP_R15(%r15) - aghi %r1,-SP_SIZE - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain + lg %r1,__PT_R15(%r11) + aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 # TRACE_IRQS_ON already done at io_return, call # TRACE_IRQS_OFF to keep things symmetrical @@ -628,9 +535,10 @@ io_work: # io_work_user: lg %r1,__LC_KERNEL_STACK - aghi %r1,-SP_SIZE - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain + aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 # @@ -663,9 +571,9 @@ io_mcck_pending: # io_reschedule: # TRACE_IRQS_ON already done at io_return - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts brasl %r14,schedule # call scheduler - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts TRACE_IRQS_OFF j io_return @@ -674,10 +582,10 @@ io_reschedule: # io_sigpending: # TRACE_IRQS_ON already done at io_return - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - la %r2,SP_PTREGS(%r15) # load pt_regs - brasl %r14,do_signal # call do_signal - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_signal + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts TRACE_IRQS_OFF j io_return @@ -686,10 +594,10 @@ io_sigpending: # io_notify_resume: # TRACE_IRQS_ON already done at io_return - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - la %r2,SP_PTREGS(%r15) # load pt_regs - brasl %r14,do_notify_resume # call do_notify_resume - stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts + ssm __LC_SVC_NEW_PSW # reenable interrupts + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,do_notify_resume + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts TRACE_IRQS_OFF j io_return @@ -699,21 +607,24 @@ io_notify_resume: ENTRY(ext_int_handler) stck __LC_INT_CLOCK stpt __LC_ASYNC_ENTER_TIMER - SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+40 - CREATE_STACK_FRAME __LC_SAVE_AREA+40 - mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct - HANDLE_SIE_INTERCEPT - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz ext_no_vtime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER - LAST_BREAK -ext_no_vtime: + stmg %r8,%r15,__LC_SAVE_AREA_ASYNC + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO + larl %r13,system_call + lmg %r8,%r9,__LC_EXT_OLD_PSW + HANDLE_SIE_INTERCEPT %r14 + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT + tmhh %r8,0x0001 # interrupting from user ? + jz ext_skip + UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER + LAST_BREAK %r14 +ext_skip: + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC + stmg %r8,%r9,__PT_PSW(%r11) TRACE_IRQS_OFF lghi %r1,4096 - la %r2,SP_PTREGS(%r15) # address of register-save area + lgr %r2,%r11 # pass pointer to pt_regs llgf %r3,__LC_CPU_ADDRESS # get cpu address + interruption code llgf %r4,__LC_EXT_PARAMS # get external parameter lg %r5,__LC_EXT_PARAMS2-4096(%r1) # get 64 bit external parameter @@ -730,81 +641,77 @@ ENTRY(mcck_int_handler) la %r1,4095 # revalidate r1 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs - stmg %r11,%r15,__LC_SAVE_AREA+80 + lg %r10,__LC_LAST_BREAK + lg %r12,__LC_THREAD_INFO larl %r13,system_call - lg %r11,__LC_LAST_BREAK - la %r12,__LC_MCK_OLD_PSW + lmg %r8,%r9,__LC_MCK_OLD_PSW + HANDLE_SIE_INTERCEPT %r14 tm __LC_MCCK_CODE,0x80 # system damage? - jo mcck_int_main # yes -> rest of mcck code invalid - la %r14,4095 - mvc __LC_MCCK_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA-4095(%r14) + jo mcck_panic # yes -> rest of mcck code invalid + lghi %r14,__LC_CPU_TIMER_SAVE_AREA + mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? - jo 1f + jo 3f la %r14,__LC_SYNC_ENTER_TIMER clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER jl 0f la %r14,__LC_ASYNC_ENTER_TIMER 0: clc 0(8,%r14),__LC_EXIT_TIMER - jl 0f + jl 1f la %r14,__LC_EXIT_TIMER -0: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER - jl 0f +1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER + jl 2f la %r14,__LC_LAST_UPDATE_TIMER -0: spt 0(%r14) +2: spt 0(%r14) mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) -1: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? - jno mcck_int_main # no -> skip cleanup critical - tm __LC_MCK_OLD_PSW+1,0x01 # test problem state bit - jnz mcck_int_main # from user -> load kernel stack - clc __LC_MCK_OLD_PSW+8(8),BASED(.Lcritical_end) - jhe mcck_int_main - clc __LC_MCK_OLD_PSW+8(8),BASED(.Lcritical_start) - jl mcck_int_main - brasl %r14,cleanup_critical -mcck_int_main: - lg %r14,__LC_PANIC_STACK # are we already on the panic stack? - slgr %r14,%r15 - srag %r14,%r14,PAGE_SHIFT - jz 0f - lg %r15,__LC_PANIC_STACK # load panic stack -0: aghi %r15,-SP_SIZE # make room for registers & psw - CREATE_STACK_FRAME __LC_SAVE_AREA+80 - mvc SP_PSW(16,%r15),0(%r12) - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct - tm __LC_MCCK_CODE+2,0x08 # mwp of old psw valid? - jno mcck_no_vtime # no -> no timer update - HANDLE_SIE_INTERCEPT - tm SP_PSW+1(%r15),0x01 # interrupting from user ? - jz mcck_no_vtime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_MCCK_ENTER_TIMER,__LC_USER_TIMER - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_MCCK_ENTER_TIMER - LAST_BREAK -mcck_no_vtime: - la %r2,SP_PTREGS(%r15) # load pt_regs +3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? + jno mcck_panic # no -> skip cleanup critical + SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_PANIC_STACK,PAGE_SHIFT + tm %r8,0x0001 # interrupting from user ? + jz mcck_skip + UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER + LAST_BREAK %r14 +mcck_skip: + lghi %r14,__LC_GPREGS_SAVE_AREA + mvc __PT_R0(128,%r11),0(%r14) + stmg %r8,%r9,__PT_PSW(%r11) + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,s390_do_machine_check - tm SP_PSW+1(%r15),0x01 # returning to user ? + tm __PT_PSW+1(%r11),0x01 # returning to user ? jno mcck_return lg %r1,__LC_KERNEL_STACK # switch to kernel stack - aghi %r1,-SP_SIZE - mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) - xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain + aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) + xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) + la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 - stosm __SF_EMPTY(%r15),0x04 # turn dat on + ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off tm __TI_flags+7(%r12),_TIF_MCCK_PENDING jno mcck_return TRACE_IRQS_OFF brasl %r14,s390_handle_mcck TRACE_IRQS_ON mcck_return: - mvc __LC_RETURN_MCCK_PSW(16),SP_PSW(%r15) # move return PSW + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r10,__PT_R0(%r11) + mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit - lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? jno 0f stpt __LC_EXIT_TIMER -0: lpswe __LC_RETURN_MCCK_PSW # back to caller -mcck_done: + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER +0: lmg %r11,%r15,__PT_R11(%r11) + lpswe __LC_RETURN_MCCK_PSW + +mcck_panic: + lg %r14,__LC_PANIC_STACK + slgr %r14,%r15 + srag %r14,%r14,PAGE_SHIFT + jz 0f + lg %r15,__LC_PANIC_STACK +0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + j mcck_skip /* * Restart interruption handler, kick starter for additional CPUs @@ -818,17 +725,18 @@ restart_base: stck __LC_LAST_UPDATE_CLOCK mvc __LC_LAST_UPDATE_TIMER(8),restart_vtime-restart_base(%r1) mvc __LC_EXIT_TIMER(8),restart_vtime-restart_base(%r1) - lg %r15,__LC_SAVE_AREA+120 # load ksp + lghi %r10,__LC_GPREGS_SAVE_AREA + lg %r15,120(%r10) # load ksp lghi %r10,__LC_CREGS_SAVE_AREA - lctlg %c0,%c15,0(%r10) # get new ctl regs + lctlg %c0,%c15,0(%r10) # get new ctl regs lghi %r10,__LC_AREGS_SAVE_AREA lam %a0,%a15,0(%r10) - lmg %r6,%r15,__SF_GPRS(%r15) # load registers from clone + lmg %r6,%r15,__SF_GPRS(%r15)# load registers from clone lg %r1,__LC_THREAD_INFO mvc __LC_USER_TIMER(8),__TI_user_timer(%r1) mvc __LC_SYSTEM_TIMER(8),__TI_system_timer(%r1) xc __LC_STEAL_TIMER(8),__LC_STEAL_TIMER - stosm __SF_EMPTY(%r15),0x04 # now we can turn dat on + ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off brasl %r14,start_secondary .align 8 restart_vtime: @@ -852,16 +760,16 @@ restart_go: # PSW restart interrupt handler # ENTRY(psw_restart_int_handler) - stg %r15,__LC_SAVE_AREA+120(%r0) # save r15 + stg %r15,__LC_SAVE_AREA_RESTART larl %r15,restart_stack # load restart stack lg %r15,0(%r15) - aghi %r15,-SP_SIZE # make room for pt_regs - stmg %r0,%r14,SP_R0(%r15) # store gprs %r0-%r14 to stack - mvc SP_R15(8,%r15),__LC_SAVE_AREA+120(%r0)# store saved %r15 to stack - mvc SP_PSW(16,%r15),__LC_RST_OLD_PSW(%r0)# store restart old psw - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # set backchain to 0 + aghi %r15,-__PT_SIZE # create pt_regs on stack + stmg %r0,%r14,__PT_R0(%r15) + mvc __PT_R15(8,%r15),__LC_SAVE_AREA_RESTART + mvc __PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw + aghi %r15,-STACK_FRAME_OVERHEAD + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) brasl %r14,do_restart - larl %r14,restart_psw_crash # load disabled wait PSW if lpswe 0(%r14) # do_restart returns .align 8 @@ -877,172 +785,153 @@ restart_psw_crash: * Setup a pt_regs so that show_trace can provide a good call trace. */ stack_overflow: - lg %r15,__LC_PANIC_STACK # change to panic stack - aghi %r15,-SP_SIZE - mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack - stmg %r0,%r10,SP_R0(%r15) # store gprs %r0-%r10 to kernel stack - la %r1,__LC_SAVE_AREA - chi %r12,__LC_SVC_OLD_PSW - je 0f - chi %r12,__LC_PGM_OLD_PSW - je 0f - la %r1,__LC_SAVE_AREA+40 -0: mvc SP_R11(40,%r15),0(%r1) # move %r11-%r15 to stack - mvc SP_ARGS(8,%r15),__LC_LAST_BREAK - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # clear back chain - la %r2,SP_PTREGS(%r15) # load pt_regs + lg %r11,__LC_PANIC_STACK # change to panic stack + aghi %r11,-__PT_SIZE # create pt_regs + stmg %r0,%r7,__PT_R0(%r11) + stmg %r8,%r9,__PT_PSW(%r11) + mvc __PT_R8(64,%r11),0(%r14) + stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2 + lgr %r15,%r11 + aghi %r15,-STACK_FRAME_OVERHEAD + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + lgr %r2,%r11 # pass pointer to pt_regs jg kernel_stack_overflow #endif -cleanup_table_system_call: - .quad system_call, sysc_do_svc -cleanup_table_sysc_tif: - .quad sysc_tif, sysc_restore -cleanup_table_sysc_restore: - .quad sysc_restore, sysc_done -cleanup_table_io_tif: - .quad io_tif, io_restore -cleanup_table_io_restore: - .quad io_restore, io_done + .align 8 +cleanup_table: + .quad system_call + .quad sysc_do_svc + .quad sysc_tif + .quad sysc_restore + .quad sysc_done + .quad io_tif + .quad io_restore + .quad io_done cleanup_critical: - clc 8(8,%r12),BASED(cleanup_table_system_call) + clg %r9,BASED(cleanup_table) # system_call jl 0f - clc 8(8,%r12),BASED(cleanup_table_system_call+8) + clg %r9,BASED(cleanup_table+8) # sysc_do_svc jl cleanup_system_call -0: - clc 8(8,%r12),BASED(cleanup_table_sysc_tif) + clg %r9,BASED(cleanup_table+16) # sysc_tif jl 0f - clc 8(8,%r12),BASED(cleanup_table_sysc_tif+8) + clg %r9,BASED(cleanup_table+24) # sysc_restore jl cleanup_sysc_tif -0: - clc 8(8,%r12),BASED(cleanup_table_sysc_restore) - jl 0f - clc 8(8,%r12),BASED(cleanup_table_sysc_restore+8) + clg %r9,BASED(cleanup_table+32) # sysc_done jl cleanup_sysc_restore -0: - clc 8(8,%r12),BASED(cleanup_table_io_tif) + clg %r9,BASED(cleanup_table+40) # io_tif jl 0f - clc 8(8,%r12),BASED(cleanup_table_io_tif+8) + clg %r9,BASED(cleanup_table+48) # io_restore jl cleanup_io_tif -0: - clc 8(8,%r12),BASED(cleanup_table_io_restore) - jl 0f - clc 8(8,%r12),BASED(cleanup_table_io_restore+8) + clg %r9,BASED(cleanup_table+56) # io_done jl cleanup_io_restore -0: - br %r14 +0: br %r14 + cleanup_system_call: - mvc __LC_RETURN_PSW(16),0(%r12) - clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+8) + # check if stpt has been executed + clg %r9,BASED(cleanup_system_call_insn) jh 0f - mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER - cghi %r12,__LC_MCK_OLD_PSW - je 0f mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER -0: cghi %r12,__LC_MCK_OLD_PSW - la %r12,__LC_SAVE_AREA+80 + cghi %r11,__LC_SAVE_AREA_ASYNC je 0f - la %r12,__LC_SAVE_AREA+40 -0: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+16) - jhe cleanup_vtime - clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn) + mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER +0: # check if stmg has been executed + clg %r9,BASED(cleanup_system_call_insn+8) jh 0f - mvc __LC_SAVE_AREA(40),0(%r12) -0: lg %r15,__LC_KERNEL_STACK # problem state -> load ksp - aghi %r15,-SP_SIZE # make room for registers & psw - stg %r15,32(%r12) - stg %r11,0(%r12) - CREATE_STACK_FRAME __LC_SAVE_AREA - mvc 8(8,%r12),__LC_THREAD_INFO - lg %r12,__LC_THREAD_INFO - mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC - oi __TI_flags+7(%r12),_TIF_SYSCALL -cleanup_vtime: - clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+24) - jhe cleanup_stime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER -cleanup_stime: - clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+32) - jh cleanup_update - UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER -cleanup_update: + mvc __LC_SAVE_AREA_SYNC(64),0(%r11) +0: # check if base register setup + TIF bit load has been done + clg %r9,BASED(cleanup_system_call_insn+16) + jhe 0f + # set up saved registers r10 and r12 + stg %r10,16(%r11) # r10 last break + stg %r12,32(%r11) # r12 thread-info pointer +0: # check if the user time update has been done + clg %r9,BASED(cleanup_system_call_insn+24) + jh 0f + lg %r15,__LC_EXIT_TIMER + slg %r15,__LC_SYNC_ENTER_TIMER + alg %r15,__LC_USER_TIMER + stg %r15,__LC_USER_TIMER +0: # check if the system time update has been done + clg %r9,BASED(cleanup_system_call_insn+32) + jh 0f + lg %r15,__LC_LAST_UPDATE_TIMER + slg %r15,__LC_EXIT_TIMER + alg %r15,__LC_SYSTEM_TIMER + stg %r15,__LC_SYSTEM_TIMER +0: # update accounting time stamp mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - srag %r12,%r11,23 - lg %r12,__LC_THREAD_INFO + # do LAST_BREAK + lg %r9,16(%r11) + srag %r9,%r9,23 jz 0f - stg %r11,__TI_last_break(%r12) -0: mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_system_call+8) - la %r12,__LC_RETURN_PSW + mvc __TI_last_break(8,%r12),16(%r11) +0: # set up saved register r11 + lg %r15,__LC_KERNEL_STACK + aghi %r15,-__PT_SIZE + stg %r15,24(%r11) # r11 pt_regs pointer + # fill pt_regs + mvc __PT_R8(64,%r15),__LC_SAVE_AREA_SYNC + stmg %r0,%r7,__PT_R0(%r15) + mvc __PT_PSW(16,%r15),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r15),__LC_SVC_ILC + # setup saved register r15 + aghi %r15,-STACK_FRAME_OVERHEAD + stg %r15,56(%r11) # r15 stack pointer + # set new psw address and exit + larl %r9,sysc_do_svc br %r14 cleanup_system_call_insn: - .quad sysc_saveall .quad system_call - .quad sysc_vtime - .quad sysc_stime - .quad sysc_update + .quad sysc_stmg + .quad sysc_per + .quad sysc_vtime+18 + .quad sysc_vtime+42 cleanup_sysc_tif: - mvc __LC_RETURN_PSW(8),0(%r12) - mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_sysc_tif) - la %r12,__LC_RETURN_PSW + larl %r9,sysc_tif br %r14 cleanup_sysc_restore: - clc 8(8,%r12),BASED(cleanup_sysc_restore_insn) - je 2f - clc 8(8,%r12),BASED(cleanup_sysc_restore_insn+8) - jhe 0f - mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER - cghi %r12,__LC_MCK_OLD_PSW + clg %r9,BASED(cleanup_sysc_restore_insn) je 0f - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER -0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) - cghi %r12,__LC_MCK_OLD_PSW - la %r12,__LC_SAVE_AREA+80 - je 1f - la %r12,__LC_SAVE_AREA+40 -1: mvc 0(40,%r12),SP_R11(%r15) - lmg %r0,%r10,SP_R0(%r15) - lg %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW + lg %r9,24(%r11) # get saved pointer to pt_regs + mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) + mvc 0(64,%r11),__PT_R8(%r9) + lmg %r0,%r7,__PT_R0(%r9) +0: lmg %r8,%r9,__LC_RETURN_PSW br %r14 cleanup_sysc_restore_insn: .quad sysc_done - 4 - .quad sysc_done - 16 cleanup_io_tif: - mvc __LC_RETURN_PSW(8),0(%r12) - mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_io_tif) - la %r12,__LC_RETURN_PSW + larl %r9,io_tif br %r14 cleanup_io_restore: - clc 8(8,%r12),BASED(cleanup_io_restore_insn) - je 1f - clc 8(8,%r12),BASED(cleanup_io_restore_insn+8) - jhe 0f - mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER -0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) - mvc __LC_SAVE_AREA+80(40),SP_R11(%r15) - lmg %r0,%r10,SP_R0(%r15) - lg %r15,SP_R15(%r15) -1: la %r12,__LC_RETURN_PSW + clg %r9,BASED(cleanup_io_restore_insn) + je 0f + lg %r9,24(%r11) # get saved r11 pointer to pt_regs + mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) + ni __LC_RETURN_PSW+1,0xfd # clear wait state bit + mvc 0(64,%r11),__PT_R8(%r9) + lmg %r0,%r7,__PT_R0(%r9) +0: lmg %r8,%r9,__LC_RETURN_PSW br %r14 cleanup_io_restore_insn: .quad io_done - 4 - .quad io_done - 16 /* * Integer constants */ - .align 4 + .align 8 .Lcritical_start: - .quad __critical_start -.Lcritical_end: - .quad __critical_end + .quad __critical_start +.Lcritical_length: + .quad __critical_end - __critical_start + #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) /* @@ -1054,6 +943,7 @@ ENTRY(sie64a) stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers stg %r2,__SF_EMPTY(%r15) # save control block pointer stg %r3,__SF_EMPTY+8(%r15) # save guest register save area + xc __SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # host id == 0 lmg %r0,%r13,0(%r3) # load guest gprs 0-13 lg %r14,__LC_THREAD_INFO # pointer thread_info struct oi __TI_flags+6(%r14),_TIF_SIE>>8 @@ -1070,7 +960,7 @@ sie_gmap: SPP __SF_EMPTY(%r15) # set guest id sie 0(%r14) sie_done: - SPP __LC_CMF_HPP # set host id + SPP __SF_EMPTY+16(%r15) # set host id lg %r14,__LC_THREAD_INFO # pointer thread_info struct sie_exit: lctlg %c1,%c1,__LC_USER_ASCE # load primary asce @@ -1093,8 +983,10 @@ sie_fault: .align 8 .Lsie_loop: .quad sie_loop -.Lsie_done: - .quad sie_done +.Lsie_length: + .quad sie_done - sie_loop +.Lhost_id: + .quad 0 .section __ex_table,"a" .quad sie_loop,sie_fault diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 900068d2bf9..c27a0727f93 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -329,8 +329,8 @@ iplstart: # # reset files in VM reader # - stidp __LC_SAVE_AREA # store cpuid - tm __LC_SAVE_AREA,0xff # running VM ? + stidp __LC_SAVE_AREA_SYNC # store cpuid + tm __LC_SAVE_AREA_SYNC,0xff# running VM ? bno .Lnoreset la %r2,.Lreset lhi %r3,26 diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 3cd0f25ab01..47b168fb29c 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -208,6 +208,7 @@ void machine_kexec_cleanup(struct kimage *image) void arch_crash_save_vmcoreinfo(void) { VMCOREINFO_SYMBOL(lowcore_ptr); + VMCOREINFO_SYMBOL(high_memory); VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); } diff --git a/arch/s390/kernel/mem_detect.c b/arch/s390/kernel/mem_detect.c index 19b4568f4ce..22d502e885e 100644 --- a/arch/s390/kernel/mem_detect.c +++ b/arch/s390/kernel/mem_detect.c @@ -64,70 +64,82 @@ void detect_memory_layout(struct mem_chunk chunk[]) EXPORT_SYMBOL(detect_memory_layout); /* + * Move memory chunks array from index "from" to index "to" + */ +static void mem_chunk_move(struct mem_chunk chunk[], int to, int from) +{ + int cnt = MEMORY_CHUNKS - to; + + memmove(&chunk[to], &chunk[from], cnt * sizeof(struct mem_chunk)); +} + +/* + * Initialize memory chunk + */ +static void mem_chunk_init(struct mem_chunk *chunk, unsigned long addr, + unsigned long size, int type) +{ + chunk->type = type; + chunk->addr = addr; + chunk->size = size; +} + +/* * Create memory hole with given address, size, and type */ -void create_mem_hole(struct mem_chunk chunks[], unsigned long addr, +void create_mem_hole(struct mem_chunk chunk[], unsigned long addr, unsigned long size, int type) { - unsigned long start, end, new_size; - int i; + unsigned long lh_start, lh_end, lh_size, ch_start, ch_end, ch_size; + int i, ch_type; for (i = 0; i < MEMORY_CHUNKS; i++) { - if (chunks[i].size == 0) - continue; - if (addr + size < chunks[i].addr) - continue; - if (addr >= chunks[i].addr + chunks[i].size) + if (chunk[i].size == 0) continue; - start = max(addr, chunks[i].addr); - end = min(addr + size, chunks[i].addr + chunks[i].size); - new_size = end - start; - if (new_size == 0) - continue; - if (start == chunks[i].addr && - end == chunks[i].addr + chunks[i].size) { - /* Remove chunk */ - chunks[i].type = type; - } else if (start == chunks[i].addr) { - /* Make chunk smaller at start */ - if (i >= MEMORY_CHUNKS - 1) - panic("Unable to create memory hole"); - memmove(&chunks[i + 1], &chunks[i], - sizeof(struct mem_chunk) * - (MEMORY_CHUNKS - (i + 1))); - chunks[i + 1].addr = chunks[i].addr + new_size; - chunks[i + 1].size = chunks[i].size - new_size; - chunks[i].size = new_size; - chunks[i].type = type; - i += 1; - } else if (end == chunks[i].addr + chunks[i].size) { - /* Make chunk smaller at end */ - if (i >= MEMORY_CHUNKS - 1) - panic("Unable to create memory hole"); - memmove(&chunks[i + 1], &chunks[i], - sizeof(struct mem_chunk) * - (MEMORY_CHUNKS - (i + 1))); - chunks[i + 1].addr = start; - chunks[i + 1].size = new_size; - chunks[i + 1].type = type; - chunks[i].size -= new_size; + + /* Define chunk properties */ + ch_start = chunk[i].addr; + ch_size = chunk[i].size; + ch_end = ch_start + ch_size - 1; + ch_type = chunk[i].type; + + /* Is memory chunk hit by memory hole? */ + if (addr + size <= ch_start) + continue; /* No: memory hole in front of chunk */ + if (addr > ch_end) + continue; /* No: memory hole after chunk */ + + /* Yes: Define local hole properties */ + lh_start = max(addr, chunk[i].addr); + lh_end = min(addr + size - 1, ch_end); + lh_size = lh_end - lh_start + 1; + + if (lh_start == ch_start && lh_end == ch_end) { + /* Hole covers complete memory chunk */ + mem_chunk_init(&chunk[i], lh_start, lh_size, type); + } else if (lh_end == ch_end) { + /* Hole starts in memory chunk and convers chunk end */ + mem_chunk_move(chunk, i + 1, i); + mem_chunk_init(&chunk[i], ch_start, ch_size - lh_size, + ch_type); + mem_chunk_init(&chunk[i + 1], lh_start, lh_size, type); i += 1; + } else if (lh_start == ch_start) { + /* Hole ends in memory chunk */ + mem_chunk_move(chunk, i + 1, i); + mem_chunk_init(&chunk[i], lh_start, lh_size, type); + mem_chunk_init(&chunk[i + 1], lh_end + 1, + ch_size - lh_size, ch_type); + break; } else { - /* Create memory hole */ - if (i >= MEMORY_CHUNKS - 2) - panic("Unable to create memory hole"); - memmove(&chunks[i + 2], &chunks[i], - sizeof(struct mem_chunk) * - (MEMORY_CHUNKS - (i + 2))); - chunks[i + 1].addr = addr; - chunks[i + 1].size = size; - chunks[i + 1].type = type; - chunks[i + 2].addr = addr + size; - chunks[i + 2].size = - chunks[i].addr + chunks[i].size - (addr + size); - chunks[i + 2].type = chunks[i].type; - chunks[i].size = addr - chunks[i].addr; - i += 2; + /* Hole splits memory chunk */ + mem_chunk_move(chunk, i + 2, i); + mem_chunk_init(&chunk[i], ch_start, + lh_start - ch_start, ch_type); + mem_chunk_init(&chunk[i + 1], lh_start, lh_size, type); + mem_chunk_init(&chunk[i + 2], lh_end + 1, + ch_end - lh_end, ch_type); + break; } } } diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index fab88431a06..0fd2e863e11 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -30,7 +30,7 @@ struct mcck_struct { static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck); -static NORET_TYPE void s390_handle_damage(char *msg) +static void s390_handle_damage(char *msg) { smp_send_stop(); disabled_wait((unsigned long) __builtin_return_address(0)); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 9451b210a1b..3201ae44799 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -91,10 +91,12 @@ static void default_idle(void) void cpu_idle(void) { for (;;) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched()) default_idle(); - tick_nohz_restart_sched_tick(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 450931a45b6..573bc29551e 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -296,13 +296,6 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA)))) /* Invalid psw mask. */ return -EINVAL; - if (addr == (addr_t) &dummy->regs.psw.addr) - /* - * The debugger changed the instruction address, - * reset system call restart, see signal.c:do_signal - */ - task_thread_info(child)->system_call = 0; - *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { @@ -614,11 +607,6 @@ static int __poke_user_compat(struct task_struct *child, /* Transfer 31 bit amode bit to psw mask. */ regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | (__u64)(tmp & PSW32_ADDR_AMODE); - /* - * The debugger changed the instruction address, - * reset system call restart, see signal.c:do_signal - */ - task_thread_info(child)->system_call = 0; } else { /* gpr 0-15 */ *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; @@ -905,6 +893,14 @@ static int s390_last_break_get(struct task_struct *target, return 0; } +static int s390_last_break_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + #endif static int s390_system_call_get(struct task_struct *target, @@ -951,6 +947,7 @@ static const struct user_regset s390_regsets[] = { .size = sizeof(long), .align = sizeof(long), .get = s390_last_break_get, + .set = s390_last_break_set, }, #endif [REGSET_SYSTEM_CALL] = { @@ -1116,6 +1113,14 @@ static int s390_compat_last_break_get(struct task_struct *target, return 0; } +static int s390_compat_last_break_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + static const struct user_regset s390_compat_regsets[] = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, @@ -1139,6 +1144,7 @@ static const struct user_regset s390_compat_regsets[] = { .size = sizeof(long), .align = sizeof(long), .get = s390_compat_last_break_get, + .set = s390_compat_last_break_set, }, [REGSET_SYSTEM_CALL] = { .core_note_type = NT_S390_SYSTEM_CALL, diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S index 732a793ec53..36b32658fb2 100644 --- a/arch/s390/kernel/reipl64.S +++ b/arch/s390/kernel/reipl64.S @@ -17,11 +17,11 @@ # ENTRY(store_status) /* Save register one and load save area base */ - stg %r1,__LC_SAVE_AREA+120(%r0) + stg %r1,__LC_SAVE_AREA_RESTART lghi %r1,SAVE_AREA_BASE /* General purpose registers */ stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - lg %r2,__LC_SAVE_AREA+120(%r0) + lg %r2,__LC_SAVE_AREA_RESTART stg %r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1) /* Control registers */ stctg %c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 8ac6bfa2786..354de0763ef 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -21,6 +21,7 @@ #include <linux/module.h> #include <linux/sched.h> #include <linux/kernel.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/stddef.h> #include <linux/unistd.h> @@ -94,6 +95,15 @@ struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS]; int __initdata memory_end_set; unsigned long __initdata memory_end; +unsigned long VMALLOC_START; +EXPORT_SYMBOL(VMALLOC_START); + +unsigned long VMALLOC_END; +EXPORT_SYMBOL(VMALLOC_END); + +struct page *vmemmap; +EXPORT_SYMBOL(vmemmap); + /* An array with a pointer to the lowcore of every CPU. */ struct _lowcore *lowcore_ptr[NR_CPUS]; EXPORT_SYMBOL(lowcore_ptr); @@ -211,6 +221,8 @@ static void __init setup_zfcpdump(unsigned int console_devno) if (ipl_info.type != IPL_TYPE_FCP_DUMP) return; + if (OLDMEM_BASE) + return; if (console_devno != -1) sprintf(str, " cio_ignore=all,!0.0.%04x,!0.0.%04x", ipl_info.data.fcp.dev_id.devno, console_devno); @@ -275,6 +287,15 @@ static int __init early_parse_mem(char *p) } early_param("mem", early_parse_mem); +static int __init parse_vmalloc(char *arg) +{ + if (!arg) + return -EINVAL; + VMALLOC_END = (memparse(arg, &arg) + PAGE_SIZE - 1) & PAGE_MASK; + return 0; +} +early_param("vmalloc", parse_vmalloc); + unsigned int user_mode = HOME_SPACE_MODE; EXPORT_SYMBOL_GPL(user_mode); @@ -380,7 +401,6 @@ setup_lowcore(void) __ctl_set_bit(14, 29); } #else - lc->cmf_hpp = -1ULL; lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; #endif lc->sync_enter_timer = S390_lowcore.sync_enter_timer; @@ -476,23 +496,19 @@ EXPORT_SYMBOL_GPL(real_memory_size); static void __init setup_memory_end(void) { - unsigned long memory_size; - unsigned long max_mem; + unsigned long vmax, vmalloc_size, tmp; int i; #ifdef CONFIG_ZFCPDUMP - if (ipl_info.type == IPL_TYPE_FCP_DUMP) { + if (ipl_info.type == IPL_TYPE_FCP_DUMP && !OLDMEM_BASE) { memory_end = ZFCPDUMP_HSA_SIZE; memory_end_set = 1; } #endif - memory_size = 0; + real_memory_size = 0; memory_end &= PAGE_MASK; - max_mem = memory_end ? min(VMEM_MAX_PHYS, memory_end) : VMEM_MAX_PHYS; - memory_end = min(max_mem, memory_end); - /* * Make sure all chunks are MAX_ORDER aligned so we don't need the * extra checks that HOLES_IN_ZONE would require. @@ -512,23 +528,48 @@ static void __init setup_memory_end(void) chunk->addr = start; chunk->size = end - start; } + real_memory_size = max(real_memory_size, + chunk->addr + chunk->size); } + /* Choose kernel address space layout: 2, 3, or 4 levels. */ +#ifdef CONFIG_64BIT + vmalloc_size = VMALLOC_END ?: 128UL << 30; + tmp = (memory_end ?: real_memory_size) / PAGE_SIZE; + tmp = tmp * (sizeof(struct page) + PAGE_SIZE) + vmalloc_size; + if (tmp <= (1UL << 42)) + vmax = 1UL << 42; /* 3-level kernel page table */ + else + vmax = 1UL << 53; /* 4-level kernel page table */ +#else + vmalloc_size = VMALLOC_END ?: 96UL << 20; + vmax = 1UL << 31; /* 2-level kernel page table */ +#endif + /* vmalloc area is at the end of the kernel address space. */ + VMALLOC_END = vmax; + VMALLOC_START = vmax - vmalloc_size; + + /* Split remaining virtual space between 1:1 mapping & vmemmap array */ + tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page)); + tmp = VMALLOC_START - tmp * sizeof(struct page); + tmp &= ~((vmax >> 11) - 1); /* align to page table level */ + tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS); + vmemmap = (struct page *) tmp; + + /* Take care that memory_end is set and <= vmemmap */ + memory_end = min(memory_end ?: real_memory_size, tmp); + + /* Fixup memory chunk array to fit into 0..memory_end */ for (i = 0; i < MEMORY_CHUNKS; i++) { struct mem_chunk *chunk = &memory_chunk[i]; - real_memory_size = max(real_memory_size, - chunk->addr + chunk->size); - if (chunk->addr >= max_mem) { + if (chunk->addr >= memory_end) { memset(chunk, 0, sizeof(*chunk)); continue; } - if (chunk->addr + chunk->size > max_mem) - chunk->size = max_mem - chunk->addr; - memory_size = max(memory_size, chunk->addr + chunk->size); + if (chunk->addr + chunk->size > memory_end) + chunk->size = memory_end - chunk->addr; } - if (!memory_end) - memory_end = memory_size; } void *restart_stack __attribute__((__section__(".data"))); @@ -577,7 +618,7 @@ static unsigned long __init find_crash_base(unsigned long crash_size, *msg = "first memory chunk must be at least crashkernel size"; return 0; } - if (is_kdump_kernel() && (crash_size == OLDMEM_SIZE)) + if (OLDMEM_BASE && crash_size == OLDMEM_SIZE) return OLDMEM_BASE; for (i = MEMORY_CHUNKS - 1; i >= 0; i--) { @@ -652,7 +693,6 @@ static int __init verify_crash_base(unsigned long crash_base, static void __init reserve_kdump_bootmem(unsigned long addr, unsigned long size, int type) { - create_mem_hole(memory_chunk, addr, size, type); } @@ -818,7 +858,8 @@ setup_memory(void) end_chunk = min(end_chunk, end_pfn); if (start_chunk >= end_chunk) continue; - add_active_range(0, start_chunk, end_chunk); + memblock_add_node(PFN_PHYS(start_chunk), + PFN_PHYS(end_chunk - start_chunk), 0); pfn = max(start_chunk, start_pfn); for (; pfn < end_chunk; pfn++) page_set_storage_key(PFN_PHYS(pfn), diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 05a85bc14c9..a8ba840294f 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -302,9 +302,13 @@ static int setup_frame(int sig, struct k_sigaction *ka, /* We forgot to include these in the sigcontext. To avoid breaking binary compatibility, they are passed as args. */ - regs->gprs[4] = current->thread.trap_no; - regs->gprs[5] = current->thread.prot_addr; - regs->gprs[6] = task_thread_info(current)->last_break; + if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL || + sig == SIGTRAP || sig == SIGFPE) { + /* set extra registers only for synchronous signals */ + regs->gprs[4] = regs->int_code & 127; + regs->gprs[5] = regs->int_parm_long; + regs->gprs[6] = task_thread_info(current)->last_break; + } /* Place signal number on stack to allow backtrace from handler. */ if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) @@ -434,13 +438,13 @@ void do_signal(struct pt_regs *regs) * call information. */ current_thread_info()->system_call = - test_thread_flag(TIF_SYSCALL) ? regs->svc_code : 0; + test_thread_flag(TIF_SYSCALL) ? regs->int_code : 0; signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { /* Whee! Actually deliver the signal. */ if (current_thread_info()->system_call) { - regs->svc_code = current_thread_info()->system_call; + regs->int_code = current_thread_info()->system_call; /* Check for system call restarting. */ switch (regs->gprs[2]) { case -ERESTART_RESTARTBLOCK: @@ -457,12 +461,12 @@ void do_signal(struct pt_regs *regs) regs->gprs[2] = regs->orig_gpr2; regs->psw.addr = __rewind_psw(regs->psw, - regs->svc_code >> 16); + regs->int_code >> 16); break; } - /* No longer in a system call */ - clear_thread_flag(TIF_SYSCALL); } + /* No longer in a system call */ + clear_thread_flag(TIF_SYSCALL); if ((is_compat_task() ? handle_signal32(signr, &ka, &info, oldset, regs) : @@ -486,12 +490,13 @@ void do_signal(struct pt_regs *regs) } /* No handlers present - check for system call restart */ + clear_thread_flag(TIF_SYSCALL); if (current_thread_info()->system_call) { - regs->svc_code = current_thread_info()->system_call; + regs->int_code = current_thread_info()->system_call; switch (regs->gprs[2]) { case -ERESTART_RESTARTBLOCK: /* Restart with sys_restart_syscall */ - regs->svc_code = __NR_restart_syscall; + regs->int_code = __NR_restart_syscall; /* fallthrough */ case -ERESTARTNOHAND: case -ERESTARTSYS: @@ -500,9 +505,6 @@ void do_signal(struct pt_regs *regs) regs->gprs[2] = regs->orig_gpr2; set_thread_flag(TIF_SYSCALL); break; - default: - clear_thread_flag(TIF_SYSCALL); - break; } } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 3ea872890da..2398ce6b15a 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -69,9 +69,7 @@ enum s390_cpu_state { }; DEFINE_MUTEX(smp_cpu_state_mutex); -int smp_cpu_polarization[NR_CPUS]; static int smp_cpu_state[NR_CPUS]; -static int cpu_management; static DEFINE_PER_CPU(struct cpu, cpu_devices); @@ -149,29 +147,59 @@ void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) sp -= sizeof(struct pt_regs); regs = (struct pt_regs *) sp; memcpy(®s->gprs, ¤t_lc->gpregs_save_area, sizeof(regs->gprs)); - regs->psw = lc->psw_save_area; + regs->psw = current_lc->psw_save_area; sp -= STACK_FRAME_OVERHEAD; sf = (struct stack_frame *) sp; - sf->back_chain = regs->gprs[15]; + sf->back_chain = 0; smp_switch_to_cpu(func, data, sp, stap(), __cpu_logical_map[0]); } +static void smp_stop_cpu(void) +{ + while (sigp(smp_processor_id(), sigp_stop) == sigp_busy) + cpu_relax(); +} + void smp_send_stop(void) { - int cpu, rc; + cpumask_t cpumask; + int cpu; + u64 end; /* Disable all interrupts/machine checks */ __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); trace_hardirqs_off(); - /* stop all processors */ - for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) - continue; - do { - rc = sigp(cpu, sigp_stop); - } while (rc == sigp_busy); + cpumask_copy(&cpumask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &cpumask); + + if (oops_in_progress) { + /* + * Give the other cpus the opportunity to complete + * outstanding interrupts before stopping them. + */ + end = get_clock() + (1000000UL << 12); + for_each_cpu(cpu, &cpumask) { + set_bit(ec_stop_cpu, (unsigned long *) + &lowcore_ptr[cpu]->ext_call_fast); + while (sigp(cpu, sigp_emergency_signal) == sigp_busy && + get_clock() < end) + cpu_relax(); + } + while (get_clock() < end) { + for_each_cpu(cpu, &cpumask) + if (cpu_stopped(cpu)) + cpumask_clear_cpu(cpu, &cpumask); + if (cpumask_empty(&cpumask)) + break; + cpu_relax(); + } + } + /* stop all processors */ + for_each_cpu(cpu, &cpumask) { + while (sigp(cpu, sigp_stop) == sigp_busy) + cpu_relax(); while (!cpu_stopped(cpu)) cpu_relax(); } @@ -187,7 +215,7 @@ static void do_ext_call_interrupt(unsigned int ext_int_code, { unsigned long bits; - if (ext_int_code == 0x1202) + if ((ext_int_code & 0xffff) == 0x1202) kstat_cpu(smp_processor_id()).irqs[EXTINT_EXC]++; else kstat_cpu(smp_processor_id()).irqs[EXTINT_EMS]++; @@ -196,6 +224,9 @@ static void do_ext_call_interrupt(unsigned int ext_int_code, */ bits = xchg(&S390_lowcore.ext_call_fast, 0); + if (test_bit(ec_stop_cpu, &bits)) + smp_stop_cpu(); + if (test_bit(ec_schedule, &bits)) scheduler_ipi(); @@ -204,6 +235,7 @@ static void do_ext_call_interrupt(unsigned int ext_int_code, if (test_bit(ec_call_function_single, &bits)) generic_smp_call_function_single_interrupt(); + } /* @@ -369,7 +401,7 @@ static int smp_rescan_cpus_sigp(cpumask_t avail) if (cpu_known(cpu_id)) continue; __cpu_logical_map[logical_cpu] = cpu_id; - smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN; + cpu_set_polarization(logical_cpu, POLARIZATION_UNKNOWN); if (!cpu_stopped(logical_cpu)) continue; set_cpu_present(logical_cpu, true); @@ -403,7 +435,7 @@ static int smp_rescan_cpus_sclp(cpumask_t avail) if (cpu_known(cpu_id)) continue; __cpu_logical_map[logical_cpu] = cpu_id; - smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN; + cpu_set_polarization(logical_cpu, POLARIZATION_UNKNOWN); set_cpu_present(logical_cpu, true); if (cpu >= info->configured) smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY; @@ -656,7 +688,7 @@ int __cpuinit __cpu_up(unsigned int cpu) - sizeof(struct stack_frame)); memset(sf, 0, sizeof(struct stack_frame)); sf->gprs[9] = (unsigned long) sf; - cpu_lowcore->save_area[15] = (unsigned long) sf; + cpu_lowcore->gpregs_save_area[15] = (unsigned long) sf; __ctl_store(cpu_lowcore->cregs_save_area, 0, 15); atomic_inc(&init_mm.context.attach_count); asm volatile( @@ -806,7 +838,7 @@ void __init smp_prepare_boot_cpu(void) S390_lowcore.percpu_offset = __per_cpu_offset[0]; current_set[0] = current; smp_cpu_state[0] = CPU_STATE_CONFIGURED; - smp_cpu_polarization[0] = POLARIZATION_UNKNWN; + cpu_set_polarization(0, POLARIZATION_UNKNOWN); } void __init smp_cpus_done(unsigned int max_cpus) @@ -831,8 +863,8 @@ int setup_profiling_timer(unsigned int multiplier) } #ifdef CONFIG_HOTPLUG_CPU -static ssize_t cpu_configure_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t cpu_configure_show(struct device *dev, + struct device_attribute *attr, char *buf) { ssize_t count; @@ -842,8 +874,8 @@ static ssize_t cpu_configure_show(struct sys_device *dev, return count; } -static ssize_t cpu_configure_store(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t cpu_configure_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { int cpu = dev->id; @@ -868,7 +900,8 @@ static ssize_t cpu_configure_store(struct sys_device *dev, rc = sclp_cpu_deconfigure(__cpu_logical_map[cpu]); if (!rc) { smp_cpu_state[cpu] = CPU_STATE_STANDBY; - smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN; + cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + topology_expect_change(); } } break; @@ -877,7 +910,8 @@ static ssize_t cpu_configure_store(struct sys_device *dev, rc = sclp_cpu_configure(__cpu_logical_map[cpu]); if (!rc) { smp_cpu_state[cpu] = CPU_STATE_CONFIGURED; - smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN; + cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + topology_expect_change(); } } break; @@ -889,52 +923,21 @@ out: put_online_cpus(); return rc ? rc : count; } -static SYSDEV_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); +static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); #endif /* CONFIG_HOTPLUG_CPU */ -static ssize_t cpu_polarization_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) -{ - int cpu = dev->id; - ssize_t count; - - mutex_lock(&smp_cpu_state_mutex); - switch (smp_cpu_polarization[cpu]) { - case POLARIZATION_HRZ: - count = sprintf(buf, "horizontal\n"); - break; - case POLARIZATION_VL: - count = sprintf(buf, "vertical:low\n"); - break; - case POLARIZATION_VM: - count = sprintf(buf, "vertical:medium\n"); - break; - case POLARIZATION_VH: - count = sprintf(buf, "vertical:high\n"); - break; - default: - count = sprintf(buf, "unknown\n"); - break; - } - mutex_unlock(&smp_cpu_state_mutex); - return count; -} -static SYSDEV_ATTR(polarization, 0444, cpu_polarization_show, NULL); - -static ssize_t show_cpu_address(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_cpu_address(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", __cpu_logical_map[dev->id]); } -static SYSDEV_ATTR(address, 0444, show_cpu_address, NULL); - +static DEVICE_ATTR(address, 0444, show_cpu_address, NULL); static struct attribute *cpu_common_attrs[] = { #ifdef CONFIG_HOTPLUG_CPU - &attr_configure.attr, + &dev_attr_configure.attr, #endif - &attr_address.attr, - &attr_polarization.attr, + &dev_attr_address.attr, NULL, }; @@ -942,8 +945,8 @@ static struct attribute_group cpu_common_attr_group = { .attrs = cpu_common_attrs, }; -static ssize_t show_capability(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_capability(struct device *dev, + struct device_attribute *attr, char *buf) { unsigned int capability; int rc; @@ -953,10 +956,10 @@ static ssize_t show_capability(struct sys_device *dev, return rc; return sprintf(buf, "%u\n", capability); } -static SYSDEV_ATTR(capability, 0444, show_capability, NULL); +static DEVICE_ATTR(capability, 0444, show_capability, NULL); -static ssize_t show_idle_count(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_idle_count(struct device *dev, + struct device_attribute *attr, char *buf) { struct s390_idle_data *idle; unsigned long long idle_count; @@ -976,10 +979,10 @@ repeat: goto repeat; return sprintf(buf, "%llu\n", idle_count); } -static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL); +static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); -static ssize_t show_idle_time(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t show_idle_time(struct device *dev, + struct device_attribute *attr, char *buf) { struct s390_idle_data *idle; unsigned long long now, idle_time, idle_enter; @@ -1001,12 +1004,12 @@ repeat: goto repeat; return sprintf(buf, "%llu\n", idle_time >> 12); } -static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL); +static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); static struct attribute *cpu_online_attrs[] = { - &attr_capability.attr, - &attr_idle_count.attr, - &attr_idle_time_us.attr, + &dev_attr_capability.attr, + &dev_attr_idle_count.attr, + &dev_attr_idle_time_us.attr, NULL, }; @@ -1019,7 +1022,7 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self, { unsigned int cpu = (unsigned int)(long)hcpu; struct cpu *c = &per_cpu(cpu_devices, cpu); - struct sys_device *s = &c->sysdev; + struct device *s = &c->dev; struct s390_idle_data *idle; int err = 0; @@ -1045,7 +1048,7 @@ static struct notifier_block __cpuinitdata smp_cpu_nb = { static int __devinit smp_add_present_cpu(int cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); - struct sys_device *s = &c->sysdev; + struct device *s = &c->dev; int rc; c->hotpluggable = 1; @@ -1055,11 +1058,20 @@ static int __devinit smp_add_present_cpu(int cpu) rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group); if (rc) goto out_cpu; - if (!cpu_online(cpu)) - goto out; - rc = sysfs_create_group(&s->kobj, &cpu_online_attr_group); - if (!rc) - return 0; + if (cpu_online(cpu)) { + rc = sysfs_create_group(&s->kobj, &cpu_online_attr_group); + if (rc) + goto out_online; + } + rc = topology_cpu_init(c); + if (rc) + goto out_topology; + return 0; + +out_topology: + if (cpu_online(cpu)) + sysfs_remove_group(&s->kobj, &cpu_online_attr_group); +out_online: sysfs_remove_group(&s->kobj, &cpu_common_attr_group); out_cpu: #ifdef CONFIG_HOTPLUG_CPU @@ -1098,8 +1110,8 @@ out: return rc; } -static ssize_t __ref rescan_store(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t __ref rescan_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { @@ -1108,64 +1120,19 @@ static ssize_t __ref rescan_store(struct sysdev_class *class, rc = smp_rescan_cpus(); return rc ? rc : count; } -static SYSDEV_CLASS_ATTR(rescan, 0200, NULL, rescan_store); +static DEVICE_ATTR(rescan, 0200, NULL, rescan_store); #endif /* CONFIG_HOTPLUG_CPU */ -static ssize_t dispatching_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, - char *buf) -{ - ssize_t count; - - mutex_lock(&smp_cpu_state_mutex); - count = sprintf(buf, "%d\n", cpu_management); - mutex_unlock(&smp_cpu_state_mutex); - return count; -} - -static ssize_t dispatching_store(struct sysdev_class *dev, - struct sysdev_class_attribute *attr, - const char *buf, - size_t count) -{ - int val, rc; - char delim; - - if (sscanf(buf, "%d %c", &val, &delim) != 1) - return -EINVAL; - if (val != 0 && val != 1) - return -EINVAL; - rc = 0; - get_online_cpus(); - mutex_lock(&smp_cpu_state_mutex); - if (cpu_management == val) - goto out; - rc = topology_set_cpu_management(val); - if (!rc) - cpu_management = val; -out: - mutex_unlock(&smp_cpu_state_mutex); - put_online_cpus(); - return rc ? rc : count; -} -static SYSDEV_CLASS_ATTR(dispatching, 0644, dispatching_show, - dispatching_store); - -static int __init topology_init(void) +static int __init s390_smp_init(void) { - int cpu; - int rc; + int cpu, rc; register_cpu_notifier(&smp_cpu_nb); - #ifdef CONFIG_HOTPLUG_CPU - rc = sysdev_class_create_file(&cpu_sysdev_class, &attr_rescan); + rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan); if (rc) return rc; #endif - rc = sysdev_class_create_file(&cpu_sysdev_class, &attr_dispatching); - if (rc) - return rc; for_each_present_cpu(cpu) { rc = smp_add_present_cpu(cpu); if (rc) @@ -1173,4 +1140,4 @@ static int __init topology_init(void) } return 0; } -subsys_initcall(topology_init); +subsys_initcall(s390_smp_init); diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c index 476081440df..78ea1948ff5 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/sys_s390.c @@ -60,74 +60,22 @@ out: } /* - * sys_ipc() is the de-multiplexer for the SysV IPC calls.. - * - * This is really horribly ugly. + * sys_ipc() is the de-multiplexer for the SysV IPC calls. */ SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second, unsigned long, third, void __user *, ptr) { - struct ipc_kludge tmp; - int ret; - - switch (call) { - case SEMOP: - return sys_semtimedop(first, (struct sembuf __user *)ptr, - (unsigned)second, NULL); - case SEMTIMEDOP: - return sys_semtimedop(first, (struct sembuf __user *)ptr, - (unsigned)second, - (const struct timespec __user *) third); - case SEMGET: - return sys_semget(first, (int)second, third); - case SEMCTL: { - union semun fourth; - if (!ptr) - return -EINVAL; - if (get_user(fourth.__pad, (void __user * __user *) ptr)) - return -EFAULT; - return sys_semctl(first, (int)second, third, fourth); - } - case MSGSND: - return sys_msgsnd (first, (struct msgbuf __user *) ptr, - (size_t)second, third); - break; - case MSGRCV: - if (!ptr) - return -EINVAL; - if (copy_from_user (&tmp, (struct ipc_kludge __user *) ptr, - sizeof (struct ipc_kludge))) - return -EFAULT; - return sys_msgrcv (first, tmp.msgp, - (size_t)second, tmp.msgtyp, third); - case MSGGET: - return sys_msgget((key_t)first, (int)second); - case MSGCTL: - return sys_msgctl(first, (int)second, - (struct msqid_ds __user *)ptr); - - case SHMAT: { - ulong raddr; - ret = do_shmat(first, (char __user *)ptr, - (int)second, &raddr); - if (ret) - return ret; - return put_user (raddr, (ulong __user *) third); - break; - } - case SHMDT: - return sys_shmdt ((char __user *)ptr); - case SHMGET: - return sys_shmget(first, (size_t)second, third); - case SHMCTL: - return sys_shmctl(first, (int)second, - (struct shmid_ds __user *) ptr); - default: - return -ENOSYS; - - } - - return -EINVAL; + if (call >> 16) + return -EINVAL; + /* The s390 sys_ipc variant has only five parameters instead of six + * like the generic variant. The only difference is the handling of + * the SEMTIMEDOP subcall where on s390 the third parameter is used + * as a pointer to a struct timespec where the generic variant uses + * the fifth parameter. + * Therefore we can call the generic variant by simply passing the + * third parameter also as fifth parameter. + */ + return sys_ipc(call, first, second, third, ptr, third); } #ifdef CONFIG_64BIT diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 73eb08c874f..bcab2f04ba5 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -348,3 +348,5 @@ SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime_wrapper) SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper) SYSCALL(sys_setns,sys_setns,sys_setns_wrapper) +SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv_wrapper) /* 340 */ +SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev_wrapper) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index ebbfab3c6e5..fa02f443f5f 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -27,7 +27,7 @@ #include <linux/cpu.h> #include <linux/stop_machine.h> #include <linux/time.h> -#include <linux/sysdev.h> +#include <linux/device.h> #include <linux/delay.h> #include <linux/init.h> #include <linux/smp.h> @@ -1116,34 +1116,35 @@ out_unlock: /* * Sysfs interface functions */ -static struct sysdev_class etr_sysclass = { - .name = "etr", +static struct bus_type etr_subsys = { + .name = "etr", + .dev_name = "etr", }; -static struct sys_device etr_port0_dev = { +static struct device etr_port0_dev = { .id = 0, - .cls = &etr_sysclass, + .bus = &etr_subsys, }; -static struct sys_device etr_port1_dev = { +static struct device etr_port1_dev = { .id = 1, - .cls = &etr_sysclass, + .bus = &etr_subsys, }; /* - * ETR class attributes + * ETR subsys attributes */ -static ssize_t etr_stepping_port_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t etr_stepping_port_show(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%i\n", etr_port0.esw.p); } -static SYSDEV_CLASS_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL); +static DEVICE_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL); -static ssize_t etr_stepping_mode_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t etr_stepping_mode_show(struct device *dev, + struct device_attribute *attr, char *buf) { char *mode_str; @@ -1157,12 +1158,12 @@ static ssize_t etr_stepping_mode_show(struct sysdev_class *class, return sprintf(buf, "%s\n", mode_str); } -static SYSDEV_CLASS_ATTR(stepping_mode, 0400, etr_stepping_mode_show, NULL); +static DEVICE_ATTR(stepping_mode, 0400, etr_stepping_mode_show, NULL); /* * ETR port attributes */ -static inline struct etr_aib *etr_aib_from_dev(struct sys_device *dev) +static inline struct etr_aib *etr_aib_from_dev(struct device *dev) { if (dev == &etr_port0_dev) return etr_port0_online ? &etr_port0 : NULL; @@ -1170,8 +1171,8 @@ static inline struct etr_aib *etr_aib_from_dev(struct sys_device *dev) return etr_port1_online ? &etr_port1 : NULL; } -static ssize_t etr_online_show(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t etr_online_show(struct device *dev, + struct device_attribute *attr, char *buf) { unsigned int online; @@ -1180,8 +1181,8 @@ static ssize_t etr_online_show(struct sys_device *dev, return sprintf(buf, "%i\n", online); } -static ssize_t etr_online_store(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t etr_online_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { unsigned int value; @@ -1218,20 +1219,20 @@ out: return count; } -static SYSDEV_ATTR(online, 0600, etr_online_show, etr_online_store); +static DEVICE_ATTR(online, 0600, etr_online_show, etr_online_store); -static ssize_t etr_stepping_control_show(struct sys_device *dev, - struct sysdev_attribute *attr, +static ssize_t etr_stepping_control_show(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ? etr_eacr.e0 : etr_eacr.e1); } -static SYSDEV_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL); +static DEVICE_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL); -static ssize_t etr_mode_code_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_mode_code_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!etr_port0_online && !etr_port1_online) /* Status word is not uptodate if both ports are offline. */ @@ -1240,10 +1241,10 @@ static ssize_t etr_mode_code_show(struct sys_device *dev, etr_port0.esw.psc0 : etr_port0.esw.psc1); } -static SYSDEV_ATTR(state_code, 0400, etr_mode_code_show, NULL); +static DEVICE_ATTR(state_code, 0400, etr_mode_code_show, NULL); -static ssize_t etr_untuned_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_untuned_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1252,10 +1253,10 @@ static ssize_t etr_untuned_show(struct sys_device *dev, return sprintf(buf, "%i\n", aib->edf1.u); } -static SYSDEV_ATTR(untuned, 0400, etr_untuned_show, NULL); +static DEVICE_ATTR(untuned, 0400, etr_untuned_show, NULL); -static ssize_t etr_network_id_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_network_id_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1264,10 +1265,10 @@ static ssize_t etr_network_id_show(struct sys_device *dev, return sprintf(buf, "%i\n", aib->edf1.net_id); } -static SYSDEV_ATTR(network, 0400, etr_network_id_show, NULL); +static DEVICE_ATTR(network, 0400, etr_network_id_show, NULL); -static ssize_t etr_id_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_id_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1276,10 +1277,10 @@ static ssize_t etr_id_show(struct sys_device *dev, return sprintf(buf, "%i\n", aib->edf1.etr_id); } -static SYSDEV_ATTR(id, 0400, etr_id_show, NULL); +static DEVICE_ATTR(id, 0400, etr_id_show, NULL); -static ssize_t etr_port_number_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_port_number_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1288,10 +1289,10 @@ static ssize_t etr_port_number_show(struct sys_device *dev, return sprintf(buf, "%i\n", aib->edf1.etr_pn); } -static SYSDEV_ATTR(port, 0400, etr_port_number_show, NULL); +static DEVICE_ATTR(port, 0400, etr_port_number_show, NULL); -static ssize_t etr_coupled_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_coupled_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1300,10 +1301,10 @@ static ssize_t etr_coupled_show(struct sys_device *dev, return sprintf(buf, "%i\n", aib->edf3.c); } -static SYSDEV_ATTR(coupled, 0400, etr_coupled_show, NULL); +static DEVICE_ATTR(coupled, 0400, etr_coupled_show, NULL); -static ssize_t etr_local_time_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_local_time_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1312,10 +1313,10 @@ static ssize_t etr_local_time_show(struct sys_device *dev, return sprintf(buf, "%i\n", aib->edf3.blto); } -static SYSDEV_ATTR(local_time, 0400, etr_local_time_show, NULL); +static DEVICE_ATTR(local_time, 0400, etr_local_time_show, NULL); -static ssize_t etr_utc_offset_show(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t etr_utc_offset_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct etr_aib *aib = etr_aib_from_dev(dev); @@ -1324,64 +1325,64 @@ static ssize_t etr_utc_offset_show(struct sys_device *dev, return sprintf(buf, "%i\n", aib->edf3.buo); } -static SYSDEV_ATTR(utc_offset, 0400, etr_utc_offset_show, NULL); +static DEVICE_ATTR(utc_offset, 0400, etr_utc_offset_show, NULL); -static struct sysdev_attribute *etr_port_attributes[] = { - &attr_online, - &attr_stepping_control, - &attr_state_code, - &attr_untuned, - &attr_network, - &attr_id, - &attr_port, - &attr_coupled, - &attr_local_time, - &attr_utc_offset, +static struct device_attribute *etr_port_attributes[] = { + &dev_attr_online, + &dev_attr_stepping_control, + &dev_attr_state_code, + &dev_attr_untuned, + &dev_attr_network, + &dev_attr_id, + &dev_attr_port, + &dev_attr_coupled, + &dev_attr_local_time, + &dev_attr_utc_offset, NULL }; -static int __init etr_register_port(struct sys_device *dev) +static int __init etr_register_port(struct device *dev) { - struct sysdev_attribute **attr; + struct device_attribute **attr; int rc; - rc = sysdev_register(dev); + rc = device_register(dev); if (rc) goto out; for (attr = etr_port_attributes; *attr; attr++) { - rc = sysdev_create_file(dev, *attr); + rc = device_create_file(dev, *attr); if (rc) goto out_unreg; } return 0; out_unreg: for (; attr >= etr_port_attributes; attr--) - sysdev_remove_file(dev, *attr); - sysdev_unregister(dev); + device_remove_file(dev, *attr); + device_unregister(dev); out: return rc; } -static void __init etr_unregister_port(struct sys_device *dev) +static void __init etr_unregister_port(struct device *dev) { - struct sysdev_attribute **attr; + struct device_attribute **attr; for (attr = etr_port_attributes; *attr; attr++) - sysdev_remove_file(dev, *attr); - sysdev_unregister(dev); + device_remove_file(dev, *attr); + device_unregister(dev); } static int __init etr_init_sysfs(void) { int rc; - rc = sysdev_class_register(&etr_sysclass); + rc = subsys_system_register(&etr_subsys, NULL); if (rc) goto out; - rc = sysdev_class_create_file(&etr_sysclass, &attr_stepping_port); + rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_port); if (rc) - goto out_unreg_class; - rc = sysdev_class_create_file(&etr_sysclass, &attr_stepping_mode); + goto out_unreg_subsys; + rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_mode); if (rc) goto out_remove_stepping_port; rc = etr_register_port(&etr_port0_dev); @@ -1395,11 +1396,11 @@ static int __init etr_init_sysfs(void) out_remove_port0: etr_unregister_port(&etr_port0_dev); out_remove_stepping_mode: - sysdev_class_remove_file(&etr_sysclass, &attr_stepping_mode); + device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_mode); out_remove_stepping_port: - sysdev_class_remove_file(&etr_sysclass, &attr_stepping_port); -out_unreg_class: - sysdev_class_unregister(&etr_sysclass); + device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_port); +out_unreg_subsys: + bus_unregister(&etr_subsys); out: return rc; } @@ -1599,14 +1600,15 @@ out_unlock: } /* - * STP class sysfs interface functions + * STP subsys sysfs interface functions */ -static struct sysdev_class stp_sysclass = { - .name = "stp", +static struct bus_type stp_subsys = { + .name = "stp", + .dev_name = "stp", }; -static ssize_t stp_ctn_id_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_ctn_id_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online) @@ -1615,10 +1617,10 @@ static ssize_t stp_ctn_id_show(struct sysdev_class *class, *(unsigned long long *) stp_info.ctnid); } -static SYSDEV_CLASS_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL); +static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL); -static ssize_t stp_ctn_type_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_ctn_type_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online) @@ -1626,10 +1628,10 @@ static ssize_t stp_ctn_type_show(struct sysdev_class *class, return sprintf(buf, "%i\n", stp_info.ctn); } -static SYSDEV_CLASS_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL); +static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL); -static ssize_t stp_dst_offset_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_dst_offset_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online || !(stp_info.vbits & 0x2000)) @@ -1637,10 +1639,10 @@ static ssize_t stp_dst_offset_show(struct sysdev_class *class, return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); } -static SYSDEV_CLASS_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL); +static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL); -static ssize_t stp_leap_seconds_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_leap_seconds_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online || !(stp_info.vbits & 0x8000)) @@ -1648,10 +1650,10 @@ static ssize_t stp_leap_seconds_show(struct sysdev_class *class, return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); } -static SYSDEV_CLASS_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL); +static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL); -static ssize_t stp_stratum_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_stratum_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online) @@ -1659,10 +1661,10 @@ static ssize_t stp_stratum_show(struct sysdev_class *class, return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); } -static SYSDEV_CLASS_ATTR(stratum, 0400, stp_stratum_show, NULL); +static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL); -static ssize_t stp_time_offset_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_time_offset_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online || !(stp_info.vbits & 0x0800)) @@ -1670,10 +1672,10 @@ static ssize_t stp_time_offset_show(struct sysdev_class *class, return sprintf(buf, "%i\n", (int) stp_info.tto); } -static SYSDEV_CLASS_ATTR(time_offset, 0400, stp_time_offset_show, NULL); +static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL); -static ssize_t stp_time_zone_offset_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_time_zone_offset_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online || !(stp_info.vbits & 0x4000)) @@ -1681,11 +1683,11 @@ static ssize_t stp_time_zone_offset_show(struct sysdev_class *class, return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); } -static SYSDEV_CLASS_ATTR(time_zone_offset, 0400, +static DEVICE_ATTR(time_zone_offset, 0400, stp_time_zone_offset_show, NULL); -static ssize_t stp_timing_mode_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_timing_mode_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online) @@ -1693,10 +1695,10 @@ static ssize_t stp_timing_mode_show(struct sysdev_class *class, return sprintf(buf, "%i\n", stp_info.tmd); } -static SYSDEV_CLASS_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL); +static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL); -static ssize_t stp_timing_state_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_timing_state_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (!stp_online) @@ -1704,17 +1706,17 @@ static ssize_t stp_timing_state_show(struct sysdev_class *class, return sprintf(buf, "%i\n", stp_info.tst); } -static SYSDEV_CLASS_ATTR(timing_state, 0400, stp_timing_state_show, NULL); +static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL); -static ssize_t stp_online_show(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_online_show(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%i\n", stp_online); } -static ssize_t stp_online_store(struct sysdev_class *class, - struct sysdev_class_attribute *attr, +static ssize_t stp_online_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { unsigned int value; @@ -1736,47 +1738,47 @@ static ssize_t stp_online_store(struct sysdev_class *class, } /* - * Can't use SYSDEV_CLASS_ATTR because the attribute should be named - * stp/online but attr_online already exists in this file .. + * Can't use DEVICE_ATTR because the attribute should be named + * stp/online but dev_attr_online already exists in this file .. */ -static struct sysdev_class_attribute attr_stp_online = { +static struct device_attribute dev_attr_stp_online = { .attr = { .name = "online", .mode = 0600 }, .show = stp_online_show, .store = stp_online_store, }; -static struct sysdev_class_attribute *stp_attributes[] = { - &attr_ctn_id, - &attr_ctn_type, - &attr_dst_offset, - &attr_leap_seconds, - &attr_stp_online, - &attr_stratum, - &attr_time_offset, - &attr_time_zone_offset, - &attr_timing_mode, - &attr_timing_state, +static struct device_attribute *stp_attributes[] = { + &dev_attr_ctn_id, + &dev_attr_ctn_type, + &dev_attr_dst_offset, + &dev_attr_leap_seconds, + &dev_attr_stp_online, + &dev_attr_stratum, + &dev_attr_time_offset, + &dev_attr_time_zone_offset, + &dev_attr_timing_mode, + &dev_attr_timing_state, NULL }; static int __init stp_init_sysfs(void) { - struct sysdev_class_attribute **attr; + struct device_attribute **attr; int rc; - rc = sysdev_class_register(&stp_sysclass); + rc = subsys_system_register(&stp_subsys, NULL); if (rc) goto out; for (attr = stp_attributes; *attr; attr++) { - rc = sysdev_class_create_file(&stp_sysclass, *attr); + rc = device_create_file(stp_subsys.dev_root, *attr); if (rc) goto out_unreg; } return 0; out_unreg: for (; attr >= stp_attributes; attr--) - sysdev_class_remove_file(&stp_sysclass, *attr); - sysdev_class_unregister(&stp_sysclass); + device_remove_file(stp_subsys.dev_root, *attr); + bus_unregister(&stp_subsys); out: return rc; } diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 77b8942b9a1..7370a41948c 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -1,22 +1,22 @@ /* - * Copyright IBM Corp. 2007 + * Copyright IBM Corp. 2007,2011 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> */ #define KMSG_COMPONENT "cpu" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/init.h> -#include <linux/device.h> +#include <linux/workqueue.h> #include <linux/bootmem.h> +#include <linux/cpuset.h> +#include <linux/device.h> +#include <linux/kernel.h> #include <linux/sched.h> -#include <linux/workqueue.h> +#include <linux/init.h> +#include <linux/delay.h> #include <linux/cpu.h> #include <linux/smp.h> -#include <linux/cpuset.h> -#include <asm/delay.h> +#include <linux/mm.h> #define PTF_HORIZONTAL (0UL) #define PTF_VERTICAL (1UL) @@ -31,7 +31,6 @@ struct mask_info { static int topology_enabled = 1; static void topology_work_fn(struct work_struct *work); static struct sysinfo_15_1_x *tl_info; -static struct timer_list topology_timer; static void set_topology_timer(void); static DECLARE_WORK(topology_work, topology_work_fn); /* topology_lock protects the core linked list */ @@ -41,11 +40,12 @@ static struct mask_info core_info; cpumask_t cpu_core_map[NR_CPUS]; unsigned char cpu_core_id[NR_CPUS]; -#ifdef CONFIG_SCHED_BOOK static struct mask_info book_info; cpumask_t cpu_book_map[NR_CPUS]; unsigned char cpu_book_id[NR_CPUS]; -#endif + +/* smp_cpu_state_mutex must be held when accessing this array */ +int cpu_polarization[NR_CPUS]; static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) { @@ -68,8 +68,10 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) return mask; } -static void add_cpus_to_mask(struct topology_cpu *tl_cpu, - struct mask_info *book, struct mask_info *core) +static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu, + struct mask_info *book, + struct mask_info *core, + int one_core_per_cpu) { unsigned int cpu; @@ -83,15 +85,19 @@ static void add_cpus_to_mask(struct topology_cpu *tl_cpu, for_each_present_cpu(lcpu) { if (cpu_logical_map(lcpu) != rcpu) continue; -#ifdef CONFIG_SCHED_BOOK cpumask_set_cpu(lcpu, &book->mask); cpu_book_id[lcpu] = book->id; -#endif cpumask_set_cpu(lcpu, &core->mask); - cpu_core_id[lcpu] = core->id; - smp_cpu_polarization[lcpu] = tl_cpu->pp; + if (one_core_per_cpu) { + cpu_core_id[lcpu] = rcpu; + core = core->next; + } else { + cpu_core_id[lcpu] = core->id; + } + cpu_set_polarization(lcpu, tl_cpu->pp); } } + return core; } static void clear_masks(void) @@ -103,13 +109,11 @@ static void clear_masks(void) cpumask_clear(&info->mask); info = info->next; } -#ifdef CONFIG_SCHED_BOOK info = &book_info; while (info) { cpumask_clear(&info->mask); info = info->next; } -#endif } static union topology_entry *next_tle(union topology_entry *tle) @@ -119,43 +123,75 @@ static union topology_entry *next_tle(union topology_entry *tle) return (union topology_entry *)((struct topology_container *)tle + 1); } -static void tl_to_cores(struct sysinfo_15_1_x *info) +static void __tl_to_cores_generic(struct sysinfo_15_1_x *info) { -#ifdef CONFIG_SCHED_BOOK - struct mask_info *book = &book_info; -#else - struct mask_info *book = NULL; -#endif struct mask_info *core = &core_info; + struct mask_info *book = &book_info; union topology_entry *tle, *end; - - spin_lock_irq(&topology_lock); - clear_masks(); tle = info->tle; end = (union topology_entry *)((unsigned long)info + info->length); while (tle < end) { switch (tle->nl) { -#ifdef CONFIG_SCHED_BOOK case 2: book = book->next; book->id = tle->container.id; break; -#endif case 1: core = core->next; core->id = tle->container.id; break; case 0: - add_cpus_to_mask(&tle->cpu, book, core); + add_cpus_to_mask(&tle->cpu, book, core, 0); break; default: clear_masks(); - goto out; + return; } tle = next_tle(tle); } -out: +} + +static void __tl_to_cores_z10(struct sysinfo_15_1_x *info) +{ + struct mask_info *core = &core_info; + struct mask_info *book = &book_info; + union topology_entry *tle, *end; + + tle = info->tle; + end = (union topology_entry *)((unsigned long)info + info->length); + while (tle < end) { + switch (tle->nl) { + case 1: + book = book->next; + book->id = tle->container.id; + break; + case 0: + core = add_cpus_to_mask(&tle->cpu, book, core, 1); + break; + default: + clear_masks(); + return; + } + tle = next_tle(tle); + } +} + +static void tl_to_cores(struct sysinfo_15_1_x *info) +{ + struct cpuid cpu_id; + + get_cpu_id(&cpu_id); + spin_lock_irq(&topology_lock); + clear_masks(); + switch (cpu_id.machine) { + case 0x2097: + case 0x2098: + __tl_to_cores_z10(info); + break; + default: + __tl_to_cores_generic(info); + } spin_unlock_irq(&topology_lock); } @@ -165,7 +201,7 @@ static void topology_update_polarization_simple(void) mutex_lock(&smp_cpu_state_mutex); for_each_possible_cpu(cpu) - smp_cpu_polarization[cpu] = POLARIZATION_HRZ; + cpu_set_polarization(cpu, POLARIZATION_HRZ); mutex_unlock(&smp_cpu_state_mutex); } @@ -184,8 +220,7 @@ static int ptf(unsigned long fc) int topology_set_cpu_management(int fc) { - int cpu; - int rc; + int cpu, rc; if (!MACHINE_HAS_TOPOLOGY) return -EOPNOTSUPP; @@ -196,7 +231,7 @@ int topology_set_cpu_management(int fc) if (rc) return -EBUSY; for_each_possible_cpu(cpu) - smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN; + cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); return rc; } @@ -208,29 +243,25 @@ static void update_cpu_core_map(void) spin_lock_irqsave(&topology_lock, flags); for_each_possible_cpu(cpu) { cpu_core_map[cpu] = cpu_group_map(&core_info, cpu); -#ifdef CONFIG_SCHED_BOOK cpu_book_map[cpu] = cpu_group_map(&book_info, cpu); -#endif } spin_unlock_irqrestore(&topology_lock, flags); } void store_topology(struct sysinfo_15_1_x *info) { -#ifdef CONFIG_SCHED_BOOK int rc; rc = stsi(info, 15, 1, 3); if (rc != -ENOSYS) return; -#endif stsi(info, 15, 1, 2); } int arch_update_cpu_topology(void) { struct sysinfo_15_1_x *info = tl_info; - struct sys_device *sysdev; + struct device *dev; int cpu; if (!MACHINE_HAS_TOPOLOGY) { @@ -242,8 +273,8 @@ int arch_update_cpu_topology(void) tl_to_cores(info); update_cpu_core_map(); for_each_online_cpu(cpu) { - sysdev = get_cpu_sysdev(cpu); - kobject_uevent(&sysdev->kobj, KOBJ_CHANGE); + dev = get_cpu_device(cpu); + kobject_uevent(&dev->kobj, KOBJ_CHANGE); } return 1; } @@ -265,12 +296,30 @@ static void topology_timer_fn(unsigned long ignored) set_topology_timer(); } +static struct timer_list topology_timer = + TIMER_DEFERRED_INITIALIZER(topology_timer_fn, 0, 0); + +static atomic_t topology_poll = ATOMIC_INIT(0); + static void set_topology_timer(void) { - topology_timer.function = topology_timer_fn; - topology_timer.data = 0; - topology_timer.expires = jiffies + 60 * HZ; - add_timer(&topology_timer); + if (atomic_add_unless(&topology_poll, -1, 0)) + mod_timer(&topology_timer, jiffies + HZ / 10); + else + mod_timer(&topology_timer, jiffies + HZ * 60); +} + +void topology_expect_change(void) +{ + if (!MACHINE_HAS_TOPOLOGY) + return; + /* This is racy, but it doesn't matter since it is just a heuristic. + * Worst case is that we poll in a higher frequency for a bit longer. + */ + if (atomic_read(&topology_poll) > 60) + return; + atomic_add(60, &topology_poll); + set_topology_timer(); } static int __init early_parse_topology(char *p) @@ -282,23 +331,6 @@ static int __init early_parse_topology(char *p) } early_param("topology", early_parse_topology); -static int __init init_topology_update(void) -{ - int rc; - - rc = 0; - if (!MACHINE_HAS_TOPOLOGY) { - topology_update_polarization_simple(); - goto out; - } - init_timer_deferrable(&topology_timer); - set_topology_timer(); -out: - update_cpu_core_map(); - return rc; -} -__initcall(init_topology_update); - static void __init alloc_masks(struct sysinfo_15_1_x *info, struct mask_info *mask, int offset) { @@ -326,10 +358,108 @@ void __init s390_init_cpu_topology(void) store_topology(info); pr_info("The CPU configuration topology of the machine is:"); for (i = 0; i < TOPOLOGY_NR_MAG; i++) - printk(" %d", info->mag[i]); - printk(" / %d\n", info->mnest); - alloc_masks(info, &core_info, 2); -#ifdef CONFIG_SCHED_BOOK - alloc_masks(info, &book_info, 3); -#endif + printk(KERN_CONT " %d", info->mag[i]); + printk(KERN_CONT " / %d\n", info->mnest); + alloc_masks(info, &core_info, 1); + alloc_masks(info, &book_info, 2); +} + +static int cpu_management; + +static ssize_t dispatching_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + ssize_t count; + + mutex_lock(&smp_cpu_state_mutex); + count = sprintf(buf, "%d\n", cpu_management); + mutex_unlock(&smp_cpu_state_mutex); + return count; +} + +static ssize_t dispatching_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + int val, rc; + char delim; + + if (sscanf(buf, "%d %c", &val, &delim) != 1) + return -EINVAL; + if (val != 0 && val != 1) + return -EINVAL; + rc = 0; + get_online_cpus(); + mutex_lock(&smp_cpu_state_mutex); + if (cpu_management == val) + goto out; + rc = topology_set_cpu_management(val); + if (rc) + goto out; + cpu_management = val; + topology_expect_change(); +out: + mutex_unlock(&smp_cpu_state_mutex); + put_online_cpus(); + return rc ? rc : count; +} +static DEVICE_ATTR(dispatching, 0644, dispatching_show, + dispatching_store); + +static ssize_t cpu_polarization_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int cpu = dev->id; + ssize_t count; + + mutex_lock(&smp_cpu_state_mutex); + switch (cpu_read_polarization(cpu)) { + case POLARIZATION_HRZ: + count = sprintf(buf, "horizontal\n"); + break; + case POLARIZATION_VL: + count = sprintf(buf, "vertical:low\n"); + break; + case POLARIZATION_VM: + count = sprintf(buf, "vertical:medium\n"); + break; + case POLARIZATION_VH: + count = sprintf(buf, "vertical:high\n"); + break; + default: + count = sprintf(buf, "unknown\n"); + break; + } + mutex_unlock(&smp_cpu_state_mutex); + return count; +} +static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL); + +static struct attribute *topology_cpu_attrs[] = { + &dev_attr_polarization.attr, + NULL, +}; + +static struct attribute_group topology_cpu_attr_group = { + .attrs = topology_cpu_attrs, +}; + +int topology_cpu_init(struct cpu *cpu) +{ + return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); +} + +static int __init topology_init(void) +{ + if (!MACHINE_HAS_TOPOLOGY) { + topology_update_polarization_simple(); + goto out; + } + set_topology_timer(); +out: + update_cpu_core_map(); + return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); } +device_initcall(topology_init); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index a9807dd8627..5ce3750b181 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -43,9 +43,9 @@ #include <asm/debug.h> #include "entry.h" -void (*pgm_check_table[128])(struct pt_regs *, long, unsigned long); +void (*pgm_check_table[128])(struct pt_regs *regs); -int show_unhandled_signals; +int show_unhandled_signals = 1; #define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; }) @@ -234,7 +234,7 @@ void show_regs(struct pt_regs *regs) static DEFINE_SPINLOCK(die_lock); -void die(const char * str, struct pt_regs * regs, long err) +void die(struct pt_regs *regs, const char *str) { static int die_counter; @@ -243,7 +243,7 @@ void die(const char * str, struct pt_regs * regs, long err) console_verbose(); spin_lock_irq(&die_lock); bust_spinlocks(1); - printk("%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); + printk("%s: %04x [#%d] ", str, regs->int_code & 0xffff, ++die_counter); #ifdef CONFIG_PREEMPT printk("PREEMPT "); #endif @@ -254,7 +254,7 @@ void die(const char * str, struct pt_regs * regs, long err) printk("DEBUG_PAGEALLOC"); #endif printk("\n"); - notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV); + notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV); show_regs(regs); bust_spinlocks(0); add_taint(TAINT_DIE); @@ -267,8 +267,7 @@ void die(const char * str, struct pt_regs * regs, long err) do_exit(SIGSEGV); } -static void inline report_user_fault(struct pt_regs *regs, long int_code, - int signr) +static inline void report_user_fault(struct pt_regs *regs, int signr) { if ((task_pid_nr(current) > 1) && !show_unhandled_signals) return; @@ -276,7 +275,7 @@ static void inline report_user_fault(struct pt_regs *regs, long int_code, return; if (!printk_ratelimit()) return; - printk("User process fault: interruption code 0x%lX ", int_code); + printk("User process fault: interruption code 0x%X ", regs->int_code); print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN); printk("\n"); show_regs(regs); @@ -287,19 +286,28 @@ int is_valid_bugaddr(unsigned long addr) return 1; } -static inline void __kprobes do_trap(long pgm_int_code, int signr, char *str, - struct pt_regs *regs, siginfo_t *info) +static inline void __user *get_psw_address(struct pt_regs *regs) { - if (notify_die(DIE_TRAP, str, regs, pgm_int_code, - pgm_int_code, signr) == NOTIFY_STOP) + return (void __user *) + ((regs->psw.addr - (regs->int_code >> 16)) & PSW_ADDR_INSN); +} + +static void __kprobes do_trap(struct pt_regs *regs, + int si_signo, int si_code, char *str) +{ + siginfo_t info; + + if (notify_die(DIE_TRAP, str, regs, 0, + regs->int_code, si_signo) == NOTIFY_STOP) return; if (regs->psw.mask & PSW_MASK_PSTATE) { - struct task_struct *tsk = current; - - tsk->thread.trap_no = pgm_int_code & 0xffff; - force_sig_info(signr, info, tsk); - report_user_fault(regs, pgm_int_code, signr); + info.si_signo = si_signo; + info.si_errno = 0; + info.si_code = si_code; + info.si_addr = get_psw_address(regs); + force_sig_info(si_signo, &info, current); + report_user_fault(regs, si_signo); } else { const struct exception_table_entry *fixup; fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); @@ -311,18 +319,11 @@ static inline void __kprobes do_trap(long pgm_int_code, int signr, char *str, btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs); if (btt == BUG_TRAP_TYPE_WARN) return; - die(str, regs, pgm_int_code); + die(regs, str); } } } -static inline void __user *get_psw_address(struct pt_regs *regs, - long pgm_int_code) -{ - return (void __user *) - ((regs->psw.addr - (pgm_int_code >> 16)) & PSW_ADDR_INSN); -} - void __kprobes do_per_trap(struct pt_regs *regs) { siginfo_t info; @@ -339,26 +340,19 @@ void __kprobes do_per_trap(struct pt_regs *regs) force_sig_info(SIGTRAP, &info, current); } -static void default_trap_handler(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +static void default_trap_handler(struct pt_regs *regs) { if (regs->psw.mask & PSW_MASK_PSTATE) { - report_user_fault(regs, pgm_int_code, SIGSEGV); + report_user_fault(regs, SIGSEGV); do_exit(SIGSEGV); } else - die("Unknown program exception", regs, pgm_int_code); + die(regs, "Unknown program exception"); } #define DO_ERROR_INFO(name, signr, sicode, str) \ -static void name(struct pt_regs *regs, long pgm_int_code, \ - unsigned long trans_exc_code) \ +static void name(struct pt_regs *regs) \ { \ - siginfo_t info; \ - info.si_signo = signr; \ - info.si_errno = 0; \ - info.si_code = sicode; \ - info.si_addr = get_psw_address(regs, pgm_int_code); \ - do_trap(pgm_int_code, signr, str, regs, &info); \ + do_trap(regs, signr, sicode, str); \ } DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, @@ -388,42 +382,34 @@ DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, DO_ERROR_INFO(translation_exception, SIGILL, ILL_ILLOPN, "translation exception") -static inline void do_fp_trap(struct pt_regs *regs, void __user *location, - int fpc, long pgm_int_code) +static inline void do_fp_trap(struct pt_regs *regs, int fpc) { - siginfo_t si; - - si.si_signo = SIGFPE; - si.si_errno = 0; - si.si_addr = location; - si.si_code = 0; + int si_code = 0; /* FPC[2] is Data Exception Code */ if ((fpc & 0x00000300) == 0) { /* bits 6 and 7 of DXC are 0 iff IEEE exception */ if (fpc & 0x8000) /* invalid fp operation */ - si.si_code = FPE_FLTINV; + si_code = FPE_FLTINV; else if (fpc & 0x4000) /* div by 0 */ - si.si_code = FPE_FLTDIV; + si_code = FPE_FLTDIV; else if (fpc & 0x2000) /* overflow */ - si.si_code = FPE_FLTOVF; + si_code = FPE_FLTOVF; else if (fpc & 0x1000) /* underflow */ - si.si_code = FPE_FLTUND; + si_code = FPE_FLTUND; else if (fpc & 0x0800) /* inexact */ - si.si_code = FPE_FLTRES; + si_code = FPE_FLTRES; } - do_trap(pgm_int_code, SIGFPE, - "floating point exception", regs, &si); + do_trap(regs, SIGFPE, si_code, "floating point exception"); } -static void __kprobes illegal_op(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +static void __kprobes illegal_op(struct pt_regs *regs) { siginfo_t info; __u8 opcode[6]; __u16 __user *location; int signal = 0; - location = get_psw_address(regs, pgm_int_code); + location = get_psw_address(regs); if (regs->psw.mask & PSW_MASK_PSTATE) { if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) @@ -467,44 +453,31 @@ static void __kprobes illegal_op(struct pt_regs *regs, long pgm_int_code, * If we get an illegal op in kernel mode, send it through the * kprobes notifier. If kprobes doesn't pick it up, SIGILL */ - if (notify_die(DIE_BPT, "bpt", regs, pgm_int_code, + if (notify_die(DIE_BPT, "bpt", regs, 0, 3, SIGTRAP) != NOTIFY_STOP) signal = SIGILL; } #ifdef CONFIG_MATHEMU if (signal == SIGFPE) - do_fp_trap(regs, location, - current->thread.fp_regs.fpc, pgm_int_code); - else if (signal == SIGSEGV) { - info.si_signo = signal; - info.si_errno = 0; - info.si_code = SEGV_MAPERR; - info.si_addr = (void __user *) location; - do_trap(pgm_int_code, signal, - "user address fault", regs, &info); - } else + do_fp_trap(regs, current->thread.fp_regs.fpc); + else if (signal == SIGSEGV) + do_trap(regs, signal, SEGV_MAPERR, "user address fault"); + else #endif - if (signal) { - info.si_signo = signal; - info.si_errno = 0; - info.si_code = ILL_ILLOPC; - info.si_addr = (void __user *) location; - do_trap(pgm_int_code, signal, - "illegal operation", regs, &info); - } + if (signal) + do_trap(regs, signal, ILL_ILLOPC, "illegal operation"); } #ifdef CONFIG_MATHEMU -void specification_exception(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +void specification_exception(struct pt_regs *regs) { __u8 opcode[6]; __u16 __user *location = NULL; int signal = 0; - location = (__u16 __user *) get_psw_address(regs, pgm_int_code); + location = (__u16 __user *) get_psw_address(regs); if (regs->psw.mask & PSW_MASK_PSTATE) { get_user(*((__u16 *) opcode), location); @@ -539,30 +512,21 @@ void specification_exception(struct pt_regs *regs, long pgm_int_code, signal = SIGILL; if (signal == SIGFPE) - do_fp_trap(regs, location, - current->thread.fp_regs.fpc, pgm_int_code); - else if (signal) { - siginfo_t info; - info.si_signo = signal; - info.si_errno = 0; - info.si_code = ILL_ILLOPN; - info.si_addr = location; - do_trap(pgm_int_code, signal, - "specification exception", regs, &info); - } + do_fp_trap(regs, current->thread.fp_regs.fpc); + else if (signal) + do_trap(regs, signal, ILL_ILLOPN, "specification exception"); } #else DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, "specification exception"); #endif -static void data_exception(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +static void data_exception(struct pt_regs *regs) { __u16 __user *location; int signal = 0; - location = get_psw_address(regs, pgm_int_code); + location = get_psw_address(regs); if (MACHINE_HAS_IEEE) asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc)); @@ -627,32 +591,18 @@ static void data_exception(struct pt_regs *regs, long pgm_int_code, else signal = SIGILL; if (signal == SIGFPE) - do_fp_trap(regs, location, - current->thread.fp_regs.fpc, pgm_int_code); - else if (signal) { - siginfo_t info; - info.si_signo = signal; - info.si_errno = 0; - info.si_code = ILL_ILLOPN; - info.si_addr = location; - do_trap(pgm_int_code, signal, "data exception", regs, &info); - } + do_fp_trap(regs, current->thread.fp_regs.fpc); + else if (signal) + do_trap(regs, signal, ILL_ILLOPN, "data exception"); } -static void space_switch_exception(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +static void space_switch_exception(struct pt_regs *regs) { - siginfo_t info; - /* Set user psw back to home space mode. */ if (regs->psw.mask & PSW_MASK_PSTATE) regs->psw.mask |= PSW_ASC_HOME; /* Send SIGILL. */ - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_PRVOPC; - info.si_addr = get_psw_address(regs, pgm_int_code); - do_trap(pgm_int_code, SIGILL, "space switch event", regs, &info); + do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event"); } void __kprobes kernel_stack_overflow(struct pt_regs * regs) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 56fe6bc81fe..e4c79ebb40e 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -43,6 +43,8 @@ SECTIONS NOTES :text :note + .dummy : { *(.dummy) } :data + RODATA #ifdef CONFIG_SHARED_KERNEL diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 87cedd61be0..8943e82cd4d 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -70,7 +70,7 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } - atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM; vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL; vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index c7c51898984..02434543eab 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -132,7 +132,6 @@ static int handle_stop(struct kvm_vcpu *vcpu) int rc = 0; vcpu->stat.exit_stop_request++; - atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); spin_lock_bh(&vcpu->arch.local_int.lock); if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; @@ -149,6 +148,8 @@ static int handle_stop(struct kvm_vcpu *vcpu) } if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) { + atomic_set_mask(CPUSTAT_STOPPED, + &vcpu->arch.sie_block->cpuflags); vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP; VCPU_EVENT(vcpu, 3, "%s", "cpu stopped"); rc = -EOPNOTSUPP; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 87c16705b38..278ee009ce6 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -252,6 +252,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, offsetof(struct _lowcore, restart_psw), sizeof(psw_t)); if (rc == -EFAULT) exception = 1; + atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); break; case KVM_S390_PROGRAM_INT: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0bd3bea1e4c..d1c44573245 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -65,6 +65,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_stfl", VCPU_STAT(instruction_stfl) }, { "instruction_tprot", VCPU_STAT(instruction_tprot) }, { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, + { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, @@ -127,6 +128,7 @@ int kvm_dev_ioctl_check_extension(long ext) switch (ext) { case KVM_CAP_S390_PSW: case KVM_CAP_S390_GMAP: + case KVM_CAP_SYNC_MMU: r = 1; break; default: @@ -270,10 +272,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) restore_fp_regs(&vcpu->arch.guest_fpregs); restore_access_regs(vcpu->arch.guest_acrs); gmap_enable(vcpu->arch.gmap); + atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); gmap_disable(vcpu->arch.gmap); save_fp_regs(&vcpu->arch.guest_fpregs); save_access_regs(vcpu->arch.guest_acrs); @@ -301,7 +305,9 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { - atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | CPUSTAT_SM); + atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | + CPUSTAT_SM | + CPUSTAT_STOPPED); vcpu->arch.sie_block->ecb = 6; vcpu->arch.sie_block->eca = 0xC1002001U; vcpu->arch.sie_block->fac = (int) (long) facilities; @@ -428,7 +434,7 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) { int rc = 0; - if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING) + if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED)) rc = -EBUSY; else { vcpu->run->psw_mask = psw.mask; @@ -501,7 +507,7 @@ rerun_vcpu: if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); - atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 39162636108..d0263895992 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -336,6 +336,7 @@ static int handle_tprot(struct kvm_vcpu *vcpu) u64 address1 = disp1 + base1 ? vcpu->arch.guest_gprs[base1] : 0; u64 address2 = disp2 + base2 ? vcpu->arch.guest_gprs[base2] : 0; struct vm_area_struct *vma; + unsigned long user_address; vcpu->stat.instruction_tprot++; @@ -349,9 +350,14 @@ static int handle_tprot(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; + /* we must resolve the address without holding the mmap semaphore. + * This is ok since the userspace hypervisor is not supposed to change + * the mapping while the guest queries the memory. Otherwise the guest + * might crash or get wrong info anyway. */ + user_address = (unsigned long) __guestaddr_to_user(vcpu, address1); + down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, - (unsigned long) __guestaddr_to_user(vcpu, address1)); + vma = find_vma(current->mm, user_address); if (!vma) { up_read(¤t->mm->mmap_sem); return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index f815118835f..0a7941d74bc 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -31,9 +31,11 @@ #define SIGP_SET_PREFIX 0x0d #define SIGP_STORE_STATUS_ADDR 0x0e #define SIGP_SET_ARCH 0x12 +#define SIGP_SENSE_RUNNING 0x15 /* cpu status bits */ #define SIGP_STAT_EQUIPMENT_CHECK 0x80000000UL +#define SIGP_STAT_NOT_RUNNING 0x00000400UL #define SIGP_STAT_INCORRECT_STATE 0x00000200UL #define SIGP_STAT_INVALID_PARAMETER 0x00000100UL #define SIGP_STAT_EXT_CALL_PENDING 0x00000080UL @@ -57,8 +59,8 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, spin_lock(&fi->lock); if (fi->local_int[cpu_addr] == NULL) rc = 3; /* not operational */ - else if (atomic_read(fi->local_int[cpu_addr]->cpuflags) - & CPUSTAT_RUNNING) { + else if (!(atomic_read(fi->local_int[cpu_addr]->cpuflags) + & CPUSTAT_STOPPED)) { *reg &= 0xffffffff00000000UL; rc = 1; /* status stored */ } else { @@ -251,7 +253,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, spin_lock_bh(&li->lock); /* cpu must be in stopped state */ - if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) { + if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { rc = 1; /* incorrect state */ *reg &= SIGP_STAT_INCORRECT_STATE; kfree(inti); @@ -275,6 +277,38 @@ out_fi: return rc; } +static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, + unsigned long *reg) +{ + int rc; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + + if (cpu_addr >= KVM_MAX_VCPUS) + return 3; /* not operational */ + + spin_lock(&fi->lock); + if (fi->local_int[cpu_addr] == NULL) + rc = 3; /* not operational */ + else { + if (atomic_read(fi->local_int[cpu_addr]->cpuflags) + & CPUSTAT_RUNNING) { + /* running */ + rc = 1; + } else { + /* not running */ + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STAT_NOT_RUNNING; + rc = 0; + } + } + spin_unlock(&fi->lock); + + VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr, + rc); + + return rc; +} + int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) { int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; @@ -331,6 +365,11 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) rc = __sigp_set_prefix(vcpu, cpu_addr, parameter, &vcpu->arch.guest_gprs[r1]); break; + case SIGP_SENSE_RUNNING: + vcpu->stat.instruction_sigp_sense_running++; + rc = __sigp_sense_running(vcpu, cpu_addr, + &vcpu->arch.guest_gprs[r1]); + break; case SIGP_RESTART: vcpu->stat.instruction_sigp_restart++; /* user space must know about restart */ diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 1766def5bc3..354dd39073e 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -125,8 +125,7 @@ static inline int user_space_fault(unsigned long trans_exc_code) return trans_exc_code != 3; } -static inline void report_user_fault(struct pt_regs *regs, long int_code, - int signr, unsigned long address) +static inline void report_user_fault(struct pt_regs *regs, long signr) { if ((task_pid_nr(current) > 1) && !show_unhandled_signals) return; @@ -134,10 +133,12 @@ static inline void report_user_fault(struct pt_regs *regs, long int_code, return; if (!printk_ratelimit()) return; - printk("User process fault: interruption code 0x%lX ", int_code); + printk(KERN_ALERT "User process fault: interruption code 0x%X ", + regs->int_code); print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN); - printk("\n"); - printk("failing address: %lX\n", address); + printk(KERN_CONT "\n"); + printk(KERN_ALERT "failing address: %lX\n", + regs->int_parm_long & __FAIL_ADDR_MASK); show_regs(regs); } @@ -145,24 +146,18 @@ static inline void report_user_fault(struct pt_regs *regs, long int_code, * Send SIGSEGV to task. This is an external routine * to keep the stack usage of do_page_fault small. */ -static noinline void do_sigsegv(struct pt_regs *regs, long int_code, - int si_code, unsigned long trans_exc_code) +static noinline void do_sigsegv(struct pt_regs *regs, int si_code) { struct siginfo si; - unsigned long address; - address = trans_exc_code & __FAIL_ADDR_MASK; - current->thread.prot_addr = address; - current->thread.trap_no = int_code; - report_user_fault(regs, int_code, SIGSEGV, address); + report_user_fault(regs, SIGSEGV); si.si_signo = SIGSEGV; si.si_code = si_code; - si.si_addr = (void __user *) address; + si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK); force_sig_info(SIGSEGV, &si, current); } -static noinline void do_no_context(struct pt_regs *regs, long int_code, - unsigned long trans_exc_code) +static noinline void do_no_context(struct pt_regs *regs) { const struct exception_table_entry *fixup; unsigned long address; @@ -178,55 +173,48 @@ static noinline void do_no_context(struct pt_regs *regs, long int_code, * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ - address = trans_exc_code & __FAIL_ADDR_MASK; - if (!user_space_fault(trans_exc_code)) + address = regs->int_parm_long & __FAIL_ADDR_MASK; + if (!user_space_fault(regs->int_parm_long)) printk(KERN_ALERT "Unable to handle kernel pointer dereference" " at virtual kernel address %p\n", (void *)address); else printk(KERN_ALERT "Unable to handle kernel paging request" " at virtual user address %p\n", (void *)address); - die("Oops", regs, int_code); + die(regs, "Oops"); do_exit(SIGKILL); } -static noinline void do_low_address(struct pt_regs *regs, long int_code, - unsigned long trans_exc_code) +static noinline void do_low_address(struct pt_regs *regs) { /* Low-address protection hit in kernel mode means NULL pointer write access in kernel mode. */ if (regs->psw.mask & PSW_MASK_PSTATE) { /* Low-address protection hit in user mode 'cannot happen'. */ - die ("Low-address protection", regs, int_code); + die (regs, "Low-address protection"); do_exit(SIGKILL); } - do_no_context(regs, int_code, trans_exc_code); + do_no_context(regs); } -static noinline void do_sigbus(struct pt_regs *regs, long int_code, - unsigned long trans_exc_code) +static noinline void do_sigbus(struct pt_regs *regs) { struct task_struct *tsk = current; - unsigned long address; struct siginfo si; /* * Send a sigbus, regardless of whether we were in kernel * or user mode. */ - address = trans_exc_code & __FAIL_ADDR_MASK; - tsk->thread.prot_addr = address; - tsk->thread.trap_no = int_code; si.si_signo = SIGBUS; si.si_errno = 0; si.si_code = BUS_ADRERR; - si.si_addr = (void __user *) address; + si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK); force_sig_info(SIGBUS, &si, tsk); } -static noinline void do_fault_error(struct pt_regs *regs, long int_code, - unsigned long trans_exc_code, int fault) +static noinline void do_fault_error(struct pt_regs *regs, int fault) { int si_code; @@ -238,24 +226,24 @@ static noinline void do_fault_error(struct pt_regs *regs, long int_code, /* User mode accesses just cause a SIGSEGV */ si_code = (fault == VM_FAULT_BADMAP) ? SEGV_MAPERR : SEGV_ACCERR; - do_sigsegv(regs, int_code, si_code, trans_exc_code); + do_sigsegv(regs, si_code); return; } case VM_FAULT_BADCONTEXT: - do_no_context(regs, int_code, trans_exc_code); + do_no_context(regs); break; default: /* fault & VM_FAULT_ERROR */ if (fault & VM_FAULT_OOM) { if (!(regs->psw.mask & PSW_MASK_PSTATE)) - do_no_context(regs, int_code, trans_exc_code); + do_no_context(regs); else pagefault_out_of_memory(); } else if (fault & VM_FAULT_SIGBUS) { /* Kernel mode? Handle exceptions or die */ if (!(regs->psw.mask & PSW_MASK_PSTATE)) - do_no_context(regs, int_code, trans_exc_code); + do_no_context(regs); else - do_sigbus(regs, int_code, trans_exc_code); + do_sigbus(regs); } else BUG(); break; @@ -273,12 +261,12 @@ static noinline void do_fault_error(struct pt_regs *regs, long int_code, * 11 Page translation -> Not present (nullification) * 3b Region third trans. -> Not present (nullification) */ -static inline int do_exception(struct pt_regs *regs, int access, - unsigned long trans_exc_code) +static inline int do_exception(struct pt_regs *regs, int access) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; + unsigned long trans_exc_code; unsigned long address; unsigned int flags; int fault; @@ -288,6 +276,7 @@ static inline int do_exception(struct pt_regs *regs, int access, tsk = current; mm = tsk->mm; + trans_exc_code = regs->int_parm_long; /* * Verify that the fault happened in user space, that @@ -387,45 +376,46 @@ out: return fault; } -void __kprobes do_protection_exception(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +void __kprobes do_protection_exception(struct pt_regs *regs) { + unsigned long trans_exc_code; int fault; + trans_exc_code = regs->int_parm_long; /* Protection exception is suppressing, decrement psw address. */ - regs->psw.addr = __rewind_psw(regs->psw, pgm_int_code >> 16); + regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); /* * Check for low-address protection. This needs to be treated * as a special case because the translation exception code * field is not guaranteed to contain valid data in this case. */ if (unlikely(!(trans_exc_code & 4))) { - do_low_address(regs, pgm_int_code, trans_exc_code); + do_low_address(regs); return; } - fault = do_exception(regs, VM_WRITE, trans_exc_code); + fault = do_exception(regs, VM_WRITE); if (unlikely(fault)) - do_fault_error(regs, 4, trans_exc_code, fault); + do_fault_error(regs, fault); } -void __kprobes do_dat_exception(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +void __kprobes do_dat_exception(struct pt_regs *regs) { int access, fault; access = VM_READ | VM_EXEC | VM_WRITE; - fault = do_exception(regs, access, trans_exc_code); + fault = do_exception(regs, access); if (unlikely(fault)) - do_fault_error(regs, pgm_int_code & 255, trans_exc_code, fault); + do_fault_error(regs, fault); } #ifdef CONFIG_64BIT -void __kprobes do_asce_exception(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +void __kprobes do_asce_exception(struct pt_regs *regs) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; + unsigned long trans_exc_code; + trans_exc_code = regs->int_parm_long; if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) goto no_context; @@ -440,12 +430,12 @@ void __kprobes do_asce_exception(struct pt_regs *regs, long pgm_int_code, /* User mode accesses just cause a SIGSEGV */ if (regs->psw.mask & PSW_MASK_PSTATE) { - do_sigsegv(regs, pgm_int_code, SEGV_MAPERR, trans_exc_code); + do_sigsegv(regs, SEGV_MAPERR); return; } no_context: - do_no_context(regs, pgm_int_code, trans_exc_code); + do_no_context(regs); } #endif @@ -459,14 +449,15 @@ int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write) regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT; regs.psw.addr = (unsigned long) __builtin_return_address(0); regs.psw.addr |= PSW_ADDR_AMODE; - uaddr &= PAGE_MASK; + regs.int_code = pgm_int_code; + regs.int_parm_long = (uaddr & PAGE_MASK) | 2; access = write ? VM_WRITE : VM_READ; - fault = do_exception(®s, access, uaddr | 2); + fault = do_exception(®s, access); if (unlikely(fault)) { if (fault & VM_FAULT_OOM) return -EFAULT; else if (fault & VM_FAULT_SIGBUS) - do_sigbus(®s, pgm_int_code, uaddr); + do_sigbus(®s); } return fault ? -EFAULT : 0; } @@ -509,7 +500,7 @@ int pfault_init(void) .reserved = __PF_RES_FIELD }; int rc; - if (!MACHINE_IS_VM || pfault_disable) + if (pfault_disable) return -1; asm volatile( " diag %1,%0,0x258\n" @@ -530,7 +521,7 @@ void pfault_fini(void) .refversn = 2, }; - if (!MACHINE_IS_VM || pfault_disable) + if (pfault_disable) return; asm volatile( " diag %0,0,0x258\n" @@ -587,8 +578,13 @@ static void pfault_interrupt(unsigned int ext_int_code, } else { /* Completion interrupt was faster than initial * interrupt. Set pfault_wait to -1 so the initial - * interrupt doesn't put the task to sleep. */ - tsk->thread.pfault_wait = -1; + * interrupt doesn't put the task to sleep. + * If the task is not running, ignore the completion + * interrupt since it must be a leftover of a PFAULT + * CANCEL operation which didn't remove all pending + * completion interrupts. */ + if (tsk->state == TASK_RUNNING) + tsk->thread.pfault_wait = -1; } put_task_struct(tsk); } else { @@ -638,8 +634,6 @@ static int __init pfault_irq_init(void) { int rc; - if (!MACHINE_IS_VM) - return 0; rc = register_external_interrupt(0x2603, pfault_interrupt); if (rc) goto out_extint; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index d4b9fb4d004..5d633019d8f 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -93,18 +93,22 @@ static unsigned long setup_zero_pages(void) void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; - unsigned long pgd_type; + unsigned long pgd_type, asce_bits; init_mm.pgd = swapper_pg_dir; - S390_lowcore.kernel_asce = __pa(init_mm.pgd) & PAGE_MASK; #ifdef CONFIG_64BIT - /* A three level page table (4TB) is enough for the kernel space. */ - S390_lowcore.kernel_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; - pgd_type = _REGION3_ENTRY_EMPTY; + if (VMALLOC_END > (1UL << 42)) { + asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; + pgd_type = _REGION2_ENTRY_EMPTY; + } else { + asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; + pgd_type = _REGION3_ENTRY_EMPTY; + } #else - S390_lowcore.kernel_asce |= _ASCE_TABLE_LENGTH; + asce_bits = _ASCE_TABLE_LENGTH; pgd_type = _SEGMENT_ENTRY_EMPTY; #endif + S390_lowcore.kernel_asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; clear_table((unsigned long *) init_mm.pgd, pgd_type, sizeof(unsigned long)*2048); vmem_map_init(); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 301c84d3b54..9a4d02f64f1 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -33,17 +33,6 @@ #define FRAG_MASK 0x03 #endif -unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE; -EXPORT_SYMBOL(VMALLOC_START); - -static int __init parse_vmalloc(char *arg) -{ - if (!arg) - return -EINVAL; - VMALLOC_START = (VMALLOC_END - memparse(arg, &arg)) & PAGE_MASK; - return 0; -} -early_param("vmalloc", parse_vmalloc); unsigned long *crst_table_alloc(struct mm_struct *mm) { @@ -267,7 +256,10 @@ static int gmap_alloc_table(struct gmap *gmap, struct page *page; unsigned long *new; + /* since we dont free the gmap table until gmap_free we can unlock */ + spin_unlock(&gmap->mm->page_table_lock); page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); + spin_lock(&gmap->mm->page_table_lock); if (!page) return -ENOMEM; new = (unsigned long *) page_to_phys(page); diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index f43c0e4282a..9daee91e6c3 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -22,6 +22,7 @@ #include <asm/irq.h> #include "hwsampler.h" +#include "op_counter.h" #define MAX_NUM_SDB 511 #define MIN_NUM_SDB 1 @@ -896,6 +897,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, if (sample_data_ptr->P == 1) { /* userspace sample */ unsigned int pid = sample_data_ptr->prim_asn; + if (!counter_config.user) + goto skip_sample; rcu_read_lock(); tsk = pid_task(find_vpid(pid), PIDTYPE_PID); if (tsk) @@ -903,6 +906,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, rcu_read_unlock(); } else { /* kernelspace sample */ + if (!counter_config.kernel) + goto skip_sample; regs = task_pt_regs(current); } @@ -910,7 +915,7 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0, !sample_data_ptr->P, tsk); mutex_unlock(&hws_sem); - + skip_sample: sample_data_ptr++; } } diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 6efc18b5e60..2297be406c6 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -2,10 +2,11 @@ * arch/s390/oprofile/init.c * * S390 Version - * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright (C) 2002-2011 IBM Deutschland Entwicklung GmbH, IBM Corporation * Author(s): Thomas Spatzier (tspat@de.ibm.com) * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com) * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com) + * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) * * @remark Copyright 2002-2011 OProfile authors */ @@ -14,6 +15,8 @@ #include <linux/init.h> #include <linux/errno.h> #include <linux/fs.h> +#include <linux/module.h> +#include <asm/processor.h> #include "../../../drivers/oprofile/oprof.h" @@ -22,6 +25,7 @@ extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth); #ifdef CONFIG_64BIT #include "hwsampler.h" +#include "op_counter.h" #define DEFAULT_INTERVAL 4127518 @@ -35,16 +39,41 @@ static unsigned long oprofile_max_interval; static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS; static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS; -static int hwsampler_file; +static int hwsampler_enabled; static int hwsampler_running; /* start_mutex must be held to change */ +static int hwsampler_available; static struct oprofile_operations timer_ops; +struct op_counter_config counter_config; + +enum __force_cpu_type { + reserved = 0, /* do not force */ + timer, +}; +static int force_cpu_type; + +static int set_cpu_type(const char *str, struct kernel_param *kp) +{ + if (!strcmp(str, "timer")) { + force_cpu_type = timer; + printk(KERN_INFO "oprofile: forcing timer to be returned " + "as cpu type\n"); + } else { + force_cpu_type = 0; + } + + return 0; +} +module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); +MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling" + "(report cpu_type \"timer\""); + static int oprofile_hwsampler_start(void) { int retval; - hwsampler_running = hwsampler_file; + hwsampler_running = hwsampler_enabled; if (!hwsampler_running) return timer_ops.start(); @@ -72,10 +101,16 @@ static void oprofile_hwsampler_stop(void) return; } +/* + * File ops used for: + * /dev/oprofile/0/enabled + * /dev/oprofile/hwsampling/hwsampler (cpu_type = timer) + */ + static ssize_t hwsampler_read(struct file *file, char __user *buf, size_t count, loff_t *offset) { - return oprofilefs_ulong_to_user(hwsampler_file, buf, count, offset); + return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset); } static ssize_t hwsampler_write(struct file *file, char const __user *buf, @@ -88,9 +123,12 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf, return -EINVAL; retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval) + if (retval <= 0) return retval; + if (val != 0 && val != 1) + return -EINVAL; + if (oprofile_started) /* * save to do without locking as we set @@ -99,7 +137,7 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf, */ return -EBUSY; - hwsampler_file = val; + hwsampler_enabled = val; return count; } @@ -109,38 +147,311 @@ static const struct file_operations hwsampler_fops = { .write = hwsampler_write, }; +/* + * File ops used for: + * /dev/oprofile/0/count + * /dev/oprofile/hwsampling/hw_interval (cpu_type = timer) + * + * Make sure that the value is within the hardware range. + */ + +static ssize_t hw_interval_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(oprofile_hw_interval, buf, + count, offset); +} + +static ssize_t hw_interval_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + if (val < oprofile_min_interval) + oprofile_hw_interval = oprofile_min_interval; + else if (val > oprofile_max_interval) + oprofile_hw_interval = oprofile_max_interval; + else + oprofile_hw_interval = val; + + return count; +} + +static const struct file_operations hw_interval_fops = { + .read = hw_interval_read, + .write = hw_interval_write, +}; + +/* + * File ops used for: + * /dev/oprofile/0/event + * Only a single event with number 0 is supported with this counter. + * + * /dev/oprofile/0/unit_mask + * This is a dummy file needed by the user space tools. + * No value other than 0 is accepted or returned. + */ + +static ssize_t hwsampler_zero_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(0, buf, count, offset); +} + +static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + if (val != 0) + return -EINVAL; + return count; +} + +static const struct file_operations zero_fops = { + .read = hwsampler_zero_read, + .write = hwsampler_zero_write, +}; + +/* /dev/oprofile/0/kernel file ops. */ + +static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(counter_config.kernel, + buf, count, offset); +} + +static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + counter_config.kernel = val; + + return count; +} + +static const struct file_operations kernel_fops = { + .read = hwsampler_kernel_read, + .write = hwsampler_kernel_write, +}; + +/* /dev/oprofile/0/user file ops. */ + +static ssize_t hwsampler_user_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(counter_config.user, + buf, count, offset); +} + +static ssize_t hwsampler_user_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + counter_config.user = val; + + return count; +} + +static const struct file_operations user_fops = { + .read = hwsampler_user_read, + .write = hwsampler_user_write, +}; + + +/* + * File ops used for: /dev/oprofile/timer/enabled + * The value always has to be the inverted value of hwsampler_enabled. So + * no separate variable is created. That way we do not need locking. + */ + +static ssize_t timer_enabled_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset); +} + +static ssize_t timer_enabled_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (val != 0 && val != 1) + return -EINVAL; + + /* Timer cannot be disabled without having hardware sampling. */ + if (val == 0 && !hwsampler_available) + return -EINVAL; + + if (oprofile_started) + /* + * save to do without locking as we set + * hwsampler_running in start() when start_mutex is + * held + */ + return -EBUSY; + + hwsampler_enabled = !val; + + return count; +} + +static const struct file_operations timer_enabled_fops = { + .read = timer_enabled_read, + .write = timer_enabled_write, +}; + + static int oprofile_create_hwsampling_files(struct super_block *sb, - struct dentry *root) + struct dentry *root) { - struct dentry *hw_dir; + struct dentry *dir; + + dir = oprofilefs_mkdir(sb, root, "timer"); + if (!dir) + return -EINVAL; + + oprofilefs_create_file(sb, dir, "enabled", &timer_enabled_fops); + + if (!hwsampler_available) + return 0; /* reinitialize default values */ - hwsampler_file = 1; + hwsampler_enabled = 1; + counter_config.kernel = 1; + counter_config.user = 1; - hw_dir = oprofilefs_mkdir(sb, root, "hwsampling"); - if (!hw_dir) - return -EINVAL; + if (!force_cpu_type) { + /* + * Create the counter file system. A single virtual + * counter is created which can be used to + * enable/disable hardware sampling dynamically from + * user space. The user space will configure a single + * counter with a single event. The value of 'event' + * and 'unit_mask' are not evaluated by the kernel code + * and can only be set to 0. + */ + + dir = oprofilefs_mkdir(sb, root, "0"); + if (!dir) + return -EINVAL; - oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops); - oprofilefs_create_ulong(sb, hw_dir, "hw_interval", - &oprofile_hw_interval); - oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval", - &oprofile_min_interval); - oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval", - &oprofile_max_interval); - oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks", - &oprofile_sdbt_blocks); + oprofilefs_create_file(sb, dir, "enabled", &hwsampler_fops); + oprofilefs_create_file(sb, dir, "event", &zero_fops); + oprofilefs_create_file(sb, dir, "count", &hw_interval_fops); + oprofilefs_create_file(sb, dir, "unit_mask", &zero_fops); + oprofilefs_create_file(sb, dir, "kernel", &kernel_fops); + oprofilefs_create_file(sb, dir, "user", &user_fops); + oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks", + &oprofile_sdbt_blocks); + } else { + /* + * Hardware sampling can be used but the cpu_type is + * forced to timer in order to deal with legacy user + * space tools. The /dev/oprofile/hwsampling fs is + * provided in that case. + */ + dir = oprofilefs_mkdir(sb, root, "hwsampling"); + if (!dir) + return -EINVAL; + + oprofilefs_create_file(sb, dir, "hwsampler", + &hwsampler_fops); + oprofilefs_create_file(sb, dir, "hw_interval", + &hw_interval_fops); + oprofilefs_create_ro_ulong(sb, dir, "hw_min_interval", + &oprofile_min_interval); + oprofilefs_create_ro_ulong(sb, dir, "hw_max_interval", + &oprofile_max_interval); + oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks", + &oprofile_sdbt_blocks); + } return 0; } static int oprofile_hwsampler_init(struct oprofile_operations *ops) { + /* + * Initialize the timer mode infrastructure as well in order + * to be able to switch back dynamically. oprofile_timer_init + * is not supposed to fail. + */ + if (oprofile_timer_init(ops)) + BUG(); + + memcpy(&timer_ops, ops, sizeof(timer_ops)); + ops->create_files = oprofile_create_hwsampling_files; + + /* + * If the user space tools do not support newer cpu types, + * the force_cpu_type module parameter + * can be used to always return \"timer\" as cpu type. + */ + if (force_cpu_type != timer) { + struct cpuid id; + + get_cpu_id (&id); + + switch (id.machine) { + case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break; + case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break; + default: return -ENODEV; + } + } + if (hwsampler_setup()) return -ENODEV; /* - * create hwsampler files only if hwsampler_setup() succeeds. + * Query the range for the sampling interval from the + * hardware. */ oprofile_min_interval = hwsampler_query_min_interval(); if (oprofile_min_interval == 0) @@ -155,23 +466,17 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops) if (oprofile_hw_interval > oprofile_max_interval) oprofile_hw_interval = oprofile_max_interval; - if (oprofile_timer_init(ops)) - return -ENODEV; - - printk(KERN_INFO "oprofile: using hardware sampling\n"); - - memcpy(&timer_ops, ops, sizeof(timer_ops)); + printk(KERN_INFO "oprofile: System z hardware sampling " + "facility found.\n"); ops->start = oprofile_hwsampler_start; ops->stop = oprofile_hwsampler_stop; - ops->create_files = oprofile_create_hwsampling_files; return 0; } static void oprofile_hwsampler_exit(void) { - oprofile_timer_exit(); hwsampler_shutdown(); } @@ -182,7 +487,15 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) ops->backtrace = s390_backtrace; #ifdef CONFIG_64BIT - return oprofile_hwsampler_init(ops); + + /* + * -ENODEV is not reported to the caller. The module itself + * will use the timer mode sampling as fallback and this is + * always available. + */ + hwsampler_available = oprofile_hwsampler_init(ops) == 0; + + return 0; #else return -ENODEV; #endif diff --git a/arch/s390/oprofile/op_counter.h b/arch/s390/oprofile/op_counter.h new file mode 100644 index 00000000000..1a8d3ca0901 --- /dev/null +++ b/arch/s390/oprofile/op_counter.h @@ -0,0 +1,23 @@ +/** + * arch/s390/oprofile/op_counter.h + * + * Copyright (C) 2011 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) + * + * @remark Copyright 2011 OProfile authors + */ + +#ifndef OP_COUNTER_H +#define OP_COUNTER_H + +struct op_counter_config { + /* `enabled' maps to the hwsampler_file variable. */ + /* `count' maps to the oprofile_hw_interval variable. */ + /* `event' and `unit_mask' are unused. */ + unsigned long kernel; + unsigned long user; +}; + +extern struct op_counter_config counter_config; + +#endif /* OP_COUNTER_H */ |