diff options
Diffstat (limited to 'arch/s390')
49 files changed, 1420 insertions, 1286 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index e0b98e71ff4..ff19efdf6fe 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -1,13 +1,8 @@ -config SCHED_MC - def_bool y - depends on SMP - config MMU def_bool y config ZONE_DMA - def_bool y - depends on 64BIT + def_bool y if 64BIT config LOCKDEP_SUPPORT def_bool y @@ -25,12 +20,10 @@ config RWSEM_XCHGADD_ALGORITHM def_bool y config ARCH_HAS_ILOG2_U32 - bool - default n + def_bool n config ARCH_HAS_ILOG2_U64 - bool - default n + def_bool n config GENERIC_HWEIGHT def_bool y @@ -42,9 +35,7 @@ config GENERIC_CLOCKEVENTS def_bool y config GENERIC_BUG - bool - depends on BUG - default y + def_bool y if BUG config GENERIC_BUG_RELATIVE_POINTERS def_bool y @@ -59,13 +50,10 @@ config ARCH_DMA_ADDR_T_64BIT def_bool 64BIT config GENERIC_LOCKBREAK - bool - default y - depends on SMP && PREEMPT + def_bool y if SMP && PREEMPT config PGSTE - bool - default y if KVM + def_bool y if KVM config VIRT_CPU_ACCOUNTING def_bool y @@ -85,7 +73,6 @@ config S390 select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_GRAPH_TRACER select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_DEFAULT_NO_SPIN_MUTEXES select HAVE_OPROFILE select HAVE_KPROBES select HAVE_KRETPROBES @@ -99,6 +86,7 @@ config S390 select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO select HAVE_GET_USER_PAGES_FAST + select HAVE_ARCH_MUTEX_CPU_RELAX select ARCH_INLINE_SPIN_TRYLOCK select ARCH_INLINE_SPIN_TRYLOCK_BH select ARCH_INLINE_SPIN_LOCK @@ -129,8 +117,7 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE config SCHED_OMIT_FRAME_POINTER - bool - default y + def_bool y source "init/Kconfig" @@ -143,20 +130,21 @@ comment "Processor type and features" source "kernel/time/Kconfig" config 64BIT - bool "64 bit kernel" + def_bool y + prompt "64 bit kernel" help Select this option if you have an IBM z/Architecture machine and want to use the 64 bit addressing mode. config 32BIT - bool - default y if !64BIT + def_bool y if !64BIT config KTIME_SCALAR def_bool 32BIT config SMP - bool "Symmetric multi-processing support" + def_bool y + prompt "Symmetric multi-processing support" ---help--- This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If @@ -188,10 +176,10 @@ config NR_CPUS approximately sixteen kilobytes to the kernel image. config HOTPLUG_CPU - bool "Support for hot-pluggable CPUs" + def_bool y + prompt "Support for hot-pluggable CPUs" depends on SMP select HOTPLUG - default n help Say Y here to be able to turn CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu/cpu#. @@ -207,14 +195,16 @@ config SCHED_MC increased overhead in some places. config SCHED_BOOK - bool "Book scheduler support" + def_bool y + prompt "Book scheduler support" depends on SMP && SCHED_MC help Book scheduler support improves the CPU scheduler's decision making when dealing with machines that have several books. config MATHEMU - bool "IEEE FPU emulation" + def_bool y + prompt "IEEE FPU emulation" depends on MARCH_G5 help This option is required for IEEE compliant floating point arithmetic @@ -222,7 +212,8 @@ config MATHEMU need this. config COMPAT - bool "Kernel support for 31 bit emulation" + def_bool y + prompt "Kernel support for 31 bit emulation" depends on 64BIT select COMPAT_BINFMT_ELF help @@ -232,16 +223,14 @@ config COMPAT executing 31 bit applications. It is safe to say "Y". config SYSVIPC_COMPAT - bool - depends on COMPAT && SYSVIPC - default y + def_bool y if COMPAT && SYSVIPC config AUDIT_ARCH - bool - default y + def_bool y config S390_EXEC_PROTECT - bool "Data execute protection" + def_bool y + prompt "Data execute protection" help This option allows to enable a buffer overflow protection for user space programs and it also selects the addressing mode option above. @@ -301,7 +290,8 @@ config MARCH_Z196 endchoice config PACK_STACK - bool "Pack kernel stack" + def_bool y + prompt "Pack kernel stack" help This option enables the compiler option -mkernel-backchain if it is available. If the option is available the compiler supports @@ -314,7 +304,8 @@ config PACK_STACK Say Y if you are unsure. config SMALL_STACK - bool "Use 8kb for kernel stack instead of 16kb" + def_bool n + prompt "Use 8kb for kernel stack instead of 16kb" depends on PACK_STACK && 64BIT && !LOCKDEP help If you say Y here and the compiler supports the -mkernel-backchain @@ -326,7 +317,8 @@ config SMALL_STACK Say N if you are unsure. config CHECK_STACK - bool "Detect kernel stack overflow" + def_bool y + prompt "Detect kernel stack overflow" help This option enables the compiler option -mstack-guard and -mstack-size if they are available. If the compiler supports them @@ -350,7 +342,8 @@ config STACK_GUARD 512 for 64 bit. config WARN_STACK - bool "Emit compiler warnings for function with broken stack usage" + def_bool n + prompt "Emit compiler warnings for function with broken stack usage" help This option enables the compiler options -mwarn-framesize and -mwarn-dynamicstack. If the compiler supports these options it @@ -385,24 +378,24 @@ config ARCH_SPARSEMEM_DEFAULT def_bool y config ARCH_SELECT_MEMORY_MODEL - def_bool y + def_bool y config ARCH_ENABLE_MEMORY_HOTPLUG - def_bool y - depends on SPARSEMEM + def_bool y if SPARSEMEM config ARCH_ENABLE_MEMORY_HOTREMOVE def_bool y config ARCH_HIBERNATION_POSSIBLE - def_bool y if 64BIT + def_bool y if 64BIT source "mm/Kconfig" comment "I/O subsystem configuration" config QDIO - tristate "QDIO support" + def_tristate y + prompt "QDIO support" ---help--- This driver provides the Queued Direct I/O base support for IBM System z. @@ -413,7 +406,8 @@ config QDIO If unsure, say Y. config CHSC_SCH - tristate "Support for CHSC subchannels" + def_tristate y + prompt "Support for CHSC subchannels" help This driver allows usage of CHSC subchannels. A CHSC subchannel is usually present on LPAR only. @@ -431,7 +425,8 @@ config CHSC_SCH comment "Misc" config IPL - bool "Builtin IPL record support" + def_bool y + prompt "Builtin IPL record support" help If you want to use the produced kernel to IPL directly from a device, you have to merge a bootsector specific to the device @@ -463,7 +458,8 @@ config FORCE_MAX_ZONEORDER default "9" config PFAULT - bool "Pseudo page fault support" + def_bool y + prompt "Pseudo page fault support" help Select this option, if you want to use PFAULT pseudo page fault handling under VM. If running native or in LPAR, this option @@ -475,7 +471,8 @@ config PFAULT this option. config SHARED_KERNEL - bool "VM shared kernel support" + def_bool y + prompt "VM shared kernel support" help Select this option, if you want to share the text segment of the Linux kernel between different VM guests. This reduces memory @@ -486,7 +483,8 @@ config SHARED_KERNEL doing and want to exploit this feature. config CMM - tristate "Cooperative memory management" + def_tristate n + prompt "Cooperative memory management" help Select this option, if you want to enable the kernel interface to reduce the memory size of the system. This is accomplished @@ -498,14 +496,16 @@ config CMM option. config CMM_IUCV - bool "IUCV special message interface to cooperative memory management" + def_bool y + prompt "IUCV special message interface to cooperative memory management" depends on CMM && (SMSGIUCV=y || CMM=SMSGIUCV) help Select this option to enable the special message interface to the cooperative memory management. config APPLDATA_BASE - bool "Linux - VM Monitor Stream, base infrastructure" + def_bool n + prompt "Linux - VM Monitor Stream, base infrastructure" depends on PROC_FS help This provides a kernel interface for creating and updating z/VM APPLDATA @@ -520,7 +520,8 @@ config APPLDATA_BASE The /proc entries can also be read from, showing the current settings. config APPLDATA_MEM - tristate "Monitor memory management statistics" + def_tristate m + prompt "Monitor memory management statistics" depends on APPLDATA_BASE && VM_EVENT_COUNTERS help This provides memory management related data to the Linux - VM Monitor @@ -536,7 +537,8 @@ config APPLDATA_MEM appldata_mem.o. config APPLDATA_OS - tristate "Monitor OS statistics" + def_tristate m + prompt "Monitor OS statistics" depends on APPLDATA_BASE help This provides OS related data to the Linux - VM Monitor Stream, like @@ -550,7 +552,8 @@ config APPLDATA_OS appldata_os.o. config APPLDATA_NET_SUM - tristate "Monitor overall network statistics" + def_tristate m + prompt "Monitor overall network statistics" depends on APPLDATA_BASE && NET help This provides network related data to the Linux - VM Monitor Stream, @@ -567,30 +570,32 @@ config APPLDATA_NET_SUM source kernel/Kconfig.hz config S390_HYPFS_FS - bool "s390 hypervisor file system support" + def_bool y + prompt "s390 hypervisor file system support" select SYS_HYPERVISOR - default y help This is a virtual file system intended to provide accounting information in an s390 hypervisor environment. config KEXEC - bool "kexec system call" + def_bool n + prompt "kexec system call" help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot but is independent of hardware/microcode support. config ZFCPDUMP - bool "zfcpdump support" + def_bool n + prompt "zfcpdump support" select SMP - default n help Select this option if you want to build an zfcpdump enabled kernel. Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. config S390_GUEST -bool "s390 guest support for KVM (EXPERIMENTAL)" + def_bool y + prompt "s390 guest support for KVM (EXPERIMENTAL)" depends on 64BIT && EXPERIMENTAL select VIRTIO select VIRTIO_RING @@ -602,9 +607,9 @@ bool "s390 guest support for KVM (EXPERIMENTAL)" the default console. config SECCOMP - bool "Enable seccomp to safely compute untrusted bytecode" + def_bool y + prompt "Enable seccomp to safely compute untrusted bytecode" depends on PROC_FS - default y help This kernel feature is useful for number crunching applications that may need to compute untrusted bytecode during their diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index 05221b13ffb..2b380df9560 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -1,8 +1,7 @@ menu "Kernel hacking" config TRACE_IRQFLAGS_SUPPORT - bool - default y + def_bool y source "lib/Kconfig.debug" @@ -19,7 +18,8 @@ config STRICT_DEVMEM If you are unsure, say Y. config DEBUG_STRICT_USER_COPY_CHECKS - bool "Strict user copy size checks" + def_bool n + prompt "Strict user copy size checks" ---help--- Enabling this option turns a certain set of sanity checks for user copy operations into compile time warnings. diff --git a/arch/s390/defconfig b/arch/s390/defconfig index e40ac6ee652..d79697157ac 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -2,16 +2,12 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_AUDIT=y +CONFIG_RCU_TRACE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y -CONFIG_CGROUPS=y -CONFIG_CGROUP_NS=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_UTS_NS=y -CONFIG_IPC_NS=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_COMPAT_BRK is not set +CONFIG_PERF_EVENTS=y CONFIG_SLAB=y CONFIG_KPROBES=y CONFIG_MODULES=y @@ -20,24 +16,12 @@ CONFIG_MODVERSIONS=y CONFIG_DEFAULT_DEADLINE=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y -CONFIG_64BIT=y -CONFIG_SMP=y -CONFIG_NR_CPUS=32 -CONFIG_COMPAT=y -CONFIG_S390_EXEC_PROTECT=y -CONFIG_PACK_STACK=y -CONFIG_CHECK_STACK=y CONFIG_PREEMPT=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y -CONFIG_QDIO=y -CONFIG_CHSC_SCH=m -CONFIG_IPL=y CONFIG_BINFMT_MISC=m -CONFIG_PFAULT=y CONFIG_HZ_100=y CONFIG_KEXEC=y -CONFIG_S390_GUEST=y CONFIG_PM=y CONFIG_HIBERNATION=y CONFIG_PACKET=y @@ -46,16 +30,15 @@ CONFIG_NET_KEY=y CONFIG_AFIUCV=m CONFIG_INET=y CONFIG_IP_MULTICAST=y +# CONFIG_INET_LRO is not set CONFIG_IPV6=y -CONFIG_NETFILTER=y -CONFIG_NETFILTER_NETLINK_QUEUE=m -CONFIG_NETFILTER_NETLINK_LOG=m -CONFIG_NF_CONNTRACK=m -# CONFIG_NF_CT_PROTO_SCTP is not set +CONFIG_NET_SCTPPROBE=m +CONFIG_L2TP=m +CONFIG_L2TP_DEBUGFS=m +CONFIG_VLAN_8021Q=y CONFIG_NET_SCHED=y CONFIG_NET_SCH_CBQ=m CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_MULTIQ=y CONFIG_NET_SCH_RED=m CONFIG_NET_SCH_SFQ=m CONFIG_NET_SCH_TEQL=m @@ -69,28 +52,14 @@ CONFIG_NET_CLS_U32=m CONFIG_CLS_U32_MARK=y CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=y -CONFIG_NET_ACT_NAT=m -CONFIG_CAN=m -CONFIG_CAN_RAW=m -CONFIG_CAN_BCM=m -CONFIG_CAN_VCAN=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_XIP=y -CONFIG_BLK_DEV_XPRAM=m -CONFIG_DASD=y -CONFIG_DASD_PROFILE=y -CONFIG_DASD_ECKD=y -CONFIG_DASD_FBA=y -CONFIG_DASD_DIAG=y -CONFIG_DASD_EER=y -CONFIG_VIRTIO_BLK=m +CONFIG_VIRTIO_BLK=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y @@ -102,101 +71,92 @@ CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SCAN_ASYNC=y CONFIG_ZFCP=y -CONFIG_SCSI_DH=m -CONFIG_SCSI_DH_RDAC=m -CONFIG_SCSI_DH_HP_SW=m -CONFIG_SCSI_DH_EMC=m -CONFIG_SCSI_DH_ALUA=m -CONFIG_SCSI_OSD_INITIATOR=m -CONFIG_SCSI_OSD_ULD=m -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_MULTIPATH=m -CONFIG_BLK_DEV_DM=y -CONFIG_DM_CRYPT=y -CONFIG_DM_SNAPSHOT=y -CONFIG_DM_MIRROR=y -CONFIG_DM_ZERO=y -CONFIG_DM_MULTIPATH=m +CONFIG_ZFCP_DIF=y CONFIG_NETDEVICES=y CONFIG_DUMMY=m CONFIG_BONDING=m CONFIG_EQUALIZER=m CONFIG_TUN=m -CONFIG_VETH=m CONFIG_NET_ETHERNET=y -CONFIG_LCS=m -CONFIG_CTCM=m -CONFIG_QETH=y -CONFIG_QETH_L2=y -CONFIG_QETH_L3=y -CONFIG_VIRTIO_NET=m -CONFIG_HW_RANDOM_VIRTIO=m +CONFIG_VIRTIO_NET=y CONFIG_RAW_DRIVER=m -CONFIG_TN3270=y -CONFIG_TN3270_TTY=y -CONFIG_TN3270_FS=m -CONFIG_TN3270_CONSOLE=y -CONFIG_TN3215=y -CONFIG_TN3215_CONSOLE=y -CONFIG_SCLP_TTY=y -CONFIG_SCLP_CONSOLE=y -CONFIG_SCLP_VT220_TTY=y -CONFIG_SCLP_VT220_CONSOLE=y -CONFIG_SCLP_CPI=m -CONFIG_SCLP_ASYNC=m -CONFIG_S390_TAPE=m -CONFIG_S390_TAPE_BLOCK=y -CONFIG_S390_TAPE_34XX=m -CONFIG_ACCESSIBILITY=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -CONFIG_NFSD=y -CONFIG_NFSD_V3=y +# CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_DLM=m CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y -# CONFIG_SCHED_DEBUG is not set -CONFIG_DEBUG_SPINLOCK=y -CONFIG_DEBUG_MUTEXES=y +CONFIG_TIMER_STATS=y +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_LOCK_STAT=y +CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_SPINLOCK_SLEEP=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_NOTIFIERS=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y +CONFIG_KPROBES_SANITY_TEST=y +CONFIG_CPU_NOTIFIER_ERROR_INJECT=m +CONFIG_LATENCYTOP=y CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_SAMPLES=y -CONFIG_CRYPTO_FIPS=y +CONFIG_DEBUG_PAGEALLOC=y +# CONFIG_FTRACE is not set +# CONFIG_STRICT_DEVMEM is not set +CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_AUTHENC=m +CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_GCM=m +CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_ECB=m +CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_XTS=m +CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m +CONFIG_CRYPTO_SHA256=m +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m +CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_DEFLATE=m CONFIG_CRYPTO_ZLIB=m CONFIG_CRYPTO_LZO=m CONFIG_ZCRYPT=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA512_S390=m -CONFIG_CRC_T10DIF=y -CONFIG_CRC32=m +CONFIG_CRYPTO_DES_S390=m +CONFIG_CRYPTO_AES_S390=m CONFIG_CRC7=m -CONFIG_KVM=m -CONFIG_VIRTIO_BALLOON=m +CONFIG_VIRTIO_BALLOON=y diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile index b08d2abf617..2e671d5004c 100644 --- a/arch/s390/hypfs/Makefile +++ b/arch/s390/hypfs/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_S390_HYPFS_FS) += s390_hypfs.o -s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o +s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o hypfs_dbfs.o diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h index fa487d4cc08..80c1526f2af 100644 --- a/arch/s390/hypfs/hypfs.h +++ b/arch/s390/hypfs/hypfs.h @@ -12,6 +12,8 @@ #include <linux/fs.h> #include <linux/types.h> #include <linux/debugfs.h> +#include <linux/workqueue.h> +#include <linux/kref.h> #define REG_FILE_MODE 0440 #define UPDATE_FILE_MODE 0220 @@ -38,6 +40,33 @@ extern int hypfs_vm_init(void); extern void hypfs_vm_exit(void); extern int hypfs_vm_create_files(struct super_block *sb, struct dentry *root); -/* Directory for debugfs files */ -extern struct dentry *hypfs_dbfs_dir; +/* debugfs interface */ +struct hypfs_dbfs_file; + +struct hypfs_dbfs_data { + void *buf; + void *buf_free_ptr; + size_t size; + struct hypfs_dbfs_file *dbfs_file;; + struct kref kref; +}; + +struct hypfs_dbfs_file { + const char *name; + int (*data_create)(void **data, void **data_free_ptr, + size_t *size); + void (*data_free)(const void *buf_free_ptr); + + /* Private data for hypfs_dbfs.c */ + struct hypfs_dbfs_data *data; + struct delayed_work data_free_work; + struct mutex lock; + struct dentry *dentry; +}; + +extern int hypfs_dbfs_init(void); +extern void hypfs_dbfs_exit(void); +extern int hypfs_dbfs_create_file(struct hypfs_dbfs_file *df); +extern void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df); + #endif /* _HYPFS_H_ */ diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c new file mode 100644 index 00000000000..b478013b7fe --- /dev/null +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -0,0 +1,116 @@ +/* + * Hypervisor filesystem for Linux on s390 - debugfs interface + * + * Copyright (C) IBM Corp. 2010 + * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> + */ + +#include <linux/slab.h> +#include "hypfs.h" + +static struct dentry *dbfs_dir; + +static struct hypfs_dbfs_data *hypfs_dbfs_data_alloc(struct hypfs_dbfs_file *f) +{ + struct hypfs_dbfs_data *data; + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return NULL; + kref_init(&data->kref); + data->dbfs_file = f; + return data; +} + +static void hypfs_dbfs_data_free(struct kref *kref) +{ + struct hypfs_dbfs_data *data; + + data = container_of(kref, struct hypfs_dbfs_data, kref); + data->dbfs_file->data_free(data->buf_free_ptr); + kfree(data); +} + +static void data_free_delayed(struct work_struct *work) +{ + struct hypfs_dbfs_data *data; + struct hypfs_dbfs_file *df; + + df = container_of(work, struct hypfs_dbfs_file, data_free_work.work); + mutex_lock(&df->lock); + data = df->data; + df->data = NULL; + mutex_unlock(&df->lock); + kref_put(&data->kref, hypfs_dbfs_data_free); +} + +static ssize_t dbfs_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct hypfs_dbfs_data *data; + struct hypfs_dbfs_file *df; + ssize_t rc; + + if (*ppos != 0) + return 0; + + df = file->f_path.dentry->d_inode->i_private; + mutex_lock(&df->lock); + if (!df->data) { + data = hypfs_dbfs_data_alloc(df); + if (!data) { + mutex_unlock(&df->lock); + return -ENOMEM; + } + rc = df->data_create(&data->buf, &data->buf_free_ptr, + &data->size); + if (rc) { + mutex_unlock(&df->lock); + kfree(data); + return rc; + } + df->data = data; + schedule_delayed_work(&df->data_free_work, HZ); + } + data = df->data; + kref_get(&data->kref); + mutex_unlock(&df->lock); + + rc = simple_read_from_buffer(buf, size, ppos, data->buf, data->size); + kref_put(&data->kref, hypfs_dbfs_data_free); + return rc; +} + +static const struct file_operations dbfs_ops = { + .read = dbfs_read, + .llseek = no_llseek, +}; + +int hypfs_dbfs_create_file(struct hypfs_dbfs_file *df) +{ + df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df, + &dbfs_ops); + if (IS_ERR(df->dentry)) + return PTR_ERR(df->dentry); + mutex_init(&df->lock); + INIT_DELAYED_WORK(&df->data_free_work, data_free_delayed); + return 0; +} + +void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df) +{ + debugfs_remove(df->dentry); +} + +int hypfs_dbfs_init(void) +{ + dbfs_dir = debugfs_create_dir("s390_hypfs", NULL); + if (IS_ERR(dbfs_dir)) + return PTR_ERR(dbfs_dir); + return 0; +} + +void hypfs_dbfs_exit(void) +{ + debugfs_remove(dbfs_dir); +} diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index cd4a81be9cf..6023c6dc1fb 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -555,81 +555,38 @@ struct dbfs_d204 { char buf[]; /* d204 buffer */ } __attribute__ ((packed)); -struct dbfs_d204_private { - struct dbfs_d204 *d204; /* Aligned d204 data with header */ - void *base; /* Base pointer (needed for vfree) */ -}; - -static int dbfs_d204_open(struct inode *inode, struct file *file) +static int dbfs_d204_create(void **data, void **data_free_ptr, size_t *size) { - struct dbfs_d204_private *data; struct dbfs_d204 *d204; int rc, buf_size; + void *base; - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - return -ENOMEM; buf_size = PAGE_SIZE * (diag204_buf_pages + 1) + sizeof(d204->hdr); - data->base = vmalloc(buf_size); - if (!data->base) { - rc = -ENOMEM; - goto fail_kfree_data; + base = vmalloc(buf_size); + if (!base) + return -ENOMEM; + memset(base, 0, buf_size); + d204 = page_align_ptr(base + sizeof(d204->hdr)) - sizeof(d204->hdr); + rc = diag204_do_store(d204->buf, diag204_buf_pages); + if (rc) { + vfree(base); + return rc; } - memset(data->base, 0, buf_size); - d204 = page_align_ptr(data->base + sizeof(d204->hdr)) - - sizeof(d204->hdr); - rc = diag204_do_store(&d204->buf, diag204_buf_pages); - if (rc) - goto fail_vfree_base; d204->hdr.version = DBFS_D204_HDR_VERSION; d204->hdr.len = PAGE_SIZE * diag204_buf_pages; d204->hdr.sc = diag204_store_sc; - data->d204 = d204; - file->private_data = data; - return nonseekable_open(inode, file); - -fail_vfree_base: - vfree(data->base); -fail_kfree_data: - kfree(data); - return rc; -} - -static int dbfs_d204_release(struct inode *inode, struct file *file) -{ - struct dbfs_d204_private *data = file->private_data; - - vfree(data->base); - kfree(data); + *data = d204; + *data_free_ptr = base; + *size = d204->hdr.len + sizeof(struct dbfs_d204_hdr); return 0; } -static ssize_t dbfs_d204_read(struct file *file, char __user *buf, - size_t size, loff_t *ppos) -{ - struct dbfs_d204_private *data = file->private_data; - - return simple_read_from_buffer(buf, size, ppos, data->d204, - data->d204->hdr.len + - sizeof(data->d204->hdr)); -} - -static const struct file_operations dbfs_d204_ops = { - .open = dbfs_d204_open, - .read = dbfs_d204_read, - .release = dbfs_d204_release, - .llseek = no_llseek, +static struct hypfs_dbfs_file dbfs_file_d204 = { + .name = "diag_204", + .data_create = dbfs_d204_create, + .data_free = vfree, }; -static int hypfs_dbfs_init(void) -{ - dbfs_d204_file = debugfs_create_file("diag_204", 0400, hypfs_dbfs_dir, - NULL, &dbfs_d204_ops); - if (IS_ERR(dbfs_d204_file)) - return PTR_ERR(dbfs_d204_file); - return 0; -} - __init int hypfs_diag_init(void) { int rc; @@ -639,7 +596,7 @@ __init int hypfs_diag_init(void) return -ENODATA; } if (diag204_info_type == INFO_EXT) { - rc = hypfs_dbfs_init(); + rc = hypfs_dbfs_create_file(&dbfs_file_d204); if (rc) return rc; } @@ -660,6 +617,7 @@ void hypfs_diag_exit(void) debugfs_remove(dbfs_d204_file); diag224_delete_name_table(); diag204_free_buffer(); + hypfs_dbfs_remove_file(&dbfs_file_d204); } /* diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index 26cf177f6a3..e54796002f6 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -20,8 +20,6 @@ static char local_guest[] = " "; static char all_guests[] = "* "; static char *guest_query; -static struct dentry *dbfs_d2fc_file; - struct diag2fc_data { __u32 version; __u32 flags; @@ -104,7 +102,7 @@ static void *diag2fc_store(char *query, unsigned int *count, int offset) return data; } -static void diag2fc_free(void *data) +static void diag2fc_free(const void *data) { vfree(data); } @@ -239,43 +237,29 @@ struct dbfs_d2fc { char buf[]; /* d2fc buffer */ } __attribute__ ((packed)); -static int dbfs_d2fc_open(struct inode *inode, struct file *file) +static int dbfs_diag2fc_create(void **data, void **data_free_ptr, size_t *size) { - struct dbfs_d2fc *data; + struct dbfs_d2fc *d2fc; unsigned int count; - data = diag2fc_store(guest_query, &count, sizeof(data->hdr)); - if (IS_ERR(data)) - return PTR_ERR(data); - get_clock_ext(data->hdr.tod_ext); - data->hdr.len = count * sizeof(struct diag2fc_data); - data->hdr.version = DBFS_D2FC_HDR_VERSION; - data->hdr.count = count; - memset(&data->hdr.reserved, 0, sizeof(data->hdr.reserved)); - file->private_data = data; - return nonseekable_open(inode, file); -} - -static int dbfs_d2fc_release(struct inode *inode, struct file *file) -{ - diag2fc_free(file->private_data); + d2fc = diag2fc_store(guest_query, &count, sizeof(d2fc->hdr)); + if (IS_ERR(d2fc)) + return PTR_ERR(d2fc); + get_clock_ext(d2fc->hdr.tod_ext); + d2fc->hdr.len = count * sizeof(struct diag2fc_data); + d2fc->hdr.version = DBFS_D2FC_HDR_VERSION; + d2fc->hdr.count = count; + memset(&d2fc->hdr.reserved, 0, sizeof(d2fc->hdr.reserved)); + *data = d2fc; + *data_free_ptr = d2fc; + *size = d2fc->hdr.len + sizeof(struct dbfs_d2fc_hdr); return 0; } -static ssize_t dbfs_d2fc_read(struct file *file, char __user *buf, - size_t size, loff_t *ppos) -{ - struct dbfs_d2fc *data = file->private_data; - - return simple_read_from_buffer(buf, size, ppos, data, data->hdr.len + - sizeof(struct dbfs_d2fc_hdr)); -} - -static const struct file_operations dbfs_d2fc_ops = { - .open = dbfs_d2fc_open, - .read = dbfs_d2fc_read, - .release = dbfs_d2fc_release, - .llseek = no_llseek, +static struct hypfs_dbfs_file dbfs_file_2fc = { + .name = "diag_2fc", + .data_create = dbfs_diag2fc_create, + .data_free = diag2fc_free, }; int hypfs_vm_init(void) @@ -288,18 +272,12 @@ int hypfs_vm_init(void) guest_query = local_guest; else return -EACCES; - - dbfs_d2fc_file = debugfs_create_file("diag_2fc", 0400, hypfs_dbfs_dir, - NULL, &dbfs_d2fc_ops); - if (IS_ERR(dbfs_d2fc_file)) - return PTR_ERR(dbfs_d2fc_file); - - return 0; + return hypfs_dbfs_create_file(&dbfs_file_2fc); } void hypfs_vm_exit(void) { if (!MACHINE_IS_VM) return; - debugfs_remove(dbfs_d2fc_file); + hypfs_dbfs_remove_file(&dbfs_file_2fc); } diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 47cc446dab8..6fe874fc5f8 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -46,8 +46,6 @@ static const struct super_operations hypfs_s_ops; /* start of list of all dentries, which have to be deleted on update */ static struct dentry *hypfs_last_dentry; -struct dentry *hypfs_dbfs_dir; - static void hypfs_update_update(struct super_block *sb) { struct hypfs_sb_info *sb_info = sb->s_fs_info; @@ -471,13 +469,12 @@ static int __init hypfs_init(void) { int rc; - hypfs_dbfs_dir = debugfs_create_dir("s390_hypfs", NULL); - if (IS_ERR(hypfs_dbfs_dir)) - return PTR_ERR(hypfs_dbfs_dir); - + rc = hypfs_dbfs_init(); + if (rc) + return rc; if (hypfs_diag_init()) { rc = -ENODATA; - goto fail_debugfs_remove; + goto fail_dbfs_exit; } if (hypfs_vm_init()) { rc = -ENODATA; @@ -499,9 +496,8 @@ fail_hypfs_vm_exit: hypfs_vm_exit(); fail_hypfs_diag_exit: hypfs_diag_exit(); -fail_debugfs_remove: - debugfs_remove(hypfs_dbfs_dir); - +fail_dbfs_exit: + hypfs_dbfs_exit(); pr_err("Initialization of hypfs failed with rc=%i\n", rc); return rc; } @@ -510,7 +506,7 @@ static void __exit hypfs_exit(void) { hypfs_diag_exit(); hypfs_vm_exit(); - debugfs_remove(hypfs_dbfs_dir); + hypfs_dbfs_exit(); unregister_filesystem(&hypfs_type); kobject_put(s390_kobj); } diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index e8501115eca..ff6f62e0ec3 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -204,6 +204,8 @@ int ccw_device_tm_start_timeout(struct ccw_device *, struct tcw *, unsigned long, u8, int); int ccw_device_tm_intrg(struct ccw_device *cdev); +int ccw_device_get_mdc(struct ccw_device *cdev, u8 mask); + extern int ccw_device_set_online(struct ccw_device *cdev); extern int ccw_device_set_offline(struct ccw_device *cdev); diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 40e2ab0fa3f..08143487829 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -202,7 +202,7 @@ static inline void s390_idle_check(struct pt_regs *regs, __u64 int_clock, static inline int s390_nohz_delay(int cpu) { - return per_cpu(s390_idle, cpu).nohz_delay != 0; + return __get_cpu_var(s390_idle).nohz_delay != 0; } #define arch_needs_cpu(cpu) s390_nohz_delay(cpu) diff --git a/arch/s390/include/asm/dasd.h b/arch/s390/include/asm/dasd.h index b604a9186f8..0be28efe5b6 100644 --- a/arch/s390/include/asm/dasd.h +++ b/arch/s390/include/asm/dasd.h @@ -73,6 +73,7 @@ typedef struct dasd_information2_t { * 0x02: use diag discipline (diag) * 0x04: set the device initially online (internal use only) * 0x08: enable ERP related logging + * 0x20: give access to raw eckd data */ #define DASD_FEATURE_DEFAULT 0x00 #define DASD_FEATURE_READONLY 0x01 @@ -80,6 +81,8 @@ typedef struct dasd_information2_t { #define DASD_FEATURE_INITIAL_ONLINE 0x04 #define DASD_FEATURE_ERPLOG 0x08 #define DASD_FEATURE_FAILFAST 0x10 +#define DASD_FEATURE_FAILONSLCK 0x20 +#define DASD_FEATURE_USERAW 0x40 #define DASD_PARTN_BITS 2 diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 96c14a9102b..3c29be4836e 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -4,20 +4,17 @@ #ifndef __ASSEMBLY__ extern void _mcount(void); -extern unsigned long ftrace_dyn_func; struct dyn_arch_ftrace { }; #define MCOUNT_ADDR ((long)_mcount) #ifdef CONFIG_64BIT -#define MCOUNT_OFFSET_RET 18 -#define MCOUNT_INSN_SIZE 24 -#define MCOUNT_OFFSET 14 -#else -#define MCOUNT_OFFSET_RET 26 -#define MCOUNT_INSN_SIZE 30 +#define MCOUNT_INSN_SIZE 12 #define MCOUNT_OFFSET 8 +#else +#define MCOUNT_INSN_SIZE 20 +#define MCOUNT_OFFSET 4 #endif static inline unsigned long ftrace_call_adjust(unsigned long addr) diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h index 881d94590ae..e4155d3eb2c 100644 --- a/arch/s390/include/asm/hardirq.h +++ b/arch/s390/include/asm/hardirq.h @@ -21,20 +21,4 @@ #define HARDIRQ_BITS 8 -void clock_comparator_work(void); - -static inline unsigned long long local_tick_disable(void) -{ - unsigned long long old; - - old = S390_lowcore.clock_comparator; - S390_lowcore.clock_comparator = -1ULL; - return old; -} - -static inline void local_tick_enable(unsigned long long comp) -{ - S390_lowcore.clock_comparator = comp; -} - #endif /* __ASM_HARDIRQ_H */ diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index 7da991a858f..db14a311f1d 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -1,23 +1,33 @@ #ifndef _ASM_IRQ_H #define _ASM_IRQ_H -#ifdef __KERNEL__ #include <linux/hardirq.h> -/* - * the definition of irqs has changed in 2.5.46: - * NR_IRQS is no longer the number of i/o - * interrupts (65536), but rather the number - * of interrupt classes (2). - * Only external and i/o interrupts make much sense here (CH). - */ - enum interruption_class { EXTERNAL_INTERRUPT, IO_INTERRUPT, - + EXTINT_CLK, + EXTINT_IPI, + EXTINT_TMR, + EXTINT_TLA, + EXTINT_PFL, + EXTINT_DSD, + EXTINT_VRT, + EXTINT_SCP, + EXTINT_IUC, + IOINT_QAI, + IOINT_QDI, + IOINT_DAS, + IOINT_C15, + IOINT_C70, + IOINT_TAP, + IOINT_VMR, + IOINT_LCS, + IOINT_CLW, + IOINT_CTC, + IOINT_APB, + NMI_NMI, NR_IRQS, }; -#endif /* __KERNEL__ */ -#endif +#endif /* _ASM_IRQ_H */ diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 330f68caffe..a231a9439c4 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -31,7 +31,6 @@ #include <linux/ptrace.h> #include <linux/percpu.h> -#define __ARCH_WANT_KPROBES_INSN_SLOT struct pt_regs; struct kprobe; @@ -58,23 +57,12 @@ typedef u16 kprobe_opcode_t; /* Architecture specific copy of original instruction */ struct arch_specific_insn { /* copy of original instruction */ - kprobe_opcode_t *insn; - int fixup; - int ilen; - int reg; + kprobe_opcode_t insn[MAX_INSN_SIZE]; }; -struct ins_replace_args { - kprobe_opcode_t *ptr; - kprobe_opcode_t old; - kprobe_opcode_t new; -}; struct prev_kprobe { struct kprobe *kp; unsigned long status; - unsigned long saved_psw; - unsigned long kprobe_saved_imask; - unsigned long kprobe_saved_ctl[3]; }; /* per-cpu kprobe control block */ @@ -82,17 +70,13 @@ struct kprobe_ctlblk { unsigned long kprobe_status; unsigned long kprobe_saved_imask; unsigned long kprobe_saved_ctl[3]; - struct pt_regs jprobe_saved_regs; - unsigned long jprobe_saved_r14; - unsigned long jprobe_saved_r15; struct prev_kprobe prev_kprobe; + struct pt_regs jprobe_saved_regs; kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE]; }; void arch_remove_kprobe(struct kprobe *p); void kretprobe_trampoline(void); -int is_prohibited_opcode(kprobe_opcode_t *instruction); -void get_instruction_type(struct arch_specific_insn *ainsn); int kprobe_fault_handler(struct pt_regs *regs, int trapnr); int kprobe_exceptions_notify(struct notifier_block *self, diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h index 458c1f7fbc1..688271f5f2e 100644 --- a/arch/s390/include/asm/mutex.h +++ b/arch/s390/include/asm/mutex.h @@ -7,3 +7,5 @@ */ #include <asm-generic/mutex-dec.h> + +#define arch_mutex_cpu_relax() barrier() diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 8d6f8716957..bf3de04170a 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -32,7 +32,6 @@ static inline void get_cpu_id(struct cpuid *ptr) } extern void s390_adjust_jiffies(void); -extern void print_cpu_info(void); extern int get_cpu_capability(unsigned int *); /* @@ -81,7 +80,8 @@ struct thread_struct { mm_segment_t mm_segment; unsigned long prot_addr; /* address of protection-excep. */ unsigned int trap_no; - per_struct per_info; + struct per_regs per_user; /* User specified PER registers */ + struct per_event per_event; /* Cause of the last PER trap */ /* pfault_wait is used to block the process on a pfault event */ unsigned long pfault_wait; }; diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index d9d42b1e46f..9ad628a8574 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -331,10 +331,60 @@ struct pt_regs unsigned short ilc; unsigned short svcnr; }; + +/* + * Program event recording (PER) register set. + */ +struct per_regs { + unsigned long control; /* PER control bits */ + unsigned long start; /* PER starting address */ + unsigned long end; /* PER ending address */ +}; + +/* + * PER event contains information about the cause of the last PER exception. + */ +struct per_event { + unsigned short cause; /* PER code, ATMID and AI */ + unsigned long address; /* PER address */ + unsigned char paid; /* PER access identification */ +}; + +/* + * Simplified per_info structure used to decode the ptrace user space ABI. + */ +struct per_struct_kernel { + unsigned long cr9; /* PER control bits */ + unsigned long cr10; /* PER starting address */ + unsigned long cr11; /* PER ending address */ + unsigned long bits; /* Obsolete software bits */ + unsigned long starting_addr; /* User specified start address */ + unsigned long ending_addr; /* User specified end address */ + unsigned short perc_atmid; /* PER trap ATMID */ + unsigned long address; /* PER trap instruction address */ + unsigned char access_id; /* PER trap access identification */ +}; + +#define PER_EVENT_MASK 0xE9000000UL + +#define PER_EVENT_BRANCH 0x80000000UL +#define PER_EVENT_IFETCH 0x40000000UL +#define PER_EVENT_STORE 0x20000000UL +#define PER_EVENT_STORE_REAL 0x08000000UL +#define PER_EVENT_NULLIFICATION 0x01000000UL + +#define PER_CONTROL_MASK 0x00a00000UL + +#define PER_CONTROL_BRANCH_ADDRESS 0x00800000UL +#define PER_CONTROL_ALTERATION 0x00200000UL + #endif /* - * Now for the program event recording (trace) definitions. + * Now for the user space program event recording (trace) definitions. + * The following structures are used only for the ptrace interface, don't + * touch or even look at it if you don't want to modify the user-space + * ptrace interface. In particular stay away from it for in-kernel PER. */ typedef struct { diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 46e96bc1f5a..350e7ee5952 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -361,6 +361,7 @@ struct qdio_initialize { qdio_handler_t *input_handler; qdio_handler_t *output_handler; void (*queue_start_poll) (struct ccw_device *, int, unsigned long); + int scan_threshold; unsigned long int_parm; void **input_sbal_addr_array; void **output_sbal_addr_array; diff --git a/arch/s390/include/asm/qeth.h b/arch/s390/include/asm/qeth.h index 06cbd1e8c94..90efda0b137 100644 --- a/arch/s390/include/asm/qeth.h +++ b/arch/s390/include/asm/qeth.h @@ -28,39 +28,70 @@ struct qeth_arp_cache_entry { __u8 reserved2[32]; } __attribute__ ((packed)); +enum qeth_arp_ipaddrtype { + QETHARP_IP_ADDR_V4 = 1, + QETHARP_IP_ADDR_V6 = 2, +}; +struct qeth_arp_entrytype { + __u8 mac; + __u8 ip; +} __attribute__((packed)); + +#define QETH_QARP_MEDIASPECIFIC_BYTES 32 +#define QETH_QARP_MACADDRTYPE_BYTES 1 struct qeth_arp_qi_entry7 { - __u8 media_specific[32]; - __u8 macaddr_type; - __u8 ipaddr_type; + __u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES]; + struct qeth_arp_entrytype type; __u8 macaddr[6]; __u8 ipaddr[4]; } __attribute__((packed)); +struct qeth_arp_qi_entry7_ipv6 { + __u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES]; + struct qeth_arp_entrytype type; + __u8 macaddr[6]; + __u8 ipaddr[16]; +} __attribute__((packed)); + struct qeth_arp_qi_entry7_short { - __u8 macaddr_type; - __u8 ipaddr_type; + struct qeth_arp_entrytype type; __u8 macaddr[6]; __u8 ipaddr[4]; } __attribute__((packed)); +struct qeth_arp_qi_entry7_short_ipv6 { + struct qeth_arp_entrytype type; + __u8 macaddr[6]; + __u8 ipaddr[16]; +} __attribute__((packed)); + struct qeth_arp_qi_entry5 { - __u8 media_specific[32]; - __u8 macaddr_type; - __u8 ipaddr_type; + __u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES]; + struct qeth_arp_entrytype type; __u8 ipaddr[4]; } __attribute__((packed)); +struct qeth_arp_qi_entry5_ipv6 { + __u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES]; + struct qeth_arp_entrytype type; + __u8 ipaddr[16]; +} __attribute__((packed)); + struct qeth_arp_qi_entry5_short { - __u8 macaddr_type; - __u8 ipaddr_type; + struct qeth_arp_entrytype type; __u8 ipaddr[4]; } __attribute__((packed)); +struct qeth_arp_qi_entry5_short_ipv6 { + struct qeth_arp_entrytype type; + __u8 ipaddr[16]; +} __attribute__((packed)); /* * can be set by user if no "media specific information" is wanted * -> saves a lot of space in user space buffer */ #define QETH_QARP_STRIP_ENTRIES 0x8000 +#define QETH_QARP_WITH_IPV6 0x4000 #define QETH_QARP_REQUEST_MASK 0x00ff /* data sent to user space as result of query arp ioctl */ diff --git a/arch/s390/include/asm/s390_ext.h b/arch/s390/include/asm/s390_ext.h index 1a9307e7084..080876d5f19 100644 --- a/arch/s390/include/asm/s390_ext.h +++ b/arch/s390/include/asm/s390_ext.h @@ -1,32 +1,17 @@ -#ifndef _S390_EXTINT_H -#define _S390_EXTINT_H - /* - * include/asm-s390/s390_ext.h - * - * S390 version - * Copyright IBM Corp. 1999,2007 - * Author(s): Holger Smolinski (Holger.Smolinski@de.ibm.com), - * Martin Schwidefsky (schwidefsky@de.ibm.com) + * Copyright IBM Corp. 1999,2010 + * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, */ +#ifndef _S390_EXTINT_H +#define _S390_EXTINT_H + #include <linux/types.h> typedef void (*ext_int_handler_t)(unsigned int, unsigned int, unsigned long); -typedef struct ext_int_info_t { - struct ext_int_info_t *next; - ext_int_handler_t handler; - __u16 code; -} ext_int_info_t; - -extern ext_int_info_t *ext_int_hash[]; - int register_external_interrupt(__u16 code, ext_int_handler_t handler); -int register_early_external_interrupt(__u16 code, ext_int_handler_t handler, - ext_int_info_t *info); int unregister_external_interrupt(__u16 code, ext_int_handler_t handler); -int unregister_early_external_interrupt(__u16 code, ext_int_handler_t handler, - ext_int_info_t *info); -#endif +#endif /* _S390_EXTINT_H */ diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index edc03cb9cd7..045e009fc16 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -20,7 +20,6 @@ extern void machine_power_off_smp(void); extern int __cpu_disable (void); extern void __cpu_die (unsigned int cpu); -extern void cpu_die (void) __attribute__ ((noreturn)); extern int __cpu_up (unsigned int cpu); extern struct mutex smp_cpu_state_mutex; @@ -71,8 +70,10 @@ static inline void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) #ifdef CONFIG_HOTPLUG_CPU extern int smp_rescan_cpus(void); +extern void __noreturn cpu_die(void); #else static inline int smp_rescan_cpus(void) { return 0; } +static inline void cpu_die(void) { } #endif #endif /* __ASM_SMP_H */ diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index 3ad16dbf622..6710b0eac16 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -20,6 +20,7 @@ struct task_struct; extern struct task_struct *__switch_to(void *, void *); +extern void update_per_regs(struct task_struct *task); static inline void save_fp_regs(s390_fp_regs *fpregs) { @@ -93,6 +94,7 @@ static inline void restore_access_regs(unsigned int *acrs) if (next->mm) { \ restore_fp_regs(&next->thread.fp_regs); \ restore_access_regs(&next->thread.acrs[0]); \ + update_per_regs(next); \ } \ prev = __switch_to(prev,next); \ } while (0) @@ -101,11 +103,9 @@ extern void account_vtime(struct task_struct *, struct task_struct *); extern void account_tick_vtime(struct task_struct *); #ifdef CONFIG_PFAULT -extern void pfault_irq_init(void); extern int pfault_init(void); extern void pfault_fini(void); #else /* CONFIG_PFAULT */ -#define pfault_irq_init() do { } while (0) #define pfault_init() ({-1;}) #define pfault_fini() do { } while (0) #endif /* CONFIG_PFAULT */ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 5baf0230b29..ebc77091466 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -74,7 +74,7 @@ struct thread_info { /* how to get the thread information struct from C */ static inline struct thread_info *current_thread_info(void) { - return (struct thread_info *)(S390_lowcore.kernel_stack - THREAD_SIZE); + return (struct thread_info *) S390_lowcore.thread_info; } #define THREAD_SIZE_ORDER THREAD_ORDER @@ -88,7 +88,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_RESTART_SVC 4 /* restart svc with new svc number */ -#define TIF_SINGLE_STEP 6 /* deliver sigtrap on return to user */ +#define TIF_PER_TRAP 6 /* deliver sigtrap on return to user */ #define TIF_MCCK_PENDING 7 /* machine check handling is pending */ #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */ @@ -99,14 +99,15 @@ static inline struct thread_info *current_thread_info(void) #define TIF_31BIT 17 /* 32bit process */ #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 19 /* restore signal mask in do_signal() */ -#define TIF_FREEZE 20 /* thread is freezing for suspend */ +#define TIF_SINGLE_STEP 20 /* This task is single stepped */ +#define TIF_FREEZE 21 /* thread is freezing for suspend */ #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) #define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) #define _TIF_RESTART_SVC (1<<TIF_RESTART_SVC) -#define _TIF_SINGLE_STEP (1<<TIF_SINGLE_STEP) +#define _TIF_PER_TRAP (1<<TIF_PER_TRAP) #define _TIF_MCCK_PENDING (1<<TIF_MCCK_PENDING) #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) @@ -114,6 +115,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) #define _TIF_31BIT (1<<TIF_31BIT) +#define _TIF_SINGLE_STEP (1<<TIF_FREEZE) #define _TIF_FREEZE (1<<TIF_FREEZE) #endif /* __KERNEL__ */ diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 09d345a701d..88829a40af6 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -11,6 +11,8 @@ #ifndef _ASM_S390_TIMEX_H #define _ASM_S390_TIMEX_H +#include <asm/lowcore.h> + /* The value of the TOD clock for 1.1.1970. */ #define TOD_UNIX_EPOCH 0x7d91048bca000000ULL @@ -49,6 +51,24 @@ static inline void store_clock_comparator(__u64 *time) asm volatile("stckc %0" : "=Q" (*time)); } +void clock_comparator_work(void); + +static inline unsigned long long local_tick_disable(void) +{ + unsigned long long old; + + old = S390_lowcore.clock_comparator; + S390_lowcore.clock_comparator = -1ULL; + set_clock_comparator(S390_lowcore.clock_comparator); + return old; +} + +static inline void local_tick_enable(unsigned long long comp) +{ + S390_lowcore.clock_comparator = comp; + set_clock_comparator(S390_lowcore.clock_comparator); +} + #define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ typedef unsigned long long cycles_t; diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 33982e7ce04..fe03c140002 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -23,14 +23,16 @@ int main(void) { DEFINE(__THREAD_info, offsetof(struct task_struct, stack)); DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp)); - DEFINE(__THREAD_per, offsetof(struct task_struct, thread.per_info)); DEFINE(__THREAD_mm_segment, offsetof(struct task_struct, thread.mm_segment)); BLANK(); DEFINE(__TASK_pid, offsetof(struct task_struct, pid)); BLANK(); - DEFINE(__PER_atmid, offsetof(per_struct, lowcore.words.perc_atmid)); - DEFINE(__PER_address, offsetof(per_struct, lowcore.words.address)); - DEFINE(__PER_access_id, offsetof(per_struct, lowcore.words.access_id)); + DEFINE(__THREAD_per_cause, + offsetof(struct task_struct, thread.per_event.cause)); + DEFINE(__THREAD_per_address, + offsetof(struct task_struct, thread.per_event.address)); + DEFINE(__THREAD_per_paid, + offsetof(struct task_struct, thread.per_event.paid)); BLANK(); DEFINE(__TI_task, offsetof(struct thread_info, task)); DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain)); @@ -85,9 +87,9 @@ int main(void) DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc)); DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code)); DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code)); - DEFINE(__LC_PER_ATMID, offsetof(struct _lowcore, per_perc_atmid)); + DEFINE(__LC_PER_CAUSE, offsetof(struct _lowcore, per_perc_atmid)); DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address)); - DEFINE(__LC_PER_ACCESS_ID, offsetof(struct _lowcore, per_access_id)); + DEFINE(__LC_PER_PAID, offsetof(struct _lowcore, per_access_id)); DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_access_id)); DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id)); DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr)); diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h index 3141025724f..12b82383351 100644 --- a/arch/s390/kernel/compat_ptrace.h +++ b/arch/s390/kernel/compat_ptrace.h @@ -4,40 +4,19 @@ #include <asm/ptrace.h> /* needed for NUM_CR_WORDS */ #include "compat_linux.h" /* needed for psw_compat_t */ -typedef struct { - __u32 cr[NUM_CR_WORDS]; -} per_cr_words32; - -typedef struct { - __u16 perc_atmid; /* 0x096 */ - __u32 address; /* 0x098 */ - __u8 access_id; /* 0x0a1 */ -} per_lowcore_words32; - -typedef struct { - union { - per_cr_words32 words; - } control_regs; - /* - * Use these flags instead of setting em_instruction_fetch - * directly they are used so that single stepping can be - * switched on & off while not affecting other tracing - */ - unsigned single_step : 1; - unsigned instruction_fetch : 1; - unsigned : 30; - /* - * These addresses are copied into cr10 & cr11 if single - * stepping is switched off - */ - __u32 starting_addr; - __u32 ending_addr; - union { - per_lowcore_words32 words; - } lowcore; -} per_struct32; +struct compat_per_struct_kernel { + __u32 cr9; /* PER control bits */ + __u32 cr10; /* PER starting address */ + __u32 cr11; /* PER ending address */ + __u32 bits; /* Obsolete software bits */ + __u32 starting_addr; /* User specified start address */ + __u32 ending_addr; /* User specified end address */ + __u16 perc_atmid; /* PER trap ATMID */ + __u32 address; /* PER trap instruction address */ + __u8 access_id; /* PER trap access identification */ +}; -struct user_regs_struct32 +struct compat_user_regs_struct { psw_compat_t psw; u32 gprs[NUM_GPRS]; @@ -50,14 +29,14 @@ struct user_regs_struct32 * itself as there is no "official" ptrace interface for hardware * watchpoints. This is the way intel does it. */ - per_struct32 per_info; + struct compat_per_struct_kernel per_info; u32 ieee_instruction_pointer; /* obsolete, always 0 */ }; -struct user32 { +struct compat_user { /* We start with the registers, to mimic the way that "memory" is returned from the ptrace(3,...) function. */ - struct user_regs_struct32 regs; /* Where the registers are actually stored */ + struct compat_user_regs_struct regs; /* The rest of this junk is to help gdb figure out what goes where */ u32 u_tsize; /* Text segment size (pages). */ u32 u_dsize; /* Data segment size (pages). */ @@ -79,6 +58,6 @@ typedef struct __u32 len; __u32 kernel_addr; __u32 process_addr; -} ptrace_area_emu31; +} compat_ptrace_area; #endif /* _PTRACE32_H */ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 1ecc337fb67..648f64239a9 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -9,7 +9,6 @@ * Heiko Carstens <heiko.carstens@de.ibm.com> */ -#include <linux/sys.h> #include <linux/linkage.h> #include <linux/init.h> #include <asm/cache.h> @@ -49,7 +48,7 @@ SP_SVCNR = STACK_FRAME_OVERHEAD + __PT_SVCNR SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_SINGLE_STEP ) + _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_PER_TRAP ) _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ @@ -110,31 +109,36 @@ STACK_SIZE = 1 << STACK_SHIFT 1: stm %r10,%r11,\lc_sum .endm - .macro SAVE_ALL_BASE savearea + .macro SAVE_ALL_SVC psworg,savearea stm %r12,%r15,\savearea l %r13,__LC_SVC_NEW_PSW+4 # load &system_call to %r13 + l %r15,__LC_KERNEL_STACK # problem state -> load ksp + s %r15,BASED(.Lc_spsize) # make room for registers & psw .endm - .macro SAVE_ALL_SVC psworg,savearea - la %r12,\psworg - l %r15,__LC_KERNEL_STACK # problem state -> load ksp + .macro SAVE_ALL_BASE savearea + stm %r12,%r15,\savearea + l %r13,__LC_SVC_NEW_PSW+4 # load &system_call to %r13 .endm - .macro SAVE_ALL_SYNC psworg,savearea - la %r12,\psworg + .macro SAVE_ALL_PGM psworg,savearea tm \psworg+1,0x01 # test problem state bit - bz BASED(2f) # skip stack setup save - l %r15,__LC_KERNEL_STACK # problem state -> load ksp #ifdef CONFIG_CHECK_STACK - b BASED(3f) -2: tml %r15,STACK_SIZE - CONFIG_STACK_GUARD - bz BASED(stack_overflow) -3: + bnz BASED(1f) + tml %r15,STACK_SIZE - CONFIG_STACK_GUARD + bnz BASED(2f) + la %r12,\psworg + b BASED(stack_overflow) +#else + bz BASED(2f) #endif -2: +1: l %r15,__LC_KERNEL_STACK # problem state -> load ksp +2: s %r15,BASED(.Lc_spsize) # make room for registers & psw .endm .macro SAVE_ALL_ASYNC psworg,savearea + stm %r12,%r15,\savearea + l %r13,__LC_SVC_NEW_PSW+4 # load &system_call to %r13 la %r12,\psworg tm \psworg+1,0x01 # test problem state bit bnz BASED(1f) # from user -> load async stack @@ -149,27 +153,23 @@ STACK_SIZE = 1 << STACK_SHIFT 0: l %r14,__LC_ASYNC_STACK # are we already on the async stack ? slr %r14,%r15 sra %r14,STACK_SHIFT - be BASED(2f) -1: l %r15,__LC_ASYNC_STACK #ifdef CONFIG_CHECK_STACK - b BASED(3f) -2: tml %r15,STACK_SIZE - CONFIG_STACK_GUARD - bz BASED(stack_overflow) -3: + bnz BASED(1f) + tml %r15,STACK_SIZE - CONFIG_STACK_GUARD + bnz BASED(2f) + b BASED(stack_overflow) +#else + bz BASED(2f) #endif -2: +1: l %r15,__LC_ASYNC_STACK +2: s %r15,BASED(.Lc_spsize) # make room for registers & psw .endm - .macro CREATE_STACK_FRAME psworg,savearea - s %r15,BASED(.Lc_spsize) # make room for registers & psw - mvc SP_PSW(8,%r15),0(%r12) # move user PSW to stack + .macro CREATE_STACK_FRAME savearea + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) st %r2,SP_ORIG_R2(%r15) # store original content of gpr 2 - icm %r12,12,__LC_SVC_ILC - stm %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack - st %r12,SP_ILC(%r15) mvc SP_R12(16,%r15),\savearea # move %r12-%r15 to stack - la %r12,0 - st %r12,__SF_BACKCHAIN(%r15) # clear back chain + stm %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack .endm .macro RESTORE_ALL psworg,sync @@ -188,6 +188,8 @@ STACK_SIZE = 1 << STACK_SHIFT ssm __SF_EMPTY(%r15) .endm + .section .kprobes.text, "ax" + /* * Scheduler resume function, called by switch_to * gpr2 = (task_struct *) prev @@ -198,31 +200,21 @@ STACK_SIZE = 1 << STACK_SHIFT .globl __switch_to __switch_to: basr %r1,0 -__switch_to_base: - tm __THREAD_per(%r3),0xe8 # new process is using per ? - bz __switch_to_noper-__switch_to_base(%r1) # if not we're fine - stctl %c9,%c11,__SF_EMPTY(%r15) # We are using per stuff - clc __THREAD_per(12,%r3),__SF_EMPTY(%r15) - be __switch_to_noper-__switch_to_base(%r1) # we got away w/o bashing TLB's - lctl %c9,%c11,__THREAD_per(%r3) # Nope we didn't -__switch_to_noper: - l %r4,__THREAD_info(%r2) # get thread_info of prev +0: l %r4,__THREAD_info(%r2) # get thread_info of prev + l %r5,__THREAD_info(%r3) # get thread_info of next tm __TI_flags+3(%r4),_TIF_MCCK_PENDING # machine check pending? - bz __switch_to_no_mcck-__switch_to_base(%r1) - ni __TI_flags+3(%r4),255-_TIF_MCCK_PENDING # clear flag in prev - l %r4,__THREAD_info(%r3) # get thread_info of next - oi __TI_flags+3(%r4),_TIF_MCCK_PENDING # set it in next -__switch_to_no_mcck: - stm %r6,%r15,__SF_GPRS(%r15)# store __switch_to registers of prev task - st %r15,__THREAD_ksp(%r2) # store kernel stack to prev->tss.ksp - l %r15,__THREAD_ksp(%r3) # load kernel stack from next->tss.ksp - lm %r6,%r15,__SF_GPRS(%r15)# load __switch_to registers of next task - st %r3,__LC_CURRENT # __LC_CURRENT = current task struct - lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 - l %r3,__THREAD_info(%r3) # load thread_info from task struct - st %r3,__LC_THREAD_INFO - ahi %r3,STACK_SIZE - st %r3,__LC_KERNEL_STACK # __LC_KERNEL_STACK = new kernel stack + bz 1f-0b(%r1) + ni __TI_flags+3(%r4),255-_TIF_MCCK_PENDING # clear flag in prev + oi __TI_flags+3(%r5),_TIF_MCCK_PENDING # set it in next +1: stm %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task + st %r15,__THREAD_ksp(%r2) # store kernel stack of prev + l %r15,__THREAD_ksp(%r3) # load kernel stack of next + lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 + lm %r6,%r15,__SF_GPRS(%r15) # load gprs of next task + st %r3,__LC_CURRENT # store task struct of next + st %r5,__LC_THREAD_INFO # store thread info of next + ahi %r5,STACK_SIZE # end of kernel stack of next + st %r5,__LC_KERNEL_STACK # store end of kernel stack br %r14 __critical_start: @@ -235,10 +227,11 @@ __critical_start: system_call: stpt __LC_SYNC_ENTER_TIMER sysc_saveall: - SAVE_ALL_BASE __LC_SAVE_AREA SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA - lh %r7,0x8a # get svc number from lowcore + CREATE_STACK_FRAME __LC_SAVE_AREA + mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW + mvc SP_ILC(4,%r15),__LC_SVC_ILC + l %r12,__LC_THREAD_INFO # load pointer to thread_info struct sysc_vtime: UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER sysc_stime: @@ -246,20 +239,20 @@ sysc_stime: sysc_update: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER sysc_do_svc: - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct - ltr %r7,%r7 # test for svc 0 + xr %r7,%r7 + icm %r7,3,SP_SVCNR(%r15) # load svc number and test for svc 0 bnz BASED(sysc_nr_ok) # svc number > 0 # svc 0: system call number in %r1 cl %r1,BASED(.Lnr_syscalls) bnl BASED(sysc_nr_ok) + sth %r1,SP_SVCNR(%r15) lr %r7,%r1 # copy svc number to %r7 sysc_nr_ok: - sth %r7,SP_SVCNR(%r15) sll %r7,2 # svc number *4 - l %r8,BASED(.Lsysc_table) - tm __TI_flags+2(%r9),_TIF_SYSCALL + l %r10,BASED(.Lsysc_table) + tm __TI_flags+2(%r12),_TIF_SYSCALL mvc SP_ARGS(4,%r15),SP_R7(%r15) - l %r8,0(%r7,%r8) # get system call addr. + l %r8,0(%r7,%r10) # get system call addr. bnz BASED(sysc_tracesys) basr %r14,%r8 # call sys_xxxx st %r2,SP_R2(%r15) # store return value (change R2 on stack) @@ -267,7 +260,7 @@ sysc_nr_ok: sysc_return: LOCKDEP_SYS_EXIT sysc_tif: - tm __TI_flags+3(%r9),_TIF_WORK_SVC + tm __TI_flags+3(%r12),_TIF_WORK_SVC bnz BASED(sysc_work) # there is work to do (signals etc.) sysc_restore: RESTORE_ALL __LC_RETURN_PSW,1 @@ -284,17 +277,17 @@ sysc_work: # One of the work bits is on. Find out which one. # sysc_work_tif: - tm __TI_flags+3(%r9),_TIF_MCCK_PENDING + tm __TI_flags+3(%r12),_TIF_MCCK_PENDING bo BASED(sysc_mcck_pending) - tm __TI_flags+3(%r9),_TIF_NEED_RESCHED + tm __TI_flags+3(%r12),_TIF_NEED_RESCHED bo BASED(sysc_reschedule) - tm __TI_flags+3(%r9),_TIF_SIGPENDING + tm __TI_flags+3(%r12),_TIF_SIGPENDING bo BASED(sysc_sigpending) - tm __TI_flags+3(%r9),_TIF_NOTIFY_RESUME + tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME bo BASED(sysc_notify_resume) - tm __TI_flags+3(%r9),_TIF_RESTART_SVC + tm __TI_flags+3(%r12),_TIF_RESTART_SVC bo BASED(sysc_restart) - tm __TI_flags+3(%r9),_TIF_SINGLE_STEP + tm __TI_flags+3(%r12),_TIF_PER_TRAP bo BASED(sysc_singlestep) b BASED(sysc_return) # beware of critical section cleanup @@ -318,13 +311,13 @@ sysc_mcck_pending: # _TIF_SIGPENDING is set, call do_signal # sysc_sigpending: - ni __TI_flags+3(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP + ni __TI_flags+3(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP la %r2,SP_PTREGS(%r15) # load pt_regs l %r1,BASED(.Ldo_signal) basr %r14,%r1 # call do_signal - tm __TI_flags+3(%r9),_TIF_RESTART_SVC + tm __TI_flags+3(%r12),_TIF_RESTART_SVC bo BASED(sysc_restart) - tm __TI_flags+3(%r9),_TIF_SINGLE_STEP + tm __TI_flags+3(%r12),_TIF_PER_TRAP bo BASED(sysc_singlestep) b BASED(sysc_return) @@ -342,23 +335,23 @@ sysc_notify_resume: # _TIF_RESTART_SVC is set, set up registers and restart svc # sysc_restart: - ni __TI_flags+3(%r9),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC + ni __TI_flags+3(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC l %r7,SP_R2(%r15) # load new svc number mvc SP_R2(4,%r15),SP_ORIG_R2(%r15) # restore first argument lm %r2,%r6,SP_R2(%r15) # load svc arguments + sth %r7,SP_SVCNR(%r15) b BASED(sysc_nr_ok) # restart svc # -# _TIF_SINGLE_STEP is set, call do_single_step +# _TIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: - ni __TI_flags+3(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP - mvi SP_SVCNR(%r15),0xff # set trap indication to pgm check - mvi SP_SVCNR+1(%r15),0xff + ni __TI_flags+3(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP + xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number la %r2,SP_PTREGS(%r15) # address of register-save area l %r1,BASED(.Lhandle_per) # load adr. of per handler la %r14,BASED(sysc_return) # load adr. of system return - br %r1 # branch to do_single_step + br %r1 # branch to do_per_trap # # call tracehook_report_syscall_entry/tracehook_report_syscall_exit before @@ -368,15 +361,15 @@ sysc_tracesys: l %r1,BASED(.Ltrace_entry) la %r2,SP_PTREGS(%r15) # load pt_regs la %r3,0 - srl %r7,2 - st %r7,SP_R2(%r15) + xr %r0,%r0 + icm %r0,3,SP_SVCNR(%r15) + st %r0,SP_R2(%r15) basr %r14,%r1 cl %r2,BASED(.Lnr_syscalls) bnl BASED(sysc_tracenogo) - l %r8,BASED(.Lsysc_table) lr %r7,%r2 sll %r7,2 # svc number *4 - l %r8,0(%r7,%r8) + l %r8,0(%r7,%r10) sysc_tracego: lm %r3,%r6,SP_R3(%r15) mvc SP_ARGS(4,%r15),SP_R7(%r15) @@ -384,7 +377,7 @@ sysc_tracego: basr %r14,%r8 # call sys_xxx st %r2,SP_R2(%r15) # store return value sysc_tracenogo: - tm __TI_flags+2(%r9),_TIF_SYSCALL + tm __TI_flags+2(%r12),_TIF_SYSCALL bz BASED(sysc_return) l %r1,BASED(.Ltrace_exit) la %r2,SP_PTREGS(%r15) # load pt_regs @@ -397,7 +390,7 @@ sysc_tracenogo: .globl ret_from_fork ret_from_fork: l %r13,__LC_SVC_NEW_PSW+4 - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct + l %r12,__LC_THREAD_INFO # load pointer to thread_info struct tm SP_PSW+1(%r15),0x01 # forking a kernel thread ? bo BASED(0f) st %r15,SP_R15(%r15) # store stack pointer for new kthread @@ -432,8 +425,8 @@ kernel_execve: 0: stnsm __SF_EMPTY(%r15),0xfc # disable interrupts l %r15,__LC_KERNEL_STACK # load ksp s %r15,BASED(.Lc_spsize) # make room for registers & psw - l %r9,__LC_THREAD_INFO mvc SP_PTREGS(__PT_SIZE,%r15),0(%r12) # copy pt_regs + l %r12,__LC_THREAD_INFO xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) stosm __SF_EMPTY(%r15),0x03 # reenable interrupts l %r1,BASED(.Lexecve_tail) @@ -463,26 +456,27 @@ pgm_check_handler: SAVE_ALL_BASE __LC_SAVE_AREA tm __LC_PGM_INT_CODE+1,0x80 # check whether we got a per exception bnz BASED(pgm_per) # got per exception -> special case - SAVE_ALL_SYNC __LC_PGM_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA + SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA + CREATE_STACK_FRAME __LC_SAVE_AREA + xc SP_ILC(4,%r15),SP_ILC(%r15) + mvc SP_PSW(8,%r15),__LC_PGM_OLD_PSW + l %r12,__LC_THREAD_INFO # load pointer to thread_info struct tm SP_PSW+1(%r15),0x01 # interrupting from user ? bz BASED(pgm_no_vtime) UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER pgm_no_vtime: - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct l %r3,__LC_PGM_ILC # load program interruption code l %r4,__LC_TRANS_EXC_CODE REENABLE_IRQS la %r8,0x7f nr %r8,%r3 -pgm_do_call: - l %r7,BASED(.Ljump_table) sll %r8,2 - l %r7,0(%r8,%r7) # load address of handler routine + l %r1,BASED(.Ljump_table) + l %r1,0(%r8,%r1) # load address of handler routine la %r2,SP_PTREGS(%r15) # address of register-save area - basr %r14,%r7 # branch to interrupt-handler + basr %r14,%r1 # branch to interrupt-handler pgm_exit: b BASED(sysc_return) @@ -503,33 +497,34 @@ pgm_per: # Normal per exception # pgm_per_std: - SAVE_ALL_SYNC __LC_PGM_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA + SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA + CREATE_STACK_FRAME __LC_SAVE_AREA + mvc SP_PSW(8,%r15),__LC_PGM_OLD_PSW + l %r12,__LC_THREAD_INFO # load pointer to thread_info struct tm SP_PSW+1(%r15),0x01 # interrupting from user ? bz BASED(pgm_no_vtime2) UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER pgm_no_vtime2: - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct - l %r1,__TI_task(%r9) + l %r1,__TI_task(%r12) tm SP_PSW+1(%r15),0x01 # kernel per event ? bz BASED(kernel_per) - mvc __THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID - mvc __THREAD_per+__PER_address(4,%r1),__LC_PER_ADDRESS - mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID - oi __TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP + mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE + mvc __THREAD_per_address(4,%r1),__LC_PER_ADDRESS + mvc __THREAD_per_paid(1,%r1),__LC_PER_PAID + oi __TI_flags+3(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP l %r3,__LC_PGM_ILC # load program interruption code l %r4,__LC_TRANS_EXC_CODE REENABLE_IRQS la %r8,0x7f nr %r8,%r3 # clear per-event-bit and ilc be BASED(pgm_exit2) # only per or per+check ? - l %r7,BASED(.Ljump_table) sll %r8,2 - l %r7,0(%r8,%r7) # load address of handler routine + l %r1,BASED(.Ljump_table) + l %r1,0(%r8,%r1) # load address of handler routine la %r2,SP_PTREGS(%r15) # address of register-save area - basr %r14,%r7 # branch to interrupt-handler + basr %r14,%r1 # branch to interrupt-handler pgm_exit2: b BASED(sysc_return) @@ -537,18 +532,19 @@ pgm_exit2: # it was a single stepped SVC that is causing all the trouble # pgm_svcper: - SAVE_ALL_SYNC __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA + SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA + CREATE_STACK_FRAME __LC_SAVE_AREA + mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW + mvc SP_ILC(4,%r15),__LC_SVC_ILC + l %r12,__LC_THREAD_INFO # load pointer to thread_info struct UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - lh %r7,0x8a # get svc number from lowcore - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct - l %r8,__TI_task(%r9) - mvc __THREAD_per+__PER_atmid(2,%r8),__LC_PER_ATMID - mvc __THREAD_per+__PER_address(4,%r8),__LC_PER_ADDRESS - mvc __THREAD_per+__PER_access_id(1,%r8),__LC_PER_ACCESS_ID - oi __TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP + l %r8,__TI_task(%r12) + mvc __THREAD_per_cause(2,%r8),__LC_PER_CAUSE + mvc __THREAD_per_address(4,%r8),__LC_PER_ADDRESS + mvc __THREAD_per_paid(1,%r8),__LC_PER_PAID + oi __TI_flags+3(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP stosm __SF_EMPTY(%r15),0x03 # reenable interrupts lm %r2,%r6,SP_R2(%r15) # load svc arguments b BASED(sysc_do_svc) @@ -558,8 +554,7 @@ pgm_svcper: # kernel_per: REENABLE_IRQS - mvi SP_SVCNR(%r15),0xff # set trap indication to pgm check - mvi SP_SVCNR+1(%r15),0xff + xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) la %r2,SP_PTREGS(%r15) # address of register-save area l %r1,BASED(.Lhandle_per) # load adr. of per handler basr %r14,%r1 # branch to do_single_step @@ -573,9 +568,10 @@ kernel_per: io_int_handler: stck __LC_INT_CLOCK stpt __LC_ASYNC_ENTER_TIMER - SAVE_ALL_BASE __LC_SAVE_AREA+16 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 - CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 + CREATE_STACK_FRAME __LC_SAVE_AREA+16 + mvc SP_PSW(8,%r15),0(%r12) # move user PSW to stack + l %r12,__LC_THREAD_INFO # load pointer to thread_info struct tm SP_PSW+1(%r15),0x01 # interrupting from user ? bz BASED(io_no_vtime) UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER @@ -583,7 +579,6 @@ io_int_handler: mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER io_no_vtime: TRACE_IRQS_OFF - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct l %r1,BASED(.Ldo_IRQ) # load address of do_IRQ la %r2,SP_PTREGS(%r15) # address of register-save area basr %r14,%r1 # branch to standard irq handler @@ -591,7 +586,7 @@ io_return: LOCKDEP_SYS_EXIT TRACE_IRQS_ON io_tif: - tm __TI_flags+3(%r9),_TIF_WORK_INT + tm __TI_flags+3(%r12),_TIF_WORK_INT bnz BASED(io_work) # there is work to do (signals etc.) io_restore: RESTORE_ALL __LC_RETURN_PSW,0 @@ -609,9 +604,9 @@ io_work: bo BASED(io_work_user) # yes -> do resched & signal #ifdef CONFIG_PREEMPT # check for preemptive scheduling - icm %r0,15,__TI_precount(%r9) + icm %r0,15,__TI_precount(%r12) bnz BASED(io_restore) # preemption disabled - tm __TI_flags+3(%r9),_TIF_NEED_RESCHED + tm __TI_flags+3(%r12),_TIF_NEED_RESCHED bno BASED(io_restore) # switch to kernel stack l %r1,SP_R15(%r15) @@ -645,13 +640,13 @@ io_work_user: # and _TIF_MCCK_PENDING # io_work_tif: - tm __TI_flags+3(%r9),_TIF_MCCK_PENDING + tm __TI_flags+3(%r12),_TIF_MCCK_PENDING bo BASED(io_mcck_pending) - tm __TI_flags+3(%r9),_TIF_NEED_RESCHED + tm __TI_flags+3(%r12),_TIF_NEED_RESCHED bo BASED(io_reschedule) - tm __TI_flags+3(%r9),_TIF_SIGPENDING + tm __TI_flags+3(%r12),_TIF_SIGPENDING bo BASED(io_sigpending) - tm __TI_flags+3(%r9),_TIF_NOTIFY_RESUME + tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME bo BASED(io_notify_resume) b BASED(io_return) # beware of critical section cleanup @@ -711,16 +706,16 @@ io_notify_resume: ext_int_handler: stck __LC_INT_CLOCK stpt __LC_ASYNC_ENTER_TIMER - SAVE_ALL_BASE __LC_SAVE_AREA+16 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 - CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 + CREATE_STACK_FRAME __LC_SAVE_AREA+16 + mvc SP_PSW(8,%r15),0(%r12) # move user PSW to stack + l %r12,__LC_THREAD_INFO # load pointer to thread_info struct tm SP_PSW+1(%r15),0x01 # interrupting from user ? bz BASED(ext_no_vtime) UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER ext_no_vtime: - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct TRACE_IRQS_OFF la %r2,SP_PTREGS(%r15) # address of register-save area l %r3,__LC_CPU_ADDRESS # get cpu address + interruption code @@ -775,7 +770,10 @@ mcck_int_main: sra %r14,PAGE_SHIFT be BASED(0f) l %r15,__LC_PANIC_STACK # load panic stack -0: CREATE_STACK_FRAME __LC_MCK_OLD_PSW,__LC_SAVE_AREA+32 +0: s %r15,BASED(.Lc_spsize) # make room for registers & psw + CREATE_STACK_FRAME __LC_SAVE_AREA+32 + mvc SP_PSW(8,%r15),0(%r12) + l %r12,__LC_THREAD_INFO # load pointer to thread_info struct tm __LC_MCCK_CODE+2,0x08 # mwp of old psw valid? bno BASED(mcck_no_vtime) # no -> skip cleanup critical tm SP_PSW+1(%r15),0x01 # interrupting from user ? @@ -784,7 +782,6 @@ mcck_int_main: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_MCCK_ENTER_TIMER mcck_no_vtime: - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct la %r2,SP_PTREGS(%r15) # load pt_regs l %r1,BASED(.Ls390_mcck) basr %r14,%r1 # call machine check handler @@ -796,7 +793,7 @@ mcck_no_vtime: xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain lr %r15,%r1 stosm __SF_EMPTY(%r15),0x04 # turn dat on - tm __TI_flags+3(%r9),_TIF_MCCK_PENDING + tm __TI_flags+3(%r12),_TIF_MCCK_PENDING bno BASED(mcck_return) TRACE_IRQS_OFF l %r1,BASED(.Ls390_handle_mcck) @@ -861,6 +858,8 @@ restart_crash: restart_go: #endif + .section .kprobes.text, "ax" + #ifdef CONFIG_CHECK_STACK /* * The synchronous or the asynchronous stack overflowed. We are dead. @@ -943,12 +942,13 @@ cleanup_system_call: bh BASED(0f) mvc __LC_SAVE_AREA(16),0(%r12) 0: st %r13,4(%r12) - st %r12,__LC_SAVE_AREA+48 # argh - SAVE_ALL_SYNC __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA - l %r12,__LC_SAVE_AREA+48 # argh + l %r15,__LC_KERNEL_STACK # problem state -> load ksp + s %r15,BASED(.Lc_spsize) # make room for registers & psw st %r15,12(%r12) - lh %r7,0x8a + CREATE_STACK_FRAME __LC_SAVE_AREA + mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW + mvc SP_ILC(4,%r15),__LC_SVC_ILC + mvc 0(4,%r12),__LC_THREAD_INFO cleanup_vtime: clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+12) bhe BASED(cleanup_stime) @@ -1046,7 +1046,7 @@ cleanup_io_restore_insn: .Ldo_signal: .long do_signal .Ldo_notify_resume: .long do_notify_resume -.Lhandle_per: .long do_single_step +.Lhandle_per: .long do_per_trap .Ldo_execve: .long do_execve .Lexecve_tail: .long execve_tail .Ljump_table: .long pgm_check_table diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 95c1dfc4ef3..17a6f83a2d6 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -12,7 +12,7 @@ pgm_check_handler_t do_dat_exception; extern int sysctl_userprocess_debug; -void do_single_step(struct pt_regs *regs); +void do_per_trap(struct pt_regs *regs); void syscall_trace(struct pt_regs *regs, int entryexit); void kernel_stack_overflow(struct pt_regs * regs); void do_signal(struct pt_regs *regs); diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 8f3e802174d..9d3603d6c51 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -51,7 +51,7 @@ STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_SINGLE_STEP ) + _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_PER_TRAP ) _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ @@ -197,6 +197,8 @@ _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ ssm __SF_EMPTY(%r15) .endm + .section .kprobes.text, "ax" + /* * Scheduler resume function, called by switch_to * gpr2 = (task_struct *) prev @@ -206,30 +208,21 @@ _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ */ .globl __switch_to __switch_to: - tm __THREAD_per+4(%r3),0xe8 # is the new process using per ? - jz __switch_to_noper # if not we're fine - stctg %c9,%c11,__SF_EMPTY(%r15)# We are using per stuff - clc __THREAD_per(24,%r3),__SF_EMPTY(%r15) - je __switch_to_noper # we got away without bashing TLB's - lctlg %c9,%c11,__THREAD_per(%r3) # Nope we didn't -__switch_to_noper: - lg %r4,__THREAD_info(%r2) # get thread_info of prev + lg %r4,__THREAD_info(%r2) # get thread_info of prev + lg %r5,__THREAD_info(%r3) # get thread_info of next tm __TI_flags+7(%r4),_TIF_MCCK_PENDING # machine check pending? - jz __switch_to_no_mcck - ni __TI_flags+7(%r4),255-_TIF_MCCK_PENDING # clear flag in prev - lg %r4,__THREAD_info(%r3) # get thread_info of next - oi __TI_flags+7(%r4),_TIF_MCCK_PENDING # set it in next -__switch_to_no_mcck: - stmg %r6,%r15,__SF_GPRS(%r15)# store __switch_to registers of prev task - stg %r15,__THREAD_ksp(%r2) # store kernel stack to prev->tss.ksp - lg %r15,__THREAD_ksp(%r3) # load kernel stack from next->tss.ksp - lmg %r6,%r15,__SF_GPRS(%r15)# load __switch_to registers of next task - stg %r3,__LC_CURRENT # __LC_CURRENT = current task struct - lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 - lg %r3,__THREAD_info(%r3) # load thread_info from task struct - stg %r3,__LC_THREAD_INFO - aghi %r3,STACK_SIZE - stg %r3,__LC_KERNEL_STACK # __LC_KERNEL_STACK = new kernel stack + jz 0f + ni __TI_flags+7(%r4),255-_TIF_MCCK_PENDING # clear flag in prev + oi __TI_flags+7(%r5),_TIF_MCCK_PENDING # set it in next +0: stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task + stg %r15,__THREAD_ksp(%r2) # store kernel stack of prev + lg %r15,__THREAD_ksp(%r3) # load kernel stack of next + lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 + lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task + stg %r3,__LC_CURRENT # store task struct of next + stg %r5,__LC_THREAD_INFO # store thread info of next + aghi %r5,STACK_SIZE # end of kernel stack of next + stg %r5,__LC_KERNEL_STACK # store end of kernel stack br %r14 __critical_start: @@ -309,7 +302,7 @@ sysc_work_tif: jo sysc_notify_resume tm __TI_flags+7(%r12),_TIF_RESTART_SVC jo sysc_restart - tm __TI_flags+7(%r12),_TIF_SINGLE_STEP + tm __TI_flags+7(%r12),_TIF_PER_TRAP jo sysc_singlestep j sysc_return # beware of critical section cleanup @@ -331,12 +324,12 @@ sysc_mcck_pending: # _TIF_SIGPENDING is set, call do_signal # sysc_sigpending: - ni __TI_flags+7(%r12),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP + ni __TI_flags+7(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP la %r2,SP_PTREGS(%r15) # load pt_regs brasl %r14,do_signal # call do_signal tm __TI_flags+7(%r12),_TIF_RESTART_SVC jo sysc_restart - tm __TI_flags+7(%r12),_TIF_SINGLE_STEP + tm __TI_flags+7(%r12),_TIF_PER_TRAP jo sysc_singlestep j sysc_return @@ -361,14 +354,14 @@ sysc_restart: j sysc_nr_ok # restart svc # -# _TIF_SINGLE_STEP is set, call do_single_step +# _TIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: - ni __TI_flags+7(%r12),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP + ni __TI_flags+7(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number la %r2,SP_PTREGS(%r15) # address of register-save area larl %r14,sysc_return # load adr. of system return - jg do_single_step # branch to do_sigtrap + jg do_per_trap # # call tracehook_report_syscall_entry/tracehook_report_syscall_exit before @@ -524,10 +517,10 @@ pgm_no_vtime2: lg %r1,__TI_task(%r12) tm SP_PSW+1(%r15),0x01 # kernel per event ? jz kernel_per - mvc __THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID - mvc __THREAD_per+__PER_address(8,%r1),__LC_PER_ADDRESS - mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID - oi __TI_flags+7(%r12),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP + mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE + mvc __THREAD_per_address(8,%r1),__LC_PER_ADDRESS + mvc __THREAD_per_paid(1,%r1),__LC_PER_PAID + oi __TI_flags+7(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP lgf %r3,__LC_PGM_ILC # load program interruption code lg %r4,__LC_TRANS_EXC_CODE REENABLE_IRQS @@ -556,10 +549,10 @@ pgm_svcper: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER LAST_BREAK lg %r8,__TI_task(%r12) - mvc __THREAD_per+__PER_atmid(2,%r8),__LC_PER_ATMID - mvc __THREAD_per+__PER_address(8,%r8),__LC_PER_ADDRESS - mvc __THREAD_per+__PER_access_id(1,%r8),__LC_PER_ACCESS_ID - oi __TI_flags+7(%r12),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP + mvc __THREAD_per_cause(2,%r8),__LC_PER_CAUSE + mvc __THREAD_per_address(8,%r8),__LC_PER_ADDRESS + mvc __THREAD_per_paid(1,%r8),__LC_PER_PAID + oi __TI_flags+7(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP stosm __SF_EMPTY(%r15),0x03 # reenable interrupts lmg %r2,%r6,SP_R2(%r15) # load svc arguments j sysc_do_svc @@ -571,7 +564,7 @@ kernel_per: REENABLE_IRQS xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number la %r2,SP_PTREGS(%r15) # address of register-save area - brasl %r14,do_single_step + brasl %r14,do_per_trap j pgm_exit /* @@ -868,6 +861,8 @@ restart_crash: restart_go: #endif + .section .kprobes.text, "ax" + #ifdef CONFIG_CHECK_STACK /* * The synchronous or the asynchronous stack overflowed. We are dead. diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 6a83d058131..78bdf0e5dff 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -4,7 +4,7 @@ * Copyright IBM Corp. 2009 * * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, - * + * Martin Schwidefsky <schwidefsky@de.ibm.com> */ #include <linux/hardirq.h> @@ -12,176 +12,144 @@ #include <linux/ftrace.h> #include <linux/kernel.h> #include <linux/types.h> +#include <linux/kprobes.h> #include <trace/syscall.h> #include <asm/asm-offsets.h> +#ifdef CONFIG_64BIT +#define MCOUNT_OFFSET_RET 12 +#else +#define MCOUNT_OFFSET_RET 22 +#endif + #ifdef CONFIG_DYNAMIC_FTRACE void ftrace_disable_code(void); -void ftrace_disable_return(void); -void ftrace_call_code(void); -void ftrace_nop_code(void); - -#define FTRACE_INSN_SIZE 4 +void ftrace_enable_insn(void); #ifdef CONFIG_64BIT - +/* + * The 64-bit mcount code looks like this: + * stg %r14,8(%r15) # offset 0 + * > larl %r1,<&counter> # offset 6 + * > brasl %r14,_mcount # offset 12 + * lg %r14,8(%r15) # offset 18 + * Total length is 24 bytes. The middle two instructions of the mcount + * block get overwritten by ftrace_make_nop / ftrace_make_call. + * The 64-bit enabled ftrace code block looks like this: + * stg %r14,8(%r15) # offset 0 + * > lg %r1,__LC_FTRACE_FUNC # offset 6 + * > lgr %r0,%r0 # offset 12 + * > basr %r14,%r1 # offset 16 + * lg %r14,8(%15) # offset 18 + * The return points of the mcount/ftrace function have the same offset 18. + * The 64-bit disable ftrace code block looks like this: + * stg %r14,8(%r15) # offset 0 + * > jg .+18 # offset 6 + * > lgr %r0,%r0 # offset 12 + * > basr %r14,%r1 # offset 16 + * lg %r14,8(%15) # offset 18 + * The jg instruction branches to offset 24 to skip as many instructions + * as possible. + */ asm( " .align 4\n" "ftrace_disable_code:\n" - " j 0f\n" - " .word 0x0024\n" - " lg %r1,"__stringify(__LC_FTRACE_FUNC)"\n" - " basr %r14,%r1\n" - "ftrace_disable_return:\n" - " lg %r14,8(15)\n" + " jg 0f\n" " lgr %r0,%r0\n" - "0:\n"); - -asm( + " basr %r14,%r1\n" + "0:\n" " .align 4\n" - "ftrace_nop_code:\n" - " j .+"__stringify(MCOUNT_INSN_SIZE)"\n"); + "ftrace_enable_insn:\n" + " lg %r1,"__stringify(__LC_FTRACE_FUNC)"\n"); -asm( - " .align 4\n" - "ftrace_call_code:\n" - " stg %r14,8(%r15)\n"); +#define FTRACE_INSN_SIZE 6 #else /* CONFIG_64BIT */ - +/* + * The 31-bit mcount code looks like this: + * st %r14,4(%r15) # offset 0 + * > bras %r1,0f # offset 4 + * > .long _mcount # offset 8 + * > .long <&counter> # offset 12 + * > 0: l %r14,0(%r1) # offset 16 + * > l %r1,4(%r1) # offset 20 + * basr %r14,%r14 # offset 24 + * l %r14,4(%r15) # offset 26 + * Total length is 30 bytes. The twenty bytes starting from offset 4 + * to offset 24 get overwritten by ftrace_make_nop / ftrace_make_call. + * The 31-bit enabled ftrace code block looks like this: + * st %r14,4(%r15) # offset 0 + * > l %r14,__LC_FTRACE_FUNC # offset 4 + * > j 0f # offset 8 + * > .fill 12,1,0x07 # offset 12 + * 0: basr %r14,%r14 # offset 24 + * l %r14,4(%r14) # offset 26 + * The return points of the mcount/ftrace function have the same offset 26. + * The 31-bit disabled ftrace code block looks like this: + * st %r14,4(%r15) # offset 0 + * > j .+26 # offset 4 + * > j 0f # offset 8 + * > .fill 12,1,0x07 # offset 12 + * 0: basr %r14,%r14 # offset 24 + * l %r14,4(%r14) # offset 26 + * The j instruction branches to offset 30 to skip as many instructions + * as possible. + */ asm( " .align 4\n" "ftrace_disable_code:\n" + " j 1f\n" " j 0f\n" - " l %r1,"__stringify(__LC_FTRACE_FUNC)"\n" - " basr %r14,%r1\n" - "ftrace_disable_return:\n" - " l %r14,4(%r15)\n" - " j 0f\n" - " bcr 0,%r7\n" - " bcr 0,%r7\n" - " bcr 0,%r7\n" - " bcr 0,%r7\n" - " bcr 0,%r7\n" - " bcr 0,%r7\n" - "0:\n"); - -asm( + " .fill 12,1,0x07\n" + "0: basr %r14,%r14\n" + "1:\n" " .align 4\n" - "ftrace_nop_code:\n" - " j .+"__stringify(MCOUNT_INSN_SIZE)"\n"); + "ftrace_enable_insn:\n" + " l %r14,"__stringify(__LC_FTRACE_FUNC)"\n"); -asm( - " .align 4\n" - "ftrace_call_code:\n" - " st %r14,4(%r15)\n"); +#define FTRACE_INSN_SIZE 4 #endif /* CONFIG_64BIT */ -static int ftrace_modify_code(unsigned long ip, - void *old_code, int old_size, - void *new_code, int new_size) -{ - unsigned char replaced[MCOUNT_INSN_SIZE]; - - /* - * Note: Due to modules code can disappear and change. - * We need to protect against faulting as well as code - * changing. We do this by using the probe_kernel_* - * functions. - * This however is just a simple sanity check. - */ - if (probe_kernel_read(replaced, (void *)ip, old_size)) - return -EFAULT; - if (memcmp(replaced, old_code, old_size) != 0) - return -EINVAL; - if (probe_kernel_write((void *)ip, new_code, new_size)) - return -EPERM; - return 0; -} - -static int ftrace_make_initial_nop(struct module *mod, struct dyn_ftrace *rec, - unsigned long addr) -{ - return ftrace_modify_code(rec->ip, - ftrace_call_code, FTRACE_INSN_SIZE, - ftrace_disable_code, MCOUNT_INSN_SIZE); -} int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - if (addr == MCOUNT_ADDR) - return ftrace_make_initial_nop(mod, rec, addr); - return ftrace_modify_code(rec->ip, - ftrace_call_code, FTRACE_INSN_SIZE, - ftrace_nop_code, FTRACE_INSN_SIZE); + if (probe_kernel_write((void *) rec->ip, ftrace_disable_code, + MCOUNT_INSN_SIZE)) + return -EPERM; + return 0; } int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - return ftrace_modify_code(rec->ip, - ftrace_nop_code, FTRACE_INSN_SIZE, - ftrace_call_code, FTRACE_INSN_SIZE); + if (probe_kernel_write((void *) rec->ip, ftrace_enable_insn, + FTRACE_INSN_SIZE)) + return -EPERM; + return 0; } int ftrace_update_ftrace_func(ftrace_func_t func) { - ftrace_dyn_func = (unsigned long)func; return 0; } int __init ftrace_dyn_arch_init(void *data) { - *(unsigned long *)data = 0; + *(unsigned long *) data = 0; return 0; } #endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER -#ifdef CONFIG_DYNAMIC_FTRACE -/* - * Patch the kernel code at ftrace_graph_caller location: - * The instruction there is branch relative on condition. The condition mask - * is either all ones (always branch aka disable ftrace_graph_caller) or all - * zeroes (nop aka enable ftrace_graph_caller). - * Instruction format for brc is a7m4xxxx where m is the condition mask. - */ -int ftrace_enable_ftrace_graph_caller(void) -{ - unsigned short opcode = 0xa704; - - return probe_kernel_write(ftrace_graph_caller, &opcode, sizeof(opcode)); -} - -int ftrace_disable_ftrace_graph_caller(void) -{ - unsigned short opcode = 0xa7f4; - - return probe_kernel_write(ftrace_graph_caller, &opcode, sizeof(opcode)); -} - -static inline unsigned long ftrace_mcount_call_adjust(unsigned long addr) -{ - return addr - (ftrace_disable_return - ftrace_disable_code); -} - -#else /* CONFIG_DYNAMIC_FTRACE */ - -static inline unsigned long ftrace_mcount_call_adjust(unsigned long addr) -{ - return addr - MCOUNT_OFFSET_RET; -} - -#endif /* CONFIG_DYNAMIC_FTRACE */ - /* * Hook the return address and push it in the stack of return addresses * in current thread info. */ -unsigned long prepare_ftrace_return(unsigned long ip, unsigned long parent) +unsigned long __kprobes prepare_ftrace_return(unsigned long parent, + unsigned long ip) { struct ftrace_graph_ent trace; @@ -189,14 +157,42 @@ unsigned long prepare_ftrace_return(unsigned long ip, unsigned long parent) goto out; if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY) goto out; - trace.func = ftrace_mcount_call_adjust(ip) & PSW_ADDR_INSN; + trace.func = (ip & PSW_ADDR_INSN) - MCOUNT_OFFSET_RET; /* Only trace if the calling function expects to. */ if (!ftrace_graph_entry(&trace)) { current->curr_ret_stack--; goto out; } - parent = (unsigned long)return_to_handler; + parent = (unsigned long) return_to_handler; out: return parent; } + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * Patch the kernel code at ftrace_graph_caller location. The instruction + * there is branch relative and save to prepare_ftrace_return. To disable + * the call to prepare_ftrace_return we patch the bras offset to point + * directly after the instructions. To enable the call we calculate + * the original offset to prepare_ftrace_return and put it back. + */ +int ftrace_enable_ftrace_graph_caller(void) +{ + unsigned short offset; + + offset = ((void *) prepare_ftrace_return - + (void *) ftrace_graph_caller) / 2; + return probe_kernel_write(ftrace_graph_caller + 2, + &offset, sizeof(offset)); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + static unsigned short offset = 0x0002; + + return probe_kernel_write(ftrace_graph_caller + 2, + &offset, sizeof(offset)); +} + +#endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 026a37a94fc..ea5099c9709 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -1,7 +1,5 @@ /* - * arch/s390/kernel/irq.c - * - * Copyright IBM Corp. 2004,2007 + * Copyright IBM Corp. 2004,2010 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Thomas Spatzier (tspat@de.ibm.com) * @@ -17,12 +15,42 @@ #include <linux/proc_fs.h> #include <linux/profile.h> +struct irq_class { + char *name; + char *desc; +}; + +static const struct irq_class intrclass_names[] = { + {.name = "EXT" }, + {.name = "I/O" }, + {.name = "CLK", .desc = "[EXT] Clock Comparator" }, + {.name = "IPI", .desc = "[EXT] Signal Processor" }, + {.name = "TMR", .desc = "[EXT] CPU Timer" }, + {.name = "TAL", .desc = "[EXT] Timing Alert" }, + {.name = "PFL", .desc = "[EXT] Pseudo Page Fault" }, + {.name = "DSD", .desc = "[EXT] DASD Diag" }, + {.name = "VRT", .desc = "[EXT] Virtio" }, + {.name = "SCP", .desc = "[EXT] Service Call" }, + {.name = "IUC", .desc = "[EXT] IUCV" }, + {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt" }, + {.name = "QDI", .desc = "[I/O] QDIO Interrupt" }, + {.name = "DAS", .desc = "[I/O] DASD" }, + {.name = "C15", .desc = "[I/O] 3215" }, + {.name = "C70", .desc = "[I/O] 3270" }, + {.name = "TAP", .desc = "[I/O] Tape" }, + {.name = "VMR", .desc = "[I/O] Unit Record Devices" }, + {.name = "LCS", .desc = "[I/O] LCS" }, + {.name = "CLW", .desc = "[I/O] CLAW" }, + {.name = "CTC", .desc = "[I/O] CTC" }, + {.name = "APB", .desc = "[I/O] AP Bus" }, + {.name = "NMI", .desc = "[NMI] Machine Check" }, +}; + /* * show_interrupts is needed by /proc/interrupts. */ int show_interrupts(struct seq_file *p, void *v) { - static const char *intrclass_names[] = { "EXT", "I/O", }; int i = *(loff_t *) v, j; get_online_cpus(); @@ -34,15 +62,16 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { - seq_printf(p, "%s: ", intrclass_names[i]); + seq_printf(p, "%s: ", intrclass_names[i].name); #ifndef CONFIG_SMP seq_printf(p, "%10u ", kstat_irqs(i)); #else for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); #endif + if (intrclass_names[i].desc) + seq_printf(p, " %s", intrclass_names[i].desc); seq_putc(p, '\n'); - } put_online_cpus(); return 0; diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 2564793ec2b..1d05d669107 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -32,34 +32,14 @@ #include <linux/slab.h> #include <linux/hardirq.h> -DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe *, current_kprobe); DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); -struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}}; +struct kretprobe_blackpoint kretprobe_blacklist[] = { }; -int __kprobes arch_prepare_kprobe(struct kprobe *p) -{ - /* Make sure the probe isn't going on a difficult instruction */ - if (is_prohibited_opcode((kprobe_opcode_t *) p->addr)) - return -EINVAL; - - if ((unsigned long)p->addr & 0x01) - return -EINVAL; - - /* Use the get_insn_slot() facility for correctness */ - if (!(p->ainsn.insn = get_insn_slot())) - return -ENOMEM; - - memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); - - get_instruction_type(&p->ainsn); - p->opcode = *p->addr; - return 0; -} - -int __kprobes is_prohibited_opcode(kprobe_opcode_t *instruction) +static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn) { - switch (*(__u8 *) instruction) { + switch (insn[0] >> 8) { case 0x0c: /* bassm */ case 0x0b: /* bsm */ case 0x83: /* diag */ @@ -68,7 +48,7 @@ int __kprobes is_prohibited_opcode(kprobe_opcode_t *instruction) case 0xad: /* stosm */ return -EINVAL; } - switch (*(__u16 *) instruction) { + switch (insn[0]) { case 0x0101: /* pr */ case 0xb25a: /* bsa */ case 0xb240: /* bakr */ @@ -81,93 +61,92 @@ int __kprobes is_prohibited_opcode(kprobe_opcode_t *instruction) return 0; } -void __kprobes get_instruction_type(struct arch_specific_insn *ainsn) +static int __kprobes get_fixup_type(kprobe_opcode_t *insn) { /* default fixup method */ - ainsn->fixup = FIXUP_PSW_NORMAL; - - /* save r1 operand */ - ainsn->reg = (*ainsn->insn & 0xf0) >> 4; + int fixup = FIXUP_PSW_NORMAL; - /* save the instruction length (pop 5-5) in bytes */ - switch (*(__u8 *) (ainsn->insn) >> 6) { - case 0: - ainsn->ilen = 2; - break; - case 1: - case 2: - ainsn->ilen = 4; - break; - case 3: - ainsn->ilen = 6; - break; - } - - switch (*(__u8 *) ainsn->insn) { + switch (insn[0] >> 8) { case 0x05: /* balr */ case 0x0d: /* basr */ - ainsn->fixup = FIXUP_RETURN_REGISTER; + fixup = FIXUP_RETURN_REGISTER; /* if r2 = 0, no branch will be taken */ - if ((*ainsn->insn & 0x0f) == 0) - ainsn->fixup |= FIXUP_BRANCH_NOT_TAKEN; + if ((insn[0] & 0x0f) == 0) + fixup |= FIXUP_BRANCH_NOT_TAKEN; break; case 0x06: /* bctr */ case 0x07: /* bcr */ - ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN; + fixup = FIXUP_BRANCH_NOT_TAKEN; break; case 0x45: /* bal */ case 0x4d: /* bas */ - ainsn->fixup = FIXUP_RETURN_REGISTER; + fixup = FIXUP_RETURN_REGISTER; break; case 0x47: /* bc */ case 0x46: /* bct */ case 0x86: /* bxh */ case 0x87: /* bxle */ - ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN; + fixup = FIXUP_BRANCH_NOT_TAKEN; break; case 0x82: /* lpsw */ - ainsn->fixup = FIXUP_NOT_REQUIRED; + fixup = FIXUP_NOT_REQUIRED; break; case 0xb2: /* lpswe */ - if (*(((__u8 *) ainsn->insn) + 1) == 0xb2) { - ainsn->fixup = FIXUP_NOT_REQUIRED; - } + if ((insn[0] & 0xff) == 0xb2) + fixup = FIXUP_NOT_REQUIRED; break; case 0xa7: /* bras */ - if ((*ainsn->insn & 0x0f) == 0x05) { - ainsn->fixup |= FIXUP_RETURN_REGISTER; - } + if ((insn[0] & 0x0f) == 0x05) + fixup |= FIXUP_RETURN_REGISTER; break; case 0xc0: - if ((*ainsn->insn & 0x0f) == 0x00 /* larl */ - || (*ainsn->insn & 0x0f) == 0x05) /* brasl */ - ainsn->fixup |= FIXUP_RETURN_REGISTER; + if ((insn[0] & 0x0f) == 0x00 || /* larl */ + (insn[0] & 0x0f) == 0x05) /* brasl */ + fixup |= FIXUP_RETURN_REGISTER; break; case 0xeb: - if (*(((__u8 *) ainsn->insn) + 5 ) == 0x44 || /* bxhg */ - *(((__u8 *) ainsn->insn) + 5) == 0x45) {/* bxleg */ - ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN; - } + if ((insn[2] & 0xff) == 0x44 || /* bxhg */ + (insn[2] & 0xff) == 0x45) /* bxleg */ + fixup = FIXUP_BRANCH_NOT_TAKEN; break; case 0xe3: /* bctg */ - if (*(((__u8 *) ainsn->insn) + 5) == 0x46) { - ainsn->fixup = FIXUP_BRANCH_NOT_TAKEN; - } + if ((insn[2] & 0xff) == 0x46) + fixup = FIXUP_BRANCH_NOT_TAKEN; break; } + return fixup; +} + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + if ((unsigned long) p->addr & 0x01) + return -EINVAL; + + /* Make sure the probe isn't going on a difficult instruction */ + if (is_prohibited_opcode(p->addr)) + return -EINVAL; + + p->opcode = *p->addr; + memcpy(p->ainsn.insn, p->addr, ((p->opcode >> 14) + 3) & -2); + + return 0; } +struct ins_replace_args { + kprobe_opcode_t *ptr; + kprobe_opcode_t opcode; +}; + static int __kprobes swap_instruction(void *aref) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); unsigned long status = kcb->kprobe_status; struct ins_replace_args *args = aref; - int rc; kcb->kprobe_status = KPROBE_SWAP_INST; - rc = probe_kernel_write(args->ptr, &args->new, sizeof(args->new)); + probe_kernel_write(args->ptr, &args->opcode, sizeof(args->opcode)); kcb->kprobe_status = status; - return rc; + return 0; } void __kprobes arch_arm_kprobe(struct kprobe *p) @@ -175,8 +154,7 @@ void __kprobes arch_arm_kprobe(struct kprobe *p) struct ins_replace_args args; args.ptr = p->addr; - args.old = p->opcode; - args.new = BREAKPOINT_INSTRUCTION; + args.opcode = BREAKPOINT_INSTRUCTION; stop_machine(swap_instruction, &args, NULL); } @@ -185,64 +163,69 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) struct ins_replace_args args; args.ptr = p->addr; - args.old = BREAKPOINT_INSTRUCTION; - args.new = p->opcode; + args.opcode = p->opcode; stop_machine(swap_instruction, &args, NULL); } void __kprobes arch_remove_kprobe(struct kprobe *p) { - if (p->ainsn.insn) { - free_insn_slot(p->ainsn.insn, 0); - p->ainsn.insn = NULL; - } } -static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +static void __kprobes enable_singlestep(struct kprobe_ctlblk *kcb, + struct pt_regs *regs, + unsigned long ip) { - per_cr_bits kprobe_per_regs[1]; + struct per_regs per_kprobe; - memset(kprobe_per_regs, 0, sizeof(per_cr_bits)); - regs->psw.addr = (unsigned long)p->ainsn.insn | PSW_ADDR_AMODE; + /* Set up the PER control registers %cr9-%cr11 */ + per_kprobe.control = PER_EVENT_IFETCH; + per_kprobe.start = ip; + per_kprobe.end = ip; - /* Set up the per control reg info, will pass to lctl */ - kprobe_per_regs[0].em_instruction_fetch = 1; - kprobe_per_regs[0].starting_addr = (unsigned long)p->ainsn.insn; - kprobe_per_regs[0].ending_addr = (unsigned long)p->ainsn.insn + 1; + /* Save control regs and psw mask */ + __ctl_store(kcb->kprobe_saved_ctl, 9, 11); + kcb->kprobe_saved_imask = regs->psw.mask & + (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT); - /* Set the PER control regs, turns on single step for this address */ - __ctl_load(kprobe_per_regs, 9, 11); + /* Set PER control regs, turns on single step for the given address */ + __ctl_load(per_kprobe, 9, 11); regs->psw.mask |= PSW_MASK_PER; regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT); + regs->psw.addr = ip | PSW_ADDR_AMODE; } -static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +static void __kprobes disable_singlestep(struct kprobe_ctlblk *kcb, + struct pt_regs *regs, + unsigned long ip) { - kcb->prev_kprobe.kp = kprobe_running(); - kcb->prev_kprobe.status = kcb->kprobe_status; - kcb->prev_kprobe.kprobe_saved_imask = kcb->kprobe_saved_imask; - memcpy(kcb->prev_kprobe.kprobe_saved_ctl, kcb->kprobe_saved_ctl, - sizeof(kcb->kprobe_saved_ctl)); + /* Restore control regs and psw mask, set new psw address */ + __ctl_load(kcb->kprobe_saved_ctl, 9, 11); + regs->psw.mask &= ~PSW_MASK_PER; + regs->psw.mask |= kcb->kprobe_saved_imask; + regs->psw.addr = ip | PSW_ADDR_AMODE; } -static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +/* + * Activate a kprobe by storing its pointer to current_kprobe. The + * previous kprobe is stored in kcb->prev_kprobe. A stack of up to + * two kprobes can be active, see KPROBE_REENTER. + */ +static void __kprobes push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p) { - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; - kcb->kprobe_status = kcb->prev_kprobe.status; - kcb->kprobe_saved_imask = kcb->prev_kprobe.kprobe_saved_imask; - memcpy(kcb->kprobe_saved_ctl, kcb->prev_kprobe.kprobe_saved_ctl, - sizeof(kcb->kprobe_saved_ctl)); + kcb->prev_kprobe.kp = __get_cpu_var(current_kprobe); + kcb->prev_kprobe.status = kcb->kprobe_status; + __get_cpu_var(current_kprobe) = p; } -static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) +/* + * Deactivate a kprobe by backing up to the previous state. If the + * current state is KPROBE_REENTER prev_kprobe.kp will be non-NULL, + * for any other state prev_kprobe.kp will be NULL. + */ +static void __kprobes pop_kprobe(struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = p; - /* Save the interrupt and per flags */ - kcb->kprobe_saved_imask = regs->psw.mask & - (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT); - /* Save the control regs that govern PER */ - __ctl_store(kcb->kprobe_saved_ctl, 9, 11); + __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + kcb->kprobe_status = kcb->prev_kprobe.status; } void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, @@ -251,79 +234,104 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, ri->ret_addr = (kprobe_opcode_t *) regs->gprs[14]; /* Replace the return addr with trampoline addr */ - regs->gprs[14] = (unsigned long)&kretprobe_trampoline; + regs->gprs[14] = (unsigned long) &kretprobe_trampoline; +} + +static void __kprobes kprobe_reenter_check(struct kprobe_ctlblk *kcb, + struct kprobe *p) +{ + switch (kcb->kprobe_status) { + case KPROBE_HIT_SSDONE: + case KPROBE_HIT_ACTIVE: + kprobes_inc_nmissed_count(p); + break; + case KPROBE_HIT_SS: + case KPROBE_REENTER: + default: + /* + * A kprobe on the code path to single step an instruction + * is a BUG. The code path resides in the .kprobes.text + * section and is executed with interrupts disabled. + */ + printk(KERN_EMERG "Invalid kprobe detected at %p.\n", p->addr); + dump_kprobe(p); + BUG(); + } } static int __kprobes kprobe_handler(struct pt_regs *regs) { - struct kprobe *p; - int ret = 0; - unsigned long *addr = (unsigned long *) - ((regs->psw.addr & PSW_ADDR_INSN) - 2); struct kprobe_ctlblk *kcb; + struct kprobe *p; /* - * We don't want to be preempted for the entire - * duration of kprobe processing + * We want to disable preemption for the entire duration of kprobe + * processing. That includes the calls to the pre/post handlers + * and single stepping the kprobe instruction. */ preempt_disable(); kcb = get_kprobe_ctlblk(); + p = get_kprobe((void *)((regs->psw.addr & PSW_ADDR_INSN) - 2)); - /* Check we're not actually recursing */ - if (kprobe_running()) { - p = get_kprobe(addr); - if (p) { - if (kcb->kprobe_status == KPROBE_HIT_SS && - *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { - regs->psw.mask &= ~PSW_MASK_PER; - regs->psw.mask |= kcb->kprobe_saved_imask; - goto no_kprobe; - } - /* We have reentered the kprobe_handler(), since - * another probe was hit while within the handler. - * We here save the original kprobes variables and - * just single step on the instruction of the new probe - * without calling any user handlers. + if (p) { + if (kprobe_running()) { + /* + * We have hit a kprobe while another is still + * active. This can happen in the pre and post + * handler. Single step the instruction of the + * new probe but do not call any handler function + * of this secondary kprobe. + * push_kprobe and pop_kprobe saves and restores + * the currently active kprobe. */ - save_previous_kprobe(kcb); - set_current_kprobe(p, regs, kcb); - kprobes_inc_nmissed_count(p); - prepare_singlestep(p, regs); + kprobe_reenter_check(kcb, p); + push_kprobe(kcb, p); kcb->kprobe_status = KPROBE_REENTER; - return 1; } else { - p = __get_cpu_var(current_kprobe); - if (p->break_handler && p->break_handler(p, regs)) { - goto ss_probe; - } + /* + * If we have no pre-handler or it returned 0, we + * continue with single stepping. If we have a + * pre-handler and it returned non-zero, it prepped + * for calling the break_handler below on re-entry + * for jprobe processing, so get out doing nothing + * more here. + */ + push_kprobe(kcb, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (p->pre_handler && p->pre_handler(p, regs)) + return 1; + kcb->kprobe_status = KPROBE_HIT_SS; } - goto no_kprobe; - } - - p = get_kprobe(addr); - if (!p) - /* - * No kprobe at this address. The fault has not been - * caused by a kprobe breakpoint. The race of breakpoint - * vs. kprobe remove does not exist because on s390 we - * use stop_machine to arm/disarm the breakpoints. - */ - goto no_kprobe; - - kcb->kprobe_status = KPROBE_HIT_ACTIVE; - set_current_kprobe(p, regs, kcb); - if (p->pre_handler && p->pre_handler(p, regs)) - /* handler has already set things up, so skip ss setup */ + enable_singlestep(kcb, regs, (unsigned long) p->ainsn.insn); return 1; - -ss_probe: - prepare_singlestep(p, regs); - kcb->kprobe_status = KPROBE_HIT_SS; - return 1; - -no_kprobe: + } else if (kprobe_running()) { + p = __get_cpu_var(current_kprobe); + if (p->break_handler && p->break_handler(p, regs)) { + /* + * Continuation after the jprobe completed and + * caused the jprobe_return trap. The jprobe + * break_handler "returns" to the original + * function that still has the kprobe breakpoint + * installed. We continue with single stepping. + */ + kcb->kprobe_status = KPROBE_HIT_SS; + enable_singlestep(kcb, regs, + (unsigned long) p->ainsn.insn); + return 1; + } /* else: + * No kprobe at this address and the current kprobe + * has no break handler (no jprobe!). The kernel just + * exploded, let the standard trap handler pick up the + * pieces. + */ + } /* else: + * No kprobe at this address and no active kprobe. The trap has + * not been caused by a kprobe breakpoint. The race of breakpoint + * vs. kprobe remove does not exist because on s390 as we use + * stop_machine to arm/disarm the breakpoints. + */ preempt_enable_no_resched(); - return ret; + return 0; } /* @@ -344,12 +352,12 @@ static void __used kretprobe_trampoline_holder(void) static int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { - struct kretprobe_instance *ri = NULL; + struct kretprobe_instance *ri; struct hlist_head *head, empty_rp; struct hlist_node *node, *tmp; - unsigned long flags, orig_ret_address = 0; - unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; - kprobe_opcode_t *correct_ret_addr = NULL; + unsigned long flags, orig_ret_address; + unsigned long trampoline_address; + kprobe_opcode_t *correct_ret_addr; INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); @@ -367,12 +375,16 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, * real return address, and all the rest will point to * kretprobe_trampoline */ + ri = NULL; + orig_ret_address = 0; + correct_ret_addr = NULL; + trampoline_address = (unsigned long) &kretprobe_trampoline; hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; - orig_ret_address = (unsigned long)ri->ret_addr; + orig_ret_address = (unsigned long) ri->ret_addr; if (orig_ret_address != trampoline_address) /* @@ -391,7 +403,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, /* another task is sharing our hash bucket */ continue; - orig_ret_address = (unsigned long)ri->ret_addr; + orig_ret_address = (unsigned long) ri->ret_addr; if (ri->rp && ri->rp->handler) { ri->ret_addr = correct_ret_addr; @@ -400,19 +412,18 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, recycle_rp_inst(ri, &empty_rp); - if (orig_ret_address != trampoline_address) { + if (orig_ret_address != trampoline_address) /* * This is the real return address. Any other * instances associated with this task are for * other calls deeper on the call stack */ break; - } } regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE; - reset_current_kprobe(); + pop_kprobe(get_kprobe_ctlblk()); kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); @@ -439,55 +450,42 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long ip = regs->psw.addr & PSW_ADDR_INSN; + int fixup = get_fixup_type(p->ainsn.insn); - regs->psw.addr &= PSW_ADDR_INSN; - - if (p->ainsn.fixup & FIXUP_PSW_NORMAL) - regs->psw.addr = (unsigned long)p->addr + - ((unsigned long)regs->psw.addr - - (unsigned long)p->ainsn.insn); + if (fixup & FIXUP_PSW_NORMAL) + ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn; - if (p->ainsn.fixup & FIXUP_BRANCH_NOT_TAKEN) - if ((unsigned long)regs->psw.addr - - (unsigned long)p->ainsn.insn == p->ainsn.ilen) - regs->psw.addr = (unsigned long)p->addr + p->ainsn.ilen; + if (fixup & FIXUP_BRANCH_NOT_TAKEN) { + int ilen = ((p->ainsn.insn[0] >> 14) + 3) & -2; + if (ip - (unsigned long) p->ainsn.insn == ilen) + ip = (unsigned long) p->addr + ilen; + } - if (p->ainsn.fixup & FIXUP_RETURN_REGISTER) - regs->gprs[p->ainsn.reg] = ((unsigned long)p->addr + - (regs->gprs[p->ainsn.reg] - - (unsigned long)p->ainsn.insn)) - | PSW_ADDR_AMODE; + if (fixup & FIXUP_RETURN_REGISTER) { + int reg = (p->ainsn.insn[0] & 0xf0) >> 4; + regs->gprs[reg] += (unsigned long) p->addr - + (unsigned long) p->ainsn.insn; + } - regs->psw.addr |= PSW_ADDR_AMODE; - /* turn off PER mode */ - regs->psw.mask &= ~PSW_MASK_PER; - /* Restore the original per control regs */ - __ctl_load(kcb->kprobe_saved_ctl, 9, 11); - regs->psw.mask |= kcb->kprobe_saved_imask; + disable_singlestep(kcb, regs, ip); } static int __kprobes post_kprobe_handler(struct pt_regs *regs) { - struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + struct kprobe *p = kprobe_running(); - if (!cur) + if (!p) return 0; - if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { + if (kcb->kprobe_status != KPROBE_REENTER && p->post_handler) { kcb->kprobe_status = KPROBE_HIT_SSDONE; - cur->post_handler(cur, regs, 0); + p->post_handler(p, regs, 0); } - resume_execution(cur, regs); - - /*Restore back the original saved kprobes variables and continue. */ - if (kcb->kprobe_status == KPROBE_REENTER) { - restore_previous_kprobe(kcb); - goto out; - } - reset_current_kprobe(); -out: + resume_execution(p, regs); + pop_kprobe(kcb); preempt_enable_no_resched(); /* @@ -495,17 +493,16 @@ out: * will have PER set, in which case, continue the remaining processing * of do_single_step, as if this is not a probe hit. */ - if (regs->psw.mask & PSW_MASK_PER) { + if (regs->psw.mask & PSW_MASK_PER) return 0; - } return 1; } static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr) { - struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + struct kprobe *p = kprobe_running(); const struct exception_table_entry *entry; switch(kcb->kprobe_status) { @@ -521,14 +518,8 @@ static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr) * and allow the page fault handler to continue as a * normal page fault. */ - regs->psw.addr = (unsigned long)cur->addr | PSW_ADDR_AMODE; - regs->psw.mask &= ~PSW_MASK_PER; - regs->psw.mask |= kcb->kprobe_saved_imask; - if (kcb->kprobe_status == KPROBE_REENTER) - restore_previous_kprobe(kcb); - else { - reset_current_kprobe(); - } + disable_singlestep(kcb, regs, (unsigned long) p->addr); + pop_kprobe(kcb); preempt_enable_no_resched(); break; case KPROBE_HIT_ACTIVE: @@ -538,7 +529,7 @@ static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr) * we can also use npre/npostfault count for accouting * these specific fault cases. */ - kprobes_inc_nmissed_count(cur); + kprobes_inc_nmissed_count(p); /* * We come here because instructions in the pre/post @@ -547,7 +538,7 @@ static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr) * copy_from_user(), get_user() etc. Let the * user-specified handler try to fix it first. */ - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + if (p->fault_handler && p->fault_handler(p, regs, trapnr)) return 1; /* @@ -589,7 +580,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) int __kprobes kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { - struct die_args *args = (struct die_args *)data; + struct die_args *args = (struct die_args *) data; struct pt_regs *regs = args->regs; int ret = NOTIFY_DONE; @@ -598,16 +589,16 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, switch (val) { case DIE_BPT: - if (kprobe_handler(args->regs)) + if (kprobe_handler(regs)) ret = NOTIFY_STOP; break; case DIE_SSTEP: - if (post_kprobe_handler(args->regs)) + if (post_kprobe_handler(regs)) ret = NOTIFY_STOP; break; case DIE_TRAP: if (!preemptible() && kprobe_running() && - kprobe_trap_handler(args->regs, args->trapnr)) + kprobe_trap_handler(regs, args->trapnr)) ret = NOTIFY_STOP; break; default: @@ -623,23 +614,19 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); - unsigned long addr; struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long stack; memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs)); /* setup return addr to the jprobe handler routine */ - regs->psw.addr = (unsigned long)(jp->entry) | PSW_ADDR_AMODE; + regs->psw.addr = (unsigned long) jp->entry | PSW_ADDR_AMODE; regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT); - /* r14 is the function return address */ - kcb->jprobe_saved_r14 = (unsigned long)regs->gprs[14]; /* r15 is the stack pointer */ - kcb->jprobe_saved_r15 = (unsigned long)regs->gprs[15]; - addr = (unsigned long)kcb->jprobe_saved_r15; + stack = (unsigned long) regs->gprs[15]; - memcpy(kcb->jprobes_stack, (kprobe_opcode_t *) addr, - MIN_STACK_SIZE(addr)); + memcpy(kcb->jprobes_stack, (void *) stack, MIN_STACK_SIZE(stack)); return 1; } @@ -656,30 +643,29 @@ void __kprobes jprobe_return_end(void) int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_r15); + unsigned long stack; + + stack = (unsigned long) kcb->jprobe_saved_regs.gprs[15]; /* Put the regs back */ memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs)); /* put the stack back */ - memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack, - MIN_STACK_SIZE(stack_addr)); + memcpy((void *) stack, kcb->jprobes_stack, MIN_STACK_SIZE(stack)); preempt_enable_no_resched(); return 1; } -static struct kprobe trampoline_p = { - .addr = (kprobe_opcode_t *) & kretprobe_trampoline, +static struct kprobe trampoline = { + .addr = (kprobe_opcode_t *) &kretprobe_trampoline, .pre_handler = trampoline_probe_handler }; int __init arch_init_kprobes(void) { - return register_kprobe(&trampoline_p); + return register_kprobe(&trampoline); } int __kprobes arch_trampoline_kprobe(struct kprobe *p) { - if (p->addr == (kprobe_opcode_t *) & kretprobe_trampoline) - return 1; - return 0; + return p->addr == (kprobe_opcode_t *) &kretprobe_trampoline; } diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index dfe015d7398..1e6a5579562 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -7,6 +7,8 @@ #include <asm/asm-offsets.h> + .section .kprobes.text, "ax" + .globl ftrace_stub ftrace_stub: br %r14 @@ -16,22 +18,12 @@ _mcount: #ifdef CONFIG_DYNAMIC_FTRACE br %r14 - .data - .globl ftrace_dyn_func -ftrace_dyn_func: - .long ftrace_stub - .previous - .globl ftrace_caller ftrace_caller: #endif stm %r2,%r5,16(%r15) bras %r1,2f -#ifdef CONFIG_DYNAMIC_FTRACE -0: .long ftrace_dyn_func -#else 0: .long ftrace_trace_function -#endif 1: .long function_trace_stop 2: l %r2,1b-0b(%r1) icm %r2,0xf,0(%r2) @@ -47,21 +39,15 @@ ftrace_caller: l %r14,0(%r14) basr %r14,%r14 #ifdef CONFIG_FUNCTION_GRAPH_TRACER -#ifdef CONFIG_DYNAMIC_FTRACE + l %r2,100(%r15) + l %r3,152(%r15) .globl ftrace_graph_caller ftrace_graph_caller: - # This unconditional branch gets runtime patched. Change only if - # you know what you are doing. See ftrace_enable_graph_caller(). - j 1f -#endif - bras %r1,0f - .long prepare_ftrace_return -0: l %r2,152(%r15) - l %r4,0(%r1) - l %r3,100(%r15) - basr %r14,%r4 - st %r2,100(%r15) -1: +# The bras instruction gets runtime patched to call prepare_ftrace_return. +# See ftrace_enable_ftrace_graph_caller. The patched instruction is: +# bras %r14,prepare_ftrace_return + bras %r14,0f +0: st %r2,100(%r15) #endif ahi %r15,96 l %r14,56(%r15) diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S index c37211c6092..e73667286ac 100644 --- a/arch/s390/kernel/mcount64.S +++ b/arch/s390/kernel/mcount64.S @@ -7,6 +7,8 @@ #include <asm/asm-offsets.h> + .section .kprobes.text, "ax" + .globl ftrace_stub ftrace_stub: br %r14 @@ -16,12 +18,6 @@ _mcount: #ifdef CONFIG_DYNAMIC_FTRACE br %r14 - .data - .globl ftrace_dyn_func -ftrace_dyn_func: - .quad ftrace_stub - .previous - .globl ftrace_caller ftrace_caller: #endif @@ -35,26 +31,19 @@ ftrace_caller: stg %r1,__SF_BACKCHAIN(%r15) lgr %r2,%r14 lg %r3,168(%r15) -#ifdef CONFIG_DYNAMIC_FTRACE - larl %r14,ftrace_dyn_func -#else larl %r14,ftrace_trace_function -#endif lg %r14,0(%r14) basr %r14,%r14 #ifdef CONFIG_FUNCTION_GRAPH_TRACER -#ifdef CONFIG_DYNAMIC_FTRACE + lg %r2,168(%r15) + lg %r3,272(%r15) .globl ftrace_graph_caller ftrace_graph_caller: - # This unconditional branch gets runtime patched. Change only if - # you know what you are doing. See ftrace_enable_graph_caller(). - j 0f -#endif - lg %r2,272(%r15) - lg %r3,168(%r15) - brasl %r14,prepare_ftrace_return - stg %r2,168(%r15) -0: +# The bras instruction gets runtime patched to call prepare_ftrace_return. +# See ftrace_enable_ftrace_graph_caller. The patched instruction is: +# bras %r14,prepare_ftrace_return + bras %r14,0f +0: stg %r2,168(%r15) #endif aghi %r15,160 lmg %r2,%r5,32(%r15) diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 1995c1712fc..fab88431a06 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -8,6 +8,7 @@ * Heiko Carstens <heiko.carstens@de.ibm.com>, */ +#include <linux/kernel_stat.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/hardirq.h> @@ -255,7 +256,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs) nmi_enter(); s390_idle_check(regs, S390_lowcore.mcck_clock, S390_lowcore.mcck_enter_timer); - + kstat_cpu(smp_processor_id()).irqs[NMI_NMI]++; mci = (struct mci *) &S390_lowcore.mcck_interruption_code; mcck = &__get_cpu_var(cpu_mcck); umode = user_mode(regs); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index ec2e03b22ea..6ba42222b54 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -32,6 +32,7 @@ #include <linux/kernel_stat.h> #include <linux/syscalls.h> #include <linux/compat.h> +#include <linux/kprobes.h> #include <asm/compat.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -41,6 +42,7 @@ #include <asm/irq.h> #include <asm/timer.h> #include <asm/nmi.h> +#include <asm/smp.h> #include "entry.h" asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); @@ -75,13 +77,8 @@ unsigned long thread_saved_pc(struct task_struct *tsk) */ static void default_idle(void) { - /* CPU is going idle. */ -#ifdef CONFIG_HOTPLUG_CPU - if (cpu_is_offline(smp_processor_id())) { - preempt_enable_no_resched(); + if (cpu_is_offline(smp_processor_id())) cpu_die(); - } -#endif local_irq_disable(); if (need_resched()) { local_irq_enable(); @@ -116,15 +113,17 @@ void cpu_idle(void) } } -extern void kernel_thread_starter(void); +extern void __kprobes kernel_thread_starter(void); asm( - ".align 4\n" + ".section .kprobes.text, \"ax\"\n" + ".global kernel_thread_starter\n" "kernel_thread_starter:\n" " la 2,0(10)\n" " basr 14,9\n" " la 2,0\n" - " br 11\n"); + " br 11\n" + ".previous\n"); int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) { @@ -214,8 +213,10 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, /* start new process with ar4 pointing to the correct address space */ p->thread.mm_segment = get_fs(); /* Don't copy debug registers */ - memset(&p->thread.per_info, 0, sizeof(p->thread.per_info)); + memset(&p->thread.per_user, 0, sizeof(p->thread.per_user)); + memset(&p->thread.per_event, 0, sizeof(p->thread.per_event)); clear_tsk_thread_flag(p, TIF_SINGLE_STEP); + clear_tsk_thread_flag(p, TIF_PER_TRAP); /* Initialize per thread user and system timer values */ ti = task_thread_info(p); ti->user_timer = 0; diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 644548e615c..311e9d71288 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -13,7 +13,7 @@ #include <linux/smp.h> #include <linux/seq_file.h> #include <linux/delay.h> - +#include <linux/cpu.h> #include <asm/elf.h> #include <asm/lowcore.h> #include <asm/param.h> @@ -35,17 +35,6 @@ void __cpuinit cpu_init(void) } /* - * print_cpu_info - print basic information about a cpu - */ -void __cpuinit print_cpu_info(void) -{ - struct cpuid *id = &per_cpu(cpu_id, smp_processor_id()); - - pr_info("Processor %d started, address %d, identification %06X\n", - S390_lowcore.cpu_nr, stap(), id->ident); -} - -/* * show_cpuinfo - Get information on one CPU for use by procfs. */ static int show_cpuinfo(struct seq_file *m, void *v) @@ -57,9 +46,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) unsigned long n = (unsigned long) v - 1; int i; - s390_adjust_jiffies(); - preempt_disable(); if (!n) { + s390_adjust_jiffies(); seq_printf(m, "vendor_id : IBM/S390\n" "# processors : %i\n" "bogomips per cpu: %lu.%02lu\n", @@ -71,7 +59,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "%s ", hwcap_str[i]); seq_puts(m, "\n"); } - + get_online_cpus(); if (cpu_online(n)) { struct cpuid *id = &per_cpu(cpu_id, n); seq_printf(m, "processor %li: " @@ -80,7 +68,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) "machine = %04X\n", n, id->version, id->ident, id->machine); } - preempt_enable(); + put_online_cpus(); return 0; } diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 019bb714db4..ef86ad24398 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -1,25 +1,9 @@ /* - * arch/s390/kernel/ptrace.c + * Ptrace user space interface. * - * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Copyright IBM Corp. 1999,2010 + * Author(s): Denis Joseph Barrow * Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * Based on PowerPC version - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Derived from "arch/m68k/kernel/ptrace.c" - * Copyright (C) 1994 by Hamish Macdonald - * Taken from linux/kernel/ptrace.c and modified for M680x0. - * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds - * - * Modified by Cort Dougan (cort@cs.nmt.edu) - * - * - * This file is subject to the terms and conditions of the GNU General - * Public License. See the file README.legal in the main directory of - * this archive for more details. */ #include <linux/kernel.h> @@ -61,76 +45,58 @@ enum s390_regset { REGSET_GENERAL_EXTENDED, }; -static void -FixPerRegisters(struct task_struct *task) +void update_per_regs(struct task_struct *task) { - struct pt_regs *regs; - per_struct *per_info; - per_cr_words cr_words; - - regs = task_pt_regs(task); - per_info = (per_struct *) &task->thread.per_info; - per_info->control_regs.bits.em_instruction_fetch = - per_info->single_step | per_info->instruction_fetch; - - if (per_info->single_step) { - per_info->control_regs.bits.starting_addr = 0; -#ifdef CONFIG_COMPAT - if (is_compat_task()) - per_info->control_regs.bits.ending_addr = 0x7fffffffUL; - else -#endif - per_info->control_regs.bits.ending_addr = PSW_ADDR_INSN; - } else { - per_info->control_regs.bits.starting_addr = - per_info->starting_addr; - per_info->control_regs.bits.ending_addr = - per_info->ending_addr; - } - /* - * if any of the control reg tracing bits are on - * we switch on per in the psw - */ - if (per_info->control_regs.words.cr[0] & PER_EM_MASK) - regs->psw.mask |= PSW_MASK_PER; - else + static const struct per_regs per_single_step = { + .control = PER_EVENT_IFETCH, + .start = 0, + .end = PSW_ADDR_INSN, + }; + struct pt_regs *regs = task_pt_regs(task); + struct thread_struct *thread = &task->thread; + const struct per_regs *new; + struct per_regs old; + + /* TIF_SINGLE_STEP overrides the user specified PER registers. */ + new = test_tsk_thread_flag(task, TIF_SINGLE_STEP) ? + &per_single_step : &thread->per_user; + + /* Take care of the PER enablement bit in the PSW. */ + if (!(new->control & PER_EVENT_MASK)) { regs->psw.mask &= ~PSW_MASK_PER; - - if (per_info->control_regs.bits.em_storage_alteration) - per_info->control_regs.bits.storage_alt_space_ctl = 1; - else - per_info->control_regs.bits.storage_alt_space_ctl = 0; - - if (task == current) { - __ctl_store(cr_words, 9, 11); - if (memcmp(&cr_words, &per_info->control_regs.words, - sizeof(cr_words)) != 0) - __ctl_load(per_info->control_regs.words, 9, 11); + return; } + regs->psw.mask |= PSW_MASK_PER; + __ctl_store(old, 9, 11); + if (memcmp(new, &old, sizeof(struct per_regs)) != 0) + __ctl_load(*new, 9, 11); } void user_enable_single_step(struct task_struct *task) { - task->thread.per_info.single_step = 1; - FixPerRegisters(task); + set_tsk_thread_flag(task, TIF_SINGLE_STEP); + if (task == current) + update_per_regs(task); } void user_disable_single_step(struct task_struct *task) { - task->thread.per_info.single_step = 0; - FixPerRegisters(task); + clear_tsk_thread_flag(task, TIF_SINGLE_STEP); + if (task == current) + update_per_regs(task); } /* * Called by kernel/ptrace.c when detaching.. * - * Make sure single step bits etc are not set. + * Clear all debugging related fields. */ -void -ptrace_disable(struct task_struct *child) +void ptrace_disable(struct task_struct *task) { - /* make sure the single step bit is not set. */ - user_disable_single_step(child); + memset(&task->thread.per_user, 0, sizeof(task->thread.per_user)); + memset(&task->thread.per_event, 0, sizeof(task->thread.per_event)); + clear_tsk_thread_flag(task, TIF_SINGLE_STEP); + clear_tsk_thread_flag(task, TIF_PER_TRAP); } #ifndef CONFIG_64BIT @@ -139,6 +105,47 @@ ptrace_disable(struct task_struct *child) # define __ADDR_MASK 7 #endif +static inline unsigned long __peek_user_per(struct task_struct *child, + addr_t addr) +{ + struct per_struct_kernel *dummy = NULL; + + if (addr == (addr_t) &dummy->cr9) + /* Control bits of the active per set. */ + return test_thread_flag(TIF_SINGLE_STEP) ? + PER_EVENT_IFETCH : child->thread.per_user.control; + else if (addr == (addr_t) &dummy->cr10) + /* Start address of the active per set. */ + return test_thread_flag(TIF_SINGLE_STEP) ? + 0 : child->thread.per_user.start; + else if (addr == (addr_t) &dummy->cr11) + /* End address of the active per set. */ + return test_thread_flag(TIF_SINGLE_STEP) ? + PSW_ADDR_INSN : child->thread.per_user.end; + else if (addr == (addr_t) &dummy->bits) + /* Single-step bit. */ + return test_thread_flag(TIF_SINGLE_STEP) ? + (1UL << (BITS_PER_LONG - 1)) : 0; + else if (addr == (addr_t) &dummy->starting_addr) + /* Start address of the user specified per set. */ + return child->thread.per_user.start; + else if (addr == (addr_t) &dummy->ending_addr) + /* End address of the user specified per set. */ + return child->thread.per_user.end; + else if (addr == (addr_t) &dummy->perc_atmid) + /* PER code, ATMID and AI of the last PER trap */ + return (unsigned long) + child->thread.per_event.cause << (BITS_PER_LONG - 16); + else if (addr == (addr_t) &dummy->address) + /* Address of the last PER trap */ + return child->thread.per_event.address; + else if (addr == (addr_t) &dummy->access_id) + /* Access id of the last PER trap */ + return (unsigned long) + child->thread.per_event.paid << (BITS_PER_LONG - 8); + return 0; +} + /* * Read the word at offset addr from the user area of a process. The * trouble here is that the information is littered over different @@ -204,10 +211,10 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr) } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { /* - * per_info is found in the thread structure + * Handle access to the per_info structure. */ - offset = addr - (addr_t) &dummy->regs.per_info; - tmp = *(addr_t *)((addr_t) &child->thread.per_info + offset); + addr -= (addr_t) &dummy->regs.per_info; + tmp = __peek_user_per(child, addr); } else tmp = 0; @@ -237,6 +244,35 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data) return put_user(tmp, (addr_t __user *) data); } +static inline void __poke_user_per(struct task_struct *child, + addr_t addr, addr_t data) +{ + struct per_struct_kernel *dummy = NULL; + + /* + * There are only three fields in the per_info struct that the + * debugger user can write to. + * 1) cr9: the debugger wants to set a new PER event mask + * 2) starting_addr: the debugger wants to set a new starting + * address to use with the PER event mask. + * 3) ending_addr: the debugger wants to set a new ending + * address to use with the PER event mask. + * The user specified PER event mask and the start and end + * addresses are used only if single stepping is not in effect. + * Writes to any other field in per_info are ignored. + */ + if (addr == (addr_t) &dummy->cr9) + /* PER event mask of the user specified per set. */ + child->thread.per_user.control = + data & (PER_EVENT_MASK | PER_CONTROL_MASK); + else if (addr == (addr_t) &dummy->starting_addr) + /* Starting address of the user specified per set. */ + child->thread.per_user.start = data; + else if (addr == (addr_t) &dummy->ending_addr) + /* Ending address of the user specified per set. */ + child->thread.per_user.end = data; +} + /* * Write a word to the user area of a process at location addr. This * operation does have an additional problem compared to peek_user. @@ -311,19 +347,17 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { /* - * per_info is found in the thread structure + * Handle access to the per_info structure. */ - offset = addr - (addr_t) &dummy->regs.per_info; - *(addr_t *)((addr_t) &child->thread.per_info + offset) = data; + addr -= (addr_t) &dummy->regs.per_info; + __poke_user_per(child, addr, data); } - FixPerRegisters(child); return 0; } -static int -poke_user(struct task_struct *child, addr_t addr, addr_t data) +static int poke_user(struct task_struct *child, addr_t addr, addr_t data) { addr_t mask; @@ -410,12 +444,53 @@ long arch_ptrace(struct task_struct *child, long request, */ /* + * Same as peek_user_per but for a 31 bit program. + */ +static inline __u32 __peek_user_per_compat(struct task_struct *child, + addr_t addr) +{ + struct compat_per_struct_kernel *dummy32 = NULL; + + if (addr == (addr_t) &dummy32->cr9) + /* Control bits of the active per set. */ + return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? + PER_EVENT_IFETCH : child->thread.per_user.control; + else if (addr == (addr_t) &dummy32->cr10) + /* Start address of the active per set. */ + return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? + 0 : child->thread.per_user.start; + else if (addr == (addr_t) &dummy32->cr11) + /* End address of the active per set. */ + return test_thread_flag(TIF_SINGLE_STEP) ? + PSW32_ADDR_INSN : child->thread.per_user.end; + else if (addr == (addr_t) &dummy32->bits) + /* Single-step bit. */ + return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? + 0x80000000 : 0; + else if (addr == (addr_t) &dummy32->starting_addr) + /* Start address of the user specified per set. */ + return (__u32) child->thread.per_user.start; + else if (addr == (addr_t) &dummy32->ending_addr) + /* End address of the user specified per set. */ + return (__u32) child->thread.per_user.end; + else if (addr == (addr_t) &dummy32->perc_atmid) + /* PER code, ATMID and AI of the last PER trap */ + return (__u32) child->thread.per_event.cause << 16; + else if (addr == (addr_t) &dummy32->address) + /* Address of the last PER trap */ + return (__u32) child->thread.per_event.address; + else if (addr == (addr_t) &dummy32->access_id) + /* Access id of the last PER trap */ + return (__u32) child->thread.per_event.paid << 24; + return 0; +} + +/* * Same as peek_user but for a 31 bit program. */ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) { - struct user32 *dummy32 = NULL; - per_struct32 *dummy_per32 = NULL; + struct compat_user *dummy32 = NULL; addr_t offset; __u32 tmp; @@ -465,19 +540,10 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) { /* - * per_info is found in the thread structure + * Handle access to the per_info structure. */ - offset = addr - (addr_t) &dummy32->regs.per_info; - /* This is magic. See per_struct and per_struct32. */ - if ((offset >= (addr_t) &dummy_per32->control_regs && - offset < (addr_t) (&dummy_per32->control_regs + 1)) || - (offset >= (addr_t) &dummy_per32->starting_addr && - offset <= (addr_t) &dummy_per32->ending_addr) || - offset == (addr_t) &dummy_per32->lowcore.words.address) - offset = offset*2 + 4; - else - offset = offset*2; - tmp = *(__u32 *)((addr_t) &child->thread.per_info + offset); + addr -= (addr_t) &dummy32->regs.per_info; + tmp = __peek_user_per_compat(child, addr); } else tmp = 0; @@ -498,13 +564,32 @@ static int peek_user_compat(struct task_struct *child, } /* + * Same as poke_user_per but for a 31 bit program. + */ +static inline void __poke_user_per_compat(struct task_struct *child, + addr_t addr, __u32 data) +{ + struct compat_per_struct_kernel *dummy32 = NULL; + + if (addr == (addr_t) &dummy32->cr9) + /* PER event mask of the user specified per set. */ + child->thread.per_user.control = + data & (PER_EVENT_MASK | PER_CONTROL_MASK); + else if (addr == (addr_t) &dummy32->starting_addr) + /* Starting address of the user specified per set. */ + child->thread.per_user.start = data; + else if (addr == (addr_t) &dummy32->ending_addr) + /* Ending address of the user specified per set. */ + child->thread.per_user.end = data; +} + +/* * Same as poke_user but for a 31 bit program. */ static int __poke_user_compat(struct task_struct *child, addr_t addr, addr_t data) { - struct user32 *dummy32 = NULL; - per_struct32 *dummy_per32 = NULL; + struct compat_user *dummy32 = NULL; __u32 tmp = (__u32) data; addr_t offset; @@ -561,37 +646,20 @@ static int __poke_user_compat(struct task_struct *child, } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) { /* - * per_info is found in the thread structure. - */ - offset = addr - (addr_t) &dummy32->regs.per_info; - /* - * This is magic. See per_struct and per_struct32. - * By incident the offsets in per_struct are exactly - * twice the offsets in per_struct32 for all fields. - * The 8 byte fields need special handling though, - * because the second half (bytes 4-7) is needed and - * not the first half. + * Handle access to the per_info structure. */ - if ((offset >= (addr_t) &dummy_per32->control_regs && - offset < (addr_t) (&dummy_per32->control_regs + 1)) || - (offset >= (addr_t) &dummy_per32->starting_addr && - offset <= (addr_t) &dummy_per32->ending_addr) || - offset == (addr_t) &dummy_per32->lowcore.words.address) - offset = offset*2 + 4; - else - offset = offset*2; - *(__u32 *)((addr_t) &child->thread.per_info + offset) = tmp; - + addr -= (addr_t) &dummy32->regs.per_info; + __poke_user_per_compat(child, addr, data); } - FixPerRegisters(child); return 0; } static int poke_user_compat(struct task_struct *child, addr_t addr, addr_t data) { - if (!is_compat_task() || (addr & 3) || addr > sizeof(struct user32) - 3) + if (!is_compat_task() || (addr & 3) || + addr > sizeof(struct compat_user) - 3) return -EIO; return __poke_user_compat(child, addr, data); @@ -602,7 +670,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, { unsigned long addr = caddr; unsigned long data = cdata; - ptrace_area_emu31 parea; + compat_ptrace_area parea; int copied, ret; switch (request) { diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c index bd1db508e8a..185029919c4 100644 --- a/arch/s390/kernel/s390_ext.c +++ b/arch/s390/kernel/s390_ext.c @@ -1,33 +1,36 @@ /* - * arch/s390/kernel/s390_ext.c - * - * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Holger Smolinski (Holger.Smolinski@de.ibm.com), - * Martin Schwidefsky (schwidefsky@de.ibm.com) + * Copyright IBM Corp. 1999,2010 + * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, */ +#include <linux/kernel_stat.h> +#include <linux/interrupt.h> #include <linux/module.h> #include <linux/kernel.h> -#include <linux/slab.h> #include <linux/ftrace.h> #include <linux/errno.h> -#include <linux/kernel_stat.h> -#include <linux/interrupt.h> -#include <asm/cputime.h> -#include <asm/lowcore.h> +#include <linux/slab.h> #include <asm/s390_ext.h> #include <asm/irq_regs.h> +#include <asm/cputime.h> +#include <asm/lowcore.h> #include <asm/irq.h> #include "entry.h" +struct ext_int_info { + struct ext_int_info *next; + ext_int_handler_t handler; + __u16 code; +}; + /* * ext_int_hash[index] is the start of the list for all external interrupts * that hash to this index. With the current set of external interrupts * (0x1202 external call, 0x1004 cpu timer, 0x2401 hwc console, 0x4000 * iucv and 0x2603 pfault) this is always the first element. */ -ext_int_info_t *ext_int_hash[256] = { NULL, }; +static struct ext_int_info *ext_int_hash[256]; static inline int ext_hash(__u16 code) { @@ -36,90 +39,53 @@ static inline int ext_hash(__u16 code) int register_external_interrupt(__u16 code, ext_int_handler_t handler) { - ext_int_info_t *p; - int index; - - p = kmalloc(sizeof(ext_int_info_t), GFP_ATOMIC); - if (p == NULL) - return -ENOMEM; - p->code = code; - p->handler = handler; - index = ext_hash(code); - p->next = ext_int_hash[index]; - ext_int_hash[index] = p; - return 0; -} - -int register_early_external_interrupt(__u16 code, ext_int_handler_t handler, - ext_int_info_t *p) -{ - int index; + struct ext_int_info *p; + int index; - if (p == NULL) - return -EINVAL; - p->code = code; - p->handler = handler; + p = kmalloc(sizeof(*p), GFP_ATOMIC); + if (!p) + return -ENOMEM; + p->code = code; + p->handler = handler; index = ext_hash(code); - p->next = ext_int_hash[index]; - ext_int_hash[index] = p; - return 0; + p->next = ext_int_hash[index]; + ext_int_hash[index] = p; + return 0; } +EXPORT_SYMBOL(register_external_interrupt); int unregister_external_interrupt(__u16 code, ext_int_handler_t handler) { - ext_int_info_t *p, *q; - int index; - - index = ext_hash(code); - q = NULL; - p = ext_int_hash[index]; - while (p != NULL) { - if (p->code == code && p->handler == handler) - break; - q = p; - p = p->next; - } - if (p == NULL) - return -ENOENT; - if (q != NULL) - q->next = p->next; - else - ext_int_hash[index] = p->next; - kfree(p); - return 0; -} - -int unregister_early_external_interrupt(__u16 code, ext_int_handler_t handler, - ext_int_info_t *p) -{ - ext_int_info_t *q; + struct ext_int_info *p, *q; int index; - if (p == NULL || p->code != code || p->handler != handler) - return -EINVAL; index = ext_hash(code); - q = ext_int_hash[index]; - if (p != q) { - while (q != NULL) { - if (q->next == p) - break; - q = q->next; - } - if (q == NULL) - return -ENOENT; + q = NULL; + p = ext_int_hash[index]; + while (p) { + if (p->code == code && p->handler == handler) + break; + q = p; + p = p->next; + } + if (!p) + return -ENOENT; + if (q) q->next = p->next; - } else + else ext_int_hash[index] = p->next; + kfree(p); return 0; } +EXPORT_SYMBOL(unregister_external_interrupt); void __irq_entry do_extint(struct pt_regs *regs, unsigned int ext_int_code, unsigned int param32, unsigned long param64) { struct pt_regs *old_regs; unsigned short code; - ext_int_info_t *p; - int index; + struct ext_int_info *p; + int index; code = (unsigned short) ext_int_code; old_regs = set_irq_regs(regs); @@ -132,7 +98,7 @@ void __irq_entry do_extint(struct pt_regs *regs, unsigned int ext_int_code, kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++; if (code != 0x1004) __get_cpu_var(s390_idle).nohz_delay = 1; - index = ext_hash(code); + index = ext_hash(code); for (p = ext_int_hash[index]; p; p = p->next) { if (likely(p->code == code)) p->handler(ext_int_code, param32, param64); @@ -140,6 +106,3 @@ void __irq_entry do_extint(struct pt_regs *regs, unsigned int ext_int_code, irq_exit(); set_irq_regs(old_regs); } - -EXPORT_SYMBOL(register_external_interrupt); -EXPORT_SYMBOL(unregister_external_interrupt); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index ee7ac8b1178..abbb3c3c7aa 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -505,7 +505,7 @@ void do_signal(struct pt_regs *regs) * Let tracing know that we've done the handler setup. */ tracehook_signal_handler(signr, &info, &ka, regs, - current->thread.per_info.single_step); + test_thread_flag(TIF_SINGLE_STEP)); } return; } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 94cf510b8fe..63a97db83f9 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -23,6 +23,7 @@ #define KMSG_COMPONENT "cpu" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/workqueue.h> #include <linux/module.h> #include <linux/init.h> #include <linux/mm.h> @@ -161,6 +162,7 @@ static void do_ext_call_interrupt(unsigned int ext_int_code, { unsigned long bits; + kstat_cpu(smp_processor_id()).irqs[EXTINT_IPI]++; /* * handle bit signal external calls * @@ -469,25 +471,25 @@ int __cpuinit start_secondary(void *cpuvoid) ipi_call_unlock(); /* Switch on interrupts */ local_irq_enable(); - /* Print info about this processor */ - print_cpu_info(); /* cpu_idle will call schedule for us */ cpu_idle(); return 0; } -static void __init smp_create_idle(unsigned int cpu) +struct create_idle { + struct work_struct work; + struct task_struct *idle; + struct completion done; + int cpu; +}; + +static void __cpuinit smp_fork_idle(struct work_struct *work) { - struct task_struct *p; + struct create_idle *c_idle; - /* - * don't care about the psw and regs settings since we'll never - * reschedule the forked task. - */ - p = fork_idle(cpu); - if (IS_ERR(p)) - panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p)); - current_set[cpu] = p; + c_idle = container_of(work, struct create_idle, work); + c_idle->idle = fork_idle(c_idle->cpu); + complete(&c_idle->done); } static int __cpuinit smp_alloc_lowcore(int cpu) @@ -551,6 +553,7 @@ static void smp_free_lowcore(int cpu) int __cpuinit __cpu_up(unsigned int cpu) { struct _lowcore *cpu_lowcore; + struct create_idle c_idle; struct task_struct *idle; struct stack_frame *sf; u32 lowcore; @@ -558,6 +561,19 @@ int __cpuinit __cpu_up(unsigned int cpu) if (smp_cpu_state[cpu] != CPU_STATE_CONFIGURED) return -EIO; + idle = current_set[cpu]; + if (!idle) { + c_idle.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done); + INIT_WORK_ONSTACK(&c_idle.work, smp_fork_idle); + c_idle.cpu = cpu; + schedule_work(&c_idle.work); + wait_for_completion(&c_idle.done); + if (IS_ERR(c_idle.idle)) + return PTR_ERR(c_idle.idle); + idle = c_idle.idle; + current_set[cpu] = c_idle.idle; + } + init_idle(idle, cpu); if (smp_alloc_lowcore(cpu)) return -ENOMEM; do { @@ -572,7 +588,6 @@ int __cpuinit __cpu_up(unsigned int cpu) while (sigp_p(lowcore, cpu, sigp_set_prefix) == sigp_busy) udelay(10); - idle = current_set[cpu]; cpu_lowcore = lowcore_ptr[cpu]; cpu_lowcore->kernel_stack = (unsigned long) task_stack_page(idle) + THREAD_SIZE; @@ -664,7 +679,6 @@ void __cpu_die(unsigned int cpu) udelay(10); smp_free_lowcore(cpu); atomic_dec(&init_mm.context.attach_count); - pr_info("Processor %d stopped\n", cpu); } void cpu_die(void) @@ -684,14 +698,12 @@ void __init smp_prepare_cpus(unsigned int max_cpus) #endif unsigned long async_stack, panic_stack; struct _lowcore *lowcore; - unsigned int cpu; smp_detect_cpus(); /* request the 0x1201 emergency signal external interrupt */ if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0) panic("Couldn't request external interrupt 0x1201"); - print_cpu_info(); /* Reallocate current lowcore, but keep its contents. */ lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); @@ -719,9 +731,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) if (vdso_alloc_per_cpu(smp_processor_id(), &S390_lowcore)) BUG(); #endif - for_each_possible_cpu(cpu) - if (cpu != smp_processor_id()) - smp_create_idle(cpu); } void __init smp_prepare_boot_cpu(void) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index f754a6dc4f9..9e7b039458d 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -15,6 +15,7 @@ #define KMSG_COMPONENT "time" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/kernel_stat.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/sched.h> @@ -37,6 +38,7 @@ #include <linux/clocksource.h> #include <linux/clockchips.h> #include <linux/gfp.h> +#include <linux/kprobes.h> #include <asm/uaccess.h> #include <asm/delay.h> #include <asm/s390_ext.h> @@ -60,7 +62,7 @@ static DEFINE_PER_CPU(struct clock_event_device, comparators); /* * Scheduler clock - returns current time in nanosec units. */ -unsigned long long notrace sched_clock(void) +unsigned long long notrace __kprobes sched_clock(void) { return (get_clock_monotonic() * 125) >> 9; } @@ -159,6 +161,7 @@ static void clock_comparator_interrupt(unsigned int ext_int_code, unsigned int param32, unsigned long param64) { + kstat_cpu(smp_processor_id()).irqs[EXTINT_CLK]++; if (S390_lowcore.clock_comparator == -1ULL) set_clock_comparator(S390_lowcore.clock_comparator); } @@ -169,6 +172,7 @@ static void stp_timing_alert(struct stp_irq_parm *); static void timing_alert_interrupt(unsigned int ext_int_code, unsigned int param32, unsigned long param64) { + kstat_cpu(smp_processor_id()).irqs[EXTINT_TLA]++; if (param32 & 0x00c40000) etr_timing_alert((struct etr_irq_parm *) ¶m32); if (param32 & 0x00038000) diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 70640822621..5eb78dd584c 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -365,12 +365,10 @@ static inline void __user *get_psw_address(struct pt_regs *regs, ((regs->psw.addr - (pgm_int_code >> 16)) & PSW_ADDR_INSN); } -void __kprobes do_single_step(struct pt_regs *regs) +void __kprobes do_per_trap(struct pt_regs *regs) { - if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0, - SIGTRAP) == NOTIFY_STOP){ + if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0, SIGTRAP) == NOTIFY_STOP) return; - } if (tracehook_consider_fatal_signal(current, SIGTRAP)) force_sig(SIGTRAP, current); } @@ -451,8 +449,8 @@ static inline void do_fp_trap(struct pt_regs *regs, void __user *location, "floating point exception", regs, &si); } -static void illegal_op(struct pt_regs *regs, long pgm_int_code, - unsigned long trans_exc_code) +static void __kprobes illegal_op(struct pt_regs *regs, long pgm_int_code, + unsigned long trans_exc_code) { siginfo_t info; __u8 opcode[6]; @@ -688,7 +686,7 @@ static void space_switch_exception(struct pt_regs *regs, long pgm_int_code, do_trap(pgm_int_code, SIGILL, "space switch event", regs, &info); } -asmlinkage void kernel_stack_overflow(struct pt_regs * regs) +asmlinkage void __kprobes kernel_stack_overflow(struct pt_regs * regs) { bust_spinlocks(1); printk("Kernel stack overflow.\n"); @@ -733,5 +731,6 @@ void __init trap_init(void) pgm_check_table[0x15] = &operand_exception; pgm_check_table[0x1C] = &space_switch_exception; pgm_check_table[0x1D] = &hfp_sqrt_exception; - pfault_irq_init(); + /* Enable machine checks early. */ + local_mcck_enable(); } diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 56c8687b29b..1ccdf4d8aa8 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -19,6 +19,8 @@ #include <linux/kernel_stat.h> #include <linux/rcupdate.h> #include <linux/posix-timers.h> +#include <linux/cpu.h> +#include <linux/kprobes.h> #include <asm/s390_ext.h> #include <asm/timer.h> @@ -121,7 +123,7 @@ void account_system_vtime(struct task_struct *tsk) } EXPORT_SYMBOL_GPL(account_system_vtime); -void vtime_start_cpu(__u64 int_clock, __u64 enter_timer) +void __kprobes vtime_start_cpu(__u64 int_clock, __u64 enter_timer) { struct s390_idle_data *idle = &__get_cpu_var(s390_idle); struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); @@ -161,7 +163,7 @@ void vtime_start_cpu(__u64 int_clock, __u64 enter_timer) idle->sequence++; } -void vtime_stop_cpu(void) +void __kprobes vtime_stop_cpu(void) { struct s390_idle_data *idle = &__get_cpu_var(s390_idle); struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); @@ -322,6 +324,7 @@ static void do_cpu_timer_interrupt(unsigned int ext_int_code, struct list_head cb_list; /* the callback queue */ __u64 elapsed, next; + kstat_cpu(smp_processor_id()).irqs[EXTINT_TMR]++; INIT_LIST_HEAD(&cb_list); vq = &__get_cpu_var(virt_cpu_timer); @@ -566,6 +569,23 @@ void init_cpu_vtimer(void) __ctl_set_bit(0,10); } +static int __cpuinit s390_nohz_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + struct s390_idle_data *idle; + long cpu = (long) hcpu; + + idle = &per_cpu(s390_idle, cpu); + switch (action) { + case CPU_DYING: + case CPU_DYING_FROZEN: + idle->nohz_delay = 0; + default: + break; + } + return NOTIFY_OK; +} + void __init vtime_init(void) { /* request the cpu timer external interrupt */ @@ -574,5 +594,6 @@ void __init vtime_init(void) /* Enable cpu timer interrupts on the boot cpu. */ init_cpu_vtimer(); + cpu_notifier(s390_nohz_notify, 0); } diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index a7251580891..f66a1bdbb61 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -4,8 +4,8 @@ source "virt/kvm/Kconfig" menuconfig VIRTUALIZATION - bool "Virtualization" - default y + def_bool y + prompt "Virtualization" ---help--- Say Y here to get to see options for using your Linux host to run other operating systems inside virtual machines (guests). @@ -16,7 +16,8 @@ menuconfig VIRTUALIZATION if VIRTUALIZATION config KVM - tristate "Kernel-based Virtual Machine (KVM) support" + def_tristate y + prompt "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM && EXPERIMENTAL select PREEMPT_NOTIFIERS select ANON_INODES diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index 7c37ec359ec..0f53110e1d0 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -47,7 +47,6 @@ static void __udelay_disabled(unsigned long long usecs) lockdep_on(); __ctl_load(cr0_saved, 0, 0); local_tick_enable(clock_saved); - set_clock_comparator(S390_lowcore.clock_comparator); } static void __udelay_enabled(unsigned long long usecs) @@ -70,7 +69,6 @@ static void __udelay_enabled(unsigned long long usecs) if (clock_saved) local_tick_enable(clock_saved); } while (get_clock() < end); - set_clock_comparator(S390_lowcore.clock_comparator); } /* diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index fe5701e9efb..2c57806c085 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -10,6 +10,7 @@ * Copyright (C) 1995 Linus Torvalds */ +#include <linux/kernel_stat.h> #include <linux/perf_event.h> #include <linux/signal.h> #include <linux/sched.h> @@ -234,13 +235,13 @@ static noinline int signal_return(struct pt_regs *regs, long int_code, rc = __get_user(instruction, (u16 __user *) regs->psw.addr); if (!rc && instruction == 0x0a77) { - clear_tsk_thread_flag(current, TIF_SINGLE_STEP); + clear_tsk_thread_flag(current, TIF_PER_TRAP); if (is_compat_task()) sys32_sigreturn(); else sys_sigreturn(); } else if (!rc && instruction == 0x0aad) { - clear_tsk_thread_flag(current, TIF_SINGLE_STEP); + clear_tsk_thread_flag(current, TIF_PER_TRAP); if (is_compat_task()) sys32_rt_sigreturn(); else @@ -378,7 +379,7 @@ static inline int do_exception(struct pt_regs *regs, int access, * The instruction that caused the program check will * be repeated. Don't signal single step via SIGTRAP. */ - clear_tsk_thread_flag(tsk, TIF_SINGLE_STEP); + clear_tsk_thread_flag(tsk, TIF_PER_TRAP); fault = 0; out_up: up_read(&mm->mmap_sem); @@ -480,8 +481,7 @@ int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write) /* * 'pfault' pseudo page faults routines. */ -static ext_int_info_t ext_int_pfault; -static int pfault_disable = 0; +static int pfault_disable; static int __init nopfault(char *str) { @@ -543,6 +543,7 @@ static void pfault_interrupt(unsigned int ext_int_code, struct task_struct *tsk; __u16 subcode; + kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++; /* * Get the external interruption subcode & pfault * initial/completion signal bit. VM stores this @@ -592,24 +593,28 @@ static void pfault_interrupt(unsigned int ext_int_code, } } -void __init pfault_irq_init(void) +static int __init pfault_irq_init(void) { - if (!MACHINE_IS_VM) - return; + int rc; + if (!MACHINE_IS_VM) + return 0; /* * Try to get pfault pseudo page faults going. */ - if (register_early_external_interrupt(0x2603, pfault_interrupt, - &ext_int_pfault) != 0) - panic("Couldn't request external interrupt 0x2603"); - + rc = register_external_interrupt(0x2603, pfault_interrupt); + if (rc) { + pfault_disable = 1; + return rc; + } if (pfault_init() == 0) - return; + return 0; /* Tough luck, no pfault. */ pfault_disable = 1; - unregister_early_external_interrupt(0x2603, pfault_interrupt, - &ext_int_pfault); + unregister_external_interrupt(0x2603, pfault_interrupt); + return 0; } +early_initcall(pfault_irq_init); + #endif |