diff options
Diffstat (limited to 'arch/s390/kernel')
37 files changed, 2175 insertions, 928 deletions
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 50f657e7734..3edc6c6f258 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -2,6 +2,11 @@ # Makefile for the linux kernel. # +ifdef CONFIG_FUNCTION_TRACER +# Do not trace early boot code +CFLAGS_REMOVE_early.o = -pg +endif + # # Passing null pointers is ok for smp code, since we access the lowcore here. # @@ -12,9 +17,10 @@ CFLAGS_smp.o := -Wno-nonnull # CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' -obj-y := bitmap.o traps.o time.o process.o base.o early.o \ - setup.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \ - s390_ext.o debug.o irq.o ipl.o dis.o diag.o mem_detect.o +obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o \ + processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \ + s390_ext.o debug.o irq.o ipl.o dis.o diag.o mem_detect.o \ + vdso.o vtime.o obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) @@ -30,12 +36,16 @@ obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \ compat_wrapper.o compat_exec_domain.o \ $(compat-obj-y) -obj-$(CONFIG_VIRT_TIMER) += vtime.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_FUNCTION_TRACER) += mcount.o # Kexec part S390_KEXEC_OBJS := machine_kexec.o crash.o S390_KEXEC_OBJS += $(if $(CONFIG_64BIT),relocate_kernel64.o,relocate_kernel.o) obj-$(CONFIG_KEXEC) += $(S390_KEXEC_OBJS) +# vdso +obj-$(CONFIG_64BIT) += vdso64/ +obj-$(CONFIG_32BIT) += vdso32/ +obj-$(CONFIG_COMPAT) += vdso32/ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 3d144e6020c..67a60016bab 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -6,6 +6,7 @@ #include <linux/sched.h> #include <linux/kbuild.h> +#include <asm/vdso.h> int main(void) { @@ -38,5 +39,24 @@ int main(void) DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain)); DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs)); DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1)); + BLANK(); + /* timeval/timezone offsets for use by vdso */ + DEFINE(__VDSO_UPD_COUNT, offsetof(struct vdso_data, tb_update_count)); + DEFINE(__VDSO_XTIME_STAMP, offsetof(struct vdso_data, xtime_tod_stamp)); + DEFINE(__VDSO_XTIME_SEC, offsetof(struct vdso_data, xtime_clock_sec)); + DEFINE(__VDSO_XTIME_NSEC, offsetof(struct vdso_data, xtime_clock_nsec)); + DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec)); + DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); + DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest)); + DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available)); + DEFINE(__VDSO_ECTG_BASE, + offsetof(struct vdso_per_cpu_data, ectg_timer_base)); + DEFINE(__VDSO_ECTG_USER, + offsetof(struct vdso_per_cpu_data, ectg_user_time)); + /* constants used by the vdso */ + DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); + DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); + DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); + return 0; } diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 4646382af34..6cc87d8c868 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -148,9 +148,9 @@ asmlinkage long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user { int retval; - if (!(retval = put_user(high2lowuid(current->uid), ruid)) && - !(retval = put_user(high2lowuid(current->euid), euid))) - retval = put_user(high2lowuid(current->suid), suid); + if (!(retval = put_user(high2lowuid(current->cred->uid), ruid)) && + !(retval = put_user(high2lowuid(current->cred->euid), euid))) + retval = put_user(high2lowuid(current->cred->suid), suid); return retval; } @@ -165,9 +165,9 @@ asmlinkage long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user { int retval; - if (!(retval = put_user(high2lowgid(current->gid), rgid)) && - !(retval = put_user(high2lowgid(current->egid), egid))) - retval = put_user(high2lowgid(current->sgid), sgid); + if (!(retval = put_user(high2lowgid(current->cred->gid), rgid)) && + !(retval = put_user(high2lowgid(current->cred->egid), egid))) + retval = put_user(high2lowgid(current->cred->sgid), sgid); return retval; } @@ -217,20 +217,20 @@ asmlinkage long sys32_getgroups16(int gidsetsize, u16 __user *grouplist) if (gidsetsize < 0) return -EINVAL; - get_group_info(current->group_info); - i = current->group_info->ngroups; + get_group_info(current->cred->group_info); + i = current->cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->group_info)) { + if (groups16_to_user(grouplist, current->cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->group_info); + put_group_info(current->cred->group_info); return i; } @@ -261,22 +261,22 @@ asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist) asmlinkage long sys32_getuid16(void) { - return high2lowuid(current->uid); + return high2lowuid(current->cred->uid); } asmlinkage long sys32_geteuid16(void) { - return high2lowuid(current->euid); + return high2lowuid(current->cred->euid); } asmlinkage long sys32_getgid16(void) { - return high2lowgid(current->gid); + return high2lowgid(current->cred->gid); } asmlinkage long sys32_getegid16(void) { - return high2lowgid(current->egid); + return high2lowgid(current->cred->egid); } /* diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index d8c1131e081..3e8b8816f30 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -7,6 +7,9 @@ * Christian Borntraeger (cborntra@de.ibm.com), */ +#define KMSG_COMPONENT "cpcmd" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> @@ -104,8 +107,8 @@ int cpcmd(const char *cmd, char *response, int rlen, int *response_code) (((unsigned long)response + rlen) >> 31)) { lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA); if (!lowbuf) { - printk(KERN_WARNING - "cpcmd: could not allocate response buffer\n"); + pr_warning("The cpcmd kernel function failed to " + "allocate a response buffer\n"); return -ENOMEM; } spin_lock_irqsave(&cpcmd_lock, flags); diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index d80fcd4a7fe..ba03fc0a3a5 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -10,6 +10,9 @@ * Bugreports to: <Linux390@de.ibm.com> */ +#define KMSG_COMPONENT "s390dbf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/stddef.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -388,7 +391,7 @@ debug_info_copy(debug_info_t* in, int mode) debug_info_free(rc); } while (1); - if(!rc || (mode == NO_AREAS)) + if (mode == NO_AREAS) goto out; for(i = 0; i < in->nr_areas; i++){ @@ -693,8 +696,8 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area, /* Since debugfs currently does not support uid/gid other than root, */ /* we do not allow gid/uid != 0 until we get support for that. */ if ((uid != 0) || (gid != 0)) - printk(KERN_WARNING "debug: Warning - Currently only uid/gid " - "= 0 are supported. Using root as owner now!"); + pr_warning("Root becomes the owner of all s390dbf files " + "in sysfs\n"); if (!initialized) BUG(); mutex_lock(&debug_mutex); @@ -709,7 +712,7 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area, debug_register_view(rc, &debug_pages_view); out: if (!rc){ - printk(KERN_ERR "debug: debug_register failed for %s\n",name); + pr_err("Registering debug feature %s failed\n", name); } mutex_unlock(&debug_mutex); return rc; @@ -763,8 +766,8 @@ debug_set_size(debug_info_t* id, int nr_areas, int pages_per_area) if(pages_per_area > 0){ new_areas = debug_areas_alloc(pages_per_area, nr_areas); if(!new_areas) { - printk(KERN_WARNING "debug: could not allocate memory "\ - "for pagenumber: %i\n",pages_per_area); + pr_info("Allocating memory for %i pages failed\n", + pages_per_area); rc = -ENOMEM; goto out; } @@ -780,8 +783,7 @@ debug_set_size(debug_info_t* id, int nr_areas, int pages_per_area) memset(id->active_entries,0,sizeof(int)*id->nr_areas); memset(id->active_pages, 0, sizeof(int)*id->nr_areas); spin_unlock_irqrestore(&id->lock,flags); - printk(KERN_INFO "debug: %s: set new size (%i pages)\n"\ - ,id->name, pages_per_area); + pr_info("%s: set new size (%i pages)\n" ,id->name, pages_per_area); out: return rc; } @@ -800,10 +802,9 @@ debug_set_level(debug_info_t* id, int new_level) spin_lock_irqsave(&id->lock,flags); if(new_level == DEBUG_OFF_LEVEL){ id->level = DEBUG_OFF_LEVEL; - printk(KERN_INFO "debug: %s: switched off\n",id->name); + pr_info("%s: switched off\n",id->name); } else if ((new_level > DEBUG_MAX_LEVEL) || (new_level < 0)) { - printk(KERN_INFO - "debug: %s: level %i is out of range (%i - %i)\n", + pr_info("%s: level %i is out of range (%i - %i)\n", id->name, new_level, 0, DEBUG_MAX_LEVEL); } else { id->level = new_level; @@ -1108,8 +1109,8 @@ debug_register_view(debug_info_t * id, struct debug_view *view) pde = debugfs_create_file(view->name, mode, id->debugfs_root_entry, id , &debug_file_ops); if (!pde){ - printk(KERN_WARNING "debug: debugfs_create_file() failed!"\ - " Cannot register view %s/%s\n", id->name,view->name); + pr_err("Registering view %s/%s failed due to out of " + "memory\n", id->name,view->name); rc = -1; goto out; } @@ -1119,10 +1120,8 @@ debug_register_view(debug_info_t * id, struct debug_view *view) break; } if (i == DEBUG_MAX_VIEWS) { - printk(KERN_WARNING "debug: cannot register view %s/%s\n", - id->name,view->name); - printk(KERN_WARNING - "debug: maximum number of views reached (%i)!\n", i); + pr_err("Registering view %s/%s would exceed the maximum " + "number of views %i\n", id->name, view->name, i); debugfs_remove(pde); rc = -1; } else { @@ -1303,7 +1302,8 @@ debug_input_level_fn(debug_info_t * id, struct debug_view *view, new_level = debug_get_uint(str); } if(new_level < 0) { - printk(KERN_INFO "debug: level `%s` is not valid\n", str); + pr_warning("%s is not a valid level for a debug " + "feature\n", str); rc = -EINVAL; } else { debug_set_level(id, new_level); @@ -1380,7 +1380,8 @@ debug_input_flush_fn(debug_info_t * id, struct debug_view *view, goto out; } - printk(KERN_INFO "debug: area `%c` is not valid\n", input_buf[0]); + pr_info("Flushing debug data failed because %c is not a valid " + "area\n", input_buf[0]); out: *offset += user_len; diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 198ea18a534..1268aa2991b 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -109,13 +109,6 @@ STACK_SIZE = 1 << STACK_SHIFT * R15 - kernel stack pointer */ - .macro STORE_TIMER lc_offset -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - stpt \lc_offset -#endif - .endm - -#ifdef CONFIG_VIRT_CPU_ACCOUNTING .macro UPDATE_VTIME lc_from,lc_to,lc_sum lm %r10,%r11,\lc_from sl %r10,\lc_to @@ -128,7 +121,6 @@ STACK_SIZE = 1 << STACK_SHIFT al %r10,BASED(.Lc_1) 1: stm %r10,%r11,\lc_sum .endm -#endif .macro SAVE_ALL_BASE savearea stm %r12,%r15,\savearea @@ -198,7 +190,7 @@ STACK_SIZE = 1 << STACK_SHIFT ni \psworg+1,0xfd # clear wait state bit .endif lm %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user - STORE_TIMER __LC_EXIT_TIMER + stpt __LC_EXIT_TIMER lpsw \psworg # back to caller .endm @@ -247,20 +239,18 @@ __critical_start: .globl system_call system_call: - STORE_TIMER __LC_SYNC_ENTER_TIMER + stpt __LC_SYNC_ENTER_TIMER sysc_saveall: SAVE_ALL_BASE __LC_SAVE_AREA SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA lh %r7,0x8a # get svc number from lowcore -#ifdef CONFIG_VIRT_CPU_ACCOUNTING sysc_vtime: UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER sysc_stime: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER sysc_update: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -#endif sysc_do_svc: l %r9,__LC_THREAD_INFO # load pointer to thread_info struct ltr %r7,%r7 # test for svc 0 @@ -436,7 +426,7 @@ ret_from_fork: basr %r14,%r1 TRACE_IRQS_ON stosm __SF_EMPTY(%r15),0x03 # reenable interrupts - b BASED(sysc_return) + b BASED(sysc_tracenogo) # # kernel_execve function needs to deal with pt_regs that is not @@ -490,20 +480,18 @@ pgm_check_handler: * we just ignore the PER event (FIXME: is there anything we have to do * for LPSW?). */ - STORE_TIMER __LC_SYNC_ENTER_TIMER + stpt __LC_SYNC_ENTER_TIMER SAVE_ALL_BASE __LC_SAVE_AREA tm __LC_PGM_INT_CODE+1,0x80 # check whether we got a per exception bnz BASED(pgm_per) # got per exception -> special case SAVE_ALL_SYNC __LC_PGM_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA -#ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? bz BASED(pgm_no_vtime) UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER pgm_no_vtime: -#endif l %r9,__LC_THREAD_INFO # load pointer to thread_info struct TRACE_IRQS_OFF l %r3,__LC_PGM_ILC # load program interruption code @@ -536,14 +524,12 @@ pgm_per: pgm_per_std: SAVE_ALL_SYNC __LC_PGM_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA -#ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? bz BASED(pgm_no_vtime2) UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER pgm_no_vtime2: -#endif l %r9,__LC_THREAD_INFO # load pointer to thread_info struct TRACE_IRQS_OFF l %r1,__TI_task(%r9) @@ -565,11 +551,9 @@ pgm_no_vtime2: pgm_svcper: SAVE_ALL_SYNC __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA -#ifdef CONFIG_VIRT_CPU_ACCOUNTING UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -#endif lh %r7,0x8a # get svc number from lowcore l %r9,__LC_THREAD_INFO # load pointer to thread_info struct TRACE_IRQS_OFF @@ -599,19 +583,17 @@ kernel_per: .globl io_int_handler io_int_handler: - STORE_TIMER __LC_ASYNC_ENTER_TIMER stck __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER SAVE_ALL_BASE __LC_SAVE_AREA+16 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? bz BASED(io_no_vtime) UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER io_no_vtime: -#endif l %r9,__LC_THREAD_INFO # load pointer to thread_info struct TRACE_IRQS_OFF l %r1,BASED(.Ldo_IRQ) # load address of do_IRQ @@ -741,19 +723,17 @@ io_notify_resume: .globl ext_int_handler ext_int_handler: - STORE_TIMER __LC_ASYNC_ENTER_TIMER stck __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER SAVE_ALL_BASE __LC_SAVE_AREA+16 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? bz BASED(ext_no_vtime) UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER ext_no_vtime: -#endif l %r9,__LC_THREAD_INFO # load pointer to thread_info struct TRACE_IRQS_OFF la %r2,SP_PTREGS(%r15) # address of register-save area @@ -770,13 +750,13 @@ __critical_end: .globl mcck_int_handler mcck_int_handler: + stck __LC_INT_CLOCK spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs SAVE_ALL_BASE __LC_SAVE_AREA+32 la %r12,__LC_MCK_OLD_PSW tm __LC_MCCK_CODE,0x80 # system damage? bo BASED(mcck_int_main) # yes -> rest of mcck code invalid -#ifdef CONFIG_VIRT_CPU_ACCOUNTING mvc __LC_SAVE_AREA+52(8),__LC_ASYNC_ENTER_TIMER mvc __LC_ASYNC_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? @@ -793,9 +773,7 @@ mcck_int_handler: la %r14,__LC_LAST_UPDATE_TIMER 0: spt 0(%r14) mvc __LC_ASYNC_ENTER_TIMER(8),0(%r14) -1: -#endif - tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? +1: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? bno BASED(mcck_int_main) # no -> skip cleanup critical tm __LC_MCK_OLD_PSW+1,0x01 # test problem state bit bnz BASED(mcck_int_main) # from user -> load async stack @@ -812,7 +790,6 @@ mcck_int_main: be BASED(0f) l %r15,__LC_PANIC_STACK # load panic stack 0: CREATE_STACK_FRAME __LC_MCK_OLD_PSW,__LC_SAVE_AREA+32 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING tm __LC_MCCK_CODE+2,0x08 # mwp of old psw valid? bno BASED(mcck_no_vtime) # no -> skip cleanup critical tm SP_PSW+1(%r15),0x01 # interrupting from user ? @@ -821,7 +798,6 @@ mcck_int_main: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER mcck_no_vtime: -#endif l %r9,__LC_THREAD_INFO # load pointer to thread_info struct la %r2,SP_PTREGS(%r15) # load pt_regs l %r1,BASED(.Ls390_mcck) @@ -843,16 +819,13 @@ mcck_no_vtime: mcck_return: mvc __LC_RETURN_MCCK_PSW(8),SP_PSW(%r15) # move return PSW ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit -#ifdef CONFIG_VIRT_CPU_ACCOUNTING mvc __LC_ASYNC_ENTER_TIMER(8),__LC_SAVE_AREA+52 tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? bno BASED(0f) lm %r0,%r15,SP_R0(%r15) # load gprs 0-15 stpt __LC_EXIT_TIMER lpsw __LC_RETURN_MCCK_PSW # back to caller -0: -#endif - lm %r0,%r15,SP_R0(%r15) # load gprs 0-15 +0: lm %r0,%r15,SP_R0(%r15) # load gprs 0-15 lpsw __LC_RETURN_MCCK_PSW # back to caller RESTORE_ALL __LC_RETURN_MCCK_PSW,0 @@ -976,13 +949,11 @@ cleanup_system_call: b BASED(1f) 0: la %r12,__LC_SAVE_AREA+32 1: -#ifdef CONFIG_VIRT_CPU_ACCOUNTING clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+4) bh BASED(0f) mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER 0: clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+8) bhe BASED(cleanup_vtime) -#endif clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn) bh BASED(0f) mvc __LC_SAVE_AREA(16),0(%r12) @@ -993,7 +964,6 @@ cleanup_system_call: l %r12,__LC_SAVE_AREA+48 # argh st %r15,12(%r12) lh %r7,0x8a -#ifdef CONFIG_VIRT_CPU_ACCOUNTING cleanup_vtime: clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+12) bhe BASED(cleanup_stime) @@ -1004,18 +974,15 @@ cleanup_stime: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER cleanup_update: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -#endif mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_system_call+4) la %r12,__LC_RETURN_PSW br %r14 cleanup_system_call_insn: .long sysc_saveall + 0x80000000 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING .long system_call + 0x80000000 .long sysc_vtime + 0x80000000 .long sysc_stime + 0x80000000 .long sysc_update + 0x80000000 -#endif cleanup_sysc_return: mvc __LC_RETURN_PSW(4),0(%r12) @@ -1026,11 +993,9 @@ cleanup_sysc_return: cleanup_sysc_leave: clc 4(4,%r12),BASED(cleanup_sysc_leave_insn) be BASED(2f) -#ifdef CONFIG_VIRT_CPU_ACCOUNTING mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER clc 4(4,%r12),BASED(cleanup_sysc_leave_insn+4) be BASED(2f) -#endif mvc __LC_RETURN_PSW(8),SP_PSW(%r15) c %r12,BASED(.Lmck_old_psw) bne BASED(0f) @@ -1043,9 +1008,7 @@ cleanup_sysc_leave: br %r14 cleanup_sysc_leave_insn: .long sysc_done - 4 + 0x80000000 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING .long sysc_done - 8 + 0x80000000 -#endif cleanup_io_return: mvc __LC_RETURN_PSW(4),0(%r12) @@ -1056,11 +1019,9 @@ cleanup_io_return: cleanup_io_leave: clc 4(4,%r12),BASED(cleanup_io_leave_insn) be BASED(2f) -#ifdef CONFIG_VIRT_CPU_ACCOUNTING mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER clc 4(4,%r12),BASED(cleanup_io_leave_insn+4) be BASED(2f) -#endif mvc __LC_RETURN_PSW(8),SP_PSW(%r15) c %r12,BASED(.Lmck_old_psw) bne BASED(0f) @@ -1073,9 +1034,7 @@ cleanup_io_leave: br %r14 cleanup_io_leave_insn: .long io_done - 4 + 0x80000000 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING .long io_done - 8 + 0x80000000 -#endif /* * Integer constants diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 89c121ae633..c6fbde13971 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -96,20 +96,12 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ #define LOCKDEP_SYS_EXIT #endif - .macro STORE_TIMER lc_offset -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - stpt \lc_offset -#endif - .endm - -#ifdef CONFIG_VIRT_CPU_ACCOUNTING .macro UPDATE_VTIME lc_from,lc_to,lc_sum lg %r10,\lc_from slg %r10,\lc_to alg %r10,\lc_sum stg %r10,\lc_sum .endm -#endif /* * Register usage in interrupt handlers: @@ -185,8 +177,11 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ .if !\sync ni \psworg+1,0xfd # clear wait state bit .endif - lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user - STORE_TIMER __LC_EXIT_TIMER + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r13,SP_R0(%r15) # load gprs 0-13 of user + stpt __LC_EXIT_TIMER + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER + lmg %r14,%r15,SP_R14(%r15) # load grps 14-15 of user lpswe \psworg # back to caller .endm @@ -233,20 +228,18 @@ __critical_start: .globl system_call system_call: - STORE_TIMER __LC_SYNC_ENTER_TIMER + stpt __LC_SYNC_ENTER_TIMER sysc_saveall: SAVE_ALL_BASE __LC_SAVE_AREA SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA llgh %r7,__LC_SVC_INT_CODE # get svc number from lowcore -#ifdef CONFIG_VIRT_CPU_ACCOUNTING sysc_vtime: UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER sysc_stime: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER sysc_update: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -#endif sysc_do_svc: lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct ltgr %r7,%r7 # test for svc 0 @@ -417,7 +410,7 @@ ret_from_fork: 0: brasl %r14,schedule_tail TRACE_IRQS_ON stosm 24(%r15),0x03 # reenable interrupts - j sysc_return + j sysc_tracenogo # # kernel_execve function needs to deal with pt_regs that is not @@ -469,20 +462,18 @@ pgm_check_handler: * we just ignore the PER event (FIXME: is there anything we have to do * for LPSW?). */ - STORE_TIMER __LC_SYNC_ENTER_TIMER + stpt __LC_SYNC_ENTER_TIMER SAVE_ALL_BASE __LC_SAVE_AREA tm __LC_PGM_INT_CODE+1,0x80 # check whether we got a per exception jnz pgm_per # got per exception -> special case SAVE_ALL_SYNC __LC_PGM_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA -#ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? jz pgm_no_vtime UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER pgm_no_vtime: -#endif lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct mvc SP_ARGS(8,%r15),__LC_LAST_BREAK TRACE_IRQS_OFF @@ -516,14 +507,12 @@ pgm_per: pgm_per_std: SAVE_ALL_SYNC __LC_PGM_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA -#ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? jz pgm_no_vtime2 UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER pgm_no_vtime2: -#endif lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct TRACE_IRQS_OFF lg %r1,__TI_task(%r9) @@ -545,11 +534,9 @@ pgm_no_vtime2: pgm_svcper: SAVE_ALL_SYNC __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA -#ifdef CONFIG_VIRT_CPU_ACCOUNTING UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -#endif llgh %r7,__LC_SVC_INT_CODE # get svc number from lowcore lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct lg %r1,__TI_task(%r9) @@ -575,19 +562,17 @@ kernel_per: */ .globl io_int_handler io_int_handler: - STORE_TIMER __LC_ASYNC_ENTER_TIMER stck __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER SAVE_ALL_BASE __LC_SAVE_AREA+32 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? jz io_no_vtime UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER io_no_vtime: -#endif lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct TRACE_IRQS_OFF la %r2,SP_PTREGS(%r15) # address of register-save area @@ -739,19 +724,17 @@ io_notify_resume: */ .globl ext_int_handler ext_int_handler: - STORE_TIMER __LC_ASYNC_ENTER_TIMER stck __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER SAVE_ALL_BASE __LC_SAVE_AREA+32 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? jz ext_no_vtime UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER ext_no_vtime: -#endif lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct TRACE_IRQS_OFF la %r2,SP_PTREGS(%r15) # address of register-save area @@ -766,6 +749,7 @@ __critical_end: */ .globl mcck_int_handler mcck_int_handler: + stck __LC_INT_CLOCK la %r1,4095 # revalidate r1 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs @@ -773,7 +757,6 @@ mcck_int_handler: la %r12,__LC_MCK_OLD_PSW tm __LC_MCCK_CODE,0x80 # system damage? jo mcck_int_main # yes -> rest of mcck code invalid -#ifdef CONFIG_VIRT_CPU_ACCOUNTING la %r14,4095 mvc __LC_SAVE_AREA+104(8),__LC_ASYNC_ENTER_TIMER mvc __LC_ASYNC_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA-4095(%r14) @@ -791,9 +774,7 @@ mcck_int_handler: la %r14,__LC_LAST_UPDATE_TIMER 0: spt 0(%r14) mvc __LC_ASYNC_ENTER_TIMER(8),0(%r14) -1: -#endif - tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? +1: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? jno mcck_int_main # no -> skip cleanup critical tm __LC_MCK_OLD_PSW+1,0x01 # test problem state bit jnz mcck_int_main # from user -> load kernel stack @@ -809,7 +790,6 @@ mcck_int_main: jz 0f lg %r15,__LC_PANIC_STACK # load panic stack 0: CREATE_STACK_FRAME __LC_MCK_OLD_PSW,__LC_SAVE_AREA+64 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING tm __LC_MCCK_CODE+2,0x08 # mwp of old psw valid? jno mcck_no_vtime # no -> no timer update tm SP_PSW+1(%r15),0x01 # interrupting from user ? @@ -818,7 +798,6 @@ mcck_int_main: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER mcck_no_vtime: -#endif lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct la %r2,SP_PTREGS(%r15) # load pt_regs brasl %r14,s390_do_machine_check @@ -839,14 +818,11 @@ mcck_return: mvc __LC_RETURN_MCCK_PSW(16),SP_PSW(%r15) # move return PSW ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING mvc __LC_ASYNC_ENTER_TIMER(8),__LC_SAVE_AREA+104 tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? jno 0f stpt __LC_EXIT_TIMER -0: -#endif - lpswe __LC_RETURN_MCCK_PSW # back to caller +0: lpswe __LC_RETURN_MCCK_PSW # back to caller /* * Restart interruption handler, kick starter for additional CPUs @@ -964,13 +940,11 @@ cleanup_system_call: j 1f 0: la %r12,__LC_SAVE_AREA+64 1: -#ifdef CONFIG_VIRT_CPU_ACCOUNTING clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+8) jh 0f mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER 0: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+16) jhe cleanup_vtime -#endif clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn) jh 0f mvc __LC_SAVE_AREA(32),0(%r12) @@ -981,7 +955,6 @@ cleanup_system_call: lg %r12,__LC_SAVE_AREA+96 # argh stg %r15,24(%r12) llgh %r7,__LC_SVC_INT_CODE -#ifdef CONFIG_VIRT_CPU_ACCOUNTING cleanup_vtime: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+24) jhe cleanup_stime @@ -992,18 +965,15 @@ cleanup_stime: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER cleanup_update: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER -#endif mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_system_call+8) la %r12,__LC_RETURN_PSW br %r14 cleanup_system_call_insn: .quad sysc_saveall -#ifdef CONFIG_VIRT_CPU_ACCOUNTING .quad system_call .quad sysc_vtime .quad sysc_stime .quad sysc_update -#endif cleanup_sysc_return: mvc __LC_RETURN_PSW(8),0(%r12) @@ -1013,27 +983,23 @@ cleanup_sysc_return: cleanup_sysc_leave: clc 8(8,%r12),BASED(cleanup_sysc_leave_insn) - je 2f -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER + je 3f clc 8(8,%r12),BASED(cleanup_sysc_leave_insn+8) - je 2f -#endif - mvc __LC_RETURN_PSW(16),SP_PSW(%r15) + jhe 0f + mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER +0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) cghi %r12,__LC_MCK_OLD_PSW - jne 0f + jne 1f mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) - j 1f -0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) -1: lmg %r0,%r11,SP_R0(%r15) + j 2f +1: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) +2: lmg %r0,%r11,SP_R0(%r15) lg %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW +3: la %r12,__LC_RETURN_PSW br %r14 cleanup_sysc_leave_insn: .quad sysc_done - 4 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - .quad sysc_done - 8 -#endif + .quad sysc_done - 16 cleanup_io_return: mvc __LC_RETURN_PSW(8),0(%r12) @@ -1043,27 +1009,23 @@ cleanup_io_return: cleanup_io_leave: clc 8(8,%r12),BASED(cleanup_io_leave_insn) - je 2f -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER + je 3f clc 8(8,%r12),BASED(cleanup_io_leave_insn+8) - je 2f -#endif - mvc __LC_RETURN_PSW(16),SP_PSW(%r15) + jhe 0f + mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER +0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) cghi %r12,__LC_MCK_OLD_PSW - jne 0f + jne 1f mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) - j 1f -0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) -1: lmg %r0,%r11,SP_R0(%r15) + j 2f +1: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) +2: lmg %r0,%r11,SP_R0(%r15) lg %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW +3: la %r12,__LC_RETURN_PSW br %r14 cleanup_io_leave_insn: .quad io_done - 4 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - .quad io_done - 8 -#endif + .quad io_done - 16 /* * Integer constants diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 83477c7dc74..ec7e35f6055 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -461,6 +461,55 @@ start: .byte 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7 .byte 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff +# +# startup-code at 0x10000, running in absolute addressing mode +# this is called either by the ipl loader or directly by PSW restart +# or linload or SALIPL +# + .org 0x10000 +startup:basr %r13,0 # get base +.LPG0: + +#ifndef CONFIG_MARCH_G5 + # check processor version against MARCH_{G5,Z900,Z990,Z9_109,Z10} + stidp __LC_CPUID # store cpuid + lhi %r0,(3f-2f) / 2 + la %r1,2f-.LPG0(%r13) +0: clc __LC_CPUID+4(2),0(%r1) + jne 3f + lpsw 1f-.LPG0(13) # machine type not good enough, crash + .align 16 +1: .long 0x000a0000,0x00000000 +2: +#if defined(CONFIG_MARCH_Z10) + .short 0x9672, 0x2064, 0x2066, 0x2084, 0x2086, 0x2094, 0x2096 +#elif defined(CONFIG_MARCH_Z9_109) + .short 0x9672, 0x2064, 0x2066, 0x2084, 0x2086 +#elif defined(CONFIG_MARCH_Z990) + .short 0x9672, 0x2064, 0x2066 +#elif defined(CONFIG_MARCH_Z900) + .short 0x9672 +#endif +3: la %r1,2(%r1) + brct %r0,0b +#endif + + l %r13,0f-.LPG0(%r13) + b 0(%r13) +0: .long startup_continue + +# +# params at 10400 (setup.h) +# + .org PARMAREA + .long 0,0 # IPL_DEVICE + .long 0,0 # INITRD_START + .long 0,0 # INITRD_SIZE + + .org COMMAND_LINE + .byte "root=/dev/ram0 ro" + .byte 0 + #ifdef CONFIG_64BIT #include "head64.S" #else diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index a816e2de32b..db476d114ca 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -10,34 +10,13 @@ * */ -# -# startup-code at 0x10000, running in absolute addressing mode -# this is called either by the ipl loader or directly by PSW restart -# or linload or SALIPL -# - .org 0x10000 -startup:basr %r13,0 # get base -.LPG0: l %r13,0f-.LPG0(%r13) - b 0(%r13) -0: .long startup_continue - -# -# params at 10400 (setup.h) -# - .org PARMAREA - .long 0,0 # IPL_DEVICE - .long 0,0 # INITRD_START - .long 0,0 # INITRD_SIZE - - .org COMMAND_LINE - .byte "root=/dev/ram0 ro" - .byte 0 - .org 0x11000 startup_continue: basr %r13,0 # get base -.LPG1: mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0) +.LPG1: + + mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0) lctl %c0,%c15,.Lctl-.LPG1(%r13) # load control registers l %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area # move IPL device to lowcore @@ -50,7 +29,6 @@ startup_continue: ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE st %r15,__LC_KERNEL_STACK # set end of kernel stack ahi %r15,-96 - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain # # Save ipl parameters, clear bss memory, initialize storage key for kernel pages, # and create a kernel NSS if the SAVESYS= parm is defined diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 1d06961e87b..f9f70aa1524 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -10,29 +10,6 @@ * */ -# -# startup-code at 0x10000, running in absolute addressing mode -# this is called either by the ipl loader or directly by PSW restart -# or linload or SALIPL -# - .org 0x10000 -startup:basr %r13,0 # get base -.LPG0: l %r13,0f-.LPG0(%r13) - b 0(%r13) -0: .long startup_continue - -# -# params at 10400 (setup.h) -# - .org PARMAREA - .quad 0 # IPL_DEVICE - .quad 0 # INITRD_START - .quad 0 # INITRD_SIZE - - .org COMMAND_LINE - .byte "root=/dev/ram0 ro" - .byte 0 - .org 0x11000 startup_continue: @@ -110,6 +87,8 @@ startup_continue: lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area # move IPL device to lowcore mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12) + lghi %r0,__LC_PASTE + stg %r0,__LC_VDSO_PER_CPU # # Setup stack # @@ -119,7 +98,6 @@ startup_continue: aghi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE stg %r15,__LC_KERNEL_STACK # set end of kernel stack aghi %r15,-160 - xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # clear backchain # # Save ipl parameters, clear bss memory, initialize storage key for kernel pages, # and create a kernel NSS if the SAVESYS= parm is defined diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c index e8071684361..7db95c0b869 100644 --- a/arch/s390/kernel/init_task.c +++ b/arch/s390/kernel/init_task.c @@ -16,7 +16,6 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S new file mode 100644 index 00000000000..397d131a345 --- /dev/null +++ b/arch/s390/kernel/mcount.S @@ -0,0 +1,56 @@ +/* + * Copyright IBM Corp. 2008 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, + * + */ + +#ifndef CONFIG_64BIT +.globl _mcount +_mcount: + stm %r0,%r5,8(%r15) + st %r14,56(%r15) + lr %r1,%r15 + ahi %r15,-96 + l %r3,100(%r15) + la %r2,0(%r14) + st %r1,0(%r15) + la %r3,0(%r3) + bras %r14,0f + .long ftrace_trace_function +0: l %r14,0(%r14) + l %r14,0(%r14) + basr %r14,%r14 + ahi %r15,96 + lm %r0,%r5,8(%r15) + l %r14,56(%r15) + br %r14 + +.globl ftrace_stub +ftrace_stub: + br %r14 + +#else /* CONFIG_64BIT */ + +.globl _mcount +_mcount: + stmg %r0,%r5,16(%r15) + stg %r14,112(%r15) + lgr %r1,%r15 + aghi %r15,-160 + stg %r1,0(%r15) + lgr %r2,%r14 + lg %r3,168(%r15) + larl %r14,ftrace_trace_function + lg %r14,0(%r14) + basr %r14,%r14 + aghi %r15,160 + lmg %r0,%r5,16(%r15) + lg %r14,112(%r15) + br %r14 + +.globl ftrace_stub +ftrace_stub: + br %r14 + +#endif /* CONFIG_64BIT */ diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 04f8c67a610..b6110bdf8dc 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -38,6 +38,7 @@ #include <linux/utsname.h> #include <linux/tick.h> #include <linux/elfcore.h> +#include <linux/kernel_stat.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/system.h> @@ -45,7 +46,6 @@ #include <asm/processor.h> #include <asm/irq.h> #include <asm/timer.h> -#include <asm/cpu.h> #include "entry.h" asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); @@ -75,36 +75,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return sf->gprs[8]; } -DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = { - .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock) -}; - -static int s390_idle_enter(void) -{ - struct s390_idle_data *idle; - - idle = &__get_cpu_var(s390_idle); - spin_lock(&idle->lock); - idle->idle_count++; - idle->in_idle = 1; - idle->idle_enter = get_clock(); - spin_unlock(&idle->lock); - vtime_stop_cpu_timer(); - return NOTIFY_OK; -} - -void s390_idle_leave(void) -{ - struct s390_idle_data *idle; - - vtime_start_cpu_timer(); - idle = &__get_cpu_var(s390_idle); - spin_lock(&idle->lock); - idle->idle_time += get_clock() - idle->idle_enter; - idle->in_idle = 0; - spin_unlock(&idle->lock); -} - extern void s390_handle_mcck(void); /* * The idle loop on a S390... @@ -117,10 +87,6 @@ static void default_idle(void) local_irq_enable(); return; } - if (s390_idle_enter() == NOTIFY_BAD) { - local_irq_enable(); - return; - } #ifdef CONFIG_HOTPLUG_CPU if (cpu_is_offline(smp_processor_id())) { preempt_enable_no_resched(); @@ -130,7 +96,6 @@ static void default_idle(void) local_mcck_disable(); if (test_thread_flag(TIF_MCCK_PENDING)) { local_mcck_enable(); - s390_idle_leave(); local_irq_enable(); s390_handle_mcck(); return; @@ -138,9 +103,9 @@ static void default_idle(void) trace_hardirqs_on(); /* Don't trace preempt off for idle. */ stop_critical_timings(); - /* Wait for external, I/O or machine check interrupt. */ - __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | - PSW_MASK_IO | PSW_MASK_EXT); + /* Stop virtual timer and halt the cpu. */ + vtime_stop_cpu(); + /* Reenable preemption tracer. */ start_critical_timings(); } diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c new file mode 100644 index 00000000000..82c1872cfe8 --- /dev/null +++ b/arch/s390/kernel/processor.c @@ -0,0 +1,98 @@ +/* + * arch/s390/kernel/processor.c + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + */ + +#define KMSG_COMPONENT "cpu" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/seq_file.h> +#include <linux/delay.h> + +#include <asm/elf.h> +#include <asm/lowcore.h> +#include <asm/param.h> + +void __cpuinit print_cpu_info(struct cpuinfo_S390 *cpuinfo) +{ + pr_info("Processor %d started, address %d, identification %06X\n", + cpuinfo->cpu_nr, cpuinfo->cpu_addr, cpuinfo->cpu_id.ident); +} + +/* + * show_cpuinfo - Get information on one CPU for use by procfs. + */ + +static int show_cpuinfo(struct seq_file *m, void *v) +{ + static const char *hwcap_str[8] = { + "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", + "edat" + }; + struct cpuinfo_S390 *cpuinfo; + unsigned long n = (unsigned long) v - 1; + int i; + + s390_adjust_jiffies(); + preempt_disable(); + if (!n) { + seq_printf(m, "vendor_id : IBM/S390\n" + "# processors : %i\n" + "bogomips per cpu: %lu.%02lu\n", + num_online_cpus(), loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ))%100); + seq_puts(m, "features\t: "); + for (i = 0; i < 8; i++) + if (hwcap_str[i] && (elf_hwcap & (1UL << i))) + seq_printf(m, "%s ", hwcap_str[i]); + seq_puts(m, "\n"); + } + + if (cpu_online(n)) { +#ifdef CONFIG_SMP + if (smp_processor_id() == n) + cpuinfo = &S390_lowcore.cpu_data; + else + cpuinfo = &lowcore_ptr[n]->cpu_data; +#else + cpuinfo = &S390_lowcore.cpu_data; +#endif + seq_printf(m, "processor %li: " + "version = %02X, " + "identification = %06X, " + "machine = %04X\n", + n, cpuinfo->cpu_id.version, + cpuinfo->cpu_id.ident, + cpuinfo->cpu_id.machine); + } + preempt_enable(); + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < NR_CPUS ? (void *)((unsigned long) *pos + 1) : NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +const struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; + diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 38ff2bce120..75c496f4f16 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -204,7 +204,6 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr) static int peek_user(struct task_struct *child, addr_t addr, addr_t data) { - struct user *dummy = NULL; addr_t tmp, mask; /* @@ -213,8 +212,8 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data) */ mask = __ADDR_MASK; #ifdef CONFIG_64BIT - if (addr >= (addr_t) &dummy->regs.acrs && - addr < (addr_t) &dummy->regs.orig_gpr2) + if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs && + addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2) mask = 3; #endif if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK) @@ -312,7 +311,6 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) static int poke_user(struct task_struct *child, addr_t addr, addr_t data) { - struct user *dummy = NULL; addr_t mask; /* @@ -321,8 +319,8 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data) */ mask = __ADDR_MASK; #ifdef CONFIG_64BIT - if (addr >= (addr_t) &dummy->regs.acrs && - addr < (addr_t) &dummy->regs.orig_gpr2) + if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs && + addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2) mask = 3; #endif if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK) diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c index e019b419efc..a0d2d55d7fb 100644 --- a/arch/s390/kernel/s390_ext.c +++ b/arch/s390/kernel/s390_ext.c @@ -119,8 +119,8 @@ void do_extint(struct pt_regs *regs, unsigned short code) struct pt_regs *old_regs; old_regs = set_irq_regs(regs); - irq_enter(); s390_idle_check(); + irq_enter(); if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) /* Serve timer interrupts first. */ clock_comparator_work(); diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c index 48238a114ce..46b90cb0370 100644 --- a/arch/s390/kernel/s390_ksyms.c +++ b/arch/s390/kernel/s390_ksyms.c @@ -14,6 +14,7 @@ #include <asm/delay.h> #include <asm/pgalloc.h> #include <asm/setup.h> +#include <asm/ftrace.h> #ifdef CONFIG_IP_MULTICAST #include <net/arp.h> #endif @@ -43,3 +44,7 @@ EXPORT_SYMBOL(csum_fold); EXPORT_SYMBOL(console_mode); EXPORT_SYMBOL(console_devno); EXPORT_SYMBOL(console_irq); + +#ifdef CONFIG_FUNCTION_TRACER +EXPORT_SYMBOL(_mcount); +#endif diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 400b040df7f..d825f4950e4 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -14,6 +14,9 @@ * This file handles the architecture-dependent parts of initialization */ +#define KMSG_COMPONENT "setup" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/errno.h> #include <linux/module.h> #include <linux/sched.h> @@ -32,7 +35,6 @@ #include <linux/bootmem.h> #include <linux/root_dev.h> #include <linux/console.h> -#include <linux/seq_file.h> #include <linux/kernel_stat.h> #include <linux/device.h> #include <linux/notifier.h> @@ -291,8 +293,8 @@ unsigned int switch_amode = 0; #endif EXPORT_SYMBOL_GPL(switch_amode); -static void set_amode_and_uaccess(unsigned long user_amode, - unsigned long user32_amode) +static int set_amode_and_uaccess(unsigned long user_amode, + unsigned long user32_amode) { psw_user_bits = PSW_BASE_BITS | PSW_MASK_DAT | user_amode | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | @@ -309,11 +311,11 @@ static void set_amode_and_uaccess(unsigned long user_amode, PSW_MASK_MCHECK | PSW_DEFAULT_KEY; if (MACHINE_HAS_MVCOS) { - printk("mvcos available.\n"); memcpy(&uaccess, &uaccess_mvcos_switch, sizeof(uaccess)); + return 1; } else { - printk("mvcos not available.\n"); memcpy(&uaccess, &uaccess_pt, sizeof(uaccess)); + return 0; } } @@ -328,9 +330,10 @@ static int __init early_parse_switch_amode(char *p) early_param("switch_amode", early_parse_switch_amode); #else /* CONFIG_S390_SWITCH_AMODE */ -static inline void set_amode_and_uaccess(unsigned long user_amode, - unsigned long user32_amode) +static inline int set_amode_and_uaccess(unsigned long user_amode, + unsigned long user32_amode) { + return 0; } #endif /* CONFIG_S390_SWITCH_AMODE */ @@ -355,11 +358,20 @@ early_param("noexec", early_parse_noexec); static void setup_addressing_mode(void) { if (s390_noexec) { - printk("S390 execute protection active, "); - set_amode_and_uaccess(PSW_ASC_SECONDARY, PSW32_ASC_SECONDARY); + if (set_amode_and_uaccess(PSW_ASC_SECONDARY, + PSW32_ASC_SECONDARY)) + pr_info("Execute protection active, " + "mvcos available\n"); + else + pr_info("Execute protection active, " + "mvcos not available\n"); } else if (switch_amode) { - printk("S390 address spaces switched, "); - set_amode_and_uaccess(PSW_ASC_PRIMARY, PSW32_ASC_PRIMARY); + if (set_amode_and_uaccess(PSW_ASC_PRIMARY, PSW32_ASC_PRIMARY)) + pr_info("Address spaces switched, " + "mvcos available\n"); + else + pr_info("Address spaces switched, " + "mvcos not available\n"); } #ifdef CONFIG_TRACE_IRQFLAGS sysc_restore_trace_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; @@ -415,6 +427,8 @@ setup_lowcore(void) /* enable extended save area */ __ctl_set_bit(14, 29); } +#else + lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; #endif set_prefix((u32)(unsigned long) lc); } @@ -572,15 +586,15 @@ setup_memory(void) start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE; if (start + INITRD_SIZE > memory_end) { - printk("initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\n" + pr_err("initrd extends beyond end of " + "memory (0x%08lx > 0x%08lx) " "disabling initrd\n", start + INITRD_SIZE, memory_end); INITRD_START = INITRD_SIZE = 0; } else { - printk("Moving initrd (0x%08lx -> 0x%08lx, " - "size: %ld)\n", - INITRD_START, start, INITRD_SIZE); + pr_info("Moving initrd (0x%08lx -> " + "0x%08lx, size: %ld)\n", + INITRD_START, start, INITRD_SIZE); memmove((void *) start, (void *) INITRD_START, INITRD_SIZE); INITRD_START = start; @@ -642,8 +656,9 @@ setup_memory(void) initrd_start = INITRD_START; initrd_end = initrd_start + INITRD_SIZE; } else { - printk("initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", + pr_err("initrd extends beyond end of " + "memory (0x%08lx > 0x%08lx) " + "disabling initrd\n", initrd_start + INITRD_SIZE, memory_end); initrd_start = initrd_end = 0; } @@ -651,23 +666,6 @@ setup_memory(void) #endif } -static int __init __stfle(unsigned long long *list, int doublewords) -{ - typedef struct { unsigned long long _[doublewords]; } addrtype; - register unsigned long __nr asm("0") = doublewords - 1; - - asm volatile(".insn s,0xb2b00000,%0" /* stfle */ - : "=m" (*(addrtype *) list), "+d" (__nr) : : "cc"); - return __nr + 1; -} - -int __init stfle(unsigned long long *list, int doublewords) -{ - if (!(stfl() & (1UL << 24))) - return -EOPNOTSUPP; - return __stfle(list, doublewords); -} - /* * Setup hardware capabilities. */ @@ -739,8 +737,13 @@ static void __init setup_hwcaps(void) strcpy(elf_platform, "z990"); break; case 0x2094: + case 0x2096: strcpy(elf_platform, "z9-109"); break; + case 0x2097: + case 0x2098: + strcpy(elf_platform, "z10"); + break; } } @@ -752,25 +755,34 @@ static void __init setup_hwcaps(void) void __init setup_arch(char **cmdline_p) { + /* set up preferred console */ + add_preferred_console("ttyS", 0, NULL); + /* * print what head.S has found out about the machine */ #ifndef CONFIG_64BIT - printk((MACHINE_IS_VM) ? - "We are running under VM (31 bit mode)\n" : - "We are running native (31 bit mode)\n"); - printk((MACHINE_HAS_IEEE) ? - "This machine has an IEEE fpu\n" : - "This machine has no IEEE fpu\n"); + if (MACHINE_IS_VM) + pr_info("Linux is running as a z/VM " + "guest operating system in 31-bit mode\n"); + else + pr_info("Linux is running natively in 31-bit mode\n"); + if (MACHINE_HAS_IEEE) + pr_info("The hardware system has IEEE compatible " + "floating point units\n"); + else + pr_info("The hardware system has no IEEE compatible " + "floating point units\n"); #else /* CONFIG_64BIT */ if (MACHINE_IS_VM) - printk("We are running under VM (64 bit mode)\n"); + pr_info("Linux is running as a z/VM " + "guest operating system in 64-bit mode\n"); else if (MACHINE_IS_KVM) { - printk("We are running under KVM (64 bit mode)\n"); + pr_info("Linux is running under KVM in 64-bit mode\n"); add_preferred_console("hvc", 0, NULL); s390_virtio_console_init(); } else - printk("We are running native (64 bit mode)\n"); + pr_info("Linux is running natively in 64-bit mode\n"); #endif /* CONFIG_64BIT */ /* Have one command line that is parsed and saved in /proc/cmdline */ @@ -818,90 +830,3 @@ setup_arch(char **cmdline_p) /* Setup zfcpdump support */ setup_zfcpdump(console_devno); } - -void __cpuinit print_cpu_info(struct cpuinfo_S390 *cpuinfo) -{ - printk(KERN_INFO "cpu %d " -#ifdef CONFIG_SMP - "phys_idx=%d " -#endif - "vers=%02X ident=%06X machine=%04X unused=%04X\n", - cpuinfo->cpu_nr, -#ifdef CONFIG_SMP - cpuinfo->cpu_addr, -#endif - cpuinfo->cpu_id.version, - cpuinfo->cpu_id.ident, - cpuinfo->cpu_id.machine, - cpuinfo->cpu_id.unused); -} - -/* - * show_cpuinfo - Get information on one CPU for use by procfs. - */ - -static int show_cpuinfo(struct seq_file *m, void *v) -{ - static const char *hwcap_str[8] = { - "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", - "edat" - }; - struct cpuinfo_S390 *cpuinfo; - unsigned long n = (unsigned long) v - 1; - int i; - - s390_adjust_jiffies(); - preempt_disable(); - if (!n) { - seq_printf(m, "vendor_id : IBM/S390\n" - "# processors : %i\n" - "bogomips per cpu: %lu.%02lu\n", - num_online_cpus(), loops_per_jiffy/(500000/HZ), - (loops_per_jiffy/(5000/HZ))%100); - seq_puts(m, "features\t: "); - for (i = 0; i < 8; i++) - if (hwcap_str[i] && (elf_hwcap & (1UL << i))) - seq_printf(m, "%s ", hwcap_str[i]); - seq_puts(m, "\n"); - } - - if (cpu_online(n)) { -#ifdef CONFIG_SMP - if (smp_processor_id() == n) - cpuinfo = &S390_lowcore.cpu_data; - else - cpuinfo = &lowcore_ptr[n]->cpu_data; -#else - cpuinfo = &S390_lowcore.cpu_data; -#endif - seq_printf(m, "processor %li: " - "version = %02X, " - "identification = %06X, " - "machine = %04X\n", - n, cpuinfo->cpu_id.version, - cpuinfo->cpu_id.ident, - cpuinfo->cpu_id.machine); - } - preempt_enable(); - return 0; -} - -static void *c_start(struct seq_file *m, loff_t *pos) -{ - return *pos < NR_CPUS ? (void *)((unsigned long) *pos + 1) : NULL; -} -static void *c_next(struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return c_start(m, pos); -} -static void c_stop(struct seq_file *m, void *v) -{ -} -const struct seq_operations cpuinfo_op = { - .start = c_start, - .next = c_next, - .stop = c_stop, - .show = show_cpuinfo, -}; - diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index b5595688a47..9c0ccb532a4 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -20,6 +20,9 @@ * cpu_number_map in other architectures. */ +#define KMSG_COMPONENT "cpu" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/module.h> #include <linux/init.h> #include <linux/mm.h> @@ -44,6 +47,7 @@ #include <asm/lowcore.h> #include <asm/sclp.h> #include <asm/cpu.h> +#include <asm/vdso.h> #include "entry.h" /* @@ -52,12 +56,6 @@ struct _lowcore *lowcore_ptr[NR_CPUS]; EXPORT_SYMBOL(lowcore_ptr); -cpumask_t cpu_online_map = CPU_MASK_NONE; -EXPORT_SYMBOL(cpu_online_map); - -cpumask_t cpu_possible_map = CPU_MASK_ALL; -EXPORT_SYMBOL(cpu_possible_map); - static struct task_struct *current_set[NR_CPUS]; static u8 smp_cpu_type; @@ -77,159 +75,6 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); static void smp_ext_bitcall(int, ec_bit_sig); -/* - * Structure and data for __smp_call_function_map(). This is designed to - * minimise static memory requirements. It also looks cleaner. - */ -static DEFINE_SPINLOCK(call_lock); - -struct call_data_struct { - void (*func) (void *info); - void *info; - cpumask_t started; - cpumask_t finished; - int wait; -}; - -static struct call_data_struct *call_data; - -/* - * 'Call function' interrupt callback - */ -static void do_call_function(void) -{ - void (*func) (void *info) = call_data->func; - void *info = call_data->info; - int wait = call_data->wait; - - cpu_set(smp_processor_id(), call_data->started); - (*func)(info); - if (wait) - cpu_set(smp_processor_id(), call_data->finished);; -} - -static void __smp_call_function_map(void (*func) (void *info), void *info, - int wait, cpumask_t map) -{ - struct call_data_struct data; - int cpu, local = 0; - - /* - * Can deadlock when interrupts are disabled or if in wrong context. - */ - WARN_ON(irqs_disabled() || in_irq()); - - /* - * Check for local function call. We have to have the same call order - * as in on_each_cpu() because of machine_restart_smp(). - */ - if (cpu_isset(smp_processor_id(), map)) { - local = 1; - cpu_clear(smp_processor_id(), map); - } - - cpus_and(map, map, cpu_online_map); - if (cpus_empty(map)) - goto out; - - data.func = func; - data.info = info; - data.started = CPU_MASK_NONE; - data.wait = wait; - if (wait) - data.finished = CPU_MASK_NONE; - - call_data = &data; - - for_each_cpu_mask(cpu, map) - smp_ext_bitcall(cpu, ec_call_function); - - /* Wait for response */ - while (!cpus_equal(map, data.started)) - cpu_relax(); - if (wait) - while (!cpus_equal(map, data.finished)) - cpu_relax(); -out: - if (local) { - local_irq_disable(); - func(info); - local_irq_enable(); - } -} - -/* - * smp_call_function: - * @func: the function to run; this must be fast and non-blocking - * @info: an arbitrary pointer to pass to the function - * @wait: if true, wait (atomically) until function has completed on other CPUs - * - * Run a function on all other CPUs. - * - * You must not call this function with disabled interrupts, from a - * hardware interrupt handler or from a bottom half. - */ -int smp_call_function(void (*func) (void *info), void *info, int wait) -{ - cpumask_t map; - - spin_lock(&call_lock); - map = cpu_online_map; - cpu_clear(smp_processor_id(), map); - __smp_call_function_map(func, info, wait, map); - spin_unlock(&call_lock); - return 0; -} -EXPORT_SYMBOL(smp_call_function); - -/* - * smp_call_function_single: - * @cpu: the CPU where func should run - * @func: the function to run; this must be fast and non-blocking - * @info: an arbitrary pointer to pass to the function - * @wait: if true, wait (atomically) until function has completed on other CPUs - * - * Run a function on one processor. - * - * You must not call this function with disabled interrupts, from a - * hardware interrupt handler or from a bottom half. - */ -int smp_call_function_single(int cpu, void (*func) (void *info), void *info, - int wait) -{ - spin_lock(&call_lock); - __smp_call_function_map(func, info, wait, cpumask_of_cpu(cpu)); - spin_unlock(&call_lock); - return 0; -} -EXPORT_SYMBOL(smp_call_function_single); - -/** - * smp_call_function_mask(): Run a function on a set of other CPUs. - * @mask: The set of cpus to run on. Must not include the current cpu. - * @func: The function to run. This must be fast and non-blocking. - * @info: An arbitrary pointer to pass to the function. - * @wait: If true, wait (atomically) until function has completed on other CPUs. - * - * Returns 0 on success, else a negative status code. - * - * If @wait is true, then returns once @func has returned; otherwise - * it returns just before the target cpu calls @func. - * - * You must not call this function with disabled interrupts or from a - * hardware interrupt handler or from a bottom half handler. - */ -int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info, - int wait) -{ - spin_lock(&call_lock); - cpu_clear(smp_processor_id(), mask); - __smp_call_function_map(func, info, wait, mask); - spin_unlock(&call_lock); - return 0; -} -EXPORT_SYMBOL(smp_call_function_mask); - void smp_send_stop(void) { int cpu, rc; @@ -271,7 +116,10 @@ static void do_ext_call_interrupt(__u16 code) bits = xchg(&S390_lowcore.ext_call_fast, 0); if (test_bit(ec_call_function, &bits)) - do_call_function(); + generic_smp_call_function_interrupt(); + + if (test_bit(ec_call_function_single, &bits)) + generic_smp_call_function_single_interrupt(); } /* @@ -288,6 +136,19 @@ static void smp_ext_bitcall(int cpu, ec_bit_sig sig) udelay(10); } +void arch_send_call_function_ipi(cpumask_t mask) +{ + int cpu; + + for_each_cpu_mask(cpu, mask) + smp_ext_bitcall(cpu, ec_call_function); +} + +void arch_send_call_function_single_ipi(int cpu) +{ + smp_ext_bitcall(cpu, ec_call_function_single); +} + #ifndef CONFIG_64BIT /* * this function sends a 'purge tlb' signal to another CPU. @@ -388,8 +249,8 @@ static void __init smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) if (ipl_info.type != IPL_TYPE_FCP_DUMP) return; if (cpu >= NR_CPUS) { - printk(KERN_WARNING "Registers for cpu %i not saved since dump " - "kernel was compiled with NR_CPUS=%i\n", cpu, NR_CPUS); + pr_warning("CPU %i exceeds the maximum %i and is excluded from " + "the dump\n", cpu, NR_CPUS - 1); return; } zfcpdump_save_areas[cpu] = kmalloc(sizeof(union save_area), GFP_KERNEL); @@ -562,7 +423,7 @@ static void __init smp_detect_cpus(void) } out: kfree(info); - printk(KERN_INFO "CPUs: %d configured, %d standby\n", c_cpus, s_cpus); + pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus); get_online_cpus(); __smp_rescan_cpus(); put_online_cpus(); @@ -578,19 +439,17 @@ int __cpuinit start_secondary(void *cpuvoid) preempt_disable(); /* Enable TOD clock interrupts on the secondary cpu. */ init_cpu_timer(); -#ifdef CONFIG_VIRT_TIMER /* Enable cpu timer interrupts on the secondary cpu. */ init_cpu_vtimer(); -#endif /* Enable pfault pseudo page faults on this cpu. */ pfault_init(); /* call cpu notifiers */ notify_cpu_starting(smp_processor_id()); /* Mark this cpu as online */ - spin_lock(&call_lock); + ipi_call_lock(); cpu_set(smp_processor_id(), cpu_online_map); - spin_unlock(&call_lock); + ipi_call_unlock(); /* Switch on interrupts */ local_irq_enable(); /* Print info about this processor */ @@ -639,18 +498,18 @@ static int __cpuinit smp_alloc_lowcore(int cpu) save_area = get_zeroed_page(GFP_KERNEL); if (!save_area) - goto out_save_area; + goto out; lowcore->extended_save_area_addr = (u32) save_area; } +#else + if (vdso_alloc_per_cpu(cpu, lowcore)) + goto out; #endif lowcore_ptr[cpu] = lowcore; return 0; -#ifndef CONFIG_64BIT -out_save_area: - free_page(panic_stack); -#endif out: + free_page(panic_stack); free_pages(async_stack, ASYNC_ORDER); free_pages((unsigned long) lowcore, lc_order); return -ENOMEM; @@ -667,6 +526,8 @@ static void smp_free_lowcore(int cpu) #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) free_page((unsigned long) lowcore->extended_save_area_addr); +#else + vdso_free_per_cpu(cpu, lowcore); #endif free_page(lowcore->panic_stack - PAGE_SIZE); free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER); @@ -690,12 +551,8 @@ int __cpuinit __cpu_up(unsigned int cpu) ccode = signal_processor_p((__u32)(unsigned long)(lowcore_ptr[cpu]), cpu, sigp_set_prefix); - if (ccode) { - printk("sigp_set_prefix failed for cpu %d " - "with condition code %d\n", - (int) cpu, (int) ccode); + if (ccode) return -EIO; - } idle = current_set[cpu]; cpu_lowcore = lowcore_ptr[cpu]; @@ -778,7 +635,7 @@ void __cpu_die(unsigned int cpu) while (!smp_cpu_not_running(cpu)) cpu_relax(); smp_free_lowcore(cpu); - printk(KERN_INFO "Processor %d spun down\n", cpu); + pr_info("Processor %d stopped\n", cpu); } void cpu_die(void) @@ -813,6 +670,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order); panic_stack = __get_free_page(GFP_KERNEL); async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); + BUG_ON(!lowcore || !panic_stack || !async_stack); #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) save_area = get_zeroed_page(GFP_KERNEL); @@ -826,6 +684,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus) #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) lowcore->extended_save_area_addr = (u32) save_area; +#else + BUG_ON(vdso_alloc_per_cpu(smp_processor_id(), lowcore)); #endif set_prefix((u32)(unsigned long) lowcore); local_mcck_enable(); @@ -994,9 +854,11 @@ static ssize_t show_idle_count(struct sys_device *dev, unsigned long long idle_count; idle = &per_cpu(s390_idle, dev->id); - spin_lock_irq(&idle->lock); + spin_lock(&idle->lock); idle_count = idle->idle_count; - spin_unlock_irq(&idle->lock); + if (idle->idle_enter) + idle_count++; + spin_unlock(&idle->lock); return sprintf(buf, "%llu\n", idle_count); } static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL); @@ -1005,18 +867,17 @@ static ssize_t show_idle_time(struct sys_device *dev, struct sysdev_attribute *attr, char *buf) { struct s390_idle_data *idle; - unsigned long long new_time; + unsigned long long now, idle_time, idle_enter; idle = &per_cpu(s390_idle, dev->id); - spin_lock_irq(&idle->lock); - if (idle->in_idle) { - new_time = get_clock(); - idle->idle_time += new_time - idle->idle_enter; - idle->idle_enter = new_time; - } - new_time = idle->idle_time; - spin_unlock_irq(&idle->lock); - return sprintf(buf, "%llu\n", new_time >> 12); + spin_lock(&idle->lock); + now = get_clock(); + idle_time = idle->idle_time; + idle_enter = idle->idle_enter; + if (idle_enter != 0ULL && idle_enter < now) + idle_time += now - idle_enter; + spin_unlock(&idle->lock); + return sprintf(buf, "%llu\n", idle_time >> 12); } static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index eccefbbff88..d649600df5b 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -12,6 +12,9 @@ * Copyright (C) 1991, 1992, 1995 Linus Torvalds */ +#define KMSG_COMPONENT "time" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/errno.h> #include <linux/module.h> #include <linux/sched.h> @@ -20,6 +23,8 @@ #include <linux/string.h> #include <linux/mm.h> #include <linux/interrupt.h> +#include <linux/cpu.h> +#include <linux/stop_machine.h> #include <linux/time.h> #include <linux/sysdev.h> #include <linux/delay.h> @@ -36,6 +41,7 @@ #include <asm/delay.h> #include <asm/s390_ext.h> #include <asm/div64.h> +#include <asm/vdso.h> #include <asm/irq.h> #include <asm/irq_regs.h> #include <asm/timer.h> @@ -154,7 +160,7 @@ void init_cpu_timer(void) cd->min_delta_ns = 1; cd->max_delta_ns = LONG_MAX; cd->rating = 400; - cd->cpumask = cpumask_of_cpu(cpu); + cd->cpumask = cpumask_of(cpu); cd->set_next_event = s390_next_event; cd->set_mode = s390_set_mode; @@ -223,6 +229,36 @@ static struct clocksource clocksource_tod = { }; +void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) +{ + if (clock != &clocksource_tod) + return; + + /* Make userspace gettimeofday spin until we're done. */ + ++vdso_data->tb_update_count; + smp_wmb(); + vdso_data->xtime_tod_stamp = clock->cycle_last; + vdso_data->xtime_clock_sec = xtime.tv_sec; + vdso_data->xtime_clock_nsec = xtime.tv_nsec; + vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec; + vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec; + smp_wmb(); + ++vdso_data->tb_update_count; +} + +extern struct timezone sys_tz; + +void update_vsyscall_tz(void) +{ + /* Make userspace gettimeofday spin until we're done. */ + ++vdso_data->tb_update_count; + smp_wmb(); + vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; + vdso_data->tz_dsttime = sys_tz.tz_dsttime; + smp_wmb(); + ++vdso_data->tb_update_count; +} + /* * Initialize the TOD clock and the CPU timer of * the boot cpu. @@ -253,10 +289,8 @@ void __init time_init(void) /* Enable TOD clock interrupts on the boot cpu. */ init_cpu_timer(); - -#ifdef CONFIG_VIRT_TIMER + /* Enable cpu timer interrupts on the boot cpu. */ vtime_init(); -#endif } /* @@ -288,8 +322,8 @@ static unsigned long long adjust_time(unsigned long long old, } sched_clock_base_cc += delta; if (adjust.offset != 0) { - printk(KERN_NOTICE "etr: time adjusted by %li micro-seconds\n", - adjust.offset); + pr_notice("The ETR interface has adjusted the clock " + "by %li microseconds\n", adjust.offset); adjust.modes = ADJ_OFFSET_SINGLESHOT; do_adjtimex(&adjust); } @@ -360,6 +394,15 @@ static void enable_sync_clock(void) atomic_set_mask(0x80000000, sw_ptr); } +/* Single threaded workqueue used for etr and stp sync events */ +static struct workqueue_struct *time_sync_wq; + +static void __init time_init_wq(void) +{ + if (!time_sync_wq) + time_sync_wq = create_singlethread_workqueue("timesync"); +} + /* * External Time Reference (ETR) code. */ @@ -425,6 +468,7 @@ static struct timer_list etr_timer; static void etr_timeout(unsigned long dummy); static void etr_work_fn(struct work_struct *work); +static DEFINE_MUTEX(etr_work_mutex); static DECLARE_WORK(etr_work, etr_work_fn); /* @@ -440,8 +484,8 @@ static void etr_reset(void) etr_tolec = get_clock(); set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags); } else if (etr_port0_online || etr_port1_online) { - printk(KERN_WARNING "Running on non ETR capable " - "machine, only local mode available.\n"); + pr_warning("The real or virtual hardware system does " + "not provide an ETR interface\n"); etr_port0_online = etr_port1_online = 0; } } @@ -452,17 +496,18 @@ static int __init etr_init(void) if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags)) return 0; + time_init_wq(); /* Check if this machine has the steai instruction. */ if (etr_steai(&aib, ETR_STEAI_STEPPING_PORT) == 0) etr_steai_available = 1; setup_timer(&etr_timer, etr_timeout, 0UL); if (etr_port0_online) { set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } if (etr_port1_online) { set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events); - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } return 0; } @@ -489,7 +534,7 @@ void etr_switch_to_local(void) if (test_bit(CLOCK_SYNC_ETR, &clock_sync_flags)) disable_sync_clock(NULL); set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events); - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } /* @@ -505,7 +550,7 @@ void etr_sync_check(void) if (test_bit(CLOCK_SYNC_ETR, &clock_sync_flags)) disable_sync_clock(NULL); set_bit(ETR_EVENT_SYNC_CHECK, &etr_events); - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } /* @@ -529,13 +574,13 @@ static void etr_timing_alert(struct etr_irq_parm *intparm) * Both ports are not up-to-date now. */ set_bit(ETR_EVENT_PORT_ALERT, &etr_events); - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } static void etr_timeout(unsigned long dummy) { set_bit(ETR_EVENT_UPDATE, &etr_events); - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } /* @@ -642,14 +687,16 @@ static int etr_aib_follows(struct etr_aib *a1, struct etr_aib *a2, int p) } struct clock_sync_data { + atomic_t cpus; int in_sync; unsigned long long fixup_cc; + int etr_port; + struct etr_aib *etr_aib; }; -static void clock_sync_cpu_start(void *dummy) +static void clock_sync_cpu(struct clock_sync_data *sync) { - struct clock_sync_data *sync = dummy; - + atomic_dec(&sync->cpus); enable_sync_clock(); /* * This looks like a busy wait loop but it isn't. etr_sync_cpus @@ -675,39 +722,35 @@ static void clock_sync_cpu_start(void *dummy) fixup_clock_comparator(sync->fixup_cc); } -static void clock_sync_cpu_end(void *dummy) -{ -} - /* * Sync the TOD clock using the port refered to by aibp. This port * has to be enabled and the other port has to be disabled. The * last eacr update has to be more than 1.6 seconds in the past. */ -static int etr_sync_clock(struct etr_aib *aib, int port) +static int etr_sync_clock(void *data) { - struct etr_aib *sync_port; - struct clock_sync_data etr_sync; + static int first; unsigned long long clock, old_clock, delay, delta; - int follows; + struct clock_sync_data *etr_sync; + struct etr_aib *sync_port, *aib; + int port; int rc; - /* Check if the current aib is adjacent to the sync port aib. */ - sync_port = (port == 0) ? &etr_port0 : &etr_port1; - follows = etr_aib_follows(sync_port, aib, port); - memcpy(sync_port, aib, sizeof(*aib)); - if (!follows) - return -EAGAIN; + etr_sync = data; - /* - * Catch all other cpus and make them wait until we have - * successfully synced the clock. smp_call_function will - * return after all other cpus are in etr_sync_cpu_start. - */ - memset(&etr_sync, 0, sizeof(etr_sync)); - preempt_disable(); - smp_call_function(clock_sync_cpu_start, &etr_sync, 0); - local_irq_disable(); + if (xchg(&first, 1) == 1) { + /* Slave */ + clock_sync_cpu(etr_sync); + return 0; + } + + /* Wait until all other cpus entered the sync function. */ + while (atomic_read(&etr_sync->cpus) != 0) + cpu_relax(); + + port = etr_sync->etr_port; + aib = etr_sync->etr_aib; + sync_port = (port == 0) ? &etr_port0 : &etr_port1; enable_sync_clock(); /* Set clock to next OTE. */ @@ -724,16 +767,16 @@ static int etr_sync_clock(struct etr_aib *aib, int port) delay = (unsigned long long) (aib->edf2.etv - sync_port->edf2.etv) << 32; delta = adjust_time(old_clock, clock, delay); - etr_sync.fixup_cc = delta; + etr_sync->fixup_cc = delta; fixup_clock_comparator(delta); /* Verify that the clock is properly set. */ if (!etr_aib_follows(sync_port, aib, port)) { /* Didn't work. */ disable_sync_clock(NULL); - etr_sync.in_sync = -EAGAIN; + etr_sync->in_sync = -EAGAIN; rc = -EAGAIN; } else { - etr_sync.in_sync = 1; + etr_sync->in_sync = 1; rc = 0; } } else { @@ -741,12 +784,33 @@ static int etr_sync_clock(struct etr_aib *aib, int port) __ctl_clear_bit(0, 29); __ctl_clear_bit(14, 21); disable_sync_clock(NULL); - etr_sync.in_sync = -EAGAIN; + etr_sync->in_sync = -EAGAIN; rc = -EAGAIN; } - local_irq_enable(); - smp_call_function(clock_sync_cpu_end, NULL, 0); - preempt_enable(); + xchg(&first, 0); + return rc; +} + +static int etr_sync_clock_stop(struct etr_aib *aib, int port) +{ + struct clock_sync_data etr_sync; + struct etr_aib *sync_port; + int follows; + int rc; + + /* Check if the current aib is adjacent to the sync port aib. */ + sync_port = (port == 0) ? &etr_port0 : &etr_port1; + follows = etr_aib_follows(sync_port, aib, port); + memcpy(sync_port, aib, sizeof(*aib)); + if (!follows) + return -EAGAIN; + memset(&etr_sync, 0, sizeof(etr_sync)); + etr_sync.etr_aib = aib; + etr_sync.etr_port = port; + get_online_cpus(); + atomic_set(&etr_sync.cpus, num_online_cpus() - 1); + rc = stop_machine(etr_sync_clock, &etr_sync, &cpu_online_map); + put_online_cpus(); return rc; } @@ -903,7 +967,7 @@ static void etr_update_eacr(struct etr_eacr eacr) } /* - * ETR tasklet. In this function you'll find the main logic. In + * ETR work. In this function you'll find the main logic. In * particular this is the only function that calls etr_update_eacr(), * it "controls" the etr control register. */ @@ -914,6 +978,9 @@ static void etr_work_fn(struct work_struct *work) struct etr_aib aib; int sync_port; + /* prevent multiple execution. */ + mutex_lock(&etr_work_mutex); + /* Create working copy of etr_eacr. */ eacr = etr_eacr; @@ -929,7 +996,7 @@ static void etr_work_fn(struct work_struct *work) del_timer_sync(&etr_timer); etr_update_eacr(eacr); clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags); - return; + goto out_unlock; } /* Store aib to get the current ETR status word. */ @@ -1016,7 +1083,7 @@ static void etr_work_fn(struct work_struct *work) eacr.es || sync_port < 0) { etr_update_eacr(eacr); etr_set_tolec_timeout(now); - return; + goto out_unlock; } /* @@ -1036,7 +1103,7 @@ static void etr_work_fn(struct work_struct *work) etr_update_eacr(eacr); set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); if (now < etr_tolec + (1600000 << 12) || - etr_sync_clock(&aib, sync_port) != 0) { + etr_sync_clock_stop(&aib, sync_port) != 0) { /* Sync failed. Try again in 1/2 second. */ eacr.es = 0; etr_update_eacr(eacr); @@ -1044,6 +1111,8 @@ static void etr_work_fn(struct work_struct *work) etr_set_sync_timeout(); } else etr_set_tolec_timeout(now); +out_unlock: + mutex_unlock(&etr_work_mutex); } /* @@ -1125,13 +1194,13 @@ static ssize_t etr_online_store(struct sys_device *dev, return count; /* Nothing to do. */ etr_port0_online = value; set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events); - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } else { if (etr_port1_online == value) return count; /* Nothing to do. */ etr_port1_online = value; set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events); - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } return count; } @@ -1332,6 +1401,7 @@ static struct stp_sstpi stp_info; static void *stp_page; static void stp_work_fn(struct work_struct *work); +static DEFINE_MUTEX(stp_work_mutex); static DECLARE_WORK(stp_work, stp_work_fn); static int __init early_parse_stp(char *p) @@ -1356,7 +1426,8 @@ static void __init stp_reset(void) if (rc == 0) set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags); else if (stp_online) { - printk(KERN_WARNING "Running on non STP capable machine.\n"); + pr_warning("The real or virtual hardware system does " + "not provide an STP interface\n"); free_bootmem((unsigned long) stp_page, PAGE_SIZE); stp_page = NULL; stp_online = 0; @@ -1365,8 +1436,12 @@ static void __init stp_reset(void) static int __init stp_init(void) { - if (test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags) && stp_online) - schedule_work(&stp_work); + if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) + return 0; + time_init_wq(); + if (!stp_online) + return 0; + queue_work(time_sync_wq, &stp_work); return 0; } @@ -1383,7 +1458,7 @@ arch_initcall(stp_init); static void stp_timing_alert(struct stp_irq_parm *intparm) { if (intparm->tsc || intparm->lac || intparm->tcpc) - schedule_work(&stp_work); + queue_work(time_sync_wq, &stp_work); } /* @@ -1397,7 +1472,7 @@ void stp_sync_check(void) if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags)) return; disable_sync_clock(NULL); - schedule_work(&stp_work); + queue_work(time_sync_wq, &stp_work); } /* @@ -1411,46 +1486,34 @@ void stp_island_check(void) if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags)) return; disable_sync_clock(NULL); - schedule_work(&stp_work); + queue_work(time_sync_wq, &stp_work); } -/* - * STP tasklet. Check for the STP state and take over the clock - * synchronization if the STP clock source is usable. - */ -static void stp_work_fn(struct work_struct *work) + +static int stp_sync_clock(void *data) { - struct clock_sync_data stp_sync; + static int first; unsigned long long old_clock, delta; + struct clock_sync_data *stp_sync; int rc; - if (!stp_online) { - chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000); - return; - } + stp_sync = data; - rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0); - if (rc) - return; + if (xchg(&first, 1) == 1) { + /* Slave */ + clock_sync_cpu(stp_sync); + return 0; + } - rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); - if (rc || stp_info.c == 0) - return; + /* Wait until all other cpus entered the sync function. */ + while (atomic_read(&stp_sync->cpus) != 0) + cpu_relax(); - /* - * Catch all other cpus and make them wait until we have - * successfully synced the clock. smp_call_function will - * return after all other cpus are in clock_sync_cpu_start. - */ - memset(&stp_sync, 0, sizeof(stp_sync)); - preempt_disable(); - smp_call_function(clock_sync_cpu_start, &stp_sync, 0); - local_irq_disable(); enable_sync_clock(); set_bit(CLOCK_SYNC_STP, &clock_sync_flags); if (test_and_clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags)) - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); rc = 0; if (stp_info.todoff[0] || stp_info.todoff[1] || @@ -1469,16 +1532,49 @@ static void stp_work_fn(struct work_struct *work) } if (rc) { disable_sync_clock(NULL); - stp_sync.in_sync = -EAGAIN; + stp_sync->in_sync = -EAGAIN; clear_bit(CLOCK_SYNC_STP, &clock_sync_flags); if (etr_port0_online || etr_port1_online) - schedule_work(&etr_work); + queue_work(time_sync_wq, &etr_work); } else - stp_sync.in_sync = 1; + stp_sync->in_sync = 1; + xchg(&first, 0); + return 0; +} + +/* + * STP work. Check for the STP state and take over the clock + * synchronization if the STP clock source is usable. + */ +static void stp_work_fn(struct work_struct *work) +{ + struct clock_sync_data stp_sync; + int rc; + + /* prevent multiple execution. */ + mutex_lock(&stp_work_mutex); + + if (!stp_online) { + chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000); + goto out_unlock; + } + + rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0); + if (rc) + goto out_unlock; + + rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); + if (rc || stp_info.c == 0) + goto out_unlock; + + memset(&stp_sync, 0, sizeof(stp_sync)); + get_online_cpus(); + atomic_set(&stp_sync.cpus, num_online_cpus() - 1); + stop_machine(stp_sync_clock, &stp_sync, &cpu_online_map); + put_online_cpus(); - local_irq_enable(); - smp_call_function(clock_sync_cpu_end, NULL, 0); - preempt_enable(); +out_unlock: + mutex_unlock(&stp_work_mutex); } /* @@ -1587,7 +1683,7 @@ static ssize_t stp_online_store(struct sysdev_class *class, if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) return -EOPNOTSUPP; stp_online = value; - schedule_work(&stp_work); + queue_work(time_sync_wq, &stp_work); return count; } diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index a947899dcba..cc362c9ea8f 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -3,6 +3,9 @@ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> */ +#define KMSG_COMPONENT "cpu" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/kernel.h> #include <linux/mm.h> #include <linux/init.h> @@ -12,6 +15,7 @@ #include <linux/workqueue.h> #include <linux/cpu.h> #include <linux/smp.h> +#include <linux/cpuset.h> #include <asm/delay.h> #include <asm/s390_ext.h> #include <asm/sysinfo.h> @@ -57,11 +61,11 @@ struct core_info { cpumask_t mask; }; +static int topology_enabled; static void topology_work_fn(struct work_struct *work); static struct tl_info *tl_info; static struct core_info core_info; static int machine_has_topology; -static int machine_has_topology_irq; static struct timer_list topology_timer; static void set_topology_timer(void); static DECLARE_WORK(topology_work, topology_work_fn); @@ -77,8 +81,8 @@ cpumask_t cpu_coregroup_map(unsigned int cpu) cpumask_t mask; cpus_clear(mask); - if (!machine_has_topology) - return cpu_present_map; + if (!topology_enabled || !machine_has_topology) + return cpu_possible_map; spin_lock_irqsave(&topology_lock, flags); while (core) { if (cpu_isset(cpu, core->mask)) { @@ -93,6 +97,11 @@ cpumask_t cpu_coregroup_map(unsigned int cpu) return mask; } +const struct cpumask *cpu_coregroup_mask(unsigned int cpu) +{ + return &cpu_core_map[cpu]; +} + static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core) { unsigned int cpu; @@ -168,7 +177,7 @@ static void topology_update_polarization_simple(void) int cpu; mutex_lock(&smp_cpu_state_mutex); - for_each_present_cpu(cpu) + for_each_possible_cpu(cpu) smp_cpu_polarization[cpu] = POLARIZATION_HRZ; mutex_unlock(&smp_cpu_state_mutex); } @@ -199,7 +208,7 @@ int topology_set_cpu_management(int fc) rc = ptf(PTF_HORIZONTAL); if (rc) return -EBUSY; - for_each_present_cpu(cpu) + for_each_possible_cpu(cpu) smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN; return rc; } @@ -208,11 +217,11 @@ static void update_cpu_core_map(void) { int cpu; - for_each_present_cpu(cpu) + for_each_possible_cpu(cpu) cpu_core_map[cpu] = cpu_coregroup_map(cpu); } -void arch_update_cpu_topology(void) +int arch_update_cpu_topology(void) { struct tl_info *info = tl_info; struct sys_device *sysdev; @@ -221,7 +230,7 @@ void arch_update_cpu_topology(void) if (!machine_has_topology) { update_cpu_core_map(); topology_update_polarization_simple(); - return; + return 0; } stsi(info, 15, 1, 2); tl_to_cores(info); @@ -230,11 +239,12 @@ void arch_update_cpu_topology(void) sysdev = get_cpu_sysdev(cpu); kobject_uevent(&sysdev->kobj, KOBJ_CHANGE); } + return 1; } static void topology_work_fn(struct work_struct *work) { - arch_reinit_sched_domains(); + rebuild_sched_domains(); } void topology_schedule_update(void) @@ -257,10 +267,14 @@ static void set_topology_timer(void) add_timer(&topology_timer); } -static void topology_interrupt(__u16 code) +static int __init early_parse_topology(char *p) { - schedule_work(&topology_work); + if (strncmp(p, "on", 2)) + return 0; + topology_enabled = 1; + return 0; } +early_param("topology", early_parse_topology); static int __init init_topology_update(void) { @@ -272,14 +286,7 @@ static int __init init_topology_update(void) goto out; } init_timer_deferrable(&topology_timer); - if (machine_has_topology_irq) { - rc = register_external_interrupt(0x2005, topology_interrupt); - if (rc) - goto out; - ctl_set_bit(0, 8); - } - else - set_topology_timer(); + set_topology_timer(); out: update_cpu_core_map(); return rc; @@ -300,9 +307,6 @@ void __init s390_init_cpu_topology(void) return; machine_has_topology = 1; - if (facility_bits & (1ULL << 51)) - machine_has_topology_irq = 1; - tl_info = alloc_bootmem_pages(PAGE_SIZE); info = tl_info; stsi(info, 15, 1, 2); @@ -311,7 +315,7 @@ void __init s390_init_cpu_topology(void) for (i = 0; i < info->mnest - 2; i++) nr_cores *= info->mag[NR_MAG - 3 - i]; - printk(KERN_INFO "CPU topology:"); + pr_info("The CPU configuration topology of the machine is:"); for (i = 0; i < NR_MAG; i++) printk(" %d", info->mag[i]); printk(" / %d\n", info->mnest); @@ -326,5 +330,4 @@ void __init s390_init_cpu_topology(void) return; error: machine_has_topology = 0; - machine_has_topology_irq = 0; } diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c new file mode 100644 index 00000000000..25a6a82f1c0 --- /dev/null +++ b/arch/s390/kernel/vdso.c @@ -0,0 +1,351 @@ +/* + * vdso setup for s390 + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/elf.h> +#include <linux/security.h> +#include <linux/bootmem.h> + +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/processor.h> +#include <asm/mmu.h> +#include <asm/mmu_context.h> +#include <asm/sections.h> +#include <asm/vdso.h> + +#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) +extern char vdso32_start, vdso32_end; +static void *vdso32_kbase = &vdso32_start; +static unsigned int vdso32_pages; +static struct page **vdso32_pagelist; +#endif + +#ifdef CONFIG_64BIT +extern char vdso64_start, vdso64_end; +static void *vdso64_kbase = &vdso64_start; +static unsigned int vdso64_pages; +static struct page **vdso64_pagelist; +#endif /* CONFIG_64BIT */ + +/* + * Should the kernel map a VDSO page into processes and pass its + * address down to glibc upon exec()? + */ +unsigned int __read_mostly vdso_enabled = 1; + +static int __init vdso_setup(char *s) +{ + vdso_enabled = simple_strtoul(s, NULL, 0); + return 1; +} +__setup("vdso=", vdso_setup); + +/* + * The vdso data page + */ +static union { + struct vdso_data data; + u8 page[PAGE_SIZE]; +} vdso_data_store __attribute__((__section__(".data.page_aligned"))); +struct vdso_data *vdso_data = &vdso_data_store.data; + +/* + * Setup vdso data page. + */ +static void vdso_init_data(struct vdso_data *vd) +{ + unsigned int facility_list; + + facility_list = stfl(); + vd->ectg_available = switch_amode && (facility_list & 1); +} + +#ifdef CONFIG_64BIT +/* + * Setup per cpu vdso data page. + */ +static void vdso_init_per_cpu_data(int cpu, struct vdso_per_cpu_data *vpcd) +{ +} + +/* + * Allocate/free per cpu vdso data. + */ +#ifdef CONFIG_64BIT +#define SEGMENT_ORDER 2 +#else +#define SEGMENT_ORDER 1 +#endif + +int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore) +{ + unsigned long segment_table, page_table, page_frame; + u32 *psal, *aste; + int i; + + lowcore->vdso_per_cpu_data = __LC_PASTE; + + if (!switch_amode || !vdso_enabled) + return 0; + + segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER); + page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA); + page_frame = get_zeroed_page(GFP_KERNEL); + if (!segment_table || !page_table || !page_frame) + goto out; + + clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY, + PAGE_SIZE << SEGMENT_ORDER); + clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY, + 256*sizeof(unsigned long)); + + *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; + *(unsigned long *) page_table = _PAGE_RO + page_frame; + + psal = (u32 *) (page_table + 256*sizeof(unsigned long)); + aste = psal + 32; + + for (i = 4; i < 32; i += 4) + psal[i] = 0x80000000; + + lowcore->paste[4] = (u32)(addr_t) psal; + psal[0] = 0x20000000; + psal[2] = (u32)(addr_t) aste; + *(unsigned long *) (aste + 2) = segment_table + + _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT; + aste[4] = (u32)(addr_t) psal; + lowcore->vdso_per_cpu_data = page_frame; + + vdso_init_per_cpu_data(cpu, (struct vdso_per_cpu_data *) page_frame); + return 0; + +out: + free_page(page_frame); + free_page(page_table); + free_pages(segment_table, SEGMENT_ORDER); + return -ENOMEM; +} + +#ifdef CONFIG_HOTPLUG_CPU +void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore) +{ + unsigned long segment_table, page_table, page_frame; + u32 *psal, *aste; + + if (!switch_amode || !vdso_enabled) + return; + + psal = (u32 *)(addr_t) lowcore->paste[4]; + aste = (u32 *)(addr_t) psal[2]; + segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK; + page_table = *(unsigned long *) segment_table; + page_frame = *(unsigned long *) page_table; + + free_page(page_frame); + free_page(page_table); + free_pages(segment_table, SEGMENT_ORDER); +} +#endif /* CONFIG_HOTPLUG_CPU */ + +static void __vdso_init_cr5(void *dummy) +{ + unsigned long cr5; + + cr5 = offsetof(struct _lowcore, paste); + __ctl_load(cr5, 5, 5); +} + +static void vdso_init_cr5(void) +{ + if (switch_amode && vdso_enabled) + on_each_cpu(__vdso_init_cr5, NULL, 1); +} +#endif /* CONFIG_64BIT */ + +/* + * This is called from binfmt_elf, we create the special vma for the + * vDSO and insert it into the mm struct tree + */ +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + struct mm_struct *mm = current->mm; + struct page **vdso_pagelist; + unsigned long vdso_pages; + unsigned long vdso_base; + int rc; + + if (!vdso_enabled) + return 0; + /* + * Only map the vdso for dynamically linked elf binaries. + */ + if (!uses_interp) + return 0; + + vdso_base = mm->mmap_base; +#ifdef CONFIG_64BIT + vdso_pagelist = vdso64_pagelist; + vdso_pages = vdso64_pages; +#ifdef CONFIG_COMPAT + if (test_thread_flag(TIF_31BIT)) { + vdso_pagelist = vdso32_pagelist; + vdso_pages = vdso32_pages; + } +#endif +#else + vdso_pagelist = vdso32_pagelist; + vdso_pages = vdso32_pages; +#endif + + /* + * vDSO has a problem and was disabled, just don't "enable" it for + * the process + */ + if (vdso_pages == 0) + return 0; + + current->mm->context.vdso_base = 0; + + /* + * pick a base address for the vDSO in process space. We try to put + * it at vdso_base which is the "natural" base for it, but we might + * fail and end up putting it elsewhere. + */ + down_write(&mm->mmap_sem); + vdso_base = get_unmapped_area(NULL, vdso_base, + vdso_pages << PAGE_SHIFT, 0, 0); + if (IS_ERR_VALUE(vdso_base)) { + rc = vdso_base; + goto out_up; + } + + /* + * our vma flags don't have VM_WRITE so by default, the process + * isn't allowed to write those pages. + * gdb can break that with ptrace interface, and thus trigger COW + * on those pages but it's then your responsibility to never do that + * on the "data" page of the vDSO or you'll stop getting kernel + * updates and your nice userland gettimeofday will be totally dead. + * It's fine to use that for setting breakpoints in the vDSO code + * pages though + * + * Make sure the vDSO gets into every core dump. + * Dumping its contents makes post-mortem fully interpretable later + * without matching up the same kernel and hardware config to see + * what PC values meant. + */ + rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| + VM_ALWAYSDUMP, + vdso_pagelist); + if (rc) + goto out_up; + + /* Put vDSO base into mm struct */ + current->mm->context.vdso_base = vdso_base; + + up_write(&mm->mmap_sem); + return 0; + +out_up: + up_write(&mm->mmap_sem); + return rc; +} + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base) + return "[vdso]"; + return NULL; +} + +static int __init vdso_init(void) +{ + int i; + + if (!vdso_enabled) + return 0; + vdso_init_data(vdso_data); +#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) + /* Calculate the size of the 32 bit vDSO */ + vdso32_pages = ((&vdso32_end - &vdso32_start + + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; + + /* Make sure pages are in the correct state */ + vdso32_pagelist = kzalloc(sizeof(struct page *) * (vdso32_pages + 1), + GFP_KERNEL); + BUG_ON(vdso32_pagelist == NULL); + for (i = 0; i < vdso32_pages - 1; i++) { + struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + vdso32_pagelist[i] = pg; + } + vdso32_pagelist[vdso32_pages - 1] = virt_to_page(vdso_data); + vdso32_pagelist[vdso32_pages] = NULL; +#endif + +#ifdef CONFIG_64BIT + /* Calculate the size of the 64 bit vDSO */ + vdso64_pages = ((&vdso64_end - &vdso64_start + + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; + + /* Make sure pages are in the correct state */ + vdso64_pagelist = kzalloc(sizeof(struct page *) * (vdso64_pages + 1), + GFP_KERNEL); + BUG_ON(vdso64_pagelist == NULL); + for (i = 0; i < vdso64_pages - 1; i++) { + struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); + ClearPageReserved(pg); + get_page(pg); + vdso64_pagelist[i] = pg; + } + vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); + vdso64_pagelist[vdso64_pages] = NULL; +#ifndef CONFIG_SMP + BUG_ON(vdso_alloc_per_cpu(0, S390_lowcore)); +#endif + vdso_init_cr5(); +#endif /* CONFIG_64BIT */ + + get_page(virt_to_page(vdso_data)); + + smp_wmb(); + + return 0; +} +arch_initcall(vdso_init); + +int in_gate_area_no_task(unsigned long addr) +{ + return 0; +} + +int in_gate_area(struct task_struct *task, unsigned long addr) +{ + return 0; +} + +struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +{ + return NULL; +} diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile new file mode 100644 index 00000000000..ca78ad60ba2 --- /dev/null +++ b/arch/s390/kernel/vdso32/Makefile @@ -0,0 +1,55 @@ +# List of files in the vdso, has to be asm only for now + +obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o + +# Build rules + +targets := $(obj-vdso32) vdso32.so vdso32.so.dbg +obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) + +KBUILD_AFLAGS_31 := $(filter-out -m64,$(KBUILD_AFLAGS)) +KBUILD_AFLAGS_31 += -m31 -s + +KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin +KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ + $(call ld-option, -Wl$(comma)--hash-style=sysv) + +$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31) +$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31) + +obj-y += vdso32_wrapper.o +extra-y += vdso32.lds +CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) + +# Force dependency (incbin is bad) +$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so + +# link rule for the .so file, .lds has to be first +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) + $(call if_changed,vdso32ld) + +# strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# assembly rules for the .S files +$(obj-vdso32): %.o: %.S + $(call if_changed_dep,vdso32as) + +# actual build commands +quiet_cmd_vdso32ld = VDSO32L $@ + cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ +quiet_cmd_vdso32as = VDSO32A $@ + cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $< + +# install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ + +vdso32.so: $(obj)/vdso32.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso32.so diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S new file mode 100644 index 00000000000..9532c4e6a9d --- /dev/null +++ b/arch/s390/kernel/vdso32/clock_getres.S @@ -0,0 +1,39 @@ +/* + * Userland implementation of clock_getres() for 32 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_clock_getres + .type __kernel_clock_getres,@function +__kernel_clock_getres: + .cfi_startproc + chi %r2,CLOCK_REALTIME + je 0f + chi %r2,CLOCK_MONOTONIC + jne 3f +0: ltr %r3,%r3 + jz 2f /* res == NULL */ + basr %r1,0 +1: l %r0,4f-1b(%r1) + xc 0(4,%r3),0(%r3) /* set tp->tv_sec to zero */ + st %r0,4(%r3) /* store tp->tv_usec */ +2: lhi %r2,0 + br %r14 +3: lhi %r1,__NR_clock_getres /* fallback to svc */ + svc 0 + br %r14 +4: .long CLOCK_REALTIME_RES + .cfi_endproc + .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S new file mode 100644 index 00000000000..4a98909a831 --- /dev/null +++ b/arch/s390/kernel/vdso32/clock_gettime.S @@ -0,0 +1,128 @@ +/* + * Userland implementation of clock_gettime() for 32 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_clock_gettime + .type __kernel_clock_gettime,@function +__kernel_clock_gettime: + .cfi_startproc + basr %r5,0 +0: al %r5,21f-0b(%r5) /* get &_vdso_data */ + chi %r2,CLOCK_REALTIME + je 10f + chi %r2,CLOCK_MONOTONIC + jne 19f + + /* CLOCK_MONOTONIC */ + ltr %r3,%r3 + jz 9f /* tp == NULL */ +1: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 1b + stck 24(%r15) /* Store TOD clock */ + lm %r0,%r1,24(%r15) + s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + sl %r1,__VDSO_XTIME_STAMP+4(%r5) + brc 3,2f + ahi %r0,-1 +2: mhi %r0,1000 /* cyc2ns(clock,cycle_delta) */ + lr %r2,%r0 + lhi %r0,1000 + ltr %r1,%r1 + mr %r0,%r0 + jnm 3f + ahi %r0,1000 +3: alr %r0,%r2 + srdl %r0,12 + al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */ + al %r1,__VDSO_XTIME_NSEC+4(%r5) + brc 12,4f + ahi %r0,1 +4: l %r2,__VDSO_XTIME_SEC+4(%r5) + al %r0,__VDSO_WTOM_NSEC(%r5) /* + wall_to_monotonic */ + al %r1,__VDSO_WTOM_NSEC+4(%r5) + brc 12,5f + ahi %r0,1 +5: al %r2,__VDSO_WTOM_SEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 1b + basr %r5,0 +6: ltr %r0,%r0 + jnz 7f + cl %r1,20f-6b(%r5) + jl 8f +7: ahi %r2,1 + sl %r1,20f-6b(%r5) + brc 3,6b + ahi %r0,-1 + j 6b +8: st %r2,0(%r3) /* store tp->tv_sec */ + st %r1,4(%r3) /* store tp->tv_nsec */ +9: lhi %r2,0 + br %r14 + + /* CLOCK_REALTIME */ +10: ltr %r3,%r3 /* tp == NULL */ + jz 18f +11: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 11b + stck 24(%r15) /* Store TOD clock */ + lm %r0,%r1,24(%r15) + s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + sl %r1,__VDSO_XTIME_STAMP+4(%r5) + brc 3,12f + ahi %r0,-1 +12: mhi %r0,1000 /* cyc2ns(clock,cycle_delta) */ + lr %r2,%r0 + lhi %r0,1000 + ltr %r1,%r1 + mr %r0,%r0 + jnm 13f + ahi %r0,1000 +13: alr %r0,%r2 + srdl %r0,12 + al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */ + al %r1,__VDSO_XTIME_NSEC+4(%r5) + brc 12,14f + ahi %r0,1 +14: l %r2,__VDSO_XTIME_SEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 11b + basr %r5,0 +15: ltr %r0,%r0 + jnz 16f + cl %r1,20f-15b(%r5) + jl 17f +16: ahi %r2,1 + sl %r1,20f-15b(%r5) + brc 3,15b + ahi %r0,-1 + j 15b +17: st %r2,0(%r3) /* store tp->tv_sec */ + st %r1,4(%r3) /* store tp->tv_nsec */ +18: lhi %r2,0 + br %r14 + + /* Fallback to system call */ +19: lhi %r1,__NR_clock_gettime + svc 0 + br %r14 + +20: .long 1000000000 +21: .long _vdso_data - 0b + .cfi_endproc + .size __kernel_clock_gettime,.-__kernel_clock_gettime diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S new file mode 100644 index 00000000000..c32f29c3d70 --- /dev/null +++ b/arch/s390/kernel/vdso32/gettimeofday.S @@ -0,0 +1,82 @@ +/* + * Userland implementation of gettimeofday() for 32 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_gettimeofday + .type __kernel_gettimeofday,@function +__kernel_gettimeofday: + .cfi_startproc + basr %r5,0 +0: al %r5,13f-0b(%r5) /* get &_vdso_data */ +1: ltr %r3,%r3 /* check if tz is NULL */ + je 2f + mvc 0(8,%r3),__VDSO_TIMEZONE(%r5) +2: ltr %r2,%r2 /* check if tv is NULL */ + je 10f + l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 1b + stck 24(%r15) /* Store TOD clock */ + lm %r0,%r1,24(%r15) + s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + sl %r1,__VDSO_XTIME_STAMP+4(%r5) + brc 3,3f + ahi %r0,-1 +3: mhi %r0,1000 /* cyc2ns(clock,cycle_delta) */ + st %r0,24(%r15) + lhi %r0,1000 + ltr %r1,%r1 + mr %r0,%r0 + jnm 4f + ahi %r0,1000 +4: al %r0,24(%r15) + srdl %r0,12 + al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */ + al %r1,__VDSO_XTIME_NSEC+4(%r5) + brc 12,5f + ahi %r0,1 +5: mvc 24(4,%r15),__VDSO_XTIME_SEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 1b + l %r4,24(%r15) /* get tv_sec from stack */ + basr %r5,0 +6: ltr %r0,%r0 + jnz 7f + cl %r1,11f-6b(%r5) + jl 8f +7: ahi %r4,1 + sl %r1,11f-6b(%r5) + brc 3,6b + ahi %r0,-1 + j 6b +8: st %r4,0(%r2) /* store tv->tv_sec */ + ltr %r1,%r1 + m %r0,12f-6b(%r5) + jnm 9f + al %r0,12f-6b(%r5) +9: srl %r0,6 + st %r0,4(%r2) /* store tv->tv_usec */ +10: slr %r2,%r2 + br %r14 +11: .long 1000000000 +12: .long 274877907 +13: .long _vdso_data - 0b + .cfi_endproc + .size __kernel_gettimeofday,.-__kernel_gettimeofday diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S new file mode 100644 index 00000000000..79a071e4357 --- /dev/null +++ b/arch/s390/kernel/vdso32/note.S @@ -0,0 +1,12 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> +#include <linux/elfnote.h> + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S new file mode 100644 index 00000000000..a8c379fa124 --- /dev/null +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -0,0 +1,138 @@ +/* + * This is the infamous ld script for the 32 bits vdso + * library + */ +#include <asm/vdso.h> + +OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") +OUTPUT_ARCH(s390:31-bit) +ENTRY(_start) + +SECTIONS +{ + . = VDSO32_LBASE + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + . = ALIGN(16); + .text : { + *(.text .stub .text.* .gnu.linkonce.t.*) + } :text + PROVIDE(__etext = .); + PROVIDE(_etext = .); + PROVIDE(etext = .); + + /* + * Other stuff is appended to the text segment: + */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + + .dynamic : { *(.dynamic) } :text :dynamic + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) } + + .rela.dyn ALIGN(8) : { *(.rela.dyn) } + .got ALIGN(8) : { *(.got .toc) } + + _end = .; + PROVIDE(end = .); + + /* + * Stabs debugging sections are here too. + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + + /* + * DWARF debug sections. + * Symbols in the DWARF debugging sections are relative to the + * beginning of the section so we begin them at 0. + */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3 */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } + + . = ALIGN(4096); + PROVIDE(_vdso_data = .); + + /DISCARD/ : { + *(.note.GNU-stack) + *(.branch_lt) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +/* + * Very old versions of ld do not recognize this name token; use the constant. + */ +#define PT_GNU_EH_FRAME 0x6474e550 + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + /* + * Has to be there for the kernel to find + */ + __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_getres; + + local: *; + }; +} diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S new file mode 100644 index 00000000000..61639a89e70 --- /dev/null +++ b/arch/s390/kernel/vdso32/vdso32_wrapper.S @@ -0,0 +1,13 @@ +#include <linux/init.h> +#include <asm/page.h> + + .section ".data.page_aligned" + + .globl vdso32_start, vdso32_end + .balign PAGE_SIZE +vdso32_start: + .incbin "arch/s390/kernel/vdso32/vdso32.so" + .balign PAGE_SIZE +vdso32_end: + + .previous diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile new file mode 100644 index 00000000000..6fc8e829258 --- /dev/null +++ b/arch/s390/kernel/vdso64/Makefile @@ -0,0 +1,55 @@ +# List of files in the vdso, has to be asm only for now + +obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o + +# Build rules + +targets := $(obj-vdso64) vdso64.so vdso64.so.dbg +obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) + +KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS)) +KBUILD_AFLAGS_64 += -m64 -s + +KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin +KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ + $(call ld-option, -Wl$(comma)--hash-style=sysv) + +$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64) +$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64) + +obj-y += vdso64_wrapper.o +extra-y += vdso64.lds +CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) + +# Force dependency (incbin is bad) +$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so + +# link rule for the .so file, .lds has to be first +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) + $(call if_changed,vdso64ld) + +# strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# assembly rules for the .S files +$(obj-vdso64): %.o: %.S + $(call if_changed_dep,vdso64as) + +# actual build commands +quiet_cmd_vdso64ld = VDSO64L $@ + cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ +quiet_cmd_vdso64as = VDSO64A $@ + cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< + +# install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ + +vdso64.so: $(obj)/vdso64.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso64.so diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S new file mode 100644 index 00000000000..9ce8caafdb4 --- /dev/null +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -0,0 +1,44 @@ +/* + * Userland implementation of clock_getres() for 64 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_clock_getres + .type __kernel_clock_getres,@function +__kernel_clock_getres: + .cfi_startproc + cghi %r2,CLOCK_REALTIME + je 0f + cghi %r2,CLOCK_MONOTONIC + je 0f + cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */ + jne 2f + larl %r5,_vdso_data + icm %r0,15,__LC_ECTG_OK(%r5) + jz 2f +0: ltgr %r3,%r3 + jz 1f /* res == NULL */ + larl %r1,3f + lg %r0,0(%r1) + xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */ + stg %r0,8(%r3) /* store tp->tv_usec */ +1: lghi %r2,0 + br %r14 +2: lghi %r1,__NR_clock_getres /* fallback to svc */ + svc 0 + br %r14 +3: .quad CLOCK_REALTIME_RES + .cfi_endproc + .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S new file mode 100644 index 00000000000..79dbfee831e --- /dev/null +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -0,0 +1,118 @@ +/* + * Userland implementation of clock_gettime() for 64 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_clock_gettime + .type __kernel_clock_gettime,@function +__kernel_clock_gettime: + .cfi_startproc + larl %r5,_vdso_data + cghi %r2,CLOCK_REALTIME + je 4f + cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */ + je 9f + cghi %r2,CLOCK_MONOTONIC + jne 12f + + /* CLOCK_MONOTONIC */ + ltgr %r3,%r3 + jz 3f /* tp == NULL */ +0: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 0b + stck 48(%r15) /* Store TOD clock */ + lg %r1,48(%r15) + sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + mghi %r1,1000 + srlg %r1,%r1,12 /* cyc2ns(clock,cycle_delta) */ + alg %r1,__VDSO_XTIME_NSEC(%r5) /* + xtime */ + lg %r0,__VDSO_XTIME_SEC(%r5) + alg %r1,__VDSO_WTOM_NSEC(%r5) /* + wall_to_monotonic */ + alg %r0,__VDSO_WTOM_SEC(%r5) + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 0b + larl %r5,13f +1: clg %r1,0(%r5) + jl 2f + slg %r1,0(%r5) + aghi %r0,1 + j 1b +2: stg %r0,0(%r3) /* store tp->tv_sec */ + stg %r1,8(%r3) /* store tp->tv_nsec */ +3: lghi %r2,0 + br %r14 + + /* CLOCK_REALTIME */ +4: ltr %r3,%r3 /* tp == NULL */ + jz 8f +5: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 5b + stck 48(%r15) /* Store TOD clock */ + lg %r1,48(%r15) + sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + mghi %r1,1000 + srlg %r1,%r1,12 /* cyc2ns(clock,cycle_delta) */ + alg %r1,__VDSO_XTIME_NSEC(%r5) /* + xtime */ + lg %r0,__VDSO_XTIME_SEC(%r5) + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 5b + larl %r5,13f +6: clg %r1,0(%r5) + jl 7f + slg %r1,0(%r5) + aghi %r0,1 + j 6b +7: stg %r0,0(%r3) /* store tp->tv_sec */ + stg %r1,8(%r3) /* store tp->tv_nsec */ +8: lghi %r2,0 + br %r14 + + /* CLOCK_THREAD_CPUTIME_ID for this thread */ +9: icm %r0,15,__VDSO_ECTG_OK(%r5) + jz 12f + ear %r2,%a4 + llilh %r4,0x0100 + sar %a4,%r4 + lghi %r4,0 + sacf 512 /* Magic ectg instruction */ + .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4 + sacf 0 + sar %a4,%r2 + algr %r1,%r0 /* r1 = cputime as TOD value */ + mghi %r1,1000 /* convert to nanoseconds */ + srlg %r1,%r1,12 /* r1 = cputime in nanosec */ + lgr %r4,%r1 + larl %r5,13f + srlg %r1,%r1,9 /* divide by 1000000000 */ + mlg %r0,8(%r5) + srlg %r0,%r0,11 /* r0 = tv_sec */ + stg %r0,0(%r3) + msg %r0,0(%r5) /* calculate tv_nsec */ + slgr %r4,%r0 /* r4 = tv_nsec */ + stg %r4,8(%r3) + lghi %r2,0 + br %r14 + + /* Fallback to system call */ +12: lghi %r1,__NR_clock_gettime + svc 0 + br %r14 + +13: .quad 1000000000 +14: .quad 19342813113834067 + .cfi_endproc + .size __kernel_clock_gettime,.-__kernel_clock_gettime diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S new file mode 100644 index 00000000000..f873e75634e --- /dev/null +++ b/arch/s390/kernel/vdso64/gettimeofday.S @@ -0,0 +1,56 @@ +/* + * Userland implementation of gettimeofday() for 64 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + + .text + .align 4 + .globl __kernel_gettimeofday + .type __kernel_gettimeofday,@function +__kernel_gettimeofday: + .cfi_startproc + larl %r5,_vdso_data +0: ltgr %r3,%r3 /* check if tz is NULL */ + je 1f + mvc 0(8,%r3),__VDSO_TIMEZONE(%r5) +1: ltgr %r2,%r2 /* check if tv is NULL */ + je 4f + lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 0b + stck 48(%r15) /* Store TOD clock */ + lg %r1,48(%r15) + sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ + mghi %r1,1000 + srlg %r1,%r1,12 /* cyc2ns(clock,cycle_delta) */ + alg %r1,__VDSO_XTIME_NSEC(%r5) /* + xtime.tv_nsec */ + lg %r0,__VDSO_XTIME_SEC(%r5) /* xtime.tv_sec */ + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 0b + larl %r5,5f +2: clg %r1,0(%r5) + jl 3f + slg %r1,0(%r5) + aghi %r0,1 + j 2b +3: stg %r0,0(%r2) /* store tv->tv_sec */ + slgr %r0,%r0 /* tv_nsec -> tv_usec */ + ml %r0,8(%r5) + srlg %r0,%r0,6 + stg %r0,8(%r2) /* store tv->tv_usec */ +4: lghi %r2,0 + br %r14 +5: .quad 1000000000 + .long 274877907 + .cfi_endproc + .size __kernel_gettimeofday,.-__kernel_gettimeofday diff --git a/arch/s390/kernel/vdso64/note.S b/arch/s390/kernel/vdso64/note.S new file mode 100644 index 00000000000..79a071e4357 --- /dev/null +++ b/arch/s390/kernel/vdso64/note.S @@ -0,0 +1,12 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> +#include <linux/elfnote.h> + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S new file mode 100644 index 00000000000..9f5979d102a --- /dev/null +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -0,0 +1,138 @@ +/* + * This is the infamous ld script for the 64 bits vdso + * library + */ +#include <asm/vdso.h> + +OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") +OUTPUT_ARCH(s390:64-bit) +ENTRY(_start) + +SECTIONS +{ + . = VDSO64_LBASE + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + . = ALIGN(16); + .text : { + *(.text .stub .text.* .gnu.linkonce.t.*) + } :text + PROVIDE(__etext = .); + PROVIDE(_etext = .); + PROVIDE(etext = .); + + /* + * Other stuff is appended to the text segment: + */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + + .dynamic : { *(.dynamic) } :text :dynamic + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) } + + .rela.dyn ALIGN(8) : { *(.rela.dyn) } + .got ALIGN(8) : { *(.got .toc) } + + _end = .; + PROVIDE(end = .); + + /* + * Stabs debugging sections are here too. + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + + /* + * DWARF debug sections. + * Symbols in the DWARF debugging sections are relative to the + * beginning of the section so we begin them at 0. + */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3 */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } + + . = ALIGN(4096); + PROVIDE(_vdso_data = .); + + /DISCARD/ : { + *(.note.GNU-stack) + *(.branch_lt) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +/* + * Very old versions of ld do not recognize this name token; use the constant. + */ +#define PT_GNU_EH_FRAME 0x6474e550 + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + /* + * Has to be there for the kernel to find + */ + __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_getres; + + local: *; + }; +} diff --git a/arch/s390/kernel/vdso64/vdso64_wrapper.S b/arch/s390/kernel/vdso64/vdso64_wrapper.S new file mode 100644 index 00000000000..d8e2ac14d56 --- /dev/null +++ b/arch/s390/kernel/vdso64/vdso64_wrapper.S @@ -0,0 +1,13 @@ +#include <linux/init.h> +#include <asm/page.h> + + .section ".data.page_aligned" + + .globl vdso64_start, vdso64_end + .balign PAGE_SIZE +vdso64_start: + .incbin "arch/s390/kernel/vdso64/vdso64.so" + .balign PAGE_SIZE +vdso64_end: + + .previous diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 0fa5dc5d68e..2fb36e46219 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -23,20 +23,43 @@ #include <asm/s390_ext.h> #include <asm/timer.h> #include <asm/irq_regs.h> +#include <asm/cpu.h> static ext_int_info_t ext_int_info_timer; + static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = { + .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock) +}; + +static inline __u64 get_vtimer(void) +{ + __u64 timer; + + asm volatile("STPT %0" : "=m" (timer)); + return timer; +} + +static inline void set_vtimer(__u64 expires) +{ + __u64 timer; + + asm volatile (" STPT %0\n" /* Store current cpu timer value */ + " SPT %1" /* Set new value immediatly afterwards */ + : "=m" (timer) : "m" (expires) ); + S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; + S390_lowcore.last_update_timer = expires; +} + /* * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ -void account_process_tick(struct task_struct *tsk, int user_tick) +static void do_account_vtime(struct task_struct *tsk, int hardirq_offset) { - cputime_t cputime; - __u64 timer, clock; - int rcu_user_flag; + struct thread_info *ti = task_thread_info(tsk); + __u64 timer, clock, user, system, steal; timer = S390_lowcore.last_update_timer; clock = S390_lowcore.last_update_clock; @@ -45,50 +68,41 @@ void account_process_tick(struct task_struct *tsk, int user_tick) : "=m" (S390_lowcore.last_update_timer), "=m" (S390_lowcore.last_update_clock) ); S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; - S390_lowcore.steal_clock += S390_lowcore.last_update_clock - clock; - - cputime = S390_lowcore.user_timer >> 12; - rcu_user_flag = cputime != 0; - S390_lowcore.user_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_user_time(tsk, cputime); - - cputime = S390_lowcore.system_timer >> 12; - S390_lowcore.system_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_system_time(tsk, HARDIRQ_OFFSET, cputime); - - cputime = S390_lowcore.steal_clock; - if ((__s64) cputime > 0) { - cputime >>= 12; - S390_lowcore.steal_clock -= cputime << 12; - account_steal_time(tsk, cputime); + S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock; + + user = S390_lowcore.user_timer - ti->user_timer; + S390_lowcore.steal_timer -= user; + ti->user_timer = S390_lowcore.user_timer; + account_user_time(tsk, user, user); + + system = S390_lowcore.system_timer - ti->system_timer; + S390_lowcore.steal_timer -= system; + ti->system_timer = S390_lowcore.system_timer; + account_system_time(tsk, hardirq_offset, system, system); + + steal = S390_lowcore.steal_timer; + if ((s64) steal > 0) { + S390_lowcore.steal_timer = 0; + account_steal_time(steal); } } -/* - * Update process times based on virtual cpu times stored by entry.S - * to the lowcore fields user_timer, system_timer & steal_clock. - */ -void account_vtime(struct task_struct *tsk) +void account_vtime(struct task_struct *prev, struct task_struct *next) { - cputime_t cputime; - __u64 timer; - - timer = S390_lowcore.last_update_timer; - asm volatile (" STPT %0" /* Store current cpu timer value */ - : "=m" (S390_lowcore.last_update_timer) ); - S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; - - cputime = S390_lowcore.user_timer >> 12; - S390_lowcore.user_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_user_time(tsk, cputime); + struct thread_info *ti; + + do_account_vtime(prev, 0); + ti = task_thread_info(prev); + ti->user_timer = S390_lowcore.user_timer; + ti->system_timer = S390_lowcore.system_timer; + ti = task_thread_info(next); + S390_lowcore.user_timer = ti->user_timer; + S390_lowcore.system_timer = ti->system_timer; +} - cputime = S390_lowcore.system_timer >> 12; - S390_lowcore.system_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_system_time(tsk, 0, cputime); +void account_process_tick(struct task_struct *tsk, int user_tick) +{ + do_account_vtime(tsk, HARDIRQ_OFFSET); } /* @@ -97,90 +111,131 @@ void account_vtime(struct task_struct *tsk) */ void account_system_vtime(struct task_struct *tsk) { - cputime_t cputime; - __u64 timer; + struct thread_info *ti = task_thread_info(tsk); + __u64 timer, system; timer = S390_lowcore.last_update_timer; - asm volatile (" STPT %0" /* Store current cpu timer value */ - : "=m" (S390_lowcore.last_update_timer) ); + S390_lowcore.last_update_timer = get_vtimer(); S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; - cputime = S390_lowcore.system_timer >> 12; - S390_lowcore.system_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_system_time(tsk, 0, cputime); + system = S390_lowcore.system_timer - ti->system_timer; + S390_lowcore.steal_timer -= system; + ti->system_timer = S390_lowcore.system_timer; + account_system_time(tsk, 0, system, system); } EXPORT_SYMBOL_GPL(account_system_vtime); -static inline void set_vtimer(__u64 expires) -{ - __u64 timer; - - asm volatile (" STPT %0\n" /* Store current cpu timer value */ - " SPT %1" /* Set new value immediatly afterwards */ - : "=m" (timer) : "m" (expires) ); - S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; - S390_lowcore.last_update_timer = expires; - - /* store expire time for this CPU timer */ - __get_cpu_var(virt_cpu_timer).to_expire = expires; -} -#else -static inline void set_vtimer(__u64 expires) -{ - S390_lowcore.last_update_timer = expires; - asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer)); - - /* store expire time for this CPU timer */ - __get_cpu_var(virt_cpu_timer).to_expire = expires; -} -#endif - -void vtime_start_cpu_timer(void) +void vtime_start_cpu(void) { - struct vtimer_queue *vt_list; - - vt_list = &__get_cpu_var(virt_cpu_timer); - - /* CPU timer interrupt is pending, don't reprogramm it */ - if (vt_list->idle & 1LL<<63) - return; + struct s390_idle_data *idle = &__get_cpu_var(s390_idle); + struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); + __u64 idle_time, expires; + + /* Account time spent with enabled wait psw loaded as idle time. */ + idle_time = S390_lowcore.int_clock - idle->idle_enter; + account_idle_time(idle_time); + S390_lowcore.last_update_clock = S390_lowcore.int_clock; + + /* Account system time spent going idle. */ + S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle; + S390_lowcore.last_update_timer = S390_lowcore.async_enter_timer; + + /* Restart vtime CPU timer */ + if (vq->do_spt) { + /* Program old expire value but first save progress. */ + expires = vq->idle - S390_lowcore.async_enter_timer; + expires += get_vtimer(); + set_vtimer(expires); + } else { + /* Don't account the CPU timer delta while the cpu was idle. */ + vq->elapsed -= vq->idle - S390_lowcore.async_enter_timer; + } - if (!list_empty(&vt_list->list)) - set_vtimer(vt_list->idle); + spin_lock(&idle->lock); + idle->idle_time += idle_time; + idle->idle_enter = 0ULL; + idle->idle_count++; + spin_unlock(&idle->lock); } -void vtime_stop_cpu_timer(void) +void vtime_stop_cpu(void) { - struct vtimer_queue *vt_list; - - vt_list = &__get_cpu_var(virt_cpu_timer); - - /* nothing to do */ - if (list_empty(&vt_list->list)) { - vt_list->idle = VTIMER_MAX_SLICE; - goto fire; + struct s390_idle_data *idle = &__get_cpu_var(s390_idle); + struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); + psw_t psw; + + /* Wait for external, I/O or machine check interrupt. */ + psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT; + + /* Check if the CPU timer needs to be reprogrammed. */ + if (vq->do_spt) { + __u64 vmax = VTIMER_MAX_SLICE; + /* + * The inline assembly is equivalent to + * vq->idle = get_cpu_timer(); + * set_cpu_timer(VTIMER_MAX_SLICE); + * idle->idle_enter = get_clock(); + * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | + * PSW_MASK_IO | PSW_MASK_EXT); + * The difference is that the inline assembly makes sure that + * the last three instruction are stpt, stck and lpsw in that + * order. This is done to increase the precision. + */ + asm volatile( +#ifndef CONFIG_64BIT + " basr 1,0\n" + "0: ahi 1,1f-0b\n" + " st 1,4(%2)\n" +#else /* CONFIG_64BIT */ + " larl 1,1f\n" + " stg 1,8(%2)\n" +#endif /* CONFIG_64BIT */ + " stpt 0(%4)\n" + " spt 0(%5)\n" + " stck 0(%3)\n" +#ifndef CONFIG_64BIT + " lpsw 0(%2)\n" +#else /* CONFIG_64BIT */ + " lpswe 0(%2)\n" +#endif /* CONFIG_64BIT */ + "1:" + : "=m" (idle->idle_enter), "=m" (vq->idle) + : "a" (&psw), "a" (&idle->idle_enter), + "a" (&vq->idle), "a" (&vmax), "m" (vmax), "m" (psw) + : "memory", "cc", "1"); + } else { + /* + * The inline assembly is equivalent to + * vq->idle = get_cpu_timer(); + * idle->idle_enter = get_clock(); + * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | + * PSW_MASK_IO | PSW_MASK_EXT); + * The difference is that the inline assembly makes sure that + * the last three instruction are stpt, stck and lpsw in that + * order. This is done to increase the precision. + */ + asm volatile( +#ifndef CONFIG_64BIT + " basr 1,0\n" + "0: ahi 1,1f-0b\n" + " st 1,4(%2)\n" +#else /* CONFIG_64BIT */ + " larl 1,1f\n" + " stg 1,8(%2)\n" +#endif /* CONFIG_64BIT */ + " stpt 0(%4)\n" + " stck 0(%3)\n" +#ifndef CONFIG_64BIT + " lpsw 0(%2)\n" +#else /* CONFIG_64BIT */ + " lpswe 0(%2)\n" +#endif /* CONFIG_64BIT */ + "1:" + : "=m" (idle->idle_enter), "=m" (vq->idle) + : "a" (&psw), "a" (&idle->idle_enter), + "a" (&vq->idle), "m" (psw) + : "memory", "cc", "1"); } - - /* store the actual expire value */ - asm volatile ("STPT %0" : "=m" (vt_list->idle)); - - /* - * If the CPU timer is negative we don't reprogramm - * it because we will get instantly an interrupt. - */ - if (vt_list->idle & 1LL<<63) - return; - - vt_list->offset += vt_list->to_expire - vt_list->idle; - - /* - * We cannot halt the CPU timer, we just write a value that - * nearly never expires (only after 71 years) and re-write - * the stored expire value if we continue the timer - */ - fire: - set_vtimer(VTIMER_MAX_SLICE); } /* @@ -206,30 +261,23 @@ static void list_add_sorted(struct vtimer_list *timer, struct list_head *head) */ static void do_callbacks(struct list_head *cb_list) { - struct vtimer_queue *vt_list; + struct vtimer_queue *vq; struct vtimer_list *event, *tmp; - void (*fn)(unsigned long); - unsigned long data; if (list_empty(cb_list)) return; - vt_list = &__get_cpu_var(virt_cpu_timer); + vq = &__get_cpu_var(virt_cpu_timer); list_for_each_entry_safe(event, tmp, cb_list, entry) { - fn = event->function; - data = event->data; - fn(data); - - if (!event->interval) - /* delete one shot timer */ - list_del_init(&event->entry); - else { - /* move interval timer back to list */ - spin_lock(&vt_list->lock); - list_del_init(&event->entry); - list_add_sorted(event, &vt_list->list); - spin_unlock(&vt_list->lock); + list_del_init(&event->entry); + (event->function)(event->data); + if (event->interval) { + /* Recharge interval timer */ + event->expires = event->interval + vq->elapsed; + spin_lock(&vq->lock); + list_add_sorted(event, &vq->list); + spin_unlock(&vq->lock); } } } @@ -239,64 +287,57 @@ static void do_callbacks(struct list_head *cb_list) */ static void do_cpu_timer_interrupt(__u16 error_code) { - __u64 next, delta; - struct vtimer_queue *vt_list; + struct vtimer_queue *vq; struct vtimer_list *event, *tmp; - struct list_head *ptr; - /* the callback queue */ - struct list_head cb_list; + struct list_head cb_list; /* the callback queue */ + __u64 elapsed, next; INIT_LIST_HEAD(&cb_list); - vt_list = &__get_cpu_var(virt_cpu_timer); + vq = &__get_cpu_var(virt_cpu_timer); /* walk timer list, fire all expired events */ - spin_lock(&vt_list->lock); - - if (vt_list->to_expire < VTIMER_MAX_SLICE) - vt_list->offset += vt_list->to_expire; - - list_for_each_entry_safe(event, tmp, &vt_list->list, entry) { - if (event->expires > vt_list->offset) - /* found first unexpired event, leave */ - break; - - /* re-charge interval timer, we have to add the offset */ - if (event->interval) - event->expires = event->interval + vt_list->offset; - - /* move expired timer to the callback queue */ - list_move_tail(&event->entry, &cb_list); + spin_lock(&vq->lock); + + elapsed = vq->elapsed + (vq->timer - S390_lowcore.async_enter_timer); + BUG_ON((s64) elapsed < 0); + vq->elapsed = 0; + list_for_each_entry_safe(event, tmp, &vq->list, entry) { + if (event->expires < elapsed) + /* move expired timer to the callback queue */ + list_move_tail(&event->entry, &cb_list); + else + event->expires -= elapsed; } - spin_unlock(&vt_list->lock); + spin_unlock(&vq->lock); + + vq->do_spt = list_empty(&cb_list); do_callbacks(&cb_list); /* next event is first in list */ - spin_lock(&vt_list->lock); - if (!list_empty(&vt_list->list)) { - ptr = vt_list->list.next; - event = list_entry(ptr, struct vtimer_list, entry); - next = event->expires - vt_list->offset; - - /* add the expired time from this interrupt handler - * and the callback functions - */ - asm volatile ("STPT %0" : "=m" (delta)); - delta = 0xffffffffffffffffLL - delta + 1; - vt_list->offset += delta; - next -= delta; - } else { - vt_list->offset = 0; - next = VTIMER_MAX_SLICE; - } - spin_unlock(&vt_list->lock); - set_vtimer(next); + next = VTIMER_MAX_SLICE; + spin_lock(&vq->lock); + if (!list_empty(&vq->list)) { + event = list_first_entry(&vq->list, struct vtimer_list, entry); + next = event->expires; + } else + vq->do_spt = 0; + spin_unlock(&vq->lock); + /* + * To improve precision add the time spent by the + * interrupt handler to the elapsed time. + * Note: CPU timer counts down and we got an interrupt, + * the current content is negative + */ + elapsed = S390_lowcore.async_enter_timer - get_vtimer(); + set_vtimer(next - elapsed); + vq->timer = next - elapsed; + vq->elapsed = elapsed; } void init_virt_timer(struct vtimer_list *timer) { timer->function = NULL; INIT_LIST_HEAD(&timer->entry); - spin_lock_init(&timer->lock); } EXPORT_SYMBOL(init_virt_timer); @@ -310,44 +351,40 @@ static inline int vtimer_pending(struct vtimer_list *timer) */ static void internal_add_vtimer(struct vtimer_list *timer) { + struct vtimer_queue *vq; unsigned long flags; - __u64 done; - struct vtimer_list *event; - struct vtimer_queue *vt_list; + __u64 left, expires; - vt_list = &per_cpu(virt_cpu_timer, timer->cpu); - spin_lock_irqsave(&vt_list->lock, flags); + vq = &per_cpu(virt_cpu_timer, timer->cpu); + spin_lock_irqsave(&vq->lock, flags); BUG_ON(timer->cpu != smp_processor_id()); - /* if list is empty we only have to set the timer */ - if (list_empty(&vt_list->list)) { - /* reset the offset, this may happen if the last timer was - * just deleted by mod_virt_timer and the interrupt - * didn't happen until here - */ - vt_list->offset = 0; - goto fire; + if (list_empty(&vq->list)) { + /* First timer on this cpu, just program it. */ + list_add(&timer->entry, &vq->list); + set_vtimer(timer->expires); + vq->timer = timer->expires; + vq->elapsed = 0; + } else { + /* Check progress of old timers. */ + expires = timer->expires; + left = get_vtimer(); + if (likely((s64) expires < (s64) left)) { + /* The new timer expires before the current timer. */ + set_vtimer(expires); + vq->elapsed += vq->timer - left; + vq->timer = expires; + } else { + vq->elapsed += vq->timer - left; + vq->timer = left; + } + /* Insert new timer into per cpu list. */ + timer->expires += vq->elapsed; + list_add_sorted(timer, &vq->list); } - /* save progress */ - asm volatile ("STPT %0" : "=m" (done)); - - /* calculate completed work */ - done = vt_list->to_expire - done + vt_list->offset; - vt_list->offset = 0; - - list_for_each_entry(event, &vt_list->list, entry) - event->expires -= done; - - fire: - list_add_sorted(timer, &vt_list->list); - - /* get first element, which is the next vtimer slice */ - event = list_entry(vt_list->list.next, struct vtimer_list, entry); - - set_vtimer(event->expires); - spin_unlock_irqrestore(&vt_list->lock, flags); + spin_unlock_irqrestore(&vq->lock, flags); /* release CPU acquired in prepare_vtimer or mod_virt_timer() */ put_cpu(); } @@ -392,14 +429,15 @@ EXPORT_SYMBOL(add_virt_timer_periodic); * If we change a pending timer the function must be called on the CPU * where the timer is running on, e.g. by smp_call_function_single() * - * The original mod_timer adds the timer if it is not pending. For compatibility - * we do the same. The timer will be added on the current CPU as a oneshot timer. + * The original mod_timer adds the timer if it is not pending. For + * compatibility we do the same. The timer will be added on the current + * CPU as a oneshot timer. * * returns whether it has modified a pending timer (1) or not (0) */ int mod_virt_timer(struct vtimer_list *timer, __u64 expires) { - struct vtimer_queue *vt_list; + struct vtimer_queue *vq; unsigned long flags; int cpu; @@ -415,17 +453,17 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires) return 1; cpu = get_cpu(); - vt_list = &per_cpu(virt_cpu_timer, cpu); + vq = &per_cpu(virt_cpu_timer, cpu); /* check if we run on the right CPU */ BUG_ON(timer->cpu != cpu); /* disable interrupts before test if timer is pending */ - spin_lock_irqsave(&vt_list->lock, flags); + spin_lock_irqsave(&vq->lock, flags); /* if timer isn't pending add it on the current CPU */ if (!vtimer_pending(timer)) { - spin_unlock_irqrestore(&vt_list->lock, flags); + spin_unlock_irqrestore(&vq->lock, flags); /* we do not activate an interval timer with mod_virt_timer */ timer->interval = 0; timer->expires = expires; @@ -442,7 +480,7 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires) timer->interval = expires; /* the timer can't expire anymore so we can release the lock */ - spin_unlock_irqrestore(&vt_list->lock, flags); + spin_unlock_irqrestore(&vq->lock, flags); internal_add_vtimer(timer); return 1; } @@ -456,25 +494,19 @@ EXPORT_SYMBOL(mod_virt_timer); int del_virt_timer(struct vtimer_list *timer) { unsigned long flags; - struct vtimer_queue *vt_list; + struct vtimer_queue *vq; /* check if timer is pending */ if (!vtimer_pending(timer)) return 0; - vt_list = &per_cpu(virt_cpu_timer, timer->cpu); - spin_lock_irqsave(&vt_list->lock, flags); + vq = &per_cpu(virt_cpu_timer, timer->cpu); + spin_lock_irqsave(&vq->lock, flags); /* we don't interrupt a running timer, just let it expire! */ list_del_init(&timer->entry); - /* last timer removed */ - if (list_empty(&vt_list->list)) { - vt_list->to_expire = 0; - vt_list->offset = 0; - } - - spin_unlock_irqrestore(&vt_list->lock, flags); + spin_unlock_irqrestore(&vq->lock, flags); return 1; } EXPORT_SYMBOL(del_virt_timer); @@ -484,24 +516,19 @@ EXPORT_SYMBOL(del_virt_timer); */ void init_cpu_vtimer(void) { - struct vtimer_queue *vt_list; + struct vtimer_queue *vq; /* kick the virtual timer */ - S390_lowcore.exit_timer = VTIMER_MAX_SLICE; - S390_lowcore.last_update_timer = VTIMER_MAX_SLICE; - asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer)); asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock)); + asm volatile ("STPT %0" : "=m" (S390_lowcore.last_update_timer)); + + /* initialize per cpu vtimer structure */ + vq = &__get_cpu_var(virt_cpu_timer); + INIT_LIST_HEAD(&vq->list); + spin_lock_init(&vq->lock); /* enable cpu timer interrupts */ __ctl_set_bit(0,10); - - vt_list = &__get_cpu_var(virt_cpu_timer); - INIT_LIST_HEAD(&vt_list->list); - spin_lock_init(&vt_list->lock); - vt_list->to_expire = 0; - vt_list->offset = 0; - vt_list->idle = 0; - } void __init vtime_init(void) |