summaryrefslogtreecommitdiffstats
path: root/arch/s390/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/kernel')
-rw-r--r--arch/s390/kernel/Makefile5
-rw-r--r--arch/s390/kernel/asm-offsets.c27
-rw-r--r--arch/s390/kernel/compat_signal.c6
-rw-r--r--arch/s390/kernel/crash_dump.c37
-rw-r--r--arch/s390/kernel/debug.c40
-rw-r--r--arch/s390/kernel/early.c22
-rw-r--r--arch/s390/kernel/entry.S159
-rw-r--r--arch/s390/kernel/entry.h17
-rw-r--r--arch/s390/kernel/entry64.S139
-rw-r--r--arch/s390/kernel/ipl.c99
-rw-r--r--arch/s390/kernel/irq.c46
-rw-r--r--arch/s390/kernel/lgr.c200
-rw-r--r--arch/s390/kernel/machine_kexec.c52
-rw-r--r--arch/s390/kernel/nmi.c2
-rw-r--r--arch/s390/kernel/os_info.c169
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c690
-rw-r--r--arch/s390/kernel/perf_event.c125
-rw-r--r--arch/s390/kernel/process.c11
-rw-r--r--arch/s390/kernel/setup.c61
-rw-r--r--arch/s390/kernel/signal.c6
-rw-r--r--arch/s390/kernel/smp.c1153
-rw-r--r--arch/s390/kernel/switch_cpu.S58
-rw-r--r--arch/s390/kernel/switch_cpu64.S51
-rw-r--r--arch/s390/kernel/swsusp_asm64.S19
-rw-r--r--arch/s390/kernel/time.c4
-rw-r--r--arch/s390/kernel/topology.c8
-rw-r--r--arch/s390/kernel/traps.c6
-rw-r--r--arch/s390/kernel/vdso.c38
-rw-r--r--arch/s390/kernel/vtime.c168
29 files changed, 2222 insertions, 1196 deletions
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 7d9ec924e7e..884b18afc86 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -23,7 +23,7 @@ CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w
obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o \
processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o \
debug.o irq.o ipl.o dis.o diag.o mem_detect.o sclp.o vdso.o \
- sysinfo.o jump_label.o
+ sysinfo.o jump_label.o lgr.o os_info.o
obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o)
obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
@@ -34,8 +34,6 @@ extra-y += $(if $(CONFIG_64BIT),head64.o,head31.o)
obj-$(CONFIG_MODULES) += s390_ksyms.o module.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SCHED_BOOK) += topology.o
-obj-$(CONFIG_SMP) += $(if $(CONFIG_64BIT),switch_cpu64.o, \
- switch_cpu.o)
obj-$(CONFIG_HIBERNATION) += suspend.o swsusp_asm64.o
obj-$(CONFIG_AUDIT) += audit.o
compat-obj-$(CONFIG_AUDIT) += compat_audit.o
@@ -50,6 +48,7 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o
# Kexec part
S390_KEXEC_OBJS := machine_kexec.o crash.o
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 6e6a72e66d6..ed8c913db79 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -8,9 +8,11 @@
#include <linux/kbuild.h>
#include <linux/sched.h>
+#include <asm/cputime.h>
+#include <asm/timer.h>
#include <asm/vdso.h>
-#include <asm/sigp.h>
#include <asm/pgtable.h>
+#include <asm/system.h>
/*
* Make sure that the compiler is new enough. We want a compiler that
@@ -70,15 +72,15 @@ int main(void)
DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC);
DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
BLANK();
- /* constants for SIGP */
- DEFINE(__SIGP_STOP, sigp_stop);
- DEFINE(__SIGP_RESTART, sigp_restart);
- DEFINE(__SIGP_SENSE, sigp_sense);
- DEFINE(__SIGP_INITIAL_CPU_RESET, sigp_initial_cpu_reset);
- BLANK();
+ /* idle data offsets */
+ DEFINE(__IDLE_ENTER, offsetof(struct s390_idle_data, idle_enter));
+ DEFINE(__IDLE_EXIT, offsetof(struct s390_idle_data, idle_exit));
+ /* vtimer queue offsets */
+ DEFINE(__VQ_IDLE_ENTER, offsetof(struct vtimer_queue, idle_enter));
+ DEFINE(__VQ_IDLE_EXIT, offsetof(struct vtimer_queue, idle_exit));
/* lowcore offsets */
DEFINE(__LC_EXT_PARAMS, offsetof(struct _lowcore, ext_params));
- DEFINE(__LC_CPU_ADDRESS, offsetof(struct _lowcore, cpu_addr));
+ DEFINE(__LC_EXT_CPU_ADDR, offsetof(struct _lowcore, ext_cpu_addr));
DEFINE(__LC_EXT_INT_CODE, offsetof(struct _lowcore, ext_int_code));
DEFINE(__LC_SVC_ILC, offsetof(struct _lowcore, svc_ilc));
DEFINE(__LC_SVC_INT_CODE, offsetof(struct _lowcore, svc_code));
@@ -95,20 +97,19 @@ int main(void)
DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word));
DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list));
DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code));
- DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib));
- BLANK();
- DEFINE(__LC_RST_NEW_PSW, offsetof(struct _lowcore, restart_psw));
DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw));
DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw));
DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw));
DEFINE(__LC_PGM_OLD_PSW, offsetof(struct _lowcore, program_old_psw));
DEFINE(__LC_MCK_OLD_PSW, offsetof(struct _lowcore, mcck_old_psw));
DEFINE(__LC_IO_OLD_PSW, offsetof(struct _lowcore, io_old_psw));
+ DEFINE(__LC_RST_NEW_PSW, offsetof(struct _lowcore, restart_psw));
DEFINE(__LC_EXT_NEW_PSW, offsetof(struct _lowcore, external_new_psw));
DEFINE(__LC_SVC_NEW_PSW, offsetof(struct _lowcore, svc_new_psw));
DEFINE(__LC_PGM_NEW_PSW, offsetof(struct _lowcore, program_new_psw));
DEFINE(__LC_MCK_NEW_PSW, offsetof(struct _lowcore, mcck_new_psw));
DEFINE(__LC_IO_NEW_PSW, offsetof(struct _lowcore, io_new_psw));
+ BLANK();
DEFINE(__LC_SAVE_AREA_SYNC, offsetof(struct _lowcore, save_area_sync));
DEFINE(__LC_SAVE_AREA_ASYNC, offsetof(struct _lowcore, save_area_async));
DEFINE(__LC_SAVE_AREA_RESTART, offsetof(struct _lowcore, save_area_restart));
@@ -129,12 +130,16 @@ int main(void)
DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack));
DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack));
DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack));
+ DEFINE(__LC_RESTART_STACK, offsetof(struct _lowcore, restart_stack));
+ DEFINE(__LC_RESTART_FN, offsetof(struct _lowcore, restart_fn));
DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce));
DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock));
DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock));
DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags));
DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func));
DEFINE(__LC_IRB, offsetof(struct _lowcore, irb));
+ DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib));
+ BLANK();
DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area));
DEFINE(__LC_CLOCK_COMP_SAVE_AREA, offsetof(struct _lowcore, clock_comp_save_area));
DEFINE(__LC_PSW_SAVE_AREA, offsetof(struct _lowcore, psw_save_area));
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 6fe78c2f95d..53a82c8d50e 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -581,7 +581,6 @@ give_sigsegv:
int handle_signal32(unsigned long sig, struct k_sigaction *ka,
siginfo_t *info, sigset_t *oldset, struct pt_regs *regs)
{
- sigset_t blocked;
int ret;
/* Set up the stack frame */
@@ -591,10 +590,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,
ret = setup_frame32(sig, ka, oldset, regs);
if (ret)
return ret;
- sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
- if (!(ka->sa.sa_flags & SA_NODEFER))
- sigaddset(&blocked, sig);
- set_current_blocked(&blocked);
+ block_sigmask(ka, sig);
return 0;
}
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index c383ce440d9..cc1172b2687 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -14,6 +14,7 @@
#include <linux/bootmem.h>
#include <linux/elf.h>
#include <asm/ipl.h>
+#include <asm/os_info.h>
#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
@@ -51,7 +52,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
/*
* Copy memory from old kernel
*/
-static int copy_from_oldmem(void *dest, void *src, size_t count)
+int copy_from_oldmem(void *dest, void *src, size_t count)
{
unsigned long copied = 0;
int rc;
@@ -224,28 +225,44 @@ static void *nt_prpsinfo(void *ptr)
}
/*
- * Initialize vmcoreinfo note (new kernel)
+ * Get vmcoreinfo using lowcore->vmcore_info (new kernel)
*/
-static void *nt_vmcoreinfo(void *ptr)
+static void *get_vmcoreinfo_old(unsigned long *size)
{
char nt_name[11], *vmcoreinfo;
Elf64_Nhdr note;
void *addr;
if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr)))
- return ptr;
+ return NULL;
memset(nt_name, 0, sizeof(nt_name));
if (copy_from_oldmem(&note, addr, sizeof(note)))
- return ptr;
+ return NULL;
if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1))
- return ptr;
+ return NULL;
if (strcmp(nt_name, "VMCOREINFO") != 0)
- return ptr;
- vmcoreinfo = kzalloc_panic(note.n_descsz + 1);
+ return NULL;
+ vmcoreinfo = kzalloc_panic(note.n_descsz);
if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz))
+ return NULL;
+ *size = note.n_descsz;
+ return vmcoreinfo;
+}
+
+/*
+ * Initialize vmcoreinfo note (new kernel)
+ */
+static void *nt_vmcoreinfo(void *ptr)
+{
+ unsigned long size;
+ void *vmcoreinfo;
+
+ vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size);
+ if (!vmcoreinfo)
+ vmcoreinfo = get_vmcoreinfo_old(&size);
+ if (!vmcoreinfo)
return ptr;
- vmcoreinfo[note.n_descsz + 1] = 0;
- return nt_init(ptr, 0, vmcoreinfo, note.n_descsz, "VMCOREINFO");
+ return nt_init(ptr, 0, vmcoreinfo, size, "VMCOREINFO");
}
/*
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 6848828b962..19e5e9eba54 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -2,8 +2,8 @@
* arch/s390/kernel/debug.c
* S/390 debug facility
*
- * Copyright (C) 1999, 2000 IBM Deutschland Entwicklung GmbH,
- * IBM Corporation
+ * Copyright IBM Corp. 1999, 2012
+ *
* Author(s): Michael Holzheu (holzheu@de.ibm.com),
* Holger Smolinski (Holger.Smolinski@de.ibm.com)
*
@@ -167,6 +167,7 @@ static debug_info_t *debug_area_last = NULL;
static DEFINE_MUTEX(debug_mutex);
static int initialized;
+static int debug_critical;
static const struct file_operations debug_file_ops = {
.owner = THIS_MODULE,
@@ -932,6 +933,11 @@ debug_stop_all(void)
}
+void debug_set_critical(void)
+{
+ debug_critical = 1;
+}
+
/*
* debug_event_common:
* - write debug entry with given size
@@ -945,7 +951,11 @@ debug_event_common(debug_info_t * id, int level, const void *buf, int len)
if (!debug_active || !id->areas)
return NULL;
- spin_lock_irqsave(&id->lock, flags);
+ if (debug_critical) {
+ if (!spin_trylock_irqsave(&id->lock, flags))
+ return NULL;
+ } else
+ spin_lock_irqsave(&id->lock, flags);
active = get_active_entry(id);
memset(DEBUG_DATA(active), 0, id->buf_size);
memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size));
@@ -968,7 +978,11 @@ debug_entry_t
if (!debug_active || !id->areas)
return NULL;
- spin_lock_irqsave(&id->lock, flags);
+ if (debug_critical) {
+ if (!spin_trylock_irqsave(&id->lock, flags))
+ return NULL;
+ } else
+ spin_lock_irqsave(&id->lock, flags);
active = get_active_entry(id);
memset(DEBUG_DATA(active), 0, id->buf_size);
memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size));
@@ -1013,7 +1027,11 @@ debug_sprintf_event(debug_info_t* id, int level,char *string,...)
return NULL;
numargs=debug_count_numargs(string);
- spin_lock_irqsave(&id->lock, flags);
+ if (debug_critical) {
+ if (!spin_trylock_irqsave(&id->lock, flags))
+ return NULL;
+ } else
+ spin_lock_irqsave(&id->lock, flags);
active = get_active_entry(id);
curr_event=(debug_sprintf_entry_t *) DEBUG_DATA(active);
va_start(ap,string);
@@ -1047,7 +1065,11 @@ debug_sprintf_exception(debug_info_t* id, int level,char *string,...)
numargs=debug_count_numargs(string);
- spin_lock_irqsave(&id->lock, flags);
+ if (debug_critical) {
+ if (!spin_trylock_irqsave(&id->lock, flags))
+ return NULL;
+ } else
+ spin_lock_irqsave(&id->lock, flags);
active = get_active_entry(id);
curr_event=(debug_sprintf_entry_t *)DEBUG_DATA(active);
va_start(ap,string);
@@ -1428,10 +1450,10 @@ debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view,
rc += sprintf(out_buf + rc, "| ");
for (i = 0; i < id->buf_size; i++) {
unsigned char c = in_buf[i];
- if (!isprint(c))
- rc += sprintf(out_buf + rc, ".");
- else
+ if (isascii(c) && isprint(c))
rc += sprintf(out_buf + rc, "%c", c);
+ else
+ rc += sprintf(out_buf + rc, ".");
}
rc += sprintf(out_buf + rc, "\n");
return rc;
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 52098d6dfaa..578eb4e6d15 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -29,6 +29,7 @@
#include <asm/sysinfo.h>
#include <asm/cpcmd.h>
#include <asm/sclp.h>
+#include <asm/system.h>
#include "entry.h"
/*
@@ -262,25 +263,8 @@ static noinline __init void setup_lowcore_early(void)
static noinline __init void setup_facility_list(void)
{
- unsigned long nr;
-
- S390_lowcore.stfl_fac_list = 0;
- asm volatile(
- " .insn s,0xb2b10000,0(0)\n" /* stfl */
- "0:\n"
- EX_TABLE(0b,0b) : "=m" (S390_lowcore.stfl_fac_list));
- memcpy(&S390_lowcore.stfle_fac_list, &S390_lowcore.stfl_fac_list, 4);
- nr = 4; /* # bytes stored by stfl */
- if (test_facility(7)) {
- /* More facility bits available with stfle */
- register unsigned long reg0 asm("0") = MAX_FACILITY_BIT/64 - 1;
- asm volatile(".insn s,0xb2b00000,%0" /* stfle */
- : "=m" (S390_lowcore.stfle_fac_list), "+d" (reg0)
- : : "cc");
- nr = (reg0 + 1) * 8; /* # bytes stored by stfle */
- }
- memset((char *) S390_lowcore.stfle_fac_list + nr, 0,
- MAX_FACILITY_BIT/8 - nr);
+ stfle(S390_lowcore.stfle_fac_list,
+ ARRAY_SIZE(S390_lowcore.stfle_fac_list));
}
static noinline __init void setup_hpage(void)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 3705700ed37..74ee563fe62 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -2,7 +2,7 @@
* arch/s390/kernel/entry.S
* S390 low-level entry points.
*
- * Copyright (C) IBM Corp. 1999,2006
+ * Copyright (C) IBM Corp. 1999,2012
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
* Hartmut Penner (hp@de.ibm.com),
* Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
@@ -105,14 +105,14 @@ STACK_SIZE = 1 << STACK_SHIFT
.macro ADD64 high,low,timer
al \high,\timer
- al \low,\timer+4
+ al \low,4+\timer
brc 12,.+8
ahi \high,1
.endm
.macro SUB64 high,low,timer
sl \high,\timer
- sl \low,\timer+4
+ sl \low,4+\timer
brc 3,.+8
ahi \high,-1
.endm
@@ -471,7 +471,6 @@ io_tif:
jnz io_work # there is work to do (signals etc.)
io_restore:
mvc __LC_RETURN_PSW(8),__PT_PSW(%r11)
- ni __LC_RETURN_PSW+1,0xfd # clean wait state bit
stpt __LC_EXIT_TIMER
lm %r0,%r15,__PT_R0(%r11)
lpsw __LC_RETURN_PSW
@@ -606,12 +605,32 @@ ext_skip:
stm %r8,%r9,__PT_PSW(%r11)
TRACE_IRQS_OFF
lr %r2,%r11 # pass pointer to pt_regs
- l %r3,__LC_CPU_ADDRESS # get cpu address + interruption code
+ l %r3,__LC_EXT_CPU_ADDR # get cpu address + interruption code
l %r4,__LC_EXT_PARAMS # get external parameters
l %r1,BASED(.Ldo_extint)
basr %r14,%r1 # call do_extint
j io_return
+/*
+ * Load idle PSW. The second "half" of this function is in cleanup_idle.
+ */
+ENTRY(psw_idle)
+ st %r4,__SF_EMPTY(%r15)
+ basr %r1,0
+ la %r1,psw_idle_lpsw+4-.(%r1)
+ st %r1,__SF_EMPTY+4(%r15)
+ oi __SF_EMPTY+4(%r15),0x80
+ la %r1,.Lvtimer_max-psw_idle_lpsw-4(%r1)
+ stck __IDLE_ENTER(%r2)
+ ltr %r5,%r5
+ stpt __VQ_IDLE_ENTER(%r3)
+ jz psw_idle_lpsw
+ spt 0(%r1)
+psw_idle_lpsw:
+ lpsw __SF_EMPTY(%r15)
+ br %r14
+psw_idle_end:
+
__critical_end:
/*
@@ -673,7 +692,6 @@ mcck_skip:
TRACE_IRQS_ON
mcck_return:
mvc __LC_RETURN_MCCK_PSW(8),__PT_PSW(%r11) # move return PSW
- ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit
tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
jno 0f
lm %r0,%r15,__PT_R0(%r11)
@@ -691,77 +709,30 @@ mcck_panic:
0: ahi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j mcck_skip
-/*
- * Restart interruption handler, kick starter for additional CPUs
- */
-#ifdef CONFIG_SMP
- __CPUINIT
-ENTRY(restart_int_handler)
- basr %r1,0
-restart_base:
- spt restart_vtime-restart_base(%r1)
- stck __LC_LAST_UPDATE_CLOCK
- mvc __LC_LAST_UPDATE_TIMER(8),restart_vtime-restart_base(%r1)
- mvc __LC_EXIT_TIMER(8),restart_vtime-restart_base(%r1)
- l %r15,__LC_GPREGS_SAVE_AREA+60 # load ksp
- lctl %c0,%c15,__LC_CREGS_SAVE_AREA # get new ctl regs
- lam %a0,%a15,__LC_AREGS_SAVE_AREA
- lm %r6,%r15,__SF_GPRS(%r15)# load registers from clone
- l %r1,__LC_THREAD_INFO
- mvc __LC_USER_TIMER(8),__TI_user_timer(%r1)
- mvc __LC_SYSTEM_TIMER(8),__TI_system_timer(%r1)
- xc __LC_STEAL_TIMER(8),__LC_STEAL_TIMER
- ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
- basr %r14,0
- l %r14,restart_addr-.(%r14)
- basr %r14,%r14 # call start_secondary
-restart_addr:
- .long start_secondary
- .align 8
-restart_vtime:
- .long 0x7fffffff,0xffffffff
- .previous
-#else
-/*
- * If we do not run with SMP enabled, let the new CPU crash ...
- */
-ENTRY(restart_int_handler)
- basr %r1,0
-restart_base:
- lpsw restart_crash-restart_base(%r1)
- .align 8
-restart_crash:
- .long 0x000a0000,0x00000000
-restart_go:
-#endif
-
#
# PSW restart interrupt handler
#
-ENTRY(psw_restart_int_handler)
+ENTRY(restart_int_handler)
st %r15,__LC_SAVE_AREA_RESTART
- basr %r15,0
-0: l %r15,.Lrestart_stack-0b(%r15) # load restart stack
- l %r15,0(%r15)
+ l %r15,__LC_RESTART_STACK
ahi %r15,-__PT_SIZE # create pt_regs on stack
+ xc 0(__PT_SIZE,%r15),0(%r15)
stm %r0,%r14,__PT_R0(%r15)
mvc __PT_R15(4,%r15),__LC_SAVE_AREA_RESTART
mvc __PT_PSW(8,%r15),__LC_RST_OLD_PSW # store restart old psw
- ahi %r15,-STACK_FRAME_OVERHEAD
- xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
- basr %r14,0
-1: l %r14,.Ldo_restart-1b(%r14)
- basr %r14,%r14
- basr %r14,0 # load disabled wait PSW if
-2: lpsw restart_psw_crash-2b(%r14) # do_restart returns
- .align 4
-.Ldo_restart:
- .long do_restart
-.Lrestart_stack:
- .long restart_stack
- .align 8
-restart_psw_crash:
- .long 0x000a0000,0x00000000 + restart_psw_crash
+ ahi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack
+ xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
+ lm %r1,%r3,__LC_RESTART_FN # load fn, parm & source cpu
+ ltr %r3,%r3 # test source cpu address
+ jm 1f # negative -> skip source stop
+0: sigp %r4,%r3,1 # sigp sense to source cpu
+ brc 10,0b # wait for status stored
+1: basr %r14,%r1 # call function
+ stap __SF_EMPTY(%r15) # store cpu address
+ lh %r3,__SF_EMPTY(%r15)
+2: sigp %r4,%r3,5 # sigp stop to current cpu
+ brc 2,2b
+3: j 3b
.section .kprobes.text, "ax"
@@ -795,6 +766,8 @@ cleanup_table:
.long io_tif + 0x80000000
.long io_restore + 0x80000000
.long io_done + 0x80000000
+ .long psw_idle + 0x80000000
+ .long psw_idle_end + 0x80000000
cleanup_critical:
cl %r9,BASED(cleanup_table) # system_call
@@ -813,6 +786,10 @@ cleanup_critical:
jl cleanup_io_tif
cl %r9,BASED(cleanup_table+28) # io_done
jl cleanup_io_restore
+ cl %r9,BASED(cleanup_table+32) # psw_idle
+ jl 0f
+ cl %r9,BASED(cleanup_table+36) # psw_idle_end
+ jl cleanup_idle
0: br %r14
cleanup_system_call:
@@ -896,7 +873,6 @@ cleanup_io_restore:
jhe 0f
l %r9,12(%r11) # get saved r11 pointer to pt_regs
mvc __LC_RETURN_PSW(8),__PT_PSW(%r9)
- ni __LC_RETURN_PSW+1,0xfd # clear wait state bit
mvc 0(32,%r11),__PT_R8(%r9)
lm %r0,%r7,__PT_R0(%r9)
0: lm %r8,%r9,__LC_RETURN_PSW
@@ -904,11 +880,52 @@ cleanup_io_restore:
cleanup_io_restore_insn:
.long io_done - 4 + 0x80000000
+cleanup_idle:
+ # copy interrupt clock & cpu timer
+ mvc __IDLE_EXIT(8,%r2),__LC_INT_CLOCK
+ mvc __VQ_IDLE_EXIT(8,%r3),__LC_ASYNC_ENTER_TIMER
+ chi %r11,__LC_SAVE_AREA_ASYNC
+ je 0f
+ mvc __IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK
+ mvc __VQ_IDLE_EXIT(8,%r3),__LC_MCCK_ENTER_TIMER
+0: # check if stck has been executed
+ cl %r9,BASED(cleanup_idle_insn)
+ jhe 1f
+ mvc __IDLE_ENTER(8,%r2),__IDLE_EXIT(%r2)
+ mvc __VQ_IDLE_ENTER(8,%r3),__VQ_IDLE_EXIT(%r3)
+ j 2f
+1: # check if the cpu timer has been reprogrammed
+ ltr %r5,%r5
+ jz 2f
+ spt __VQ_IDLE_ENTER(%r3)
+2: # account system time going idle
+ lm %r9,%r10,__LC_STEAL_TIMER
+ ADD64 %r9,%r10,__IDLE_ENTER(%r2)
+ SUB64 %r9,%r10,__LC_LAST_UPDATE_CLOCK
+ stm %r9,%r10,__LC_STEAL_TIMER
+ mvc __LC_LAST_UPDATE_CLOCK(8),__IDLE_EXIT(%r2)
+ lm %r9,%r10,__LC_SYSTEM_TIMER
+ ADD64 %r9,%r10,__LC_LAST_UPDATE_TIMER
+ SUB64 %r9,%r10,__VQ_IDLE_ENTER(%r3)
+ stm %r9,%r10,__LC_SYSTEM_TIMER
+ mvc __LC_LAST_UPDATE_TIMER(8),__VQ_IDLE_EXIT(%r3)
+ # prepare return psw
+ n %r8,BASED(cleanup_idle_wait) # clear wait state bit
+ l %r9,24(%r11) # return from psw_idle
+ br %r14
+cleanup_idle_insn:
+ .long psw_idle_lpsw + 0x80000000
+cleanup_idle_wait:
+ .long 0xfffdffff
+
/*
* Integer constants
*/
.align 4
-.Lnr_syscalls: .long NR_syscalls
+.Lnr_syscalls:
+ .long NR_syscalls
+.Lvtimer_max:
+ .quad 0x7fffffffffffffff
/*
* Symbol constants
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index bf538aaf407..6cdddac93a2 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -4,11 +4,22 @@
#include <linux/types.h>
#include <linux/signal.h>
#include <asm/ptrace.h>
-
+#include <asm/cputime.h>
+#include <asm/timer.h>
extern void (*pgm_check_table[128])(struct pt_regs *);
extern void *restart_stack;
+void system_call(void);
+void pgm_check_handler(void);
+void ext_int_handler(void);
+void io_int_handler(void);
+void mcck_int_handler(void);
+void restart_int_handler(void);
+void restart_call_handler(void);
+void psw_idle(struct s390_idle_data *, struct vtimer_queue *,
+ unsigned long, int);
+
asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
@@ -24,9 +35,9 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,
siginfo_t *info, sigset_t *oldset, struct pt_regs *regs);
void do_notify_resume(struct pt_regs *regs);
-void do_extint(struct pt_regs *regs, unsigned int, unsigned int, unsigned long);
+struct ext_code;
+void do_extint(struct pt_regs *regs, struct ext_code, unsigned int, unsigned long);
void do_restart(void);
-int __cpuinit start_secondary(void *cpuvoid);
void __init startup_init(void);
void die(struct pt_regs *regs, const char *str);
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 412a7b8783d..4e1c292fa7e 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -2,7 +2,7 @@
* arch/s390/kernel/entry64.S
* S390 low-level entry points.
*
- * Copyright (C) IBM Corp. 1999,2010
+ * Copyright (C) IBM Corp. 1999,2012
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
* Hartmut Penner (hp@de.ibm.com),
* Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
@@ -489,7 +489,6 @@ io_restore:
lg %r14,__LC_VDSO_PER_CPU
lmg %r0,%r10,__PT_R0(%r11)
mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
- ni __LC_RETURN_PSW+1,0xfd # clear wait state bit
stpt __LC_EXIT_TIMER
mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
lmg %r11,%r15,__PT_R11(%r11)
@@ -625,12 +624,30 @@ ext_skip:
TRACE_IRQS_OFF
lghi %r1,4096
lgr %r2,%r11 # pass pointer to pt_regs
- llgf %r3,__LC_CPU_ADDRESS # get cpu address + interruption code
+ llgf %r3,__LC_EXT_CPU_ADDR # get cpu address + interruption code
llgf %r4,__LC_EXT_PARAMS # get external parameter
lg %r5,__LC_EXT_PARAMS2-4096(%r1) # get 64 bit external parameter
brasl %r14,do_extint
j io_return
+/*
+ * Load idle PSW. The second "half" of this function is in cleanup_idle.
+ */
+ENTRY(psw_idle)
+ stg %r4,__SF_EMPTY(%r15)
+ larl %r1,psw_idle_lpsw+4
+ stg %r1,__SF_EMPTY+8(%r15)
+ larl %r1,.Lvtimer_max
+ stck __IDLE_ENTER(%r2)
+ ltr %r5,%r5
+ stpt __VQ_IDLE_ENTER(%r3)
+ jz psw_idle_lpsw
+ spt 0(%r1)
+psw_idle_lpsw:
+ lpswe __SF_EMPTY(%r15)
+ br %r14
+psw_idle_end:
+
__critical_end:
/*
@@ -696,7 +713,6 @@ mcck_return:
lg %r14,__LC_VDSO_PER_CPU
lmg %r0,%r10,__PT_R0(%r11)
mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
- ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit
tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
jno 0f
stpt __LC_EXIT_TIMER
@@ -713,68 +729,30 @@ mcck_panic:
0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j mcck_skip
-/*
- * Restart interruption handler, kick starter for additional CPUs
- */
-#ifdef CONFIG_SMP
- __CPUINIT
-ENTRY(restart_int_handler)
- basr %r1,0
-restart_base:
- spt restart_vtime-restart_base(%r1)
- stck __LC_LAST_UPDATE_CLOCK
- mvc __LC_LAST_UPDATE_TIMER(8),restart_vtime-restart_base(%r1)
- mvc __LC_EXIT_TIMER(8),restart_vtime-restart_base(%r1)
- lghi %r10,__LC_GPREGS_SAVE_AREA
- lg %r15,120(%r10) # load ksp
- lghi %r10,__LC_CREGS_SAVE_AREA
- lctlg %c0,%c15,0(%r10) # get new ctl regs
- lghi %r10,__LC_AREGS_SAVE_AREA
- lam %a0,%a15,0(%r10)
- lmg %r6,%r15,__SF_GPRS(%r15)# load registers from clone
- lg %r1,__LC_THREAD_INFO
- mvc __LC_USER_TIMER(8),__TI_user_timer(%r1)
- mvc __LC_SYSTEM_TIMER(8),__TI_system_timer(%r1)
- xc __LC_STEAL_TIMER(8),__LC_STEAL_TIMER
- ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
- brasl %r14,start_secondary
- .align 8
-restart_vtime:
- .long 0x7fffffff,0xffffffff
- .previous
-#else
-/*
- * If we do not run with SMP enabled, let the new CPU crash ...
- */
-ENTRY(restart_int_handler)
- basr %r1,0
-restart_base:
- lpswe restart_crash-restart_base(%r1)
- .align 8
-restart_crash:
- .long 0x000a0000,0x00000000,0x00000000,0x00000000
-restart_go:
-#endif
-
#
# PSW restart interrupt handler
#
-ENTRY(psw_restart_int_handler)
+ENTRY(restart_int_handler)
stg %r15,__LC_SAVE_AREA_RESTART
- larl %r15,restart_stack # load restart stack
- lg %r15,0(%r15)
+ lg %r15,__LC_RESTART_STACK
aghi %r15,-__PT_SIZE # create pt_regs on stack
+ xc 0(__PT_SIZE,%r15),0(%r15)
stmg %r0,%r14,__PT_R0(%r15)
mvc __PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
mvc __PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw
- aghi %r15,-STACK_FRAME_OVERHEAD
- xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
- brasl %r14,do_restart
- larl %r14,restart_psw_crash # load disabled wait PSW if
- lpswe 0(%r14) # do_restart returns
- .align 8
-restart_psw_crash:
- .quad 0x0002000080000000,0x0000000000000000 + restart_psw_crash
+ aghi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack
+ xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
+ lmg %r1,%r3,__LC_RESTART_FN # load fn, parm & source cpu
+ ltgr %r3,%r3 # test source cpu address
+ jm 1f # negative -> skip source stop
+0: sigp %r4,%r3,1 # sigp sense to source cpu
+ brc 10,0b # wait for status stored
+1: basr %r14,%r1 # call function
+ stap __SF_EMPTY(%r15) # store cpu address
+ llgh %r3,__SF_EMPTY(%r15)
+2: sigp %r4,%r3,5 # sigp stop to current cpu
+ brc 2,2b
+3: j 3b
.section .kprobes.text, "ax"
@@ -808,6 +786,8 @@ cleanup_table:
.quad io_tif
.quad io_restore
.quad io_done
+ .quad psw_idle
+ .quad psw_idle_end
cleanup_critical:
clg %r9,BASED(cleanup_table) # system_call
@@ -826,6 +806,10 @@ cleanup_critical:
jl cleanup_io_tif
clg %r9,BASED(cleanup_table+56) # io_done
jl cleanup_io_restore
+ clg %r9,BASED(cleanup_table+64) # psw_idle
+ jl 0f
+ clg %r9,BASED(cleanup_table+72) # psw_idle_end
+ jl cleanup_idle
0: br %r14
@@ -915,7 +899,6 @@ cleanup_io_restore:
je 0f
lg %r9,24(%r11) # get saved r11 pointer to pt_regs
mvc __LC_RETURN_PSW(16),__PT_PSW(%r9)
- ni __LC_RETURN_PSW+1,0xfd # clear wait state bit
mvc 0(64,%r11),__PT_R8(%r9)
lmg %r0,%r7,__PT_R0(%r9)
0: lmg %r8,%r9,__LC_RETURN_PSW
@@ -923,6 +906,42 @@ cleanup_io_restore:
cleanup_io_restore_insn:
.quad io_done - 4
+cleanup_idle:
+ # copy interrupt clock & cpu timer
+ mvc __IDLE_EXIT(8,%r2),__LC_INT_CLOCK
+ mvc __VQ_IDLE_EXIT(8,%r3),__LC_ASYNC_ENTER_TIMER
+ cghi %r11,__LC_SAVE_AREA_ASYNC
+ je 0f
+ mvc __IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK
+ mvc __VQ_IDLE_EXIT(8,%r3),__LC_MCCK_ENTER_TIMER
+0: # check if stck & stpt have been executed
+ clg %r9,BASED(cleanup_idle_insn)
+ jhe 1f
+ mvc __IDLE_ENTER(8,%r2),__IDLE_EXIT(%r2)
+ mvc __VQ_IDLE_ENTER(8,%r3),__VQ_IDLE_EXIT(%r3)
+ j 2f
+1: # check if the cpu timer has been reprogrammed
+ ltr %r5,%r5
+ jz 2f
+ spt __VQ_IDLE_ENTER(%r3)
+2: # account system time going idle
+ lg %r9,__LC_STEAL_TIMER
+ alg %r9,__IDLE_ENTER(%r2)
+ slg %r9,__LC_LAST_UPDATE_CLOCK
+ stg %r9,__LC_STEAL_TIMER
+ mvc __LC_LAST_UPDATE_CLOCK(8),__IDLE_EXIT(%r2)
+ lg %r9,__LC_SYSTEM_TIMER
+ alg %r9,__LC_LAST_UPDATE_TIMER
+ slg %r9,__VQ_IDLE_ENTER(%r3)
+ stg %r9,__LC_SYSTEM_TIMER
+ mvc __LC_LAST_UPDATE_TIMER(8),__VQ_IDLE_EXIT(%r3)
+ # prepare return psw
+ nihh %r8,0xfffd # clear wait state bit
+ lg %r9,48(%r11) # return from psw_idle
+ br %r14
+cleanup_idle_insn:
+ .quad psw_idle_lpsw
+
/*
* Integer constants
*/
@@ -931,6 +950,8 @@ cleanup_io_restore_insn:
.quad __critical_start
.Lcritical_length:
.quad __critical_end - __critical_start
+.Lvtimer_max:
+ .quad 0x7fffffffffffffff
#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index affa8e68124..8342e65a140 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -2,7 +2,7 @@
* arch/s390/kernel/ipl.c
* ipl/reipl/dump support for Linux on s390.
*
- * Copyright IBM Corp. 2005,2007
+ * Copyright IBM Corp. 2005,2012
* Author(s): Michael Holzheu <holzheu@de.ibm.com>
* Heiko Carstens <heiko.carstens@de.ibm.com>
* Volker Sameske <sameske@de.ibm.com>
@@ -17,6 +17,7 @@
#include <linux/fs.h>
#include <linux/gfp.h>
#include <linux/crash_dump.h>
+#include <linux/debug_locks.h>
#include <asm/ipl.h>
#include <asm/smp.h>
#include <asm/setup.h>
@@ -25,8 +26,9 @@
#include <asm/ebcdic.h>
#include <asm/reset.h>
#include <asm/sclp.h>
-#include <asm/sigp.h>
#include <asm/checksum.h>
+#include <asm/debug.h>
+#include <asm/os_info.h>
#include "entry.h"
#define IPL_PARM_BLOCK_VERSION 0
@@ -571,7 +573,7 @@ static void __ipl_run(void *unused)
static void ipl_run(struct shutdown_trigger *trigger)
{
- smp_switch_to_ipl_cpu(__ipl_run, NULL);
+ smp_call_ipl_cpu(__ipl_run, NULL);
}
static int __init ipl_init(void)
@@ -950,6 +952,13 @@ static struct attribute_group reipl_nss_attr_group = {
.attrs = reipl_nss_attrs,
};
+static void set_reipl_block_actual(struct ipl_parameter_block *reipl_block)
+{
+ reipl_block_actual = reipl_block;
+ os_info_entry_add(OS_INFO_REIPL_BLOCK, reipl_block_actual,
+ reipl_block->hdr.len);
+}
+
/* reipl type */
static int reipl_set_type(enum ipl_type type)
@@ -965,7 +974,7 @@ static int reipl_set_type(enum ipl_type type)
reipl_method = REIPL_METHOD_CCW_VM;
else
reipl_method = REIPL_METHOD_CCW_CIO;
- reipl_block_actual = reipl_block_ccw;
+ set_reipl_block_actual(reipl_block_ccw);
break;
case IPL_TYPE_FCP:
if (diag308_set_works)
@@ -974,7 +983,7 @@ static int reipl_set_type(enum ipl_type type)
reipl_method = REIPL_METHOD_FCP_RO_VM;
else
reipl_method = REIPL_METHOD_FCP_RO_DIAG;
- reipl_block_actual = reipl_block_fcp;
+ set_reipl_block_actual(reipl_block_fcp);
break;
case IPL_TYPE_FCP_DUMP:
reipl_method = REIPL_METHOD_FCP_DUMP;
@@ -984,7 +993,7 @@ static int reipl_set_type(enum ipl_type type)
reipl_method = REIPL_METHOD_NSS_DIAG;
else
reipl_method = REIPL_METHOD_NSS;
- reipl_block_actual = reipl_block_nss;
+ set_reipl_block_actual(reipl_block_nss);
break;
case IPL_TYPE_UNKNOWN:
reipl_method = REIPL_METHOD_DEFAULT;
@@ -1101,7 +1110,7 @@ static void __reipl_run(void *unused)
static void reipl_run(struct shutdown_trigger *trigger)
{
- smp_switch_to_ipl_cpu(__reipl_run, NULL);
+ smp_call_ipl_cpu(__reipl_run, NULL);
}
static void reipl_block_ccw_init(struct ipl_parameter_block *ipb)
@@ -1256,6 +1265,29 @@ static int __init reipl_fcp_init(void)
return 0;
}
+static int __init reipl_type_init(void)
+{
+ enum ipl_type reipl_type = ipl_info.type;
+ struct ipl_parameter_block *reipl_block;
+ unsigned long size;
+
+ reipl_block = os_info_old_entry(OS_INFO_REIPL_BLOCK, &size);
+ if (!reipl_block)
+ goto out;
+ /*
+ * If we have an OS info reipl block, this will be used
+ */
+ if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_FCP) {
+ memcpy(reipl_block_fcp, reipl_block, size);
+ reipl_type = IPL_TYPE_FCP;
+ } else if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_CCW) {
+ memcpy(reipl_block_ccw, reipl_block, size);
+ reipl_type = IPL_TYPE_CCW;
+ }
+out:
+ return reipl_set_type(reipl_type);
+}
+
static int __init reipl_init(void)
{
int rc;
@@ -1277,10 +1309,7 @@ static int __init reipl_init(void)
rc = reipl_nss_init();
if (rc)
return rc;
- rc = reipl_set_type(ipl_info.type);
- if (rc)
- return rc;
- return 0;
+ return reipl_type_init();
}
static struct shutdown_action __refdata reipl_action = {
@@ -1421,7 +1450,7 @@ static void dump_run(struct shutdown_trigger *trigger)
if (dump_method == DUMP_METHOD_NONE)
return;
smp_send_stop();
- smp_switch_to_ipl_cpu(__dump_run, NULL);
+ smp_call_ipl_cpu(__dump_run, NULL);
}
static int __init dump_ccw_init(void)
@@ -1499,30 +1528,12 @@ static struct shutdown_action __refdata dump_action = {
static void dump_reipl_run(struct shutdown_trigger *trigger)
{
- preempt_disable();
- /*
- * Bypass dynamic address translation (DAT) when storing IPL parameter
- * information block address and checksum into the prefix area
- * (corresponding to absolute addresses 0-8191).
- * When enhanced DAT applies and the STE format control in one,
- * the absolute address is formed without prefixing. In this case a
- * normal store (stg/st) into the prefix area would no more match to
- * absolute addresses 0-8191.
- */
-#ifdef CONFIG_64BIT
- asm volatile("sturg %0,%1"
- :: "a" ((unsigned long) reipl_block_actual),
- "a" (&lowcore_ptr[smp_processor_id()]->ipib));
-#else
- asm volatile("stura %0,%1"
- :: "a" ((unsigned long) reipl_block_actual),
- "a" (&lowcore_ptr[smp_processor_id()]->ipib));
-#endif
- asm volatile("stura %0,%1"
- :: "a" (csum_partial(reipl_block_actual,
- reipl_block_actual->hdr.len, 0)),
- "a" (&lowcore_ptr[smp_processor_id()]->ipib_checksum));
- preempt_enable();
+ u32 csum;
+
+ csum = csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
+ copy_to_absolute_zero(&S390_lowcore.ipib_checksum, &csum, sizeof(csum));
+ copy_to_absolute_zero(&S390_lowcore.ipib, &reipl_block_actual,
+ sizeof(reipl_block_actual));
dump_run(trigger);
}
@@ -1623,9 +1634,7 @@ static void stop_run(struct shutdown_trigger *trigger)
if (strcmp(trigger->name, ON_PANIC_STR) == 0 ||
strcmp(trigger->name, ON_RESTART_STR) == 0)
disabled_wait((unsigned long) __builtin_return_address(0));
- while (sigp(smp_processor_id(), sigp_stop) == sigp_busy)
- cpu_relax();
- for (;;);
+ smp_stop_cpu();
}
static struct shutdown_action stop_action = {SHUTDOWN_ACTION_STOP_STR,
@@ -1713,6 +1722,7 @@ static struct kobj_attribute on_panic_attr =
static void do_panic(void)
{
+ lgr_info_log();
on_panic_trigger.action->fn(&on_panic_trigger);
stop_run(&on_panic_trigger);
}
@@ -1738,9 +1748,8 @@ static ssize_t on_restart_store(struct kobject *kobj,
static struct kobj_attribute on_restart_attr =
__ATTR(on_restart, 0644, on_restart_show, on_restart_store);
-void do_restart(void)
+static void __do_restart(void *ignore)
{
- smp_restart_with_online_cpu();
smp_send_stop();
#ifdef CONFIG_CRASH_DUMP
crash_kexec(NULL);
@@ -1749,6 +1758,14 @@ void do_restart(void)
stop_run(&on_restart_trigger);
}
+void do_restart(void)
+{
+ tracing_off();
+ debug_locks_off();
+ lgr_info_log();
+ smp_call_online_cpu(__do_restart, NULL);
+}
+
/* on halt */
static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action};
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index b9a7fdd9c81..1c2cdd59ccd 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -165,13 +165,6 @@ static inline int ext_hash(u16 code)
return (code + (code >> 9)) & 0xff;
}
-static void ext_int_hash_update(struct rcu_head *head)
-{
- struct ext_int_info *p = container_of(head, struct ext_int_info, rcu);
-
- kfree(p);
-}
-
int register_external_interrupt(u16 code, ext_int_handler_t handler)
{
struct ext_int_info *p;
@@ -202,38 +195,34 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler)
list_for_each_entry_rcu(p, &ext_int_hash[index], entry)
if (p->code == code && p->handler == handler) {
list_del_rcu(&p->entry);
- call_rcu(&p->rcu, ext_int_hash_update);
+ kfree_rcu(p, rcu);
}
spin_unlock_irqrestore(&ext_int_hash_lock, flags);
return 0;
}
EXPORT_SYMBOL(unregister_external_interrupt);
-void __irq_entry do_extint(struct pt_regs *regs, unsigned int ext_int_code,
+void __irq_entry do_extint(struct pt_regs *regs, struct ext_code ext_code,
unsigned int param32, unsigned long param64)
{
struct pt_regs *old_regs;
- unsigned short code;
struct ext_int_info *p;
int index;
- code = (unsigned short) ext_int_code;
old_regs = set_irq_regs(regs);
- s390_idle_check(regs, S390_lowcore.int_clock,
- S390_lowcore.async_enter_timer);
irq_enter();
if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
/* Serve timer interrupts first. */
clock_comparator_work();
kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++;
- if (code != 0x1004)
+ if (ext_code.code != 0x1004)
__get_cpu_var(s390_idle).nohz_delay = 1;
- index = ext_hash(code);
+ index = ext_hash(ext_code.code);
rcu_read_lock();
list_for_each_entry_rcu(p, &ext_int_hash[index], entry)
- if (likely(p->code == code))
- p->handler(ext_int_code, param32, param64);
+ if (likely(p->code == ext_code.code))
+ p->handler(ext_code, param32, param64);
rcu_read_unlock();
irq_exit();
set_irq_regs(old_regs);
@@ -266,3 +255,26 @@ void service_subclass_irq_unregister(void)
spin_unlock(&sc_irq_lock);
}
EXPORT_SYMBOL(service_subclass_irq_unregister);
+
+static DEFINE_SPINLOCK(ma_subclass_lock);
+static int ma_subclass_refcount;
+
+void measurement_alert_subclass_register(void)
+{
+ spin_lock(&ma_subclass_lock);
+ if (!ma_subclass_refcount)
+ ctl_set_bit(0, 5);
+ ma_subclass_refcount++;
+ spin_unlock(&ma_subclass_lock);
+}
+EXPORT_SYMBOL(measurement_alert_subclass_register);
+
+void measurement_alert_subclass_unregister(void)
+{
+ spin_lock(&ma_subclass_lock);
+ ma_subclass_refcount--;
+ if (!ma_subclass_refcount)
+ ctl_clear_bit(0, 5);
+ spin_unlock(&ma_subclass_lock);
+}
+EXPORT_SYMBOL(measurement_alert_subclass_unregister);
diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c
new file mode 100644
index 00000000000..8431b92ca3a
--- /dev/null
+++ b/arch/s390/kernel/lgr.c
@@ -0,0 +1,200 @@
+/*
+ * Linux Guest Relocation (LGR) detection
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/slab.h>
+#include <asm/sysinfo.h>
+#include <asm/ebcdic.h>
+#include <asm/system.h>
+#include <asm/debug.h>
+#include <asm/ipl.h>
+
+#define LGR_TIMER_INTERVAL_SECS (30 * 60)
+#define VM_LEVEL_MAX 2 /* Maximum is 8, but we only record two levels */
+
+/*
+ * LGR info: Contains stfle and stsi data
+ */
+struct lgr_info {
+ /* Bit field with facility information: 4 DWORDs are stored */
+ u64 stfle_fac_list[4];
+ /* Level of system (1 = CEC, 2 = LPAR, 3 = z/VM */
+ u32 level;
+ /* Level 1: CEC info (stsi 1.1.1) */
+ char manufacturer[16];
+ char type[4];
+ char sequence[16];
+ char plant[4];
+ char model[16];
+ /* Level 2: LPAR info (stsi 2.2.2) */
+ u16 lpar_number;
+ char name[8];
+ /* Level 3: VM info (stsi 3.2.2) */
+ u8 vm_count;
+ struct {
+ char name[8];
+ char cpi[16];
+ } vm[VM_LEVEL_MAX];
+} __packed __aligned(8);
+
+/*
+ * LGR globals
+ */
+static void *lgr_page;
+static struct lgr_info lgr_info_last;
+static struct lgr_info lgr_info_cur;
+static struct debug_info *lgr_dbf;
+
+/*
+ * Return number of valid stsi levels
+ */
+static inline int stsi_0(void)
+{
+ int rc = stsi(NULL, 0, 0, 0);
+
+ return rc == -ENOSYS ? rc : (((unsigned int) rc) >> 28);
+}
+
+/*
+ * Copy buffer and then convert it to ASCII
+ */
+static void cpascii(char *dst, char *src, int size)
+{
+ memcpy(dst, src, size);
+ EBCASC(dst, size);
+}
+
+/*
+ * Fill LGR info with 1.1.1 stsi data
+ */
+static void lgr_stsi_1_1_1(struct lgr_info *lgr_info)
+{
+ struct sysinfo_1_1_1 *si = lgr_page;
+
+ if (stsi(si, 1, 1, 1) == -ENOSYS)
+ return;
+ cpascii(lgr_info->manufacturer, si->manufacturer,
+ sizeof(si->manufacturer));
+ cpascii(lgr_info->type, si->type, sizeof(si->type));
+ cpascii(lgr_info->model, si->model, sizeof(si->model));
+ cpascii(lgr_info->sequence, si->sequence, sizeof(si->sequence));
+ cpascii(lgr_info->plant, si->plant, sizeof(si->plant));
+}
+
+/*
+ * Fill LGR info with 2.2.2 stsi data
+ */
+static void lgr_stsi_2_2_2(struct lgr_info *lgr_info)
+{
+ struct sysinfo_2_2_2 *si = lgr_page;
+
+ if (stsi(si, 2, 2, 2) == -ENOSYS)
+ return;
+ cpascii(lgr_info->name, si->name, sizeof(si->name));
+ memcpy(&lgr_info->lpar_number, &si->lpar_number,
+ sizeof(lgr_info->lpar_number));
+}
+
+/*
+ * Fill LGR info with 3.2.2 stsi data
+ */
+static void lgr_stsi_3_2_2(struct lgr_info *lgr_info)
+{
+ struct sysinfo_3_2_2 *si = lgr_page;
+ int i;
+
+ if (stsi(si, 3, 2, 2) == -ENOSYS)
+ return;
+ for (i = 0; i < min_t(u8, si->count, VM_LEVEL_MAX); i++) {
+ cpascii(lgr_info->vm[i].name, si->vm[i].name,
+ sizeof(si->vm[i].name));
+ cpascii(lgr_info->vm[i].cpi, si->vm[i].cpi,
+ sizeof(si->vm[i].cpi));
+ }
+ lgr_info->vm_count = si->count;
+}
+
+/*
+ * Fill LGR info with current data
+ */
+static void lgr_info_get(struct lgr_info *lgr_info)
+{
+ memset(lgr_info, 0, sizeof(*lgr_info));
+ stfle(lgr_info->stfle_fac_list, ARRAY_SIZE(lgr_info->stfle_fac_list));
+ lgr_info->level = stsi_0();
+ if (lgr_info->level == -ENOSYS)
+ return;
+ if (lgr_info->level >= 1)
+ lgr_stsi_1_1_1(lgr_info);
+ if (lgr_info->level >= 2)
+ lgr_stsi_2_2_2(lgr_info);
+ if (lgr_info->level >= 3)
+ lgr_stsi_3_2_2(lgr_info);
+}
+
+/*
+ * Check if LGR info has changed and if yes log new LGR info to s390dbf
+ */
+void lgr_info_log(void)
+{
+ static DEFINE_SPINLOCK(lgr_info_lock);
+ unsigned long flags;
+
+ if (!spin_trylock_irqsave(&lgr_info_lock, flags))
+ return;
+ lgr_info_get(&lgr_info_cur);
+ if (memcmp(&lgr_info_last, &lgr_info_cur, sizeof(lgr_info_cur)) != 0) {
+ debug_event(lgr_dbf, 1, &lgr_info_cur, sizeof(lgr_info_cur));
+ lgr_info_last = lgr_info_cur;
+ }
+ spin_unlock_irqrestore(&lgr_info_lock, flags);
+}
+EXPORT_SYMBOL_GPL(lgr_info_log);
+
+static void lgr_timer_set(void);
+
+/*
+ * LGR timer callback
+ */
+static void lgr_timer_fn(unsigned long ignored)
+{
+ lgr_info_log();
+ lgr_timer_set();
+}
+
+static struct timer_list lgr_timer =
+ TIMER_DEFERRED_INITIALIZER(lgr_timer_fn, 0, 0);
+
+/*
+ * Setup next LGR timer
+ */
+static void lgr_timer_set(void)
+{
+ mod_timer(&lgr_timer, jiffies + LGR_TIMER_INTERVAL_SECS * HZ);
+}
+
+/*
+ * Initialize LGR: Add s390dbf, write initial lgr_info and setup timer
+ */
+static int __init lgr_init(void)
+{
+ lgr_page = (void *) __get_free_pages(GFP_KERNEL, 0);
+ if (!lgr_page)
+ return -ENOMEM;
+ lgr_dbf = debug_register("lgr", 1, 1, sizeof(struct lgr_info));
+ if (!lgr_dbf) {
+ free_page((unsigned long) lgr_page);
+ return -ENOMEM;
+ }
+ debug_register_view(lgr_dbf, &debug_hex_ascii_view);
+ lgr_info_get(&lgr_info_last);
+ debug_event(lgr_dbf, 1, &lgr_info_last, sizeof(lgr_info_last));
+ lgr_timer_set();
+ return 0;
+}
+module_init(lgr_init);
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 47b168fb29c..0f8cdf1268d 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -14,6 +14,7 @@
#include <linux/delay.h>
#include <linux/reboot.h>
#include <linux/ftrace.h>
+#include <linux/debug_locks.h>
#include <asm/cio.h>
#include <asm/setup.h>
#include <asm/pgtable.h>
@@ -49,50 +50,21 @@ static void add_elf_notes(int cpu)
}
/*
- * Store status of next available physical CPU
- */
-static int store_status_next(int start_cpu, int this_cpu)
-{
- struct save_area *sa = (void *) 4608 + store_prefix();
- int cpu, rc;
-
- for (cpu = start_cpu; cpu < 65536; cpu++) {
- if (cpu == this_cpu)
- continue;
- do {
- rc = raw_sigp(cpu, sigp_stop_and_store_status);
- } while (rc == sigp_busy);
- if (rc != sigp_order_code_accepted)
- continue;
- if (sa->pref_reg)
- return cpu;
- }
- return -1;
-}
-
-/*
* Initialize CPU ELF notes
*/
void setup_regs(void)
{
unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE;
- int cpu, this_cpu, phys_cpu = 0, first = 1;
+ int cpu, this_cpu;
- this_cpu = stap();
-
- if (!S390_lowcore.prefixreg_save_area)
- first = 0;
+ this_cpu = smp_find_processor_id(stap());
+ add_elf_notes(this_cpu);
for_each_online_cpu(cpu) {
- if (first) {
- add_elf_notes(cpu);
- first = 0;
+ if (cpu == this_cpu)
+ continue;
+ if (smp_store_status(cpu))
continue;
- }
- phys_cpu = store_status_next(phys_cpu, this_cpu);
- if (phys_cpu == -1)
- break;
add_elf_notes(cpu);
- phys_cpu++;
}
/* Copy dump CPU store status info to absolute zero */
memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area));
@@ -238,10 +210,14 @@ static void __machine_kexec(void *data)
struct kimage *image = data;
pfault_fini();
- if (image->type == KEXEC_TYPE_CRASH)
+ tracing_off();
+ debug_locks_off();
+ if (image->type == KEXEC_TYPE_CRASH) {
+ lgr_info_log();
s390_reset_system(__do_machine_kdump, data);
- else
+ } else {
s390_reset_system(__do_machine_kexec, data);
+ }
disabled_wait((unsigned long) __builtin_return_address(0));
}
@@ -255,5 +231,5 @@ void machine_kexec(struct kimage *image)
return;
tracer_disable();
smp_send_stop();
- smp_switch_to_ipl_cpu(__machine_kexec, image);
+ smp_call_ipl_cpu(__machine_kexec, image);
}
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 0fd2e863e11..8c372ca6135 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -254,8 +254,6 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
int umode;
nmi_enter();
- s390_idle_check(regs, S390_lowcore.mcck_clock,
- S390_lowcore.mcck_enter_timer);
kstat_cpu(smp_processor_id()).irqs[NMI_NMI]++;
mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
mcck = &__get_cpu_var(cpu_mcck);
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
new file mode 100644
index 00000000000..bbe522672e0
--- /dev/null
+++ b/arch/s390/kernel/os_info.c
@@ -0,0 +1,169 @@
+/*
+ * OS info memory interface
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "os_info"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/crash_dump.h>
+#include <linux/kernel.h>
+#include <asm/checksum.h>
+#include <asm/lowcore.h>
+#include <asm/system.h>
+#include <asm/os_info.h>
+
+/*
+ * OS info structure has to be page aligned
+ */
+static struct os_info os_info __page_aligned_data;
+
+/*
+ * Compute checksum over OS info structure
+ */
+u32 os_info_csum(struct os_info *os_info)
+{
+ int size = sizeof(*os_info) - offsetof(struct os_info, version_major);
+ return csum_partial(&os_info->version_major, size, 0);
+}
+
+/*
+ * Add crashkernel info to OS info and update checksum
+ */
+void os_info_crashkernel_add(unsigned long base, unsigned long size)
+{
+ os_info.crashkernel_addr = (u64)(unsigned long)base;
+ os_info.crashkernel_size = (u64)(unsigned long)size;
+ os_info.csum = os_info_csum(&os_info);
+}
+
+/*
+ * Add OS info entry and update checksum
+ */
+void os_info_entry_add(int nr, void *ptr, u64 size)
+{
+ os_info.entry[nr].addr = (u64)(unsigned long)ptr;
+ os_info.entry[nr].size = size;
+ os_info.entry[nr].csum = csum_partial(ptr, size, 0);
+ os_info.csum = os_info_csum(&os_info);
+}
+
+/*
+ * Initialize OS info struture and set lowcore pointer
+ */
+void __init os_info_init(void)
+{
+ void *ptr = &os_info;
+
+ os_info.version_major = OS_INFO_VERSION_MAJOR;
+ os_info.version_minor = OS_INFO_VERSION_MINOR;
+ os_info.magic = OS_INFO_MAGIC;
+ os_info.csum = os_info_csum(&os_info);
+ copy_to_absolute_zero(&S390_lowcore.os_info, &ptr, sizeof(ptr));
+}
+
+#ifdef CONFIG_CRASH_DUMP
+
+static struct os_info *os_info_old;
+
+/*
+ * Allocate and copy OS info entry from oldmem
+ */
+static void os_info_old_alloc(int nr, int align)
+{
+ unsigned long addr, size = 0;
+ char *buf, *buf_align, *msg;
+ u32 csum;
+
+ addr = os_info_old->entry[nr].addr;
+ if (!addr) {
+ msg = "not available";
+ goto fail;
+ }
+ size = os_info_old->entry[nr].size;
+ buf = kmalloc(size + align - 1, GFP_KERNEL);
+ if (!buf) {
+ msg = "alloc failed";
+ goto fail;
+ }
+ buf_align = PTR_ALIGN(buf, align);
+ if (copy_from_oldmem(buf_align, (void *) addr, size)) {
+ msg = "copy failed";
+ goto fail_free;
+ }
+ csum = csum_partial(buf_align, size, 0);
+ if (csum != os_info_old->entry[nr].csum) {
+ msg = "checksum failed";
+ goto fail_free;
+ }
+ os_info_old->entry[nr].addr = (u64)(unsigned long)buf_align;
+ msg = "copied";
+ goto out;
+fail_free:
+ kfree(buf);
+fail:
+ os_info_old->entry[nr].addr = 0;
+out:
+ pr_info("entry %i: %s (addr=0x%lx size=%lu)\n",
+ nr, msg, addr, size);
+}
+
+/*
+ * Initialize os info and os info entries from oldmem
+ */
+static void os_info_old_init(void)
+{
+ static int os_info_init;
+ unsigned long addr;
+
+ if (os_info_init)
+ return;
+ if (!OLDMEM_BASE)
+ goto fail;
+ if (copy_from_oldmem(&addr, &S390_lowcore.os_info, sizeof(addr)))
+ goto fail;
+ if (addr == 0 || addr % PAGE_SIZE)
+ goto fail;
+ os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL);
+ if (!os_info_old)
+ goto fail;
+ if (copy_from_oldmem(os_info_old, (void *) addr, sizeof(*os_info_old)))
+ goto fail_free;
+ if (os_info_old->magic != OS_INFO_MAGIC)
+ goto fail_free;
+ if (os_info_old->csum != os_info_csum(os_info_old))
+ goto fail_free;
+ if (os_info_old->version_major > OS_INFO_VERSION_MAJOR)
+ goto fail_free;
+ os_info_old_alloc(OS_INFO_VMCOREINFO, 1);
+ os_info_old_alloc(OS_INFO_REIPL_BLOCK, 1);
+ os_info_old_alloc(OS_INFO_INIT_FN, PAGE_SIZE);
+ pr_info("crashkernel: addr=0x%lx size=%lu\n",
+ (unsigned long) os_info_old->crashkernel_addr,
+ (unsigned long) os_info_old->crashkernel_size);
+ os_info_init = 1;
+ return;
+fail_free:
+ kfree(os_info_old);
+fail:
+ os_info_init = 1;
+ os_info_old = NULL;
+}
+
+/*
+ * Return pointer to os infor entry and its size
+ */
+void *os_info_old_entry(int nr, unsigned long *size)
+{
+ os_info_old_init();
+
+ if (!os_info_old)
+ return NULL;
+ if (!os_info_old->entry[nr].addr)
+ return NULL;
+ *size = (unsigned long) os_info_old->entry[nr].size;
+ return (void *)(unsigned long)os_info_old->entry[nr].addr;
+}
+#endif
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
new file mode 100644
index 00000000000..8481ecf2ad7
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -0,0 +1,690 @@
+/*
+ * Performance event support for s390x - CPU-measurement Counter Facility
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#define KMSG_COMPONENT "cpum_cf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/cpu_mf.h>
+
+/* CPU-measurement counter facility supports these CPU counter sets:
+ * For CPU counter sets:
+ * Basic counter set: 0-31
+ * Problem-state counter set: 32-63
+ * Crypto-activity counter set: 64-127
+ * Extented counter set: 128-159
+ */
+enum cpumf_ctr_set {
+ /* CPU counter sets */
+ CPUMF_CTR_SET_BASIC = 0,
+ CPUMF_CTR_SET_USER = 1,
+ CPUMF_CTR_SET_CRYPTO = 2,
+ CPUMF_CTR_SET_EXT = 3,
+
+ /* Maximum number of counter sets */
+ CPUMF_CTR_SET_MAX,
+};
+
+#define CPUMF_LCCTL_ENABLE_SHIFT 16
+#define CPUMF_LCCTL_ACTCTL_SHIFT 0
+static const u64 cpumf_state_ctl[CPUMF_CTR_SET_MAX] = {
+ [CPUMF_CTR_SET_BASIC] = 0x02,
+ [CPUMF_CTR_SET_USER] = 0x04,
+ [CPUMF_CTR_SET_CRYPTO] = 0x08,
+ [CPUMF_CTR_SET_EXT] = 0x01,
+};
+
+static void ctr_set_enable(u64 *state, int ctr_set)
+{
+ *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT;
+}
+static void ctr_set_disable(u64 *state, int ctr_set)
+{
+ *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT);
+}
+static void ctr_set_start(u64 *state, int ctr_set)
+{
+ *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT;
+}
+static void ctr_set_stop(u64 *state, int ctr_set)
+{
+ *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT);
+}
+
+/* Local CPUMF event structure */
+struct cpu_hw_events {
+ struct cpumf_ctr_info info;
+ atomic_t ctr_set[CPUMF_CTR_SET_MAX];
+ u64 state, tx_state;
+ unsigned int flags;
+};
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
+ .ctr_set = {
+ [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0),
+ [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0),
+ [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0),
+ [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0),
+ },
+ .state = 0,
+ .flags = 0,
+};
+
+static int get_counter_set(u64 event)
+{
+ int set = -1;
+
+ if (event < 32)
+ set = CPUMF_CTR_SET_BASIC;
+ else if (event < 64)
+ set = CPUMF_CTR_SET_USER;
+ else if (event < 128)
+ set = CPUMF_CTR_SET_CRYPTO;
+ else if (event < 160)
+ set = CPUMF_CTR_SET_EXT;
+
+ return set;
+}
+
+static int validate_event(const struct hw_perf_event *hwc)
+{
+ switch (hwc->config_base) {
+ case CPUMF_CTR_SET_BASIC:
+ case CPUMF_CTR_SET_USER:
+ case CPUMF_CTR_SET_CRYPTO:
+ case CPUMF_CTR_SET_EXT:
+ /* check for reserved counters */
+ if ((hwc->config >= 6 && hwc->config <= 31) ||
+ (hwc->config >= 38 && hwc->config <= 63) ||
+ (hwc->config >= 80 && hwc->config <= 127))
+ return -EOPNOTSUPP;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int validate_ctr_version(const struct hw_perf_event *hwc)
+{
+ struct cpu_hw_events *cpuhw;
+ int err = 0;
+
+ cpuhw = &get_cpu_var(cpu_hw_events);
+
+ /* check required version for counter sets */
+ switch (hwc->config_base) {
+ case CPUMF_CTR_SET_BASIC:
+ case CPUMF_CTR_SET_USER:
+ if (cpuhw->info.cfvn < 1)
+ err = -EOPNOTSUPP;
+ break;
+ case CPUMF_CTR_SET_CRYPTO:
+ case CPUMF_CTR_SET_EXT:
+ if (cpuhw->info.csvn < 1)
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ put_cpu_var(cpu_hw_events);
+ return err;
+}
+
+static int validate_ctr_auth(const struct hw_perf_event *hwc)
+{
+ struct cpu_hw_events *cpuhw;
+ u64 ctrs_state;
+ int err = 0;
+
+ cpuhw = &get_cpu_var(cpu_hw_events);
+
+ /* check authorization for cpu counter sets */
+ ctrs_state = cpumf_state_ctl[hwc->config_base];
+ if (!(ctrs_state & cpuhw->info.auth_ctl))
+ err = -EPERM;
+
+ put_cpu_var(cpu_hw_events);
+ return err;
+}
+
+/*
+ * Change the CPUMF state to active.
+ * Enable and activate the CPU-counter sets according
+ * to the per-cpu control state.
+ */
+static void cpumf_pmu_enable(struct pmu *pmu)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ int err;
+
+ if (cpuhw->flags & PMU_F_ENABLED)
+ return;
+
+ err = lcctl(cpuhw->state);
+ if (err) {
+ pr_err("Enabling the performance measuring unit "
+ "failed with rc=%lx\n", err);
+ return;
+ }
+
+ cpuhw->flags |= PMU_F_ENABLED;
+}
+
+/*
+ * Change the CPUMF state to inactive.
+ * Disable and enable (inactive) the CPU-counter sets according
+ * to the per-cpu control state.
+ */
+static void cpumf_pmu_disable(struct pmu *pmu)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ int err;
+ u64 inactive;
+
+ if (!(cpuhw->flags & PMU_F_ENABLED))
+ return;
+
+ inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
+ err = lcctl(inactive);
+ if (err) {
+ pr_err("Disabling the performance measuring unit "
+ "failed with rc=%lx\n", err);
+ return;
+ }
+
+ cpuhw->flags &= ~PMU_F_ENABLED;
+}
+
+
+/* Number of perf events counting hardware events */
+static atomic_t num_events = ATOMIC_INIT(0);
+/* Used to avoid races in calling reserve/release_cpumf_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/* CPU-measurement alerts for the counter facility */
+static void cpumf_measurement_alert(struct ext_code ext_code,
+ unsigned int alert, unsigned long unused)
+{
+ struct cpu_hw_events *cpuhw;
+
+ if (!(alert & CPU_MF_INT_CF_MASK))
+ return;
+
+ kstat_cpu(smp_processor_id()).irqs[EXTINT_CPM]++;
+ cpuhw = &__get_cpu_var(cpu_hw_events);
+
+ /* Measurement alerts are shared and might happen when the PMU
+ * is not reserved. Ignore these alerts in this case. */
+ if (!(cpuhw->flags & PMU_F_RESERVED))
+ return;
+
+ /* counter authorization change alert */
+ if (alert & CPU_MF_INT_CF_CACA)
+ qctri(&cpuhw->info);
+
+ /* loss of counter data alert */
+ if (alert & CPU_MF_INT_CF_LCDA)
+ pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
+}
+
+#define PMC_INIT 0
+#define PMC_RELEASE 1
+static void setup_pmc_cpu(void *flags)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+
+ switch (*((int *) flags)) {
+ case PMC_INIT:
+ memset(&cpuhw->info, 0, sizeof(cpuhw->info));
+ qctri(&cpuhw->info);
+ cpuhw->flags |= PMU_F_RESERVED;
+ break;
+
+ case PMC_RELEASE:
+ cpuhw->flags &= ~PMU_F_RESERVED;
+ break;
+ }
+
+ /* Disable CPU counter sets */
+ lcctl(0);
+}
+
+/* Initialize the CPU-measurement facility */
+static int reserve_pmc_hardware(void)
+{
+ int flags = PMC_INIT;
+
+ on_each_cpu(setup_pmc_cpu, &flags, 1);
+ measurement_alert_subclass_register();
+
+ return 0;
+}
+
+/* Release the CPU-measurement facility */
+static void release_pmc_hardware(void)
+{
+ int flags = PMC_RELEASE;
+
+ on_each_cpu(setup_pmc_cpu, &flags, 1);
+ measurement_alert_subclass_unregister();
+}
+
+/* Release the PMU if event is the last perf event */
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+ if (!atomic_add_unless(&num_events, -1, 1)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_dec_return(&num_events) == 0)
+ release_pmc_hardware();
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+}
+
+/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */
+static const int cpumf_generic_events_basic[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = 0,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 1,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = -1,
+ [PERF_COUNT_HW_CACHE_MISSES] = -1,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
+ [PERF_COUNT_HW_BRANCH_MISSES] = -1,
+ [PERF_COUNT_HW_BUS_CYCLES] = -1,
+};
+/* CPUMF <-> perf event mappings for userspace (problem-state set) */
+static const int cpumf_generic_events_user[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = 32,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 33,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = -1,
+ [PERF_COUNT_HW_CACHE_MISSES] = -1,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
+ [PERF_COUNT_HW_BRANCH_MISSES] = -1,
+ [PERF_COUNT_HW_BUS_CYCLES] = -1,
+};
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+ struct hw_perf_event *hwc = &event->hw;
+ int err;
+ u64 ev;
+
+ switch (attr->type) {
+ case PERF_TYPE_RAW:
+ /* Raw events are used to access counters directly,
+ * hence do not permit excludes */
+ if (attr->exclude_kernel || attr->exclude_user ||
+ attr->exclude_hv)
+ return -EOPNOTSUPP;
+ ev = attr->config;
+ break;
+
+ case PERF_TYPE_HARDWARE:
+ ev = attr->config;
+ /* Count user space (problem-state) only */
+ if (!attr->exclude_user && attr->exclude_kernel) {
+ if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
+ return -EOPNOTSUPP;
+ ev = cpumf_generic_events_user[ev];
+
+ /* No support for kernel space counters only */
+ } else if (!attr->exclude_kernel && attr->exclude_user) {
+ return -EOPNOTSUPP;
+
+ /* Count user and kernel space */
+ } else {
+ if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
+ return -EOPNOTSUPP;
+ ev = cpumf_generic_events_basic[ev];
+ }
+ break;
+
+ default:
+ return -ENOENT;
+ }
+
+ if (ev == -1)
+ return -ENOENT;
+
+ if (ev >= PERF_CPUM_CF_MAX_CTR)
+ return -EINVAL;
+
+ /* The CPU measurement counter facility does not have any interrupts
+ * to do sampling. Sampling must be provided by external means,
+ * for example, by timers.
+ */
+ if (hwc->sample_period)
+ return -EINVAL;
+
+ /* Use the hardware perf event structure to store the counter number
+ * in 'config' member and the counter set to which the counter belongs
+ * in the 'config_base'. The counter set (config_base) is then used
+ * to enable/disable the counters.
+ */
+ hwc->config = ev;
+ hwc->config_base = get_counter_set(ev);
+
+ /* Validate the counter that is assigned to this event.
+ * Because the counter facility can use numerous counters at the
+ * same time without constraints, it is not necessary to explicity
+ * validate event groups (event->group_leader != event).
+ */
+ err = validate_event(hwc);
+ if (err)
+ return err;
+
+ /* Initialize for using the CPU-measurement counter facility */
+ if (!atomic_inc_not_zero(&num_events)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
+ err = -EBUSY;
+ else
+ atomic_inc(&num_events);
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+ event->destroy = hw_perf_event_destroy;
+
+ /* Finally, validate version and authorization of the counter set */
+ err = validate_ctr_auth(hwc);
+ if (!err)
+ err = validate_ctr_version(hwc);
+
+ return err;
+}
+
+static int cpumf_pmu_event_init(struct perf_event *event)
+{
+ int err;
+
+ switch (event->attr.type) {
+ case PERF_TYPE_HARDWARE:
+ case PERF_TYPE_HW_CACHE:
+ case PERF_TYPE_RAW:
+ err = __hw_perf_event_init(event);
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ if (unlikely(err) && event->destroy)
+ event->destroy(event);
+
+ return err;
+}
+
+static int hw_perf_event_reset(struct perf_event *event)
+{
+ u64 prev, new;
+ int err;
+
+ do {
+ prev = local64_read(&event->hw.prev_count);
+ err = ecctr(event->hw.config, &new);
+ if (err) {
+ if (err != 3)
+ break;
+ /* The counter is not (yet) available. This
+ * might happen if the counter set to which
+ * this counter belongs is in the disabled
+ * state.
+ */
+ new = 0;
+ }
+ } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+ return err;
+}
+
+static int hw_perf_event_update(struct perf_event *event)
+{
+ u64 prev, new, delta;
+ int err;
+
+ do {
+ prev = local64_read(&event->hw.prev_count);
+ err = ecctr(event->hw.config, &new);
+ if (err)
+ goto out;
+ } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+ delta = (prev <= new) ? new - prev
+ : (-1ULL - prev) + new + 1; /* overflow */
+ local64_add(delta, &event->count);
+out:
+ return err;
+}
+
+static void cpumf_pmu_read(struct perf_event *event)
+{
+ if (event->hw.state & PERF_HES_STOPPED)
+ return;
+
+ hw_perf_event_update(event);
+}
+
+static void cpumf_pmu_start(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ if (WARN_ON_ONCE(hwc->config == -1))
+ return;
+
+ if (flags & PERF_EF_RELOAD)
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+ hwc->state = 0;
+
+ /* (Re-)enable and activate the counter set */
+ ctr_set_enable(&cpuhw->state, hwc->config_base);
+ ctr_set_start(&cpuhw->state, hwc->config_base);
+
+ /* The counter set to which this counter belongs can be already active.
+ * Because all counters in a set are active, the event->hw.prev_count
+ * needs to be synchronized. At this point, the counter set can be in
+ * the inactive or disabled state.
+ */
+ hw_perf_event_reset(event);
+
+ /* increment refcount for this counter set */
+ atomic_inc(&cpuhw->ctr_set[hwc->config_base]);
+}
+
+static void cpumf_pmu_stop(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!(hwc->state & PERF_HES_STOPPED)) {
+ /* Decrement reference count for this counter set and if this
+ * is the last used counter in the set, clear activation
+ * control and set the counter set state to inactive.
+ */
+ if (!atomic_dec_return(&cpuhw->ctr_set[hwc->config_base]))
+ ctr_set_stop(&cpuhw->state, hwc->config_base);
+ event->hw.state |= PERF_HES_STOPPED;
+ }
+
+ if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+ hw_perf_event_update(event);
+ event->hw.state |= PERF_HES_UPTODATE;
+ }
+}
+
+static int cpumf_pmu_add(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+
+ /* Check authorization for the counter set to which this
+ * counter belongs.
+ * For group events transaction, the authorization check is
+ * done in cpumf_pmu_commit_txn().
+ */
+ if (!(cpuhw->flags & PERF_EVENT_TXN))
+ if (validate_ctr_auth(&event->hw))
+ return -EPERM;
+
+ ctr_set_enable(&cpuhw->state, event->hw.config_base);
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ if (flags & PERF_EF_START)
+ cpumf_pmu_start(event, PERF_EF_RELOAD);
+
+ perf_event_update_userpage(event);
+
+ return 0;
+}
+
+static void cpumf_pmu_del(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+
+ cpumf_pmu_stop(event, PERF_EF_UPDATE);
+
+ /* Check if any counter in the counter set is still used. If not used,
+ * change the counter set to the disabled state. This also clears the
+ * content of all counters in the set.
+ *
+ * When a new perf event has been added but not yet started, this can
+ * clear enable control and resets all counters in a set. Therefore,
+ * cpumf_pmu_start() always has to reenable a counter set.
+ */
+ if (!atomic_read(&cpuhw->ctr_set[event->hw.config_base]))
+ ctr_set_disable(&cpuhw->state, event->hw.config_base);
+
+ perf_event_update_userpage(event);
+}
+
+/*
+ * Start group events scheduling transaction.
+ * Set flags to perform a single test at commit time.
+ */
+static void cpumf_pmu_start_txn(struct pmu *pmu)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+
+ perf_pmu_disable(pmu);
+ cpuhw->flags |= PERF_EVENT_TXN;
+ cpuhw->tx_state = cpuhw->state;
+}
+
+/*
+ * Stop and cancel a group events scheduling tranctions.
+ * Assumes cpumf_pmu_del() is called for each successful added
+ * cpumf_pmu_add() during the transaction.
+ */
+static void cpumf_pmu_cancel_txn(struct pmu *pmu)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+
+ WARN_ON(cpuhw->tx_state != cpuhw->state);
+
+ cpuhw->flags &= ~PERF_EVENT_TXN;
+ perf_pmu_enable(pmu);
+}
+
+/*
+ * Commit the group events scheduling transaction. On success, the
+ * transaction is closed. On error, the transaction is kept open
+ * until cpumf_pmu_cancel_txn() is called.
+ */
+static int cpumf_pmu_commit_txn(struct pmu *pmu)
+{
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ u64 state;
+
+ /* check if the updated state can be scheduled */
+ state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
+ state >>= CPUMF_LCCTL_ENABLE_SHIFT;
+ if ((state & cpuhw->info.auth_ctl) != state)
+ return -EPERM;
+
+ cpuhw->flags &= ~PERF_EVENT_TXN;
+ perf_pmu_enable(pmu);
+ return 0;
+}
+
+/* Performance monitoring unit for s390x */
+static struct pmu cpumf_pmu = {
+ .pmu_enable = cpumf_pmu_enable,
+ .pmu_disable = cpumf_pmu_disable,
+ .event_init = cpumf_pmu_event_init,
+ .add = cpumf_pmu_add,
+ .del = cpumf_pmu_del,
+ .start = cpumf_pmu_start,
+ .stop = cpumf_pmu_stop,
+ .read = cpumf_pmu_read,
+ .start_txn = cpumf_pmu_start_txn,
+ .commit_txn = cpumf_pmu_commit_txn,
+ .cancel_txn = cpumf_pmu_cancel_txn,
+};
+
+static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (long) hcpu;
+ int flags;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_ONLINE:
+ flags = PMC_INIT;
+ smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+ break;
+ case CPU_DOWN_PREPARE:
+ flags = PMC_RELEASE;
+ smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static int __init cpumf_pmu_init(void)
+{
+ int rc;
+
+ if (!cpum_cf_avail())
+ return -ENODEV;
+
+ /* clear bit 15 of cr0 to unauthorize problem-state to
+ * extract measurement counters */
+ ctl_clear_bit(0, 48);
+
+ /* register handler for measurement-alert interruptions */
+ rc = register_external_interrupt(0x1407, cpumf_measurement_alert);
+ if (rc) {
+ pr_err("Registering for CPU-measurement alerts "
+ "failed with rc=%i\n", rc);
+ goto out;
+ }
+
+ rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
+ if (rc) {
+ pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
+ unregister_external_interrupt(0x1407, cpumf_measurement_alert);
+ goto out;
+ }
+ perf_cpu_notifier(cpumf_pmu_notifier);
+out:
+ return rc;
+}
+early_initcall(cpumf_pmu_init);
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
new file mode 100644
index 00000000000..609f985198c
--- /dev/null
+++ b/arch/s390/kernel/perf_event.c
@@ -0,0 +1,125 @@
+/*
+ * Performance event support for s390x
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#define KMSG_COMPONENT "perf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/export.h>
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/cpu_mf.h>
+#include <asm/lowcore.h>
+#include <asm/processor.h>
+
+const char *perf_pmu_name(void)
+{
+ if (cpum_cf_avail() || cpum_sf_avail())
+ return "CPU-measurement facilities (CPUMF)";
+ return "pmu";
+}
+EXPORT_SYMBOL(perf_pmu_name);
+
+int perf_num_counters(void)
+{
+ int num = 0;
+
+ if (cpum_cf_avail())
+ num += PERF_CPUM_CF_MAX_CTR;
+
+ return num;
+}
+EXPORT_SYMBOL(perf_num_counters);
+
+void perf_event_print_debug(void)
+{
+ struct cpumf_ctr_info cf_info;
+ unsigned long flags;
+ int cpu;
+
+ if (!cpum_cf_avail())
+ return;
+
+ local_irq_save(flags);
+
+ cpu = smp_processor_id();
+ memset(&cf_info, 0, sizeof(cf_info));
+ if (!qctri(&cf_info)) {
+ pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n",
+ cpu, cf_info.cfvn, cf_info.csvn,
+ cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl);
+ print_hex_dump_bytes("CPUMF Query: ", DUMP_PREFIX_OFFSET,
+ &cf_info, sizeof(cf_info));
+ }
+
+ local_irq_restore(flags);
+}
+
+/* See also arch/s390/kernel/traps.c */
+static unsigned long __store_trace(struct perf_callchain_entry *entry,
+ unsigned long sp,
+ unsigned long low, unsigned long high)
+{
+ struct stack_frame *sf;
+ struct pt_regs *regs;
+
+ while (1) {
+ sp = sp & PSW_ADDR_INSN;
+ if (sp < low || sp > high - sizeof(*sf))
+ return sp;
+ sf = (struct stack_frame *) sp;
+ perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
+ /* Follow the backchain. */
+ while (1) {
+ low = sp;
+ sp = sf->back_chain & PSW_ADDR_INSN;
+ if (!sp)
+ break;
+ if (sp <= low || sp > high - sizeof(*sf))
+ return sp;
+ sf = (struct stack_frame *) sp;
+ perf_callchain_store(entry,
+ sf->gprs[8] & PSW_ADDR_INSN);
+ }
+ /* Zero backchain detected, check for interrupt frame. */
+ sp = (unsigned long) (sf + 1);
+ if (sp <= low || sp > high - sizeof(*regs))
+ return sp;
+ regs = (struct pt_regs *) sp;
+ perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
+ low = sp;
+ sp = regs->gprs[15];
+ }
+}
+
+void perf_callchain_kernel(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
+{
+ unsigned long head;
+ struct stack_frame *head_sf;
+
+ if (user_mode(regs))
+ return;
+
+ head = regs->gprs[15];
+ head_sf = (struct stack_frame *) head;
+
+ if (!head_sf || !head_sf->back_chain)
+ return;
+
+ head = head_sf->back_chain;
+ head = __store_trace(entry, head, S390_lowcore.async_stack - ASYNC_SIZE,
+ S390_lowcore.async_stack);
+
+ __store_trace(entry, head, S390_lowcore.thread_info,
+ S390_lowcore.thread_info + THREAD_SIZE);
+}
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index e795933eb2c..3732e4c09cb 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -77,13 +77,8 @@ static void default_idle(void)
local_irq_enable();
return;
}
- trace_hardirqs_on();
- /* Don't trace preempt off for idle. */
- stop_critical_timings();
- /* Stop virtual timer and halt the cpu. */
+ /* Halt the cpu and keep track of cpu time accounting. */
vtime_stop_cpu();
- /* Reenable preemption tracer. */
- start_critical_timings();
}
void cpu_idle(void)
@@ -97,9 +92,7 @@ void cpu_idle(void)
tick_nohz_idle_exit();
if (test_thread_flag(TIF_MCCK_PENDING))
s390_handle_mcck();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 3b2efc81f34..38e751278bf 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -2,7 +2,7 @@
* arch/s390/kernel/setup.c
*
* S390 version
- * Copyright (C) IBM Corp. 1999,2010
+ * Copyright (C) IBM Corp. 1999,2012
* Author(s): Hartmut Penner (hp@de.ibm.com),
* Martin Schwidefsky (schwidefsky@de.ibm.com)
*
@@ -62,6 +62,8 @@
#include <asm/ebcdic.h>
#include <asm/kvm_virtio.h>
#include <asm/diag.h>
+#include <asm/os_info.h>
+#include "entry.h"
long psw_kernel_bits = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_PRIMARY |
PSW_MASK_EA | PSW_MASK_BA;
@@ -351,8 +353,9 @@ static void setup_addressing_mode(void)
}
}
-static void __init
-setup_lowcore(void)
+void *restart_stack __attribute__((__section__(".data")));
+
+static void __init setup_lowcore(void)
{
struct _lowcore *lc;
@@ -363,7 +366,7 @@ setup_lowcore(void)
lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);
lc->restart_psw.mask = psw_kernel_bits;
lc->restart_psw.addr =
- PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler;
+ PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
lc->external_new_psw.mask = psw_kernel_bits |
PSW_MASK_DAT | PSW_MASK_MCHECK;
lc->external_new_psw.addr =
@@ -412,6 +415,24 @@ setup_lowcore(void)
lc->last_update_timer = S390_lowcore.last_update_timer;
lc->last_update_clock = S390_lowcore.last_update_clock;
lc->ftrace_func = S390_lowcore.ftrace_func;
+
+ restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0);
+ restart_stack += ASYNC_SIZE;
+
+ /*
+ * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
+ * restart data to the absolute zero lowcore. This is necesary if
+ * PSW restart is done on an offline CPU that has lowcore zero.
+ */
+ lc->restart_stack = (unsigned long) restart_stack;
+ lc->restart_fn = (unsigned long) do_restart;
+ lc->restart_data = 0;
+ lc->restart_source = -1UL;
+ memcpy(&S390_lowcore.restart_stack, &lc->restart_stack,
+ 4*sizeof(unsigned long));
+ copy_to_absolute_zero(&S390_lowcore.restart_psw,
+ &lc->restart_psw, sizeof(psw_t));
+
set_prefix((u32)(unsigned long) lc);
lowcore_ptr[0] = lc;
}
@@ -572,27 +593,6 @@ static void __init setup_memory_end(void)
}
}
-void *restart_stack __attribute__((__section__(".data")));
-
-/*
- * Setup new PSW and allocate stack for PSW restart interrupt
- */
-static void __init setup_restart_psw(void)
-{
- psw_t psw;
-
- restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0);
- restart_stack += ASYNC_SIZE;
-
- /*
- * Setup restart PSW for absolute zero lowcore. This is necesary
- * if PSW restart is done on an offline CPU that has lowcore zero
- */
- psw.mask = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA;
- psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler;
- copy_to_absolute_zero(&S390_lowcore.restart_psw, &psw, sizeof(psw));
-}
-
static void __init setup_vmcoreinfo(void)
{
#ifdef CONFIG_KEXEC
@@ -747,7 +747,7 @@ static void __init reserve_crashkernel(void)
{
#ifdef CONFIG_CRASH_DUMP
unsigned long long crash_base, crash_size;
- char *msg;
+ char *msg = NULL;
int rc;
rc = parse_crashkernel(boot_command_line, memory_end, &crash_size,
@@ -779,11 +779,11 @@ static void __init reserve_crashkernel(void)
pr_info("Reserving %lluMB of memory at %lluMB "
"for crashkernel (System RAM: %luMB)\n",
crash_size >> 20, crash_base >> 20, memory_end >> 20);
+ os_info_crashkernel_add(crash_base, crash_size);
#endif
}
-static void __init
-setup_memory(void)
+static void __init setup_memory(void)
{
unsigned long bootmap_size;
unsigned long start_pfn, end_pfn;
@@ -1014,8 +1014,7 @@ static void __init setup_hwcaps(void)
* was printed.
*/
-void __init
-setup_arch(char **cmdline_p)
+void __init setup_arch(char **cmdline_p)
{
/*
* print what head.S has found out about the machine
@@ -1060,6 +1059,7 @@ setup_arch(char **cmdline_p)
parse_early_param();
+ os_info_init();
setup_ipl();
setup_memory_end();
setup_addressing_mode();
@@ -1068,7 +1068,6 @@ setup_arch(char **cmdline_p)
setup_memory();
setup_resources();
setup_vmcoreinfo();
- setup_restart_psw();
setup_lowcore();
cpu_init();
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 2d421d90fad..f29f5ef400e 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -384,7 +384,6 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka,
siginfo_t *info, sigset_t *oldset,
struct pt_regs *regs)
{
- sigset_t blocked;
int ret;
/* Set up the stack frame */
@@ -394,10 +393,7 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka,
ret = setup_frame(sig, ka, oldset, regs);
if (ret)
return ret;
- sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
- if (!(ka->sa.sa_flags & SA_NODEFER))
- sigaddset(&blocked, sig);
- set_current_blocked(&blocked);
+ block_sigmask(ka, sig);
return 0;
}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 2398ce6b15a..a8bf9994b08 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -1,23 +1,18 @@
/*
- * arch/s390/kernel/smp.c
+ * SMP related functions
*
- * Copyright IBM Corp. 1999, 2009
- * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
- * Martin Schwidefsky (schwidefsky@de.ibm.com)
- * Heiko Carstens (heiko.carstens@de.ibm.com)
+ * Copyright IBM Corp. 1999,2012
+ * Author(s): Denis Joseph Barrow,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>,
*
* based on other smp stuff by
* (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net>
* (c) 1998 Ingo Molnar
*
- * We work with logical cpu numbering everywhere we can. The only
- * functions using the real cpu address (got from STAP) are the sigp
- * functions. For all other functions we use the identity mapping.
- * That means that cpu_number_map[i] == i for every cpu. cpu_number_map is
- * used e.g. to find the idle task belonging to a logical cpu. Every array
- * in the kernel is sorted by the logical cpu number and not by the physical
- * one which is causing all the confusion with __cpu_logical_map and
- * cpu_number_map in other architectures.
+ * The code outside of smp.c uses logical cpu numbers, only smp.c does
+ * the translation of logical to physical cpu ids. All new code that
+ * operates on physical cpu numbers needs to go into smp.c.
*/
#define KMSG_COMPONENT "cpu"
@@ -31,198 +26,433 @@
#include <linux/spinlock.h>
#include <linux/kernel_stat.h>
#include <linux/delay.h>
-#include <linux/cache.h>
#include <linux/interrupt.h>
#include <linux/irqflags.h>
#include <linux/cpu.h>
-#include <linux/timex.h>
-#include <linux/bootmem.h>
#include <linux/slab.h>
#include <linux/crash_dump.h>
#include <asm/asm-offsets.h>
#include <asm/ipl.h>
#include <asm/setup.h>
-#include <asm/sigp.h>
-#include <asm/pgalloc.h>
#include <asm/irq.h>
-#include <asm/cpcmd.h>
#include <asm/tlbflush.h>
#include <asm/timer.h>
#include <asm/lowcore.h>
#include <asm/sclp.h>
-#include <asm/cputime.h>
#include <asm/vdso.h>
-#include <asm/cpu.h>
+#include <asm/debug.h>
+#include <asm/os_info.h>
#include "entry.h"
-/* logical cpu to cpu address */
-unsigned short __cpu_logical_map[NR_CPUS];
+enum {
+ sigp_sense = 1,
+ sigp_external_call = 2,
+ sigp_emergency_signal = 3,
+ sigp_start = 4,
+ sigp_stop = 5,
+ sigp_restart = 6,
+ sigp_stop_and_store_status = 9,
+ sigp_initial_cpu_reset = 11,
+ sigp_cpu_reset = 12,
+ sigp_set_prefix = 13,
+ sigp_store_status_at_address = 14,
+ sigp_store_extended_status_at_address = 15,
+ sigp_set_architecture = 18,
+ sigp_conditional_emergency_signal = 19,
+ sigp_sense_running = 21,
+};
-static struct task_struct *current_set[NR_CPUS];
+enum {
+ sigp_order_code_accepted = 0,
+ sigp_status_stored = 1,
+ sigp_busy = 2,
+ sigp_not_operational = 3,
+};
-static u8 smp_cpu_type;
-static int smp_use_sigp_detection;
+enum {
+ ec_schedule = 0,
+ ec_call_function,
+ ec_call_function_single,
+ ec_stop_cpu,
+};
-enum s390_cpu_state {
+enum {
CPU_STATE_STANDBY,
CPU_STATE_CONFIGURED,
};
+struct pcpu {
+ struct cpu cpu;
+ struct task_struct *idle; /* idle process for the cpu */
+ struct _lowcore *lowcore; /* lowcore page(s) for the cpu */
+ unsigned long async_stack; /* async stack for the cpu */
+ unsigned long panic_stack; /* panic stack for the cpu */
+ unsigned long ec_mask; /* bit mask for ec_xxx functions */
+ int state; /* physical cpu state */
+ u32 status; /* last status received via sigp */
+ u16 address; /* physical cpu address */
+};
+
+static u8 boot_cpu_type;
+static u16 boot_cpu_address;
+static struct pcpu pcpu_devices[NR_CPUS];
+
DEFINE_MUTEX(smp_cpu_state_mutex);
-static int smp_cpu_state[NR_CPUS];
-static DEFINE_PER_CPU(struct cpu, cpu_devices);
+/*
+ * Signal processor helper functions.
+ */
+static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status)
+{
+ register unsigned int reg1 asm ("1") = parm;
+ int cc;
-static void smp_ext_bitcall(int, int);
+ asm volatile(
+ " sigp %1,%2,0(%3)\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc");
+ if (status && cc == 1)
+ *status = reg1;
+ return cc;
+}
-static int raw_cpu_stopped(int cpu)
+static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status)
{
- u32 status;
+ int cc;
- switch (raw_sigp_ps(&status, 0, cpu, sigp_sense)) {
- case sigp_status_stored:
- /* Check for stopped and check stop state */
- if (status & 0x50)
- return 1;
- break;
- default:
- break;
+ while (1) {
+ cc = __pcpu_sigp(addr, order, parm, status);
+ if (cc != sigp_busy)
+ return cc;
+ cpu_relax();
}
- return 0;
}
-static inline int cpu_stopped(int cpu)
+static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm)
{
- return raw_cpu_stopped(cpu_logical_map(cpu));
+ int cc, retry;
+
+ for (retry = 0; ; retry++) {
+ cc = __pcpu_sigp(pcpu->address, order, parm, &pcpu->status);
+ if (cc != sigp_busy)
+ break;
+ if (retry >= 3)
+ udelay(10);
+ }
+ return cc;
+}
+
+static inline int pcpu_stopped(struct pcpu *pcpu)
+{
+ if (__pcpu_sigp(pcpu->address, sigp_sense,
+ 0, &pcpu->status) != sigp_status_stored)
+ return 0;
+ /* Check for stopped and check stop state */
+ return !!(pcpu->status & 0x50);
+}
+
+static inline int pcpu_running(struct pcpu *pcpu)
+{
+ if (__pcpu_sigp(pcpu->address, sigp_sense_running,
+ 0, &pcpu->status) != sigp_status_stored)
+ return 1;
+ /* Check for running status */
+ return !(pcpu->status & 0x400);
}
/*
- * Ensure that PSW restart is done on an online CPU
+ * Find struct pcpu by cpu address.
*/
-void smp_restart_with_online_cpu(void)
+static struct pcpu *pcpu_find_address(const struct cpumask *mask, int address)
{
int cpu;
- for_each_online_cpu(cpu) {
- if (stap() == __cpu_logical_map[cpu]) {
- /* We are online: Enable DAT again and return */
- __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT);
- return;
- }
+ for_each_cpu(cpu, mask)
+ if (pcpu_devices[cpu].address == address)
+ return pcpu_devices + cpu;
+ return NULL;
+}
+
+static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
+{
+ int order;
+
+ set_bit(ec_bit, &pcpu->ec_mask);
+ order = pcpu_running(pcpu) ?
+ sigp_external_call : sigp_emergency_signal;
+ pcpu_sigp_retry(pcpu, order, 0);
+}
+
+static int __cpuinit pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
+{
+ struct _lowcore *lc;
+
+ if (pcpu != &pcpu_devices[0]) {
+ pcpu->lowcore = (struct _lowcore *)
+ __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
+ pcpu->async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
+ pcpu->panic_stack = __get_free_page(GFP_KERNEL);
+ if (!pcpu->lowcore || !pcpu->panic_stack || !pcpu->async_stack)
+ goto out;
}
- /* We are not online: Do PSW restart on an online CPU */
- while (sigp(cpu, sigp_restart) == sigp_busy)
- cpu_relax();
- /* And stop ourself */
- while (raw_sigp(stap(), sigp_stop) == sigp_busy)
- cpu_relax();
- for (;;);
+ lc = pcpu->lowcore;
+ memcpy(lc, &S390_lowcore, 512);
+ memset((char *) lc + 512, 0, sizeof(*lc) - 512);
+ lc->async_stack = pcpu->async_stack + ASYNC_SIZE;
+ lc->panic_stack = pcpu->panic_stack + PAGE_SIZE;
+ lc->cpu_nr = cpu;
+#ifndef CONFIG_64BIT
+ if (MACHINE_HAS_IEEE) {
+ lc->extended_save_area_addr = get_zeroed_page(GFP_KERNEL);
+ if (!lc->extended_save_area_addr)
+ goto out;
+ }
+#else
+ if (vdso_alloc_per_cpu(lc))
+ goto out;
+#endif
+ lowcore_ptr[cpu] = lc;
+ pcpu_sigp_retry(pcpu, sigp_set_prefix, (u32)(unsigned long) lc);
+ return 0;
+out:
+ if (pcpu != &pcpu_devices[0]) {
+ free_page(pcpu->panic_stack);
+ free_pages(pcpu->async_stack, ASYNC_ORDER);
+ free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
+ }
+ return -ENOMEM;
}
-void smp_switch_to_ipl_cpu(void (*func)(void *), void *data)
+static void pcpu_free_lowcore(struct pcpu *pcpu)
{
- struct _lowcore *lc, *current_lc;
- struct stack_frame *sf;
- struct pt_regs *regs;
- unsigned long sp;
-
- if (smp_processor_id() == 0)
- func(data);
- __load_psw_mask(PSW_DEFAULT_KEY | PSW_MASK_BASE |
- PSW_MASK_EA | PSW_MASK_BA);
- /* Disable lowcore protection */
- __ctl_clear_bit(0, 28);
- current_lc = lowcore_ptr[smp_processor_id()];
- lc = lowcore_ptr[0];
- if (!lc)
- lc = current_lc;
- lc->restart_psw.mask =
- PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA;
- lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) smp_restart_cpu;
- if (!cpu_online(0))
- smp_switch_to_cpu(func, data, 0, stap(), __cpu_logical_map[0]);
- while (sigp(0, sigp_stop_and_store_status) == sigp_busy)
- cpu_relax();
- sp = lc->panic_stack;
- sp -= sizeof(struct pt_regs);
- regs = (struct pt_regs *) sp;
- memcpy(&regs->gprs, &current_lc->gpregs_save_area, sizeof(regs->gprs));
- regs->psw = current_lc->psw_save_area;
- sp -= STACK_FRAME_OVERHEAD;
- sf = (struct stack_frame *) sp;
- sf->back_chain = 0;
- smp_switch_to_cpu(func, data, sp, stap(), __cpu_logical_map[0]);
+ pcpu_sigp_retry(pcpu, sigp_set_prefix, 0);
+ lowcore_ptr[pcpu - pcpu_devices] = NULL;
+#ifndef CONFIG_64BIT
+ if (MACHINE_HAS_IEEE) {
+ struct _lowcore *lc = pcpu->lowcore;
+
+ free_page((unsigned long) lc->extended_save_area_addr);
+ lc->extended_save_area_addr = 0;
+ }
+#else
+ vdso_free_per_cpu(pcpu->lowcore);
+#endif
+ if (pcpu != &pcpu_devices[0]) {
+ free_page(pcpu->panic_stack);
+ free_pages(pcpu->async_stack, ASYNC_ORDER);
+ free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
+ }
+}
+
+static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
+{
+ struct _lowcore *lc = pcpu->lowcore;
+
+ atomic_inc(&init_mm.context.attach_count);
+ lc->cpu_nr = cpu;
+ lc->percpu_offset = __per_cpu_offset[cpu];
+ lc->kernel_asce = S390_lowcore.kernel_asce;
+ lc->machine_flags = S390_lowcore.machine_flags;
+ lc->ftrace_func = S390_lowcore.ftrace_func;
+ lc->user_timer = lc->system_timer = lc->steal_timer = 0;
+ __ctl_store(lc->cregs_save_area, 0, 15);
+ save_access_regs((unsigned int *) lc->access_regs_save_area);
+ memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
+ MAX_FACILITY_BIT/8);
+}
+
+static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
+{
+ struct _lowcore *lc = pcpu->lowcore;
+ struct thread_info *ti = task_thread_info(tsk);
+
+ lc->kernel_stack = (unsigned long) task_stack_page(tsk) + THREAD_SIZE;
+ lc->thread_info = (unsigned long) task_thread_info(tsk);
+ lc->current_task = (unsigned long) tsk;
+ lc->user_timer = ti->user_timer;
+ lc->system_timer = ti->system_timer;
+ lc->steal_timer = 0;
+}
+
+static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
+{
+ struct _lowcore *lc = pcpu->lowcore;
+
+ lc->restart_stack = lc->kernel_stack;
+ lc->restart_fn = (unsigned long) func;
+ lc->restart_data = (unsigned long) data;
+ lc->restart_source = -1UL;
+ pcpu_sigp_retry(pcpu, sigp_restart, 0);
+}
+
+/*
+ * Call function via PSW restart on pcpu and stop the current cpu.
+ */
+static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
+ void *data, unsigned long stack)
+{
+ struct _lowcore *lc = pcpu->lowcore;
+ unsigned short this_cpu;
+
+ __load_psw_mask(psw_kernel_bits);
+ this_cpu = stap();
+ if (pcpu->address == this_cpu)
+ func(data); /* should not return */
+ /* Stop target cpu (if func returns this stops the current cpu). */
+ pcpu_sigp_retry(pcpu, sigp_stop, 0);
+ /* Restart func on the target cpu and stop the current cpu. */
+ lc->restart_stack = stack;
+ lc->restart_fn = (unsigned long) func;
+ lc->restart_data = (unsigned long) data;
+ lc->restart_source = (unsigned long) this_cpu;
+ asm volatile(
+ "0: sigp 0,%0,6 # sigp restart to target cpu\n"
+ " brc 2,0b # busy, try again\n"
+ "1: sigp 0,%1,5 # sigp stop to current cpu\n"
+ " brc 2,1b # busy, try again\n"
+ : : "d" (pcpu->address), "d" (this_cpu) : "0", "1", "cc");
+ for (;;) ;
+}
+
+/*
+ * Call function on an online CPU.
+ */
+void smp_call_online_cpu(void (*func)(void *), void *data)
+{
+ struct pcpu *pcpu;
+
+ /* Use the current cpu if it is online. */
+ pcpu = pcpu_find_address(cpu_online_mask, stap());
+ if (!pcpu)
+ /* Use the first online cpu. */
+ pcpu = pcpu_devices + cpumask_first(cpu_online_mask);
+ pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack);
}
-static void smp_stop_cpu(void)
+/*
+ * Call function on the ipl CPU.
+ */
+void smp_call_ipl_cpu(void (*func)(void *), void *data)
{
- while (sigp(smp_processor_id(), sigp_stop) == sigp_busy)
+ pcpu_delegate(&pcpu_devices[0], func, data,
+ pcpu_devices->panic_stack + PAGE_SIZE);
+}
+
+int smp_find_processor_id(u16 address)
+{
+ int cpu;
+
+ for_each_present_cpu(cpu)
+ if (pcpu_devices[cpu].address == address)
+ return cpu;
+ return -1;
+}
+
+int smp_vcpu_scheduled(int cpu)
+{
+ return pcpu_running(pcpu_devices + cpu);
+}
+
+void smp_yield(void)
+{
+ if (MACHINE_HAS_DIAG44)
+ asm volatile("diag 0,0,0x44");
+}
+
+void smp_yield_cpu(int cpu)
+{
+ if (MACHINE_HAS_DIAG9C)
+ asm volatile("diag %0,0,0x9c"
+ : : "d" (pcpu_devices[cpu].address));
+ else if (MACHINE_HAS_DIAG44)
+ asm volatile("diag 0,0,0x44");
+}
+
+/*
+ * Send cpus emergency shutdown signal. This gives the cpus the
+ * opportunity to complete outstanding interrupts.
+ */
+void smp_emergency_stop(cpumask_t *cpumask)
+{
+ u64 end;
+ int cpu;
+
+ end = get_clock() + (1000000UL << 12);
+ for_each_cpu(cpu, cpumask) {
+ struct pcpu *pcpu = pcpu_devices + cpu;
+ set_bit(ec_stop_cpu, &pcpu->ec_mask);
+ while (__pcpu_sigp(pcpu->address, sigp_emergency_signal,
+ 0, NULL) == sigp_busy &&
+ get_clock() < end)
+ cpu_relax();
+ }
+ while (get_clock() < end) {
+ for_each_cpu(cpu, cpumask)
+ if (pcpu_stopped(pcpu_devices + cpu))
+ cpumask_clear_cpu(cpu, cpumask);
+ if (cpumask_empty(cpumask))
+ break;
cpu_relax();
+ }
}
+/*
+ * Stop all cpus but the current one.
+ */
void smp_send_stop(void)
{
cpumask_t cpumask;
int cpu;
- u64 end;
/* Disable all interrupts/machine checks */
__load_psw_mask(psw_kernel_bits | PSW_MASK_DAT);
trace_hardirqs_off();
+ debug_set_critical();
cpumask_copy(&cpumask, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), &cpumask);
- if (oops_in_progress) {
- /*
- * Give the other cpus the opportunity to complete
- * outstanding interrupts before stopping them.
- */
- end = get_clock() + (1000000UL << 12);
- for_each_cpu(cpu, &cpumask) {
- set_bit(ec_stop_cpu, (unsigned long *)
- &lowcore_ptr[cpu]->ext_call_fast);
- while (sigp(cpu, sigp_emergency_signal) == sigp_busy &&
- get_clock() < end)
- cpu_relax();
- }
- while (get_clock() < end) {
- for_each_cpu(cpu, &cpumask)
- if (cpu_stopped(cpu))
- cpumask_clear_cpu(cpu, &cpumask);
- if (cpumask_empty(&cpumask))
- break;
- cpu_relax();
- }
- }
+ if (oops_in_progress)
+ smp_emergency_stop(&cpumask);
/* stop all processors */
for_each_cpu(cpu, &cpumask) {
- while (sigp(cpu, sigp_stop) == sigp_busy)
- cpu_relax();
- while (!cpu_stopped(cpu))
+ struct pcpu *pcpu = pcpu_devices + cpu;
+ pcpu_sigp_retry(pcpu, sigp_stop, 0);
+ while (!pcpu_stopped(pcpu))
cpu_relax();
}
}
/*
+ * Stop the current cpu.
+ */
+void smp_stop_cpu(void)
+{
+ pcpu_sigp_retry(pcpu_devices + smp_processor_id(), sigp_stop, 0);
+ for (;;) ;
+}
+
+/*
* This is the main routine where commands issued by other
* cpus are handled.
*/
-
-static void do_ext_call_interrupt(unsigned int ext_int_code,
+static void do_ext_call_interrupt(struct ext_code ext_code,
unsigned int param32, unsigned long param64)
{
unsigned long bits;
+ int cpu;
- if ((ext_int_code & 0xffff) == 0x1202)
- kstat_cpu(smp_processor_id()).irqs[EXTINT_EXC]++;
+ cpu = smp_processor_id();
+ if (ext_code.code == 0x1202)
+ kstat_cpu(cpu).irqs[EXTINT_EXC]++;
else
- kstat_cpu(smp_processor_id()).irqs[EXTINT_EMS]++;
+ kstat_cpu(cpu).irqs[EXTINT_EMS]++;
/*
* handle bit signal external calls
*/
- bits = xchg(&S390_lowcore.ext_call_fast, 0);
+ bits = xchg(&pcpu_devices[cpu].ec_mask, 0);
if (test_bit(ec_stop_cpu, &bits))
smp_stop_cpu();
@@ -238,38 +468,17 @@ static void do_ext_call_interrupt(unsigned int ext_int_code,
}
-/*
- * Send an external call sigp to another cpu and return without waiting
- * for its completion.
- */
-static void smp_ext_bitcall(int cpu, int sig)
-{
- int order;
-
- /*
- * Set signaling bit in lowcore of target cpu and kick it
- */
- set_bit(sig, (unsigned long *) &lowcore_ptr[cpu]->ext_call_fast);
- while (1) {
- order = smp_vcpu_scheduled(cpu) ?
- sigp_external_call : sigp_emergency_signal;
- if (sigp(cpu, order) != sigp_busy)
- break;
- udelay(10);
- }
-}
-
void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
int cpu;
for_each_cpu(cpu, mask)
- smp_ext_bitcall(cpu, ec_call_function);
+ pcpu_ec_call(pcpu_devices + cpu, ec_call_function);
}
void arch_send_call_function_single_ipi(int cpu)
{
- smp_ext_bitcall(cpu, ec_call_function_single);
+ pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
}
#ifndef CONFIG_64BIT
@@ -295,15 +504,16 @@ EXPORT_SYMBOL(smp_ptlb_all);
*/
void smp_send_reschedule(int cpu)
{
- smp_ext_bitcall(cpu, ec_schedule);
+ pcpu_ec_call(pcpu_devices + cpu, ec_schedule);
}
/*
* parameter area for the set/clear control bit callbacks
*/
struct ec_creg_mask_parms {
- unsigned long orvals[16];
- unsigned long andvals[16];
+ unsigned long orval;
+ unsigned long andval;
+ int cr;
};
/*
@@ -313,11 +523,9 @@ static void smp_ctl_bit_callback(void *info)
{
struct ec_creg_mask_parms *pp = info;
unsigned long cregs[16];
- int i;
__ctl_store(cregs, 0, 15);
- for (i = 0; i <= 15; i++)
- cregs[i] = (cregs[i] & pp->andvals[i]) | pp->orvals[i];
+ cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval;
__ctl_load(cregs, 0, 15);
}
@@ -326,11 +534,8 @@ static void smp_ctl_bit_callback(void *info)
*/
void smp_ctl_set_bit(int cr, int bit)
{
- struct ec_creg_mask_parms parms;
+ struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr };
- memset(&parms.orvals, 0, sizeof(parms.orvals));
- memset(&parms.andvals, 0xff, sizeof(parms.andvals));
- parms.orvals[cr] = 1UL << bit;
on_each_cpu(smp_ctl_bit_callback, &parms, 1);
}
EXPORT_SYMBOL(smp_ctl_set_bit);
@@ -340,226 +545,178 @@ EXPORT_SYMBOL(smp_ctl_set_bit);
*/
void smp_ctl_clear_bit(int cr, int bit)
{
- struct ec_creg_mask_parms parms;
+ struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr };
- memset(&parms.orvals, 0, sizeof(parms.orvals));
- memset(&parms.andvals, 0xff, sizeof(parms.andvals));
- parms.andvals[cr] = ~(1UL << bit);
on_each_cpu(smp_ctl_bit_callback, &parms, 1);
}
EXPORT_SYMBOL(smp_ctl_clear_bit);
#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP)
-static void __init smp_get_save_area(unsigned int cpu, unsigned int phy_cpu)
+struct save_area *zfcpdump_save_areas[NR_CPUS + 1];
+EXPORT_SYMBOL_GPL(zfcpdump_save_areas);
+
+static void __init smp_get_save_area(int cpu, u16 address)
{
- if (ipl_info.type != IPL_TYPE_FCP_DUMP && !OLDMEM_BASE)
- return;
+ void *lc = pcpu_devices[0].lowcore;
+ struct save_area *save_area;
+
if (is_kdump_kernel())
return;
+ if (!OLDMEM_BASE && (address == boot_cpu_address ||
+ ipl_info.type != IPL_TYPE_FCP_DUMP))
+ return;
if (cpu >= NR_CPUS) {
- pr_warning("CPU %i exceeds the maximum %i and is excluded from "
- "the dump\n", cpu, NR_CPUS - 1);
+ pr_warning("CPU %i exceeds the maximum %i and is excluded "
+ "from the dump\n", cpu, NR_CPUS - 1);
return;
}
- zfcpdump_save_areas[cpu] = kmalloc(sizeof(struct save_area), GFP_KERNEL);
- while (raw_sigp(phy_cpu, sigp_stop_and_store_status) == sigp_busy)
- cpu_relax();
- memcpy_real(zfcpdump_save_areas[cpu],
- (void *)(unsigned long) store_prefix() + SAVE_AREA_BASE,
- sizeof(struct save_area));
+ save_area = kmalloc(sizeof(struct save_area), GFP_KERNEL);
+ if (!save_area)
+ panic("could not allocate memory for save area\n");
+ zfcpdump_save_areas[cpu] = save_area;
+#ifdef CONFIG_CRASH_DUMP
+ if (address == boot_cpu_address) {
+ /* Copy the registers of the boot cpu. */
+ copy_oldmem_page(1, (void *) save_area, sizeof(*save_area),
+ SAVE_AREA_BASE - PAGE_SIZE, 0);
+ return;
+ }
+#endif
+ /* Get the registers of a non-boot cpu. */
+ __pcpu_sigp_relax(address, sigp_stop_and_store_status, 0, NULL);
+ memcpy_real(save_area, lc + SAVE_AREA_BASE, sizeof(*save_area));
}
-struct save_area *zfcpdump_save_areas[NR_CPUS + 1];
-EXPORT_SYMBOL_GPL(zfcpdump_save_areas);
-
-#else
-
-static inline void smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) { }
-
-#endif /* CONFIG_ZFCPDUMP */
-
-static int cpu_known(int cpu_id)
+int smp_store_status(int cpu)
{
- int cpu;
+ struct pcpu *pcpu;
- for_each_present_cpu(cpu) {
- if (__cpu_logical_map[cpu] == cpu_id)
- return 1;
- }
+ pcpu = pcpu_devices + cpu;
+ if (__pcpu_sigp_relax(pcpu->address, sigp_stop_and_store_status,
+ 0, NULL) != sigp_order_code_accepted)
+ return -EIO;
return 0;
}
-static int smp_rescan_cpus_sigp(cpumask_t avail)
-{
- int cpu_id, logical_cpu;
+#else /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */
- logical_cpu = cpumask_first(&avail);
- if (logical_cpu >= nr_cpu_ids)
- return 0;
- for (cpu_id = 0; cpu_id <= MAX_CPU_ADDRESS; cpu_id++) {
- if (cpu_known(cpu_id))
- continue;
- __cpu_logical_map[logical_cpu] = cpu_id;
- cpu_set_polarization(logical_cpu, POLARIZATION_UNKNOWN);
- if (!cpu_stopped(logical_cpu))
- continue;
- set_cpu_present(logical_cpu, true);
- smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED;
- logical_cpu = cpumask_next(logical_cpu, &avail);
- if (logical_cpu >= nr_cpu_ids)
- break;
- }
- return 0;
-}
+static inline void smp_get_save_area(int cpu, u16 address) { }
+
+#endif /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */
-static int smp_rescan_cpus_sclp(cpumask_t avail)
+static struct sclp_cpu_info *smp_get_cpu_info(void)
{
+ static int use_sigp_detection;
struct sclp_cpu_info *info;
- int cpu_id, logical_cpu, cpu;
- int rc;
-
- logical_cpu = cpumask_first(&avail);
- if (logical_cpu >= nr_cpu_ids)
- return 0;
- info = kmalloc(sizeof(*info), GFP_KERNEL);
- if (!info)
- return -ENOMEM;
- rc = sclp_get_cpu_info(info);
- if (rc)
- goto out;
- for (cpu = 0; cpu < info->combined; cpu++) {
- if (info->has_cpu_type && info->cpu[cpu].type != smp_cpu_type)
- continue;
- cpu_id = info->cpu[cpu].address;
- if (cpu_known(cpu_id))
- continue;
- __cpu_logical_map[logical_cpu] = cpu_id;
- cpu_set_polarization(logical_cpu, POLARIZATION_UNKNOWN);
- set_cpu_present(logical_cpu, true);
- if (cpu >= info->configured)
- smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY;
- else
- smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED;
- logical_cpu = cpumask_next(logical_cpu, &avail);
- if (logical_cpu >= nr_cpu_ids)
- break;
+ int address;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (info && (use_sigp_detection || sclp_get_cpu_info(info))) {
+ use_sigp_detection = 1;
+ for (address = 0; address <= MAX_CPU_ADDRESS; address++) {
+ if (__pcpu_sigp_relax(address, sigp_sense, 0, NULL) ==
+ sigp_not_operational)
+ continue;
+ info->cpu[info->configured].address = address;
+ info->configured++;
+ }
+ info->combined = info->configured;
}
-out:
- kfree(info);
- return rc;
+ return info;
}
-static int __smp_rescan_cpus(void)
+static int __devinit smp_add_present_cpu(int cpu);
+
+static int __devinit __smp_rescan_cpus(struct sclp_cpu_info *info,
+ int sysfs_add)
{
+ struct pcpu *pcpu;
cpumask_t avail;
+ int cpu, nr, i;
+ nr = 0;
cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
- if (smp_use_sigp_detection)
- return smp_rescan_cpus_sigp(avail);
- else
- return smp_rescan_cpus_sclp(avail);
+ cpu = cpumask_first(&avail);
+ for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) {
+ if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type)
+ continue;
+ if (pcpu_find_address(cpu_present_mask, info->cpu[i].address))
+ continue;
+ pcpu = pcpu_devices + cpu;
+ pcpu->address = info->cpu[i].address;
+ pcpu->state = (cpu >= info->configured) ?
+ CPU_STATE_STANDBY : CPU_STATE_CONFIGURED;
+ cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ set_cpu_present(cpu, true);
+ if (sysfs_add && smp_add_present_cpu(cpu) != 0)
+ set_cpu_present(cpu, false);
+ else
+ nr++;
+ cpu = cpumask_next(cpu, &avail);
+ }
+ return nr;
}
static void __init smp_detect_cpus(void)
{
unsigned int cpu, c_cpus, s_cpus;
struct sclp_cpu_info *info;
- u16 boot_cpu_addr, cpu_addr;
- c_cpus = 1;
- s_cpus = 0;
- boot_cpu_addr = __cpu_logical_map[0];
- info = kmalloc(sizeof(*info), GFP_KERNEL);
+ info = smp_get_cpu_info();
if (!info)
panic("smp_detect_cpus failed to allocate memory\n");
-#ifdef CONFIG_CRASH_DUMP
- if (OLDMEM_BASE && !is_kdump_kernel()) {
- struct save_area *save_area;
-
- save_area = kmalloc(sizeof(*save_area), GFP_KERNEL);
- if (!save_area)
- panic("could not allocate memory for save area\n");
- copy_oldmem_page(1, (void *) save_area, sizeof(*save_area),
- 0x200, 0);
- zfcpdump_save_areas[0] = save_area;
- }
-#endif
- /* Use sigp detection algorithm if sclp doesn't work. */
- if (sclp_get_cpu_info(info)) {
- smp_use_sigp_detection = 1;
- for (cpu = 0; cpu <= MAX_CPU_ADDRESS; cpu++) {
- if (cpu == boot_cpu_addr)
- continue;
- if (!raw_cpu_stopped(cpu))
- continue;
- smp_get_save_area(c_cpus, cpu);
- c_cpus++;
- }
- goto out;
- }
-
if (info->has_cpu_type) {
for (cpu = 0; cpu < info->combined; cpu++) {
- if (info->cpu[cpu].address == boot_cpu_addr) {
- smp_cpu_type = info->cpu[cpu].type;
- break;
- }
+ if (info->cpu[cpu].address != boot_cpu_address)
+ continue;
+ /* The boot cpu dictates the cpu type. */
+ boot_cpu_type = info->cpu[cpu].type;
+ break;
}
}
-
+ c_cpus = s_cpus = 0;
for (cpu = 0; cpu < info->combined; cpu++) {
- if (info->has_cpu_type && info->cpu[cpu].type != smp_cpu_type)
- continue;
- cpu_addr = info->cpu[cpu].address;
- if (cpu_addr == boot_cpu_addr)
+ if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type)
continue;
- if (!raw_cpu_stopped(cpu_addr)) {
+ if (cpu < info->configured) {
+ smp_get_save_area(c_cpus, info->cpu[cpu].address);
+ c_cpus++;
+ } else
s_cpus++;
- continue;
- }
- smp_get_save_area(c_cpus, cpu_addr);
- c_cpus++;
}
-out:
- kfree(info);
pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
get_online_cpus();
- __smp_rescan_cpus();
+ __smp_rescan_cpus(info, 0);
put_online_cpus();
+ kfree(info);
}
/*
* Activate a secondary processor.
*/
-int __cpuinit start_secondary(void *cpuvoid)
+static void __cpuinit smp_start_secondary(void *cpuvoid)
{
+ S390_lowcore.last_update_clock = get_clock();
+ S390_lowcore.restart_stack = (unsigned long) restart_stack;
+ S390_lowcore.restart_fn = (unsigned long) do_restart;
+ S390_lowcore.restart_data = 0;
+ S390_lowcore.restart_source = -1UL;
+ restore_access_regs(S390_lowcore.access_regs_save_area);
+ __ctl_load(S390_lowcore.cregs_save_area, 0, 15);
+ __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT);
cpu_init();
preempt_disable();
init_cpu_timer();
init_cpu_vtimer();
pfault_init();
-
notify_cpu_starting(smp_processor_id());
ipi_call_lock();
set_cpu_online(smp_processor_id(), true);
ipi_call_unlock();
- __ctl_clear_bit(0, 28); /* Disable lowcore protection */
- S390_lowcore.restart_psw.mask =
- PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA;
- S390_lowcore.restart_psw.addr =
- PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler;
- __ctl_set_bit(0, 28); /* Enable lowcore protection */
- /*
- * Wait until the cpu which brought this one up marked it
- * active before enabling interrupts.
- */
- while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask))
- cpu_relax();
local_irq_enable();
/* cpu_idle will call schedule for us */
cpu_idle();
- return 0;
}
struct create_idle {
@@ -578,82 +735,20 @@ static void __cpuinit smp_fork_idle(struct work_struct *work)
complete(&c_idle->done);
}
-static int __cpuinit smp_alloc_lowcore(int cpu)
-{
- unsigned long async_stack, panic_stack;
- struct _lowcore *lowcore;
-
- lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
- if (!lowcore)
- return -ENOMEM;
- async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
- panic_stack = __get_free_page(GFP_KERNEL);
- if (!panic_stack || !async_stack)
- goto out;
- memcpy(lowcore, &S390_lowcore, 512);
- memset((char *)lowcore + 512, 0, sizeof(*lowcore) - 512);
- lowcore->async_stack = async_stack + ASYNC_SIZE;
- lowcore->panic_stack = panic_stack + PAGE_SIZE;
- lowcore->restart_psw.mask =
- PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA;
- lowcore->restart_psw.addr =
- PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
- if (user_mode != HOME_SPACE_MODE)
- lowcore->restart_psw.mask |= PSW_ASC_HOME;
-#ifndef CONFIG_64BIT
- if (MACHINE_HAS_IEEE) {
- unsigned long save_area;
-
- save_area = get_zeroed_page(GFP_KERNEL);
- if (!save_area)
- goto out;
- lowcore->extended_save_area_addr = (u32) save_area;
- }
-#else
- if (vdso_alloc_per_cpu(cpu, lowcore))
- goto out;
-#endif
- lowcore_ptr[cpu] = lowcore;
- return 0;
-
-out:
- free_page(panic_stack);
- free_pages(async_stack, ASYNC_ORDER);
- free_pages((unsigned long) lowcore, LC_ORDER);
- return -ENOMEM;
-}
-
-static void smp_free_lowcore(int cpu)
-{
- struct _lowcore *lowcore;
-
- lowcore = lowcore_ptr[cpu];
-#ifndef CONFIG_64BIT
- if (MACHINE_HAS_IEEE)
- free_page((unsigned long) lowcore->extended_save_area_addr);
-#else
- vdso_free_per_cpu(cpu, lowcore);
-#endif
- free_page(lowcore->panic_stack - PAGE_SIZE);
- free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER);
- free_pages((unsigned long) lowcore, LC_ORDER);
- lowcore_ptr[cpu] = NULL;
-}
-
/* Upping and downing of CPUs */
int __cpuinit __cpu_up(unsigned int cpu)
{
- struct _lowcore *cpu_lowcore;
struct create_idle c_idle;
- struct task_struct *idle;
- struct stack_frame *sf;
- u32 lowcore;
- int ccode;
+ struct pcpu *pcpu;
+ int rc;
- if (smp_cpu_state[cpu] != CPU_STATE_CONFIGURED)
+ pcpu = pcpu_devices + cpu;
+ if (pcpu->state != CPU_STATE_CONFIGURED)
+ return -EIO;
+ if (pcpu_sigp_retry(pcpu, sigp_initial_cpu_reset, 0) !=
+ sigp_order_code_accepted)
return -EIO;
- idle = current_set[cpu];
- if (!idle) {
+ if (!pcpu->idle) {
c_idle.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done);
INIT_WORK_ONSTACK(&c_idle.work, smp_fork_idle);
c_idle.cpu = cpu;
@@ -661,68 +756,28 @@ int __cpuinit __cpu_up(unsigned int cpu)
wait_for_completion(&c_idle.done);
if (IS_ERR(c_idle.idle))
return PTR_ERR(c_idle.idle);
- idle = c_idle.idle;
- current_set[cpu] = c_idle.idle;
+ pcpu->idle = c_idle.idle;
}
- init_idle(idle, cpu);
- if (smp_alloc_lowcore(cpu))
- return -ENOMEM;
- do {
- ccode = sigp(cpu, sigp_initial_cpu_reset);
- if (ccode == sigp_busy)
- udelay(10);
- if (ccode == sigp_not_operational)
- goto err_out;
- } while (ccode == sigp_busy);
-
- lowcore = (u32)(unsigned long)lowcore_ptr[cpu];
- while (sigp_p(lowcore, cpu, sigp_set_prefix) == sigp_busy)
- udelay(10);
-
- cpu_lowcore = lowcore_ptr[cpu];
- cpu_lowcore->kernel_stack = (unsigned long)
- task_stack_page(idle) + THREAD_SIZE;
- cpu_lowcore->thread_info = (unsigned long) task_thread_info(idle);
- sf = (struct stack_frame *) (cpu_lowcore->kernel_stack
- - sizeof(struct pt_regs)
- - sizeof(struct stack_frame));
- memset(sf, 0, sizeof(struct stack_frame));
- sf->gprs[9] = (unsigned long) sf;
- cpu_lowcore->gpregs_save_area[15] = (unsigned long) sf;
- __ctl_store(cpu_lowcore->cregs_save_area, 0, 15);
- atomic_inc(&init_mm.context.attach_count);
- asm volatile(
- " stam 0,15,0(%0)"
- : : "a" (&cpu_lowcore->access_regs_save_area) : "memory");
- cpu_lowcore->percpu_offset = __per_cpu_offset[cpu];
- cpu_lowcore->current_task = (unsigned long) idle;
- cpu_lowcore->cpu_nr = cpu;
- cpu_lowcore->kernel_asce = S390_lowcore.kernel_asce;
- cpu_lowcore->machine_flags = S390_lowcore.machine_flags;
- cpu_lowcore->ftrace_func = S390_lowcore.ftrace_func;
- memcpy(cpu_lowcore->stfle_fac_list, S390_lowcore.stfle_fac_list,
- MAX_FACILITY_BIT/8);
- eieio();
-
- while (sigp(cpu, sigp_restart) == sigp_busy)
- udelay(10);
-
+ init_idle(pcpu->idle, cpu);
+ rc = pcpu_alloc_lowcore(pcpu, cpu);
+ if (rc)
+ return rc;
+ pcpu_prepare_secondary(pcpu, cpu);
+ pcpu_attach_task(pcpu, pcpu->idle);
+ pcpu_start_fn(pcpu, smp_start_secondary, NULL);
while (!cpu_online(cpu))
cpu_relax();
return 0;
-
-err_out:
- smp_free_lowcore(cpu);
- return -EIO;
}
static int __init setup_possible_cpus(char *s)
{
- int pcpus, cpu;
+ int max, cpu;
- pcpus = simple_strtoul(s, NULL, 0);
+ if (kstrtoint(s, 0, &max) < 0)
+ return 0;
init_cpu_possible(cpumask_of(0));
- for (cpu = 1; cpu < pcpus && cpu < nr_cpu_ids; cpu++)
+ for (cpu = 1; cpu < max && cpu < nr_cpu_ids; cpu++)
set_cpu_possible(cpu, true);
return 0;
}
@@ -732,113 +787,79 @@ early_param("possible_cpus", setup_possible_cpus);
int __cpu_disable(void)
{
- struct ec_creg_mask_parms cr_parms;
- int cpu = smp_processor_id();
-
- set_cpu_online(cpu, false);
+ unsigned long cregs[16];
- /* Disable pfault pseudo page faults on this cpu. */
+ set_cpu_online(smp_processor_id(), false);
+ /* Disable pseudo page faults on this cpu. */
pfault_fini();
-
- memset(&cr_parms.orvals, 0, sizeof(cr_parms.orvals));
- memset(&cr_parms.andvals, 0xff, sizeof(cr_parms.andvals));
-
- /* disable all external interrupts */
- cr_parms.orvals[0] = 0;
- cr_parms.andvals[0] = ~(1 << 15 | 1 << 14 | 1 << 13 | 1 << 11 |
- 1 << 10 | 1 << 9 | 1 << 6 | 1 << 5 |
- 1 << 4);
- /* disable all I/O interrupts */
- cr_parms.orvals[6] = 0;
- cr_parms.andvals[6] = ~(1 << 31 | 1 << 30 | 1 << 29 | 1 << 28 |
- 1 << 27 | 1 << 26 | 1 << 25 | 1 << 24);
- /* disable most machine checks */
- cr_parms.orvals[14] = 0;
- cr_parms.andvals[14] = ~(1 << 28 | 1 << 27 | 1 << 26 |
- 1 << 25 | 1 << 24);
-
- smp_ctl_bit_callback(&cr_parms);
-
+ /* Disable interrupt sources via control register. */
+ __ctl_store(cregs, 0, 15);
+ cregs[0] &= ~0x0000ee70UL; /* disable all external interrupts */
+ cregs[6] &= ~0xff000000UL; /* disable all I/O interrupts */
+ cregs[14] &= ~0x1f000000UL; /* disable most machine checks */
+ __ctl_load(cregs, 0, 15);
return 0;
}
void __cpu_die(unsigned int cpu)
{
+ struct pcpu *pcpu;
+
/* Wait until target cpu is down */
- while (!cpu_stopped(cpu))
+ pcpu = pcpu_devices + cpu;
+ while (!pcpu_stopped(pcpu))
cpu_relax();
- while (sigp_p(0, cpu, sigp_set_prefix) == sigp_busy)
- udelay(10);
- smp_free_lowcore(cpu);
+ pcpu_free_lowcore(pcpu);
atomic_dec(&init_mm.context.attach_count);
}
void __noreturn cpu_die(void)
{
idle_task_exit();
- while (sigp(smp_processor_id(), sigp_stop) == sigp_busy)
- cpu_relax();
- for (;;);
+ pcpu_sigp_retry(pcpu_devices + smp_processor_id(), sigp_stop, 0);
+ for (;;) ;
}
#endif /* CONFIG_HOTPLUG_CPU */
-void __init smp_prepare_cpus(unsigned int max_cpus)
+static void smp_call_os_info_init_fn(void)
{
-#ifndef CONFIG_64BIT
- unsigned long save_area = 0;
-#endif
- unsigned long async_stack, panic_stack;
- struct _lowcore *lowcore;
+ int (*init_fn)(void);
+ unsigned long size;
- smp_detect_cpus();
+ init_fn = os_info_old_entry(OS_INFO_INIT_FN, &size);
+ if (!init_fn)
+ return;
+ init_fn();
+}
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
/* request the 0x1201 emergency signal external interrupt */
if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0)
panic("Couldn't request external interrupt 0x1201");
/* request the 0x1202 external call external interrupt */
if (register_external_interrupt(0x1202, do_ext_call_interrupt) != 0)
panic("Couldn't request external interrupt 0x1202");
-
- /* Reallocate current lowcore, but keep its contents. */
- lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
- panic_stack = __get_free_page(GFP_KERNEL);
- async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
- BUG_ON(!lowcore || !panic_stack || !async_stack);
-#ifndef CONFIG_64BIT
- if (MACHINE_HAS_IEEE)
- save_area = get_zeroed_page(GFP_KERNEL);
-#endif
- local_irq_disable();
- local_mcck_disable();
- lowcore_ptr[smp_processor_id()] = lowcore;
- *lowcore = S390_lowcore;
- lowcore->panic_stack = panic_stack + PAGE_SIZE;
- lowcore->async_stack = async_stack + ASYNC_SIZE;
-#ifndef CONFIG_64BIT
- if (MACHINE_HAS_IEEE)
- lowcore->extended_save_area_addr = (u32) save_area;
-#endif
- set_prefix((u32)(unsigned long) lowcore);
- local_mcck_enable();
- local_irq_enable();
-#ifdef CONFIG_64BIT
- if (vdso_alloc_per_cpu(smp_processor_id(), &S390_lowcore))
- BUG();
-#endif
+ smp_call_os_info_init_fn();
+ smp_detect_cpus();
}
void __init smp_prepare_boot_cpu(void)
{
- BUG_ON(smp_processor_id() != 0);
-
- current_thread_info()->cpu = 0;
- set_cpu_present(0, true);
- set_cpu_online(0, true);
+ struct pcpu *pcpu = pcpu_devices;
+
+ boot_cpu_address = stap();
+ pcpu->idle = current;
+ pcpu->state = CPU_STATE_CONFIGURED;
+ pcpu->address = boot_cpu_address;
+ pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix();
+ pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE;
+ pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE;
S390_lowcore.percpu_offset = __per_cpu_offset[0];
- current_set[0] = current;
- smp_cpu_state[0] = CPU_STATE_CONFIGURED;
cpu_set_polarization(0, POLARIZATION_UNKNOWN);
+ set_cpu_present(0, true);
+ set_cpu_online(0, true);
}
void __init smp_cpus_done(unsigned int max_cpus)
@@ -848,7 +869,6 @@ void __init smp_cpus_done(unsigned int max_cpus)
void __init smp_setup_processor_id(void)
{
S390_lowcore.cpu_nr = 0;
- __cpu_logical_map[0] = stap();
}
/*
@@ -864,56 +884,57 @@ int setup_profiling_timer(unsigned int multiplier)
#ifdef CONFIG_HOTPLUG_CPU
static ssize_t cpu_configure_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr, char *buf)
{
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
- count = sprintf(buf, "%d\n", smp_cpu_state[dev->id]);
+ count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state);
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
static ssize_t cpu_configure_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
- int cpu = dev->id;
- int val, rc;
+ struct pcpu *pcpu;
+ int cpu, val, rc;
char delim;
if (sscanf(buf, "%d %c", &val, &delim) != 1)
return -EINVAL;
if (val != 0 && val != 1)
return -EINVAL;
-
get_online_cpus();
mutex_lock(&smp_cpu_state_mutex);
rc = -EBUSY;
/* disallow configuration changes of online cpus and cpu 0 */
+ cpu = dev->id;
if (cpu_online(cpu) || cpu == 0)
goto out;
+ pcpu = pcpu_devices + cpu;
rc = 0;
switch (val) {
case 0:
- if (smp_cpu_state[cpu] == CPU_STATE_CONFIGURED) {
- rc = sclp_cpu_deconfigure(__cpu_logical_map[cpu]);
- if (!rc) {
- smp_cpu_state[cpu] = CPU_STATE_STANDBY;
- cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
- topology_expect_change();
- }
- }
+ if (pcpu->state != CPU_STATE_CONFIGURED)
+ break;
+ rc = sclp_cpu_deconfigure(pcpu->address);
+ if (rc)
+ break;
+ pcpu->state = CPU_STATE_STANDBY;
+ cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ topology_expect_change();
break;
case 1:
- if (smp_cpu_state[cpu] == CPU_STATE_STANDBY) {
- rc = sclp_cpu_configure(__cpu_logical_map[cpu]);
- if (!rc) {
- smp_cpu_state[cpu] = CPU_STATE_CONFIGURED;
- cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
- topology_expect_change();
- }
- }
+ if (pcpu->state != CPU_STATE_STANDBY)
+ break;
+ rc = sclp_cpu_configure(pcpu->address);
+ if (rc)
+ break;
+ pcpu->state = CPU_STATE_CONFIGURED;
+ cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ topology_expect_change();
break;
default:
break;
@@ -929,7 +950,7 @@ static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
static ssize_t show_cpu_address(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", __cpu_logical_map[dev->id]);
+ return sprintf(buf, "%d\n", pcpu_devices[dev->id].address);
}
static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
@@ -961,22 +982,16 @@ static DEVICE_ATTR(capability, 0444, show_capability, NULL);
static ssize_t show_idle_count(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct s390_idle_data *idle;
+ struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
unsigned long long idle_count;
unsigned int sequence;
- idle = &per_cpu(s390_idle, dev->id);
-repeat:
- sequence = idle->sequence;
- smp_rmb();
- if (sequence & 1)
- goto repeat;
- idle_count = idle->idle_count;
- if (idle->idle_enter)
- idle_count++;
- smp_rmb();
- if (idle->sequence != sequence)
- goto repeat;
+ do {
+ sequence = ACCESS_ONCE(idle->sequence);
+ idle_count = ACCESS_ONCE(idle->idle_count);
+ if (ACCESS_ONCE(idle->idle_enter))
+ idle_count++;
+ } while ((sequence & 1) || (idle->sequence != sequence));
return sprintf(buf, "%llu\n", idle_count);
}
static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
@@ -984,24 +999,18 @@ static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
static ssize_t show_idle_time(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct s390_idle_data *idle;
- unsigned long long now, idle_time, idle_enter;
+ struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
+ unsigned long long now, idle_time, idle_enter, idle_exit;
unsigned int sequence;
- idle = &per_cpu(s390_idle, dev->id);
- now = get_clock();
-repeat:
- sequence = idle->sequence;
- smp_rmb();
- if (sequence & 1)
- goto repeat;
- idle_time = idle->idle_time;
- idle_enter = idle->idle_enter;
- if (idle_enter != 0ULL && idle_enter < now)
- idle_time += now - idle_enter;
- smp_rmb();
- if (idle->sequence != sequence)
- goto repeat;
+ do {
+ now = get_clock();
+ sequence = ACCESS_ONCE(idle->sequence);
+ idle_time = ACCESS_ONCE(idle->idle_time);
+ idle_enter = ACCESS_ONCE(idle->idle_enter);
+ idle_exit = ACCESS_ONCE(idle->idle_exit);
+ } while ((sequence & 1) || (idle->sequence != sequence));
+ idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;
return sprintf(buf, "%llu\n", idle_time >> 12);
}
static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
@@ -1021,7 +1030,7 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned int)(long)hcpu;
- struct cpu *c = &per_cpu(cpu_devices, cpu);
+ struct cpu *c = &pcpu_devices[cpu].cpu;
struct device *s = &c->dev;
struct s390_idle_data *idle;
int err = 0;
@@ -1047,7 +1056,7 @@ static struct notifier_block __cpuinitdata smp_cpu_nb = {
static int __devinit smp_add_present_cpu(int cpu)
{
- struct cpu *c = &per_cpu(cpu_devices, cpu);
+ struct cpu *c = &pcpu_devices[cpu].cpu;
struct device *s = &c->dev;
int rc;
@@ -1085,29 +1094,21 @@ out:
int __ref smp_rescan_cpus(void)
{
- cpumask_t newcpus;
- int cpu;
- int rc;
+ struct sclp_cpu_info *info;
+ int nr;
+ info = smp_get_cpu_info();
+ if (!info)
+ return -ENOMEM;
get_online_cpus();
mutex_lock(&smp_cpu_state_mutex);
- cpumask_copy(&newcpus, cpu_present_mask);
- rc = __smp_rescan_cpus();
- if (rc)
- goto out;
- cpumask_andnot(&newcpus, cpu_present_mask, &newcpus);
- for_each_cpu(cpu, &newcpus) {
- rc = smp_add_present_cpu(cpu);
- if (rc)
- set_cpu_present(cpu, false);
- }
- rc = 0;
-out:
+ nr = __smp_rescan_cpus(info, 1);
mutex_unlock(&smp_cpu_state_mutex);
put_online_cpus();
- if (!cpumask_empty(&newcpus))
+ kfree(info);
+ if (nr)
topology_schedule_update();
- return rc;
+ return 0;
}
static ssize_t __ref rescan_store(struct device *dev,
diff --git a/arch/s390/kernel/switch_cpu.S b/arch/s390/kernel/switch_cpu.S
deleted file mode 100644
index bfe070bc765..00000000000
--- a/arch/s390/kernel/switch_cpu.S
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * 31-bit switch cpu code
- *
- * Copyright IBM Corp. 2009
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/ptrace.h>
-
-# smp_switch_to_cpu switches to destination cpu and executes the passed function
-# Parameter: %r2 - function to call
-# %r3 - function parameter
-# %r4 - stack poiner
-# %r5 - current cpu
-# %r6 - destination cpu
-
- .section .text
-ENTRY(smp_switch_to_cpu)
- stm %r6,%r15,__SF_GPRS(%r15)
- lr %r1,%r15
- ahi %r15,-STACK_FRAME_OVERHEAD
- st %r1,__SF_BACKCHAIN(%r15)
- basr %r13,0
-0: la %r1,.gprregs_addr-0b(%r13)
- l %r1,0(%r1)
- stm %r0,%r15,0(%r1)
-1: sigp %r0,%r6,__SIGP_RESTART /* start destination CPU */
- brc 2,1b /* busy, try again */
-2: sigp %r0,%r5,__SIGP_STOP /* stop current CPU */
- brc 2,2b /* busy, try again */
-3: j 3b
-
-ENTRY(smp_restart_cpu)
- basr %r13,0
-0: la %r1,.gprregs_addr-0b(%r13)
- l %r1,0(%r1)
- lm %r0,%r15,0(%r1)
-1: sigp %r0,%r5,__SIGP_SENSE /* Wait for calling CPU */
- brc 10,1b /* busy, accepted (status 0), running */
- tmll %r0,0x40 /* Test if calling CPU is stopped */
- jz 1b
- ltr %r4,%r4 /* New stack ? */
- jz 1f
- lr %r15,%r4
-1: lr %r14,%r2 /* r14: Function to call */
- lr %r2,%r3 /* r2 : Parameter for function*/
- basr %r14,%r14 /* Call function */
-
-.gprregs_addr:
- .long .gprregs
-
- .section .data,"aw",@progbits
-.gprregs:
- .rept 16
- .long 0
- .endr
diff --git a/arch/s390/kernel/switch_cpu64.S b/arch/s390/kernel/switch_cpu64.S
deleted file mode 100644
index fcc42d799e4..00000000000
--- a/arch/s390/kernel/switch_cpu64.S
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * 64-bit switch cpu code
- *
- * Copyright IBM Corp. 2009
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/ptrace.h>
-
-# smp_switch_to_cpu switches to destination cpu and executes the passed function
-# Parameter: %r2 - function to call
-# %r3 - function parameter
-# %r4 - stack poiner
-# %r5 - current cpu
-# %r6 - destination cpu
-
- .section .text
-ENTRY(smp_switch_to_cpu)
- stmg %r6,%r15,__SF_GPRS(%r15)
- lgr %r1,%r15
- aghi %r15,-STACK_FRAME_OVERHEAD
- stg %r1,__SF_BACKCHAIN(%r15)
- larl %r1,.gprregs
- stmg %r0,%r15,0(%r1)
-1: sigp %r0,%r6,__SIGP_RESTART /* start destination CPU */
- brc 2,1b /* busy, try again */
-2: sigp %r0,%r5,__SIGP_STOP /* stop current CPU */
- brc 2,2b /* busy, try again */
-3: j 3b
-
-ENTRY(smp_restart_cpu)
- larl %r1,.gprregs
- lmg %r0,%r15,0(%r1)
-1: sigp %r0,%r5,__SIGP_SENSE /* Wait for calling CPU */
- brc 10,1b /* busy, accepted (status 0), running */
- tmll %r0,0x40 /* Test if calling CPU is stopped */
- jz 1b
- ltgr %r4,%r4 /* New stack ? */
- jz 1f
- lgr %r15,%r4
-1: lgr %r14,%r2 /* r14: Function to call */
- lgr %r2,%r3 /* r2 : Parameter for function*/
- basr %r14,%r14 /* Call function */
-
- .section .data,"aw",@progbits
-.gprregs:
- .rept 16
- .quad 0
- .endr
diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S
index acb78cdee89..dd70ef04605 100644
--- a/arch/s390/kernel/swsusp_asm64.S
+++ b/arch/s390/kernel/swsusp_asm64.S
@@ -42,7 +42,7 @@ ENTRY(swsusp_arch_suspend)
lghi %r1,0x1000
/* Save CPU address */
- stap __LC_CPU_ADDRESS(%r0)
+ stap __LC_EXT_CPU_ADDR(%r0)
/* Store registers */
mvc 0x318(4,%r1),__SF_EMPTY(%r15) /* move prefix to lowcore */
@@ -173,15 +173,15 @@ pgm_check_entry:
larl %r1,.Lresume_cpu /* Resume CPU address: r2 */
stap 0(%r1)
llgh %r2,0(%r1)
- llgh %r1,__LC_CPU_ADDRESS(%r0) /* Suspend CPU address: r1 */
+ llgh %r1,__LC_EXT_CPU_ADDR(%r0) /* Suspend CPU address: r1 */
cgr %r1,%r2
je restore_registers /* r1 = r2 -> nothing to do */
larl %r4,.Lrestart_suspend_psw /* Set new restart PSW */
mvc __LC_RST_NEW_PSW(16,%r0),0(%r4)
3:
- sigp %r9,%r1,__SIGP_INITIAL_CPU_RESET
- brc 8,4f /* accepted */
- brc 2,3b /* busy, try again */
+ sigp %r9,%r1,11 /* sigp initial cpu reset */
+ brc 8,4f /* accepted */
+ brc 2,3b /* busy, try again */
/* Suspend CPU not available -> panic */
larl %r15,init_thread_union
@@ -196,10 +196,10 @@ pgm_check_entry:
lpsw 0(%r3)
4:
/* Switch to suspend CPU */
- sigp %r9,%r1,__SIGP_RESTART /* start suspend CPU */
+ sigp %r9,%r1,6 /* sigp restart to suspend CPU */
brc 2,4b /* busy, try again */
5:
- sigp %r9,%r2,__SIGP_STOP /* stop resume (current) CPU */
+ sigp %r9,%r2,5 /* sigp stop to current resume CPU */
brc 2,5b /* busy, try again */
6: j 6b
@@ -207,7 +207,7 @@ restart_suspend:
larl %r1,.Lresume_cpu
llgh %r2,0(%r1)
7:
- sigp %r9,%r2,__SIGP_SENSE /* Wait for resume CPU */
+ sigp %r9,%r2,1 /* sigp sense, wait for resume CPU */
brc 8,7b /* accepted, status 0, still running */
brc 2,7b /* busy, try again */
tmll %r9,0x40 /* Test if resume CPU is stopped */
@@ -257,6 +257,9 @@ restore_registers:
lghi %r2,0
brasl %r14,arch_set_page_states
+ /* Log potential guest relocation */
+ brasl %r14,lgr_info_log
+
/* Reinitialize the channel subsystem */
brasl %r14,channel_subsystem_reinit
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 14da278febb..d4e1cb1dbcd 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -165,7 +165,7 @@ void init_cpu_timer(void)
__ctl_set_bit(0, 4);
}
-static void clock_comparator_interrupt(unsigned int ext_int_code,
+static void clock_comparator_interrupt(struct ext_code ext_code,
unsigned int param32,
unsigned long param64)
{
@@ -177,7 +177,7 @@ static void clock_comparator_interrupt(unsigned int ext_int_code,
static void etr_timing_alert(struct etr_irq_parm *);
static void stp_timing_alert(struct stp_irq_parm *);
-static void timing_alert_interrupt(unsigned int ext_int_code,
+static void timing_alert_interrupt(struct ext_code ext_code,
unsigned int param32, unsigned long param64)
{
kstat_cpu(smp_processor_id()).irqs[EXTINT_TLA]++;
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 7370a41948c..4f8dc942257 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -79,12 +79,12 @@ static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu,
cpu < TOPOLOGY_CPU_BITS;
cpu = find_next_bit(&tl_cpu->mask[0], TOPOLOGY_CPU_BITS, cpu + 1))
{
- unsigned int rcpu, lcpu;
+ unsigned int rcpu;
+ int lcpu;
rcpu = TOPOLOGY_CPU_BITS - 1 - cpu + tl_cpu->origin;
- for_each_present_cpu(lcpu) {
- if (cpu_logical_map(lcpu) != rcpu)
- continue;
+ lcpu = smp_find_processor_id(rcpu);
+ if (lcpu >= 0) {
cpumask_set_cpu(lcpu, &book->mask);
cpu_book_id[lcpu] = book->id;
cpumask_set_cpu(lcpu, &core->mask);
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 5ce3750b181..cd6ebe12c48 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -41,6 +41,7 @@
#include <asm/cpcmd.h>
#include <asm/lowcore.h>
#include <asm/debug.h>
+#include <asm/ipl.h>
#include "entry.h"
void (*pgm_check_table[128])(struct pt_regs *regs);
@@ -144,8 +145,8 @@ void show_stack(struct task_struct *task, unsigned long *sp)
for (i = 0; i < kstack_depth_to_print; i++) {
if (((addr_t) stack & (THREAD_SIZE-1)) == 0)
break;
- if (i && ((i * sizeof (long) % 32) == 0))
- printk("\n ");
+ if ((i * sizeof(long) % 32) == 0)
+ printk("%s ", i == 0 ? "" : "\n");
printk(LONG, *stack++);
}
printk("\n");
@@ -239,6 +240,7 @@ void die(struct pt_regs *regs, const char *str)
static int die_counter;
oops_enter();
+ lgr_info_log();
debug_stop_all();
console_verbose();
spin_lock_irq(&die_lock);
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index d73630b4fe1..9c80138206b 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -89,18 +89,11 @@ static void vdso_init_data(struct vdso_data *vd)
#ifdef CONFIG_64BIT
/*
- * Setup per cpu vdso data page.
- */
-static void vdso_init_per_cpu_data(int cpu, struct vdso_per_cpu_data *vpcd)
-{
-}
-
-/*
* Allocate/free per cpu vdso data.
*/
#define SEGMENT_ORDER 2
-int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore)
+int vdso_alloc_per_cpu(struct _lowcore *lowcore)
{
unsigned long segment_table, page_table, page_frame;
u32 *psal, *aste;
@@ -139,7 +132,6 @@ int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore)
aste[4] = (u32)(addr_t) psal;
lowcore->vdso_per_cpu_data = page_frame;
- vdso_init_per_cpu_data(cpu, (struct vdso_per_cpu_data *) page_frame);
return 0;
out:
@@ -149,7 +141,7 @@ out:
return -ENOMEM;
}
-void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore)
+void vdso_free_per_cpu(struct _lowcore *lowcore)
{
unsigned long segment_table, page_table, page_frame;
u32 *psal, *aste;
@@ -168,19 +160,15 @@ void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore)
free_pages(segment_table, SEGMENT_ORDER);
}
-static void __vdso_init_cr5(void *dummy)
+static void vdso_init_cr5(void)
{
unsigned long cr5;
+ if (user_mode == HOME_SPACE_MODE || !vdso_enabled)
+ return;
cr5 = offsetof(struct _lowcore, paste);
__ctl_load(cr5, 5, 5);
}
-
-static void vdso_init_cr5(void)
-{
- if (user_mode != HOME_SPACE_MODE && vdso_enabled)
- on_each_cpu(__vdso_init_cr5, NULL, 1);
-}
#endif /* CONFIG_64BIT */
/*
@@ -253,17 +241,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
* on the "data" page of the vDSO or you'll stop getting kernel
* updates and your nice userland gettimeofday will be totally dead.
* It's fine to use that for setting breakpoints in the vDSO code
- * pages though
- *
- * Make sure the vDSO gets into every core dump.
- * Dumping its contents makes post-mortem fully interpretable later
- * without matching up the same kernel and hardware config to see
- * what PC values meant.
+ * pages though.
*/
rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
- VM_ALWAYSDUMP,
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
vdso_pagelist);
if (rc)
current->mm->context.vdso_base = 0;
@@ -322,10 +304,8 @@ static int __init vdso_init(void)
}
vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data);
vdso64_pagelist[vdso64_pages] = NULL;
-#ifndef CONFIG_SMP
- if (vdso_alloc_per_cpu(0, &S390_lowcore))
+ if (vdso_alloc_per_cpu(&S390_lowcore))
BUG();
-#endif
vdso_init_cr5();
#endif /* CONFIG_64BIT */
@@ -335,7 +315,7 @@ static int __init vdso_init(void)
return 0;
}
-arch_initcall(vdso_init);
+early_initcall(vdso_init);
int in_gate_area_no_mm(unsigned long addr)
{
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index bb48977f546..39ebff50694 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -26,6 +26,7 @@
#include <asm/irq_regs.h>
#include <asm/cputime.h>
#include <asm/irq.h>
+#include "entry.h"
static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer);
@@ -123,153 +124,53 @@ void account_system_vtime(struct task_struct *tsk)
}
EXPORT_SYMBOL_GPL(account_system_vtime);
-void __kprobes vtime_start_cpu(__u64 int_clock, __u64 enter_timer)
+void __kprobes vtime_stop_cpu(void)
{
struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
- __u64 idle_time, expires;
+ unsigned long long idle_time;
+ unsigned long psw_mask;
- if (idle->idle_enter == 0ULL)
- return;
+ trace_hardirqs_on();
+ /* Don't trace preempt off for idle. */
+ stop_critical_timings();
- /* Account time spent with enabled wait psw loaded as idle time. */
- idle_time = int_clock - idle->idle_enter;
- account_idle_time(idle_time);
- S390_lowcore.steal_timer +=
- idle->idle_enter - S390_lowcore.last_update_clock;
- S390_lowcore.last_update_clock = int_clock;
-
- /* Account system time spent going idle. */
- S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle;
- S390_lowcore.last_update_timer = enter_timer;
-
- /* Restart vtime CPU timer */
- if (vq->do_spt) {
- /* Program old expire value but first save progress. */
- expires = vq->idle - enter_timer;
- expires += get_vtimer();
- set_vtimer(expires);
- } else {
- /* Don't account the CPU timer delta while the cpu was idle. */
- vq->elapsed -= vq->idle - enter_timer;
- }
+ /* Wait for external, I/O or machine check interrupt. */
+ psw_mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_DAT |
+ PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ idle->nohz_delay = 0;
+ /* Call the assembler magic in entry.S */
+ psw_idle(idle, vq, psw_mask, !list_empty(&vq->list));
+
+ /* Reenable preemption tracer. */
+ start_critical_timings();
+
+ /* Account time spent with enabled wait psw loaded as idle time. */
idle->sequence++;
smp_wmb();
+ idle_time = idle->idle_exit - idle->idle_enter;
idle->idle_time += idle_time;
- idle->idle_enter = 0ULL;
+ idle->idle_enter = idle->idle_exit = 0ULL;
idle->idle_count++;
+ account_idle_time(idle_time);
smp_wmb();
idle->sequence++;
}
-void __kprobes vtime_stop_cpu(void)
-{
- struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
- struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
- psw_t psw;
-
- /* Wait for external, I/O or machine check interrupt. */
- psw.mask = psw_kernel_bits | PSW_MASK_WAIT |
- PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
-
- idle->nohz_delay = 0;
-
- /* Check if the CPU timer needs to be reprogrammed. */
- if (vq->do_spt) {
- __u64 vmax = VTIMER_MAX_SLICE;
- /*
- * The inline assembly is equivalent to
- * vq->idle = get_cpu_timer();
- * set_cpu_timer(VTIMER_MAX_SLICE);
- * idle->idle_enter = get_clock();
- * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
- * PSW_MASK_DAT | PSW_MASK_IO |
- * PSW_MASK_EXT | PSW_MASK_MCHECK);
- * The difference is that the inline assembly makes sure that
- * the last three instruction are stpt, stck and lpsw in that
- * order. This is done to increase the precision.
- */
- asm volatile(
-#ifndef CONFIG_64BIT
- " basr 1,0\n"
- "0: ahi 1,1f-0b\n"
- " st 1,4(%2)\n"
-#else /* CONFIG_64BIT */
- " larl 1,1f\n"
- " stg 1,8(%2)\n"
-#endif /* CONFIG_64BIT */
- " stpt 0(%4)\n"
- " spt 0(%5)\n"
- " stck 0(%3)\n"
-#ifndef CONFIG_64BIT
- " lpsw 0(%2)\n"
-#else /* CONFIG_64BIT */
- " lpswe 0(%2)\n"
-#endif /* CONFIG_64BIT */
- "1:"
- : "=m" (idle->idle_enter), "=m" (vq->idle)
- : "a" (&psw), "a" (&idle->idle_enter),
- "a" (&vq->idle), "a" (&vmax), "m" (vmax), "m" (psw)
- : "memory", "cc", "1");
- } else {
- /*
- * The inline assembly is equivalent to
- * vq->idle = get_cpu_timer();
- * idle->idle_enter = get_clock();
- * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
- * PSW_MASK_DAT | PSW_MASK_IO |
- * PSW_MASK_EXT | PSW_MASK_MCHECK);
- * The difference is that the inline assembly makes sure that
- * the last three instruction are stpt, stck and lpsw in that
- * order. This is done to increase the precision.
- */
- asm volatile(
-#ifndef CONFIG_64BIT
- " basr 1,0\n"
- "0: ahi 1,1f-0b\n"
- " st 1,4(%2)\n"
-#else /* CONFIG_64BIT */
- " larl 1,1f\n"
- " stg 1,8(%2)\n"
-#endif /* CONFIG_64BIT */
- " stpt 0(%4)\n"
- " stck 0(%3)\n"
-#ifndef CONFIG_64BIT
- " lpsw 0(%2)\n"
-#else /* CONFIG_64BIT */
- " lpswe 0(%2)\n"
-#endif /* CONFIG_64BIT */
- "1:"
- : "=m" (idle->idle_enter), "=m" (vq->idle)
- : "a" (&psw), "a" (&idle->idle_enter),
- "a" (&vq->idle), "m" (psw)
- : "memory", "cc", "1");
- }
-}
-
cputime64_t s390_get_idle_time(int cpu)
{
- struct s390_idle_data *idle;
- unsigned long long now, idle_time, idle_enter;
+ struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
+ unsigned long long now, idle_enter, idle_exit;
unsigned int sequence;
- idle = &per_cpu(s390_idle, cpu);
-
- now = get_clock();
-repeat:
- sequence = idle->sequence;
- smp_rmb();
- if (sequence & 1)
- goto repeat;
- idle_time = 0;
- idle_enter = idle->idle_enter;
- if (idle_enter != 0ULL && idle_enter < now)
- idle_time = now - idle_enter;
- smp_rmb();
- if (idle->sequence != sequence)
- goto repeat;
- return idle_time;
+ do {
+ now = get_clock();
+ sequence = ACCESS_ONCE(idle->sequence);
+ idle_enter = ACCESS_ONCE(idle->idle_enter);
+ idle_exit = ACCESS_ONCE(idle->idle_exit);
+ } while ((sequence & 1) || (idle->sequence != sequence));
+ return idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;
}
/*
@@ -319,7 +220,7 @@ static void do_callbacks(struct list_head *cb_list)
/*
* Handler for the virtual CPU timer.
*/
-static void do_cpu_timer_interrupt(unsigned int ext_int_code,
+static void do_cpu_timer_interrupt(struct ext_code ext_code,
unsigned int param32, unsigned long param64)
{
struct vtimer_queue *vq;
@@ -346,7 +247,6 @@ static void do_cpu_timer_interrupt(unsigned int ext_int_code,
}
spin_unlock(&vq->lock);
- vq->do_spt = list_empty(&cb_list);
do_callbacks(&cb_list);
/* next event is first in list */
@@ -355,8 +255,7 @@ static void do_cpu_timer_interrupt(unsigned int ext_int_code,
if (!list_empty(&vq->list)) {
event = list_first_entry(&vq->list, struct vtimer_list, entry);
next = event->expires;
- } else
- vq->do_spt = 0;
+ }
spin_unlock(&vq->lock);
/*
* To improve precision add the time spent by the
@@ -570,6 +469,9 @@ void init_cpu_vtimer(void)
/* enable cpu timer interrupts */
__ctl_set_bit(0,10);
+
+ /* set initial cpu timer */
+ set_vtimer(0x7fffffffffffffffULL);
}
static int __cpuinit s390_nohz_notify(struct notifier_block *self,