diff options
Diffstat (limited to 'arch/s390/kernel/smp.c')
-rw-r--r-- | arch/s390/kernel/smp.c | 315 |
1 files changed, 232 insertions, 83 deletions
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 0b499f5cbe1..db8f1115a3b 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -59,21 +59,41 @@ enum { CPU_STATE_CONFIGURED, }; +static DEFINE_PER_CPU(struct cpu *, cpu_device); + struct pcpu { - struct cpu *cpu; struct _lowcore *lowcore; /* lowcore page(s) for the cpu */ - unsigned long async_stack; /* async stack for the cpu */ - unsigned long panic_stack; /* panic stack for the cpu */ unsigned long ec_mask; /* bit mask for ec_xxx functions */ - int state; /* physical cpu state */ - int polarization; /* physical polarization */ + signed char state; /* physical cpu state */ + signed char polarization; /* physical polarization */ u16 address; /* physical cpu address */ }; static u8 boot_cpu_type; -static u16 boot_cpu_address; static struct pcpu pcpu_devices[NR_CPUS]; +unsigned int smp_cpu_mt_shift; +EXPORT_SYMBOL(smp_cpu_mt_shift); + +unsigned int smp_cpu_mtid; +EXPORT_SYMBOL(smp_cpu_mtid); + +static unsigned int smp_max_threads __initdata = -1U; + +static int __init early_nosmt(char *s) +{ + smp_max_threads = 1; + return 0; +} +early_param("nosmt", early_nosmt); + +static int __init early_smt(char *s) +{ + get_option(&s, &smp_max_threads); + return 0; +} +early_param("smt", early_smt); + /* * The smp_cpu_state_mutex must be held when changing the state or polarization * member of a pcpu data structure within the pcpu_devices arreay. @@ -132,7 +152,7 @@ static inline int pcpu_running(struct pcpu *pcpu) /* * Find struct pcpu by cpu address. */ -static struct pcpu *pcpu_find_address(const struct cpumask *mask, int address) +static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address) { int cpu; @@ -152,25 +172,30 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) pcpu_sigp_retry(pcpu, order, 0); } +#define ASYNC_FRAME_OFFSET (ASYNC_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE) +#define PANIC_FRAME_OFFSET (PAGE_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE) + static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) { + unsigned long async_stack, panic_stack; struct _lowcore *lc; if (pcpu != &pcpu_devices[0]) { pcpu->lowcore = (struct _lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); - pcpu->async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); - pcpu->panic_stack = __get_free_page(GFP_KERNEL); - if (!pcpu->lowcore || !pcpu->panic_stack || !pcpu->async_stack) + async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); + panic_stack = __get_free_page(GFP_KERNEL); + if (!pcpu->lowcore || !panic_stack || !async_stack) goto out; + } else { + async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET; + panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET; } lc = pcpu->lowcore; memcpy(lc, &S390_lowcore, 512); memset((char *) lc + 512, 0, sizeof(*lc) - 512); - lc->async_stack = pcpu->async_stack + ASYNC_SIZE - - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); - lc->panic_stack = pcpu->panic_stack + PAGE_SIZE - - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); + lc->async_stack = async_stack + ASYNC_FRAME_OFFSET; + lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET; lc->cpu_nr = cpu; lc->spinlock_lockval = arch_spin_lockval(cpu); #ifndef CONFIG_64BIT @@ -191,8 +216,8 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) return 0; out: if (pcpu != &pcpu_devices[0]) { - free_page(pcpu->panic_stack); - free_pages(pcpu->async_stack, ASYNC_ORDER); + free_page(panic_stack); + free_pages(async_stack, ASYNC_ORDER); free_pages((unsigned long) pcpu->lowcore, LC_ORDER); } return -ENOMEM; @@ -214,11 +239,11 @@ static void pcpu_free_lowcore(struct pcpu *pcpu) #else vdso_free_per_cpu(pcpu->lowcore); #endif - if (pcpu != &pcpu_devices[0]) { - free_page(pcpu->panic_stack); - free_pages(pcpu->async_stack, ASYNC_ORDER); - free_pages((unsigned long) pcpu->lowcore, LC_ORDER); - } + if (pcpu == &pcpu_devices[0]) + return; + free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET); + free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER); + free_pages((unsigned long) pcpu->lowcore, LC_ORDER); } #endif /* CONFIG_HOTPLUG_CPU */ @@ -299,6 +324,32 @@ static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *), } /* + * Enable additional logical cpus for multi-threading. + */ +static int pcpu_set_smt(unsigned int mtid) +{ + register unsigned long reg1 asm ("1") = (unsigned long) mtid; + int cc; + + if (smp_cpu_mtid == mtid) + return 0; + asm volatile( + " sigp %1,0,%2 # sigp set multi-threading\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) : "d" (reg1), "K" (SIGP_SET_MULTI_THREADING) + : "cc"); + if (cc == 0) { + smp_cpu_mtid = mtid; + smp_cpu_mt_shift = 0; + while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift)) + smp_cpu_mt_shift++; + pcpu_devices[0].address = stap(); + } + return cc; +} + +/* * Call function on an online CPU. */ void smp_call_online_cpu(void (*func)(void *), void *data) @@ -319,7 +370,8 @@ void smp_call_online_cpu(void (*func)(void *), void *data) void smp_call_ipl_cpu(void (*func)(void *), void *data) { pcpu_delegate(&pcpu_devices[0], func, data, - pcpu_devices->panic_stack + PAGE_SIZE); + pcpu_devices->lowcore->panic_stack - + PANIC_FRAME_OFFSET + PAGE_SIZE); } int smp_find_processor_id(u16 address) @@ -512,22 +564,17 @@ EXPORT_SYMBOL(smp_ctl_clear_bit); #ifdef CONFIG_CRASH_DUMP -static void __init smp_get_save_area(int cpu, u16 address) +static inline void __smp_store_cpu_state(int cpu, u16 address, int is_boot_cpu) { void *lc = pcpu_devices[0].lowcore; struct save_area_ext *sa_ext; unsigned long vx_sa; - if (is_kdump_kernel()) - return; - if (!OLDMEM_BASE && (address == boot_cpu_address || - ipl_info.type != IPL_TYPE_FCP_DUMP)) - return; sa_ext = dump_save_area_create(cpu); if (!sa_ext) panic("could not allocate memory for save area\n"); - if (address == boot_cpu_address) { - /* Copy the registers of the boot cpu. */ + if (is_boot_cpu) { + /* Copy the registers of the boot CPU. */ copy_oldmem_page(1, (void *) &sa_ext->sa, sizeof(sa_ext->sa), SAVE_AREA_BASE - PAGE_SIZE, 0); if (MACHINE_HAS_VX) @@ -548,6 +595,64 @@ static void __init smp_get_save_area(int cpu, u16 address) free_page(vx_sa); } +/* + * Collect CPU state of the previous, crashed system. + * There are four cases: + * 1) standard zfcp dump + * condition: OLDMEM_BASE == NULL && ipl_info.type == IPL_TYPE_FCP_DUMP + * The state for all CPUs except the boot CPU needs to be collected + * with sigp stop-and-store-status. The boot CPU state is located in + * the absolute lowcore of the memory stored in the HSA. The zcore code + * will allocate the save area and copy the boot CPU state from the HSA. + * 2) stand-alone kdump for SCSI (zfcp dump with swapped memory) + * condition: OLDMEM_BASE != NULL && ipl_info.type == IPL_TYPE_FCP_DUMP + * The state for all CPUs except the boot CPU needs to be collected + * with sigp stop-and-store-status. The firmware or the boot-loader + * stored the registers of the boot CPU in the absolute lowcore in the + * memory of the old system. + * 3) kdump and the old kernel did not store the CPU state, + * or stand-alone kdump for DASD + * condition: OLDMEM_BASE != NULL && !is_kdump_kernel() + * The state for all CPUs except the boot CPU needs to be collected + * with sigp stop-and-store-status. The kexec code or the boot-loader + * stored the registers of the boot CPU in the memory of the old system. + * 4) kdump and the old kernel stored the CPU state + * condition: OLDMEM_BASE != NULL && is_kdump_kernel() + * The state of all CPUs is stored in ELF sections in the memory of the + * old system. The ELF sections are picked up by the crash_dump code + * via elfcorehdr_addr. + */ +static void __init smp_store_cpu_states(struct sclp_cpu_info *info) +{ + unsigned int cpu, address, i, j; + int is_boot_cpu; + + if (is_kdump_kernel()) + /* Previous system stored the CPU states. Nothing to do. */ + return; + if (!(OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP)) + /* No previous system present, normal boot. */ + return; + /* Set multi-threading state to the previous system. */ + pcpu_set_smt(sclp_get_mtid_prev()); + /* Collect CPU states. */ + cpu = 0; + for (i = 0; i < info->configured; i++) { + /* Skip CPUs with different CPU type. */ + if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type) + continue; + for (j = 0; j <= smp_cpu_mtid; j++, cpu++) { + address = (info->cpu[i].core_id << smp_cpu_mt_shift) + j; + is_boot_cpu = (address == pcpu_devices[0].address); + if (is_boot_cpu && !OLDMEM_BASE) + /* Skip boot CPU for standard zfcp dump. */ + continue; + /* Get state for this CPu. */ + __smp_store_cpu_state(cpu, address, is_boot_cpu); + } + } +} + int smp_store_status(int cpu) { unsigned long vx_sa; @@ -565,10 +670,6 @@ int smp_store_status(int cpu) return 0; } -#else /* CONFIG_CRASH_DUMP */ - -static inline void smp_get_save_area(int cpu, u16 address) { } - #endif /* CONFIG_CRASH_DUMP */ void smp_cpu_set_polarization(int cpu, int val) @@ -590,11 +691,13 @@ static struct sclp_cpu_info *smp_get_cpu_info(void) info = kzalloc(sizeof(*info), GFP_KERNEL); if (info && (use_sigp_detection || sclp_get_cpu_info(info))) { use_sigp_detection = 1; - for (address = 0; address <= MAX_CPU_ADDRESS; address++) { + for (address = 0; address <= MAX_CPU_ADDRESS; + address += (1U << smp_cpu_mt_shift)) { if (__pcpu_sigp_relax(address, SIGP_SENSE, 0, NULL) == SIGP_CC_NOT_OPERATIONAL) continue; - info->cpu[info->configured].address = address; + info->cpu[info->configured].core_id = + address >> smp_cpu_mt_shift; info->configured++; } info->combined = info->configured; @@ -608,7 +711,8 @@ static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add) { struct pcpu *pcpu; cpumask_t avail; - int cpu, nr, i; + int cpu, nr, i, j; + u16 address; nr = 0; cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); @@ -616,51 +720,76 @@ static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add) for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) { if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type) continue; - if (pcpu_find_address(cpu_present_mask, info->cpu[i].address)) - continue; - pcpu = pcpu_devices + cpu; - pcpu->address = info->cpu[i].address; - pcpu->state = (i >= info->configured) ? - CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; - smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); - set_cpu_present(cpu, true); - if (sysfs_add && smp_add_present_cpu(cpu) != 0) - set_cpu_present(cpu, false); - else - nr++; - cpu = cpumask_next(cpu, &avail); + address = info->cpu[i].core_id << smp_cpu_mt_shift; + for (j = 0; j <= smp_cpu_mtid; j++) { + if (pcpu_find_address(cpu_present_mask, address + j)) + continue; + pcpu = pcpu_devices + cpu; + pcpu->address = address + j; + pcpu->state = + (cpu >= info->configured*(smp_cpu_mtid + 1)) ? + CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + set_cpu_present(cpu, true); + if (sysfs_add && smp_add_present_cpu(cpu) != 0) + set_cpu_present(cpu, false); + else + nr++; + cpu = cpumask_next(cpu, &avail); + if (cpu >= nr_cpu_ids) + break; + } } return nr; } static void __init smp_detect_cpus(void) { - unsigned int cpu, c_cpus, s_cpus; + unsigned int cpu, mtid, c_cpus, s_cpus; struct sclp_cpu_info *info; + u16 address; + /* Get CPU information */ info = smp_get_cpu_info(); if (!info) panic("smp_detect_cpus failed to allocate memory\n"); + + /* Find boot CPU type */ if (info->has_cpu_type) { - for (cpu = 0; cpu < info->combined; cpu++) { - if (info->cpu[cpu].address != boot_cpu_address) - continue; - /* The boot cpu dictates the cpu type. */ - boot_cpu_type = info->cpu[cpu].type; - break; - } + address = stap(); + for (cpu = 0; cpu < info->combined; cpu++) + if (info->cpu[cpu].core_id == address) { + /* The boot cpu dictates the cpu type. */ + boot_cpu_type = info->cpu[cpu].type; + break; + } + if (cpu >= info->combined) + panic("Could not find boot CPU type"); } + +#ifdef CONFIG_CRASH_DUMP + /* Collect CPU state of previous system */ + smp_store_cpu_states(info); +#endif + + /* Set multi-threading state for the current system */ + mtid = sclp_get_mtid(boot_cpu_type); + mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1; + pcpu_set_smt(mtid); + + /* Print number of CPUs */ c_cpus = s_cpus = 0; for (cpu = 0; cpu < info->combined; cpu++) { if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type) continue; - if (cpu < info->configured) { - smp_get_save_area(c_cpus, info->cpu[cpu].address); - c_cpus++; - } else - s_cpus++; + if (cpu < info->configured) + c_cpus += smp_cpu_mtid + 1; + else + s_cpus += smp_cpu_mtid + 1; } pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus); + + /* Add CPUs present at boot */ get_online_cpus(); __smp_rescan_cpus(info, 0); put_online_cpus(); @@ -696,12 +825,23 @@ static void smp_start_secondary(void *cpuvoid) int __cpu_up(unsigned int cpu, struct task_struct *tidle) { struct pcpu *pcpu; - int rc; + int base, i, rc; pcpu = pcpu_devices + cpu; if (pcpu->state != CPU_STATE_CONFIGURED) return -EIO; - if (pcpu_sigp_retry(pcpu, SIGP_INITIAL_CPU_RESET, 0) != + base = cpu - (cpu % (smp_cpu_mtid + 1)); + for (i = 0; i <= smp_cpu_mtid; i++) { + if (base + i < nr_cpu_ids) + if (cpu_online(base + i)) + break; + } + /* + * If this is the first CPU of the core to get online + * do an initial CPU reset. + */ + if (i > smp_cpu_mtid && + pcpu_sigp_retry(pcpu_devices + base, SIGP_INITIAL_CPU_RESET, 0) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; @@ -774,7 +914,8 @@ void __init smp_fill_possible_mask(void) { unsigned int possible, sclp, cpu; - sclp = sclp_get_max_cpu() ?: nr_cpu_ids; + sclp = min(smp_max_threads, sclp_get_mtid_max() + 1); + sclp = sclp_get_max_cpu()*sclp ?: nr_cpu_ids; possible = setup_possible_cpus ?: nr_cpu_ids; possible = min(possible, sclp); for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++) @@ -796,14 +937,9 @@ void __init smp_prepare_boot_cpu(void) { struct pcpu *pcpu = pcpu_devices; - boot_cpu_address = stap(); pcpu->state = CPU_STATE_CONFIGURED; - pcpu->address = boot_cpu_address; + pcpu->address = stap(); pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix(); - pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE - + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE - + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); S390_lowcore.percpu_offset = __per_cpu_offset[0]; smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); set_cpu_present(0, true); @@ -848,7 +984,7 @@ static ssize_t cpu_configure_store(struct device *dev, const char *buf, size_t count) { struct pcpu *pcpu; - int cpu, val, rc; + int cpu, val, rc, i; char delim; if (sscanf(buf, "%d %c", &val, &delim) != 1) @@ -860,29 +996,43 @@ static ssize_t cpu_configure_store(struct device *dev, rc = -EBUSY; /* disallow configuration changes of online cpus and cpu 0 */ cpu = dev->id; - if (cpu_online(cpu) || cpu == 0) + cpu -= cpu % (smp_cpu_mtid + 1); + if (cpu == 0) goto out; + for (i = 0; i <= smp_cpu_mtid; i++) + if (cpu_online(cpu + i)) + goto out; pcpu = pcpu_devices + cpu; rc = 0; switch (val) { case 0: if (pcpu->state != CPU_STATE_CONFIGURED) break; - rc = sclp_cpu_deconfigure(pcpu->address); + rc = sclp_cpu_deconfigure(pcpu->address >> smp_cpu_mt_shift); if (rc) break; - pcpu->state = CPU_STATE_STANDBY; - smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + for (i = 0; i <= smp_cpu_mtid; i++) { + if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i)) + continue; + pcpu[i].state = CPU_STATE_STANDBY; + smp_cpu_set_polarization(cpu + i, + POLARIZATION_UNKNOWN); + } topology_expect_change(); break; case 1: if (pcpu->state != CPU_STATE_STANDBY) break; - rc = sclp_cpu_configure(pcpu->address); + rc = sclp_cpu_configure(pcpu->address >> smp_cpu_mt_shift); if (rc) break; - pcpu->state = CPU_STATE_CONFIGURED; - smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + for (i = 0; i <= smp_cpu_mtid; i++) { + if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i)) + continue; + pcpu[i].state = CPU_STATE_CONFIGURED; + smp_cpu_set_polarization(cpu + i, + POLARIZATION_UNKNOWN); + } topology_expect_change(); break; default: @@ -929,8 +1079,7 @@ static int smp_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned int)(long)hcpu; - struct cpu *c = pcpu_devices[cpu].cpu; - struct device *s = &c->dev; + struct device *s = &per_cpu(cpu_device, cpu)->dev; int err = 0; switch (action & ~CPU_TASKS_FROZEN) { @@ -953,7 +1102,7 @@ static int smp_add_present_cpu(int cpu) c = kzalloc(sizeof(*c), GFP_KERNEL); if (!c) return -ENOMEM; - pcpu_devices[cpu].cpu = c; + per_cpu(cpu_device, cpu) = c; s = &c->dev; c->hotpluggable = 1; rc = register_cpu(c, cpu); |