diff options
Diffstat (limited to 'arch/powerpc/platforms/pseries')
27 files changed, 399 insertions, 271 deletions
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 0ff5174ae4f..3dbef309bc8 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -7,7 +7,7 @@ EXTRA_CFLAGS += -DDEBUG endif obj-y := lpar.o hvCall.o nvram.o reconfig.o \ - setup.o iommu.o ras.o \ + setup.o iommu.o event_sources.o ras.o \ firmware.o power.o dlpar.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_XICS) += xics.o diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index a277f2e28db..f4803868642 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -24,6 +24,7 @@ #include <linux/delay.h> #include <linux/errno.h> #include <linux/fs.h> +#include <linux/gfp.h> #include <linux/init.h> #include <linux/kthread.h> #include <linux/module.h> diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 37bce52526d..d71e5858408 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -16,6 +16,7 @@ #include <linux/proc_fs.h> #include <linux/spinlock.h> #include <linux/cpu.h> +#include <linux/slab.h> #include "offline_states.h" #include <asm/prom.h> @@ -78,13 +79,12 @@ static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa) * prepend this to the full_name. */ name = (char *)ccwa + ccwa->name_offset; - dn->full_name = kmalloc(strlen(name) + 2, GFP_KERNEL); + dn->full_name = kasprintf(GFP_KERNEL, "/%s", name); if (!dn->full_name) { kfree(dn); return NULL; } - sprintf(dn->full_name, "/%s", name); return dn; } @@ -409,15 +409,13 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count) * directory of the device tree. CPUs actually live in the * cpus directory so we need to fixup the full_name. */ - cpu_name = kzalloc(strlen(dn->full_name) + strlen("/cpus") + 1, - GFP_KERNEL); + cpu_name = kasprintf(GFP_KERNEL, "/cpus%s", dn->full_name); if (!cpu_name) { dlpar_free_cc_nodes(dn); rc = -ENOMEM; goto out; } - sprintf(cpu_name, "/cpus%s", dn->full_name); kfree(dn->full_name); dn->full_name = cpu_name; @@ -432,6 +430,7 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count) if (rc) { dlpar_release_drc(drc_index); dlpar_free_cc_nodes(dn); + goto out; } rc = dlpar_online_cpu(dn); diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index c5f3116b6ca..a00addb5594 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -21,6 +21,7 @@ */ #include <linux/init.h> +#include <linux/slab.h> #include <linux/debugfs.h> #include <asm/smp.h> #include <asm/system.h> diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index ccd8dd03b8c..34b7dc12e73 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -100,7 +100,7 @@ int eeh_subsystem_enabled; EXPORT_SYMBOL(eeh_subsystem_enabled); /* Lock to avoid races due to multiple reports of an error */ -static DEFINE_SPINLOCK(confirm_error_lock); +static DEFINE_RAW_SPINLOCK(confirm_error_lock); /* Buffer for reporting slot-error-detail rtas calls. Its here * in BSS, and not dynamically alloced, so that it ends up in @@ -436,7 +436,7 @@ static void __eeh_clear_slot(struct device_node *parent, int mode_flag) void eeh_clear_slot (struct device_node *dn, int mode_flag) { unsigned long flags; - spin_lock_irqsave(&confirm_error_lock, flags); + raw_spin_lock_irqsave(&confirm_error_lock, flags); dn = find_device_pe (dn); @@ -447,7 +447,7 @@ void eeh_clear_slot (struct device_node *dn, int mode_flag) PCI_DN(dn)->eeh_mode &= ~mode_flag; PCI_DN(dn)->eeh_check_count = 0; __eeh_clear_slot(dn, mode_flag); - spin_unlock_irqrestore(&confirm_error_lock, flags); + raw_spin_unlock_irqrestore(&confirm_error_lock, flags); } /** @@ -491,7 +491,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) pdn->eeh_mode & EEH_MODE_NOCHECK) { ignored_check++; pr_debug("EEH: Ignored check (%x) for %s %s\n", - pdn->eeh_mode, pci_name (dev), dn->full_name); + pdn->eeh_mode, eeh_pci_name(dev), dn->full_name); return 0; } @@ -506,7 +506,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) * in one slot might report errors simultaneously, and we * only want one error recovery routine running. */ - spin_lock_irqsave(&confirm_error_lock, flags); + raw_spin_lock_irqsave(&confirm_error_lock, flags); rc = 1; if (pdn->eeh_mode & EEH_MODE_ISOLATED) { pdn->eeh_check_count ++; @@ -515,7 +515,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) printk (KERN_ERR "EEH: %d reads ignored for recovering device at " "location=%s driver=%s pci addr=%s\n", pdn->eeh_check_count, location, - dev->driver->name, pci_name(dev)); + dev->driver->name, eeh_pci_name(dev)); printk (KERN_ERR "EEH: Might be infinite loop in %s driver\n", dev->driver->name); dump_stack(); @@ -575,7 +575,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) * with other functions on this device, and functions under * bridges. */ eeh_mark_slot (dn, EEH_MODE_ISOLATED); - spin_unlock_irqrestore(&confirm_error_lock, flags); + raw_spin_unlock_irqrestore(&confirm_error_lock, flags); eeh_send_failure_event (dn, dev); @@ -586,7 +586,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) return 1; dn_unlock: - spin_unlock_irqrestore(&confirm_error_lock, flags); + raw_spin_unlock_irqrestore(&confirm_error_lock, flags); return rc; } @@ -749,7 +749,7 @@ static void __rtas_set_slot_reset(struct pci_dn *pdn) /* Determine type of EEH reset required by device, * default hot reset or fundamental reset */ - if (dev->needs_freset) + if (dev && dev->needs_freset) rtas_pci_slot_reset(pdn, 3); else rtas_pci_slot_reset(pdn, 1); @@ -1064,7 +1064,7 @@ void __init eeh_init(void) struct device_node *phb, *np; struct eeh_early_enable_info info; - spin_lock_init(&confirm_error_lock); + raw_spin_lock_init(&confirm_error_lock); spin_lock_init(&slot_errbuf_lock); np = of_find_node_by_path("/rtas"); diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c index ce37040af87..30b987b73c2 100644 --- a/arch/powerpc/platforms/pseries/eeh_cache.c +++ b/arch/powerpc/platforms/pseries/eeh_cache.c @@ -23,6 +23,7 @@ #include <linux/list.h> #include <linux/pci.h> #include <linux/rbtree.h> +#include <linux/slab.h> #include <linux/spinlock.h> #include <asm/atomic.h> #include <asm/pci-bridge.h> diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index ef8e4544848..b8d70f5d9aa 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -337,7 +337,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) location = location ? location : "unknown"; printk(KERN_ERR "EEH: Error: Cannot find partition endpoint " "for location=%s pci addr=%s\n", - location, pci_name(event->dev)); + location, eeh_pci_name(event->dev)); return NULL; } @@ -368,7 +368,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event) pci_str = pci_name (frozen_pdn->pcidev); drv_str = pcid_name (frozen_pdn->pcidev); } else { - pci_str = pci_name (event->dev); + pci_str = eeh_pci_name(event->dev); drv_str = pcid_name (event->dev); } @@ -478,9 +478,9 @@ excess_failures: * due to actual, failed cards. */ printk(KERN_ERR - "EEH: PCI device at location=%s driver=%s pci addr=%s \n" + "EEH: PCI device at location=%s driver=%s pci addr=%s\n" "has failed %d times in the last hour " - "and has been permanently disabled. \n" + "and has been permanently disabled.\n" "Please try reseating this device or replacing it.\n", location, drv_str, pci_str, frozen_pdn->eeh_freeze_count); goto perm_error; @@ -488,7 +488,7 @@ excess_failures: hard_fail: printk(KERN_ERR "EEH: Unable to recover from failure of PCI device " - "at location=%s driver=%s pci addr=%s \n" + "at location=%s driver=%s pci addr=%s\n" "Please try reseating this device or replacing it.\n", location, drv_str, pci_str); diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c index ddb80f5d850..2ec500c130b 100644 --- a/arch/powerpc/platforms/pseries/eeh_event.c +++ b/arch/powerpc/platforms/pseries/eeh_event.c @@ -22,6 +22,7 @@ #include <linux/list.h> #include <linux/mutex.h> #include <linux/pci.h> +#include <linux/slab.h> #include <linux/workqueue.h> #include <asm/eeh_event.h> #include <asm/ppc-pci.h> @@ -80,7 +81,7 @@ static int eeh_event_handler(void * dummy) eeh_mark_slot(event->dn, EEH_MODE_RECOVERING); printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n", - pci_name(event->dev)); + eeh_pci_name(event->dev)); pdn = handle_eeh_events(event); diff --git a/arch/powerpc/platforms/pseries/event_sources.c b/arch/powerpc/platforms/pseries/event_sources.c new file mode 100644 index 00000000000..e889c9d9586 --- /dev/null +++ b/arch/powerpc/platforms/pseries/event_sources.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2001 Dave Engebretsen IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <asm/prom.h> + +#include "pseries.h" + +void request_event_sources_irqs(struct device_node *np, + irq_handler_t handler, + const char *name) +{ + int i, index, count = 0; + struct of_irq oirq; + const u32 *opicprop; + unsigned int opicplen; + unsigned int virqs[16]; + + /* Check for obsolete "open-pic-interrupt" property. If present, then + * map those interrupts using the default interrupt host and default + * trigger + */ + opicprop = of_get_property(np, "open-pic-interrupt", &opicplen); + if (opicprop) { + opicplen /= sizeof(u32); + for (i = 0; i < opicplen; i++) { + if (count > 15) + break; + virqs[count] = irq_create_mapping(NULL, *(opicprop++)); + if (virqs[count] == NO_IRQ) + printk(KERN_ERR "Unable to allocate interrupt " + "number for %s\n", np->full_name); + else + count++; + + } + } + /* Else use normal interrupt tree parsing */ + else { + /* First try to do a proper OF tree parsing */ + for (index = 0; of_irq_map_one(np, index, &oirq) == 0; + index++) { + if (count > 15) + break; + virqs[count] = irq_create_of_mapping(oirq.controller, + oirq.specifier, + oirq.size); + if (virqs[count] == NO_IRQ) + printk(KERN_ERR "Unable to allocate interrupt " + "number for %s\n", np->full_name); + else + count++; + } + } + + /* Now request them */ + for (i = 0; i < count; i++) { + if (request_irq(virqs[i], handler, 0, name, NULL)) { + printk(KERN_ERR "Unable to request interrupt %d for " + "%s\n", virqs[i], np->full_name); + return; + } + } +} + diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 6ea4698d917..8f85f399ab9 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -122,74 +122,38 @@ static void pseries_mach_cpu_die(void) if (!get_lppaca()->shared_proc) get_lppaca()->donate_dedicated_cpu = 1; - printk(KERN_INFO - "cpu %u (hwid %u) ceding for offline with hint %d\n", - cpu, hwcpu, cede_latency_hint); while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { extended_cede_processor(cede_latency_hint); - printk(KERN_INFO "cpu %u (hwid %u) returned from cede.\n", - cpu, hwcpu); - printk(KERN_INFO - "Decrementer value = %x Timebase value = %llx\n", - get_dec(), get_tb()); } - printk(KERN_INFO "cpu %u (hwid %u) got prodded to go online\n", - cpu, hwcpu); - if (!get_lppaca()->shared_proc) get_lppaca()->donate_dedicated_cpu = 0; get_lppaca()->idle = 0; - } - if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) { - unregister_slb_shadow(hwcpu, __pa(get_slb_shadow())); + if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) { + unregister_slb_shadow(hwcpu, __pa(get_slb_shadow())); - /* - * NOTE: Calling start_secondary() here for now to - * start new context. - * However, need to do it cleanly by resetting the - * stack pointer. - */ - start_secondary(); + /* + * Call to start_secondary_resume() will not return. + * Kernel stack will be reset and start_secondary() + * will be called to continue the online operation. + */ + start_secondary_resume(); + } + } - } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) { + /* Requested state is CPU_STATE_OFFLINE at this point */ + WARN_ON(get_preferred_offline_state(cpu) != CPU_STATE_OFFLINE); - set_cpu_current_state(cpu, CPU_STATE_OFFLINE); - unregister_slb_shadow(hard_smp_processor_id(), - __pa(get_slb_shadow())); - rtas_stop_self(); - } + set_cpu_current_state(cpu, CPU_STATE_OFFLINE); + unregister_slb_shadow(hwcpu, __pa(get_slb_shadow())); + rtas_stop_self(); /* Should never get here... */ BUG(); for(;;); } -static int qcss_tok; /* query-cpu-stopped-state token */ - -/* Get state of physical CPU. - * Return codes: - * 0 - The processor is in the RTAS stopped state - * 1 - stop-self is in progress - * 2 - The processor is not in the RTAS stopped state - * -1 - Hardware Error - * -2 - Hardware Busy, Try again later. - */ -static int query_cpu_stopped(unsigned int pcpu) -{ - int cpu_status, status; - - status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu); - if (status != 0) { - printk(KERN_ERR - "RTAS query-cpu-stopped-state failed: %i\n", status); - return status; - } - - return cpu_status; -} - static int pseries_cpu_disable(void) { int cpu = smp_processor_id(); @@ -199,7 +163,7 @@ static int pseries_cpu_disable(void) /*fix boot_cpuid here*/ if (cpu == boot_cpuid) - boot_cpuid = any_online_cpu(cpu_online_map); + boot_cpuid = cpumask_any(cpu_online_mask); /* FIXME: abstract this to not be platform specific later on */ xics_migrate_irqs_away(); @@ -236,8 +200,9 @@ static void pseries_cpu_die(unsigned int cpu) } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) { for (tries = 0; tries < 25; tries++) { - cpu_status = query_cpu_stopped(pcpu); - if (cpu_status == 0 || cpu_status == -1) + cpu_status = smp_query_cpu_stopped(pcpu); + if (cpu_status == QCSS_STOPPED || + cpu_status == QCSS_HARDWARE_ERROR) break; cpu_relax(); } @@ -257,7 +222,7 @@ static void pseries_cpu_die(unsigned int cpu) } /* - * Update cpu_present_map and paca(s) for a new cpu node. The wrinkle + * Update cpu_present_mask and paca(s) for a new cpu node. The wrinkle * here is that a cpu device node may represent up to two logical cpus * in the SMT case. We must honor the assumption in other code that * the logical ids for sibling SMT threads x and y are adjacent, such @@ -266,7 +231,7 @@ static void pseries_cpu_die(unsigned int cpu) static int pseries_add_processor(struct device_node *np) { unsigned int cpu; - cpumask_t candidate_map, tmp = CPU_MASK_NONE; + cpumask_var_t candidate_mask, tmp; int err = -ENOSPC, len, nthreads, i; const u32 *intserv; @@ -274,48 +239,53 @@ static int pseries_add_processor(struct device_node *np) if (!intserv) return 0; + zalloc_cpumask_var(&candidate_mask, GFP_KERNEL); + zalloc_cpumask_var(&tmp, GFP_KERNEL); + nthreads = len / sizeof(u32); for (i = 0; i < nthreads; i++) - cpu_set(i, tmp); + cpumask_set_cpu(i, tmp); cpu_maps_update_begin(); - BUG_ON(!cpus_subset(cpu_present_map, cpu_possible_map)); + BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask)); /* Get a bitmap of unoccupied slots. */ - cpus_xor(candidate_map, cpu_possible_map, cpu_present_map); - if (cpus_empty(candidate_map)) { + cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask); + if (cpumask_empty(candidate_mask)) { /* If we get here, it most likely means that NR_CPUS is * less than the partition's max processors setting. */ printk(KERN_ERR "Cannot add cpu %s; this system configuration" " supports %d logical cpus.\n", np->full_name, - cpus_weight(cpu_possible_map)); + cpumask_weight(cpu_possible_mask)); goto out_unlock; } - while (!cpus_empty(tmp)) - if (cpus_subset(tmp, candidate_map)) + while (!cpumask_empty(tmp)) + if (cpumask_subset(tmp, candidate_mask)) /* Found a range where we can insert the new cpu(s) */ break; else - cpus_shift_left(tmp, tmp, nthreads); + cpumask_shift_left(tmp, tmp, nthreads); - if (cpus_empty(tmp)) { - printk(KERN_ERR "Unable to find space in cpu_present_map for" + if (cpumask_empty(tmp)) { + printk(KERN_ERR "Unable to find space in cpu_present_mask for" " processor %s with %d thread(s)\n", np->name, nthreads); goto out_unlock; } - for_each_cpu_mask(cpu, tmp) { - BUG_ON(cpu_isset(cpu, cpu_present_map)); + for_each_cpu(cpu, tmp) { + BUG_ON(cpumask_test_cpu(cpu, cpu_present_mask)); set_cpu_present(cpu, true); set_hard_smp_processor_id(cpu, *intserv++); } err = 0; out_unlock: cpu_maps_update_done(); + free_cpumask_var(candidate_mask); + free_cpumask_var(tmp); return err; } @@ -346,7 +316,7 @@ static void pseries_remove_processor(struct device_node *np) set_hard_smp_processor_id(cpu, -1); break; } - if (cpu == NR_CPUS) + if (cpu >= nr_cpu_ids) printk(KERN_WARNING "Could not find cpu to remove " "with physical id 0x%x\n", intserv[i]); } @@ -387,24 +357,12 @@ static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH]; static int parse_cede_parameters(void) { - int call_status; - memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH); - call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, - NULL, - CEDE_LATENCY_TOKEN, - __pa(cede_parameters), - CEDE_LATENCY_PARAM_MAX_LENGTH); - - if (call_status != 0) - printk(KERN_INFO "CEDE_LATENCY: \ - %s %s Error calling get-system-parameter(0x%x)\n", - __FILE__, __func__, call_status); - else - printk(KERN_INFO "CEDE_LATENCY: \ - get-system-parameter successful.\n"); - - return call_status; + return rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, + NULL, + CEDE_LATENCY_TOKEN, + __pa(cede_parameters), + CEDE_LATENCY_PARAM_MAX_LENGTH); } static int __init pseries_cpu_hotplug_init(void) @@ -412,6 +370,7 @@ static int __init pseries_cpu_hotplug_init(void) struct device_node *np; const char *typep; int cpu; + int qcss_tok; for_each_node_by_name(np, "interrupt-controller") { typep = of_get_property(np, "compatible", NULL); diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 9b21ee68ea5..bc880366414 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -10,13 +10,14 @@ */ #include <linux/of.h> -#include <linux/lmb.h> +#include <linux/memblock.h> +#include <linux/vmalloc.h> #include <asm/firmware.h> #include <asm/machdep.h> #include <asm/pSeries_reconfig.h> #include <asm/sparsemem.h> -static int pseries_remove_lmb(unsigned long base, unsigned int lmb_size) +static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) { unsigned long start, start_pfn; struct zone *zone; @@ -25,7 +26,7 @@ static int pseries_remove_lmb(unsigned long base, unsigned int lmb_size) start_pfn = base >> PAGE_SHIFT; if (!pfn_valid(start_pfn)) { - lmb_remove(base, lmb_size); + memblock_remove(base, memblock_size); return 0; } @@ -40,20 +41,26 @@ static int pseries_remove_lmb(unsigned long base, unsigned int lmb_size) * to sysfs "state" file and we can't remove sysfs entries * while writing to it. So we have to defer it to here. */ - ret = __remove_pages(zone, start_pfn, lmb_size >> PAGE_SHIFT); + ret = __remove_pages(zone, start_pfn, memblock_size >> PAGE_SHIFT); if (ret) return ret; /* * Update memory regions for memory remove */ - lmb_remove(base, lmb_size); + memblock_remove(base, memblock_size); /* * Remove htab bolted mappings for this section of memory */ start = (unsigned long)__va(base); - ret = remove_section_mapping(start, start + lmb_size); + ret = remove_section_mapping(start, start + memblock_size); + + /* Ensure all vmalloc mappings are flushed in case they also + * hit that section of memory + */ + vm_unmap_aliases(); + return ret; } @@ -73,7 +80,7 @@ static int pseries_remove_memory(struct device_node *np) return 0; /* - * Find the bae address and size of the lmb + * Find the bae address and size of the memblock */ regs = of_get_property(np, "reg", NULL); if (!regs) @@ -82,7 +89,7 @@ static int pseries_remove_memory(struct device_node *np) base = *(unsigned long *)regs; lmb_size = regs[3]; - ret = pseries_remove_lmb(base, lmb_size); + ret = pseries_remove_memblock(base, lmb_size); return ret; } @@ -102,7 +109,7 @@ static int pseries_add_memory(struct device_node *np) return 0; /* - * Find the base and size of the lmb + * Find the base and size of the memblock */ regs = of_get_property(np, "reg", NULL); if (!regs) @@ -114,7 +121,7 @@ static int pseries_add_memory(struct device_node *np) /* * Update memory region to represent the memory add */ - ret = lmb_add(base, lmb_size); + ret = memblock_add(base, lmb_size); return (ret < 0) ? -EINVAL : 0; } @@ -135,10 +142,10 @@ static int pseries_drconf_memory(unsigned long *base, unsigned int action) } if (action == PSERIES_DRCONF_MEM_ADD) { - rc = lmb_add(*base, *lmb_size); + rc = memblock_add(*base, *lmb_size); rc = (rc < 0) ? -EINVAL : 0; } else if (action == PSERIES_DRCONF_MEM_REMOVE) { - rc = pseries_remove_lmb(*base, *lmb_size); + rc = pseries_remove_memblock(*base, *lmb_size); } else { rc = -EINVAL; } diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 383a5d0e981..48d20573e4d 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -228,3 +228,41 @@ _GLOBAL(plpar_hcall9) mtcrf 0xff,r0 blr /* return r3 = status */ + +/* See plpar_hcall_raw to see why this is needed */ +_GLOBAL(plpar_hcall9_raw) + HMT_MEDIUM + + mfcr r0 + stw r0,8(r1) + + std r4,STK_PARM(r4)(r1) /* Save ret buffer */ + + mr r4,r5 + mr r5,r6 + mr r6,r7 + mr r7,r8 + mr r8,r9 + mr r9,r10 + ld r10,STK_PARM(r11)(r1) /* put arg7 in R10 */ + ld r11,STK_PARM(r12)(r1) /* put arg8 in R11 */ + ld r12,STK_PARM(r13)(r1) /* put arg9 in R12 */ + + HVSC /* invoke the hypervisor */ + + mr r0,r12 + ld r12,STK_PARM(r4)(r1) + std r4, 0(r12) + std r5, 8(r12) + std r6, 16(r12) + std r7, 24(r12) + std r8, 32(r12) + std r9, 40(r12) + std r10,48(r12) + std r11,56(r12) + std r0, 64(r12) + + lwz r0,8(r1) + mtcrf 0xff,r0 + + blr /* return r3 = status */ diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index 2f58c71b725..e19ff021e71 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -102,7 +102,7 @@ static const struct file_operations hcall_inst_seq_fops = { #define CPU_NAME_BUF_SIZE 32 -static void probe_hcall_entry(unsigned long opcode, unsigned long *args) +static void probe_hcall_entry(void *ignored, unsigned long opcode, unsigned long *args) { struct hcall_stats *h; @@ -114,7 +114,7 @@ static void probe_hcall_entry(unsigned long opcode, unsigned long *args) h->purr_start = mfspr(SPRN_PURR); } -static void probe_hcall_exit(unsigned long opcode, unsigned long retval, +static void probe_hcall_exit(void *ignored, unsigned long opcode, unsigned long retval, unsigned long *retbuf) { struct hcall_stats *h; @@ -124,8 +124,8 @@ static void probe_hcall_exit(unsigned long opcode, unsigned long retval, h = &__get_cpu_var(hcall_stats)[opcode / 4]; h->num_calls++; - h->tb_total = mftb() - h->tb_start; - h->purr_total = mfspr(SPRN_PURR) - h->purr_start; + h->tb_total += mftb() - h->tb_start; + h->purr_total += mfspr(SPRN_PURR) - h->purr_start; put_cpu_var(hcall_stats); } @@ -140,11 +140,11 @@ static int __init hcall_inst_init(void) if (!firmware_has_feature(FW_FEATURE_LPAR)) return 0; - if (register_trace_hcall_entry(probe_hcall_entry)) + if (register_trace_hcall_entry(probe_hcall_entry, NULL)) return -EINVAL; - if (register_trace_hcall_exit(probe_hcall_exit)) { - unregister_trace_hcall_entry(probe_hcall_entry); + if (register_trace_hcall_exit(probe_hcall_exit, NULL)) { + unregister_trace_hcall_entry(probe_hcall_entry, NULL); return -EINVAL; } diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 1a0000a4b6d..395848e30c5 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -66,7 +66,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index, tcep = ((u64 *)tbl->it_base) + index; while (npages--) { - /* can't move this out since we might cross LMB boundary */ + /* can't move this out since we might cross MEMBLOCK boundary */ rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; *tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; @@ -468,7 +468,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) pr_debug("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev)); - dn = dev->dev.archdata.of_node; + dn = dev->dev.of_node; /* If we're the direct child of a root bus, then we need to allocate * an iommu table ourselves. The bus setup code should have setup diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 0707653612b..cf79b46d8f8 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -367,21 +367,28 @@ static void pSeries_lpar_hptab_clear(void) { unsigned long size_bytes = 1UL << ppc64_pft_size; unsigned long hpte_count = size_bytes >> 4; - unsigned long dummy1, dummy2, dword0; + struct { + unsigned long pteh; + unsigned long ptel; + } ptes[4]; long lpar_rc; - int i; + int i, j; - /* TODO: Use bulk call */ - for (i = 0; i < hpte_count; i++) { - /* dont remove HPTEs with VRMA mappings */ - lpar_rc = plpar_pte_remove_raw(H_ANDCOND, i, HPTE_V_1TB_SEG, - &dummy1, &dummy2); - if (lpar_rc == H_NOT_FOUND) { - lpar_rc = plpar_pte_read_raw(0, i, &dword0, &dummy1); - if (!lpar_rc && ((dword0 & HPTE_V_VRMA_MASK) - != HPTE_V_VRMA_MASK)) - /* Can be hpte for 1TB Seg. So remove it */ - plpar_pte_remove_raw(0, i, 0, &dummy1, &dummy2); + /* Read in batches of 4, + * invalidate only valid entries not in the VRMA + * hpte_count will be a multiple of 4 + */ + for (i = 0; i < hpte_count; i += 4) { + lpar_rc = plpar_pte_read_4_raw(0, i, (void *)ptes); + if (lpar_rc != H_SUCCESS) + continue; + for (j = 0; j < 4; j++){ + if ((ptes[j].pteh & HPTE_V_VRMA_MASK) == + HPTE_V_VRMA_MASK) + continue; + if (ptes[j].pteh & HPTE_V_VALID) + plpar_pte_remove_raw(0, i + j, 0, + &(ptes[j].pteh), &(ptes[j].ptel)); } } } diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index 42f7e384e6c..bc3c7f2abd7 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -15,7 +15,6 @@ #include <linux/types.h> #include <linux/errno.h> #include <linux/init.h> -#include <linux/slab.h> #include <linux/spinlock.h> #include <asm/uaccess.h> #include <asm/nvram.h> diff --git a/arch/powerpc/platforms/pseries/offline_states.h b/arch/powerpc/platforms/pseries/offline_states.h index 22574e0d9d9..75a6f480d93 100644 --- a/arch/powerpc/platforms/pseries/offline_states.h +++ b/arch/powerpc/platforms/pseries/offline_states.h @@ -9,10 +9,31 @@ enum cpu_state_vals { CPU_MAX_OFFLINE_STATES }; +#ifdef CONFIG_HOTPLUG_CPU extern enum cpu_state_vals get_cpu_current_state(int cpu); extern void set_cpu_current_state(int cpu, enum cpu_state_vals state); -extern enum cpu_state_vals get_preferred_offline_state(int cpu); extern void set_preferred_offline_state(int cpu, enum cpu_state_vals state); extern void set_default_offline_state(int cpu); +#else +static inline enum cpu_state_vals get_cpu_current_state(int cpu) +{ + return CPU_STATE_ONLINE; +} + +static inline void set_cpu_current_state(int cpu, enum cpu_state_vals state) +{ +} + +static inline void set_preferred_offline_state(int cpu, enum cpu_state_vals state) +{ +} + +static inline void set_default_offline_state(int cpu) +{ +} +#endif + +extern enum cpu_state_vals get_preferred_offline_state(int cpu); extern int start_secondary(void); +extern void start_secondary_resume(void); #endif diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index b6fa3e4b51b..4b7a062dee1 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -165,7 +165,7 @@ int remove_phb_dynamic(struct pci_controller *phb) struct resource *res; int rc, i; - pr_debug("PCI: Removing PHB %04x:%02x... \n", + pr_debug("PCI: Removing PHB %04x:%02x...\n", pci_domain_nr(b), b->number); /* We cannot to remove a root bus that has children */ diff --git a/arch/powerpc/platforms/pseries/phyp_dump.c b/arch/powerpc/platforms/pseries/phyp_dump.c index 15eb6107bcd..6e7742da007 100644 --- a/arch/powerpc/platforms/pseries/phyp_dump.c +++ b/arch/powerpc/platforms/pseries/phyp_dump.c @@ -11,6 +11,7 @@ * */ +#include <linux/gfp.h> #include <linux/init.h> #include <linux/kobject.h> #include <linux/mm.h> @@ -150,7 +151,7 @@ static void print_dump_header(const struct phyp_dump_header *ph) printk(KERN_INFO "Max auto time= %d\n", ph->maxtime_to_auto); /*set cpu state and hpte states as well scratch pad area */ - printk(KERN_INFO " CPU AREA \n"); + printk(KERN_INFO " CPU AREA\n"); printk(KERN_INFO "cpu dump_flags =%d\n", ph->cpu_data.dump_flags); printk(KERN_INFO "cpu source_type =%d\n", ph->cpu_data.source_type); printk(KERN_INFO "cpu error_flags =%d\n", ph->cpu_data.error_flags); @@ -161,7 +162,7 @@ static void print_dump_header(const struct phyp_dump_header *ph) printk(KERN_INFO "cpu length_copied =%llx\n", ph->cpu_data.length_copied); - printk(KERN_INFO " HPTE AREA \n"); + printk(KERN_INFO " HPTE AREA\n"); printk(KERN_INFO "HPTE dump_flags =%d\n", ph->hpte_data.dump_flags); printk(KERN_INFO "HPTE source_type =%d\n", ph->hpte_data.source_type); printk(KERN_INFO "HPTE error_flags =%d\n", ph->hpte_data.error_flags); @@ -172,7 +173,7 @@ static void print_dump_header(const struct phyp_dump_header *ph) printk(KERN_INFO "HPTE length_copied =%llx\n", ph->hpte_data.length_copied); - printk(KERN_INFO " SRSD AREA \n"); + printk(KERN_INFO " SRSD AREA\n"); printk(KERN_INFO "SRSD dump_flags =%d\n", ph->kernel_data.dump_flags); printk(KERN_INFO "SRSD source_type =%d\n", ph->kernel_data.source_type); printk(KERN_INFO "SRSD error_flags =%d\n", ph->kernel_data.error_flags); @@ -254,12 +255,12 @@ void invalidate_last_dump(struct phyp_dump_header *ph, unsigned long addr) /* ------------------------------------------------- */ /** - * release_memory_range -- release memory previously lmb_reserved + * release_memory_range -- release memory previously memblock_reserved * @start_pfn: starting physical frame number * @nr_pages: number of pages to free. * * This routine will release memory that had been previously - * lmb_reserved in early boot. The released memory becomes + * memblock_reserved in early boot. The released memory becomes * available for genreal use. */ static void release_memory_range(unsigned long start_pfn, diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h index 0603c91538a..d9801117124 100644 --- a/arch/powerpc/platforms/pseries/plpar_wrappers.h +++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h @@ -4,6 +4,14 @@ #include <asm/hvcall.h> #include <asm/page.h> +/* Get state of physical CPU from query_cpu_stopped */ +int smp_query_cpu_stopped(unsigned int pcpu); +#define QCSS_STOPPED 0 +#define QCSS_STOPPING 1 +#define QCSS_NOT_STOPPED 2 +#define QCSS_HARDWARE_ERROR -1 +#define QCSS_HARDWARE_BUSY -2 + static inline long poll_pending(void) { return plpar_hcall_norets(H_POLL_PENDING); @@ -183,6 +191,24 @@ static inline long plpar_pte_read_raw(unsigned long flags, unsigned long ptex, return rc; } +/* + * plpar_pte_read_4_raw can be called in real mode. + * ptes must be 8*sizeof(unsigned long) + */ +static inline long plpar_pte_read_4_raw(unsigned long flags, unsigned long ptex, + unsigned long *ptes) + +{ + long rc; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + + rc = plpar_hcall9_raw(H_READ, retbuf, flags | H_READ_4, ptex); + + memcpy(ptes, retbuf, 8*sizeof(unsigned long)); + + return rc; +} + static inline long plpar_pte_protect(unsigned long flags, unsigned long ptex, unsigned long avpn) { @@ -259,12 +285,12 @@ static inline long plpar_ipi(unsigned long servernum, unsigned long mfrr) return plpar_hcall_norets(H_IPI, servernum, mfrr); } -static inline long plpar_xirr(unsigned long *xirr_ret) +static inline long plpar_xirr(unsigned long *xirr_ret, unsigned char cppr) { long rc; unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - rc = plpar_hcall(H_XIRR, retbuf); + rc = plpar_hcall(H_XIRR, retbuf, cppr); *xirr_ret = retbuf[0]; diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 9e17c0d2a0c..40c93cad91d 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -10,6 +10,13 @@ #ifndef _PSERIES_PSERIES_H #define _PSERIES_PSERIES_H +#include <linux/interrupt.h> + +struct device_node; + +extern void request_event_sources_irqs(struct device_node *np, + irq_handler_t handler, const char *name); + extern void __init fw_feature_init(const char *hypertas, unsigned long len); struct pt_regs; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index d20b96e22c2..41a3e9a039e 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -30,7 +30,6 @@ #include <linux/interrupt.h> #include <linux/timex.h> #include <linux/init.h> -#include <linux/slab.h> #include <linux/delay.h> #include <linux/irq.h> #include <linux/random.h> @@ -68,63 +67,6 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id); static irqreturn_t ras_error_interrupt(int irq, void *dev_id); -static void request_ras_irqs(struct device_node *np, - irq_handler_t handler, - const char *name) -{ - int i, index, count = 0; - struct of_irq oirq; - const u32 *opicprop; - unsigned int opicplen; - unsigned int virqs[16]; - - /* Check for obsolete "open-pic-interrupt" property. If present, then - * map those interrupts using the default interrupt host and default - * trigger - */ - opicprop = of_get_property(np, "open-pic-interrupt", &opicplen); - if (opicprop) { - opicplen /= sizeof(u32); - for (i = 0; i < opicplen; i++) { - if (count > 15) - break; - virqs[count] = irq_create_mapping(NULL, *(opicprop++)); - if (virqs[count] == NO_IRQ) - printk(KERN_ERR "Unable to allocate interrupt " - "number for %s\n", np->full_name); - else - count++; - - } - } - /* Else use normal interrupt tree parsing */ - else { - /* First try to do a proper OF tree parsing */ - for (index = 0; of_irq_map_one(np, index, &oirq) == 0; - index++) { - if (count > 15) - break; - virqs[count] = irq_create_of_mapping(oirq.controller, - oirq.specifier, - oirq.size); - if (virqs[count] == NO_IRQ) - printk(KERN_ERR "Unable to allocate interrupt " - "number for %s\n", np->full_name); - else - count++; - } - } - - /* Now request them */ - for (i = 0; i < count; i++) { - if (request_irq(virqs[i], handler, 0, name, NULL)) { - printk(KERN_ERR "Unable to request interrupt %d for " - "%s\n", virqs[i], np->full_name); - return; - } - } -} - /* * Initialize handlers for the set of interrupts caused by hardware errors * and power system events. @@ -139,14 +81,15 @@ static int __init init_ras_IRQ(void) /* Internal Errors */ np = of_find_node_by_path("/event-sources/internal-errors"); if (np != NULL) { - request_ras_irqs(np, ras_error_interrupt, "RAS_ERROR"); + request_event_sources_irqs(np, ras_error_interrupt, + "RAS_ERROR"); of_node_put(np); } /* EPOW Events */ np = of_find_node_by_path("/event-sources/epow-events"); if (np != NULL) { - request_ras_irqs(np, ras_epow_interrupt, "RAS_EPOW"); + request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW"); of_node_put(np); } diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index a2305d29bbb..1a58637bcea 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -15,6 +15,7 @@ #include <linux/kref.h> #include <linux/notifier.h> #include <linux/proc_fs.h> +#include <linux/slab.h> #include <asm/prom.h> #include <asm/machdep.h> diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c index 1b45c458f95..80e9e7652a4 100644 --- a/arch/powerpc/platforms/pseries/scanlog.c +++ b/arch/powerpc/platforms/pseries/scanlog.c @@ -26,6 +26,7 @@ #include <linux/proc_fs.h> #include <linux/init.h> #include <linux/delay.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <asm/rtas.h> #include <asm/prom.h> diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index ca5f2e10972..a6d19e3a505 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -23,7 +23,6 @@ #include <linux/mm.h> #include <linux/stddef.h> #include <linux/unistd.h> -#include <linux/slab.h> #include <linux/user.h> #include <linux/tty.h> #include <linux/major.h> @@ -497,13 +496,14 @@ static int __init pSeries_probe(void) } -DECLARE_PER_CPU(unsigned long, smt_snooze_delay); +DECLARE_PER_CPU(long, smt_snooze_delay); static void pseries_dedicated_idle_sleep(void) { unsigned int cpu = smp_processor_id(); unsigned long start_snooze; unsigned long in_purr, out_purr; + long snooze = __get_cpu_var(smt_snooze_delay); /* * Indicate to the HV that we are idle. Now would be @@ -518,13 +518,12 @@ static void pseries_dedicated_idle_sleep(void) * has been checked recently. If we should poll for a little * while, do so. */ - if (__get_cpu_var(smt_snooze_delay)) { - start_snooze = get_tb() + - __get_cpu_var(smt_snooze_delay) * tb_ticks_per_usec; + if (snooze) { + start_snooze = get_tb() + snooze * tb_ticks_per_usec; local_irq_enable(); set_thread_flag(TIF_POLLING_NRFLAG); - while (get_tb() < start_snooze) { + while ((snooze < 0) || (get_tb() < start_snooze)) { if (need_resched() || cpu_is_offline(cpu)) goto out; ppc64_runlatch_off(); diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index b4886635972..3b1bf61c45b 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -55,7 +55,29 @@ * The Primary thread of each non-boot processor was started from the OF client * interface by prom_hold_cpus and is spinning on secondary_hold_spinloop. */ -static cpumask_t of_spin_map; +static cpumask_var_t of_spin_mask; + +/* Query where a cpu is now. Return codes #defined in plpar_wrappers.h */ +int smp_query_cpu_stopped(unsigned int pcpu) +{ + int cpu_status, status; + int qcss_tok = rtas_token("query-cpu-stopped-state"); + + if (qcss_tok == RTAS_UNKNOWN_SERVICE) { + printk(KERN_INFO "Firmware doesn't support " + "query-cpu-stopped-state\n"); + return QCSS_HARDWARE_ERROR; + } + + status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu); + if (status != 0) { + printk(KERN_ERR + "RTAS query-cpu-stopped-state failed: %i\n", status); + return status; + } + + return cpu_status; +} /** * smp_startup_cpu() - start the given cpu @@ -76,12 +98,18 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) unsigned int pcpu; int start_cpu; - if (cpu_isset(lcpu, of_spin_map)) + if (cpumask_test_cpu(lcpu, of_spin_mask)) /* Already started by OF and sitting in spin loop */ return 1; pcpu = get_hard_smp_processor_id(lcpu); + /* Check to see if the CPU out of FW already for kexec */ + if (smp_query_cpu_stopped(pcpu) == QCSS_NOT_STOPPED){ + cpumask_set_cpu(lcpu, of_spin_mask); + return 1; + } + /* Fixup atomic count: it exited inside IRQ handler. */ task_thread_info(paca[lcpu].__current)->preempt_count = 0; @@ -115,7 +143,7 @@ static void __devinit smp_xics_setup_cpu(int cpu) if (firmware_has_feature(FW_FEATURE_SPLPAR)) vpa_init(cpu); - cpu_clear(cpu, of_spin_map); + cpumask_clear_cpu(cpu, of_spin_mask); set_cpu_current_state(cpu, CPU_STATE_ONLINE); set_default_offline_state(cpu); @@ -144,8 +172,8 @@ static void __devinit smp_pSeries_kick_cpu(int nr) hcpuid = get_hard_smp_processor_id(nr); rc = plpar_hcall_norets(H_PROD, hcpuid); if (rc != H_SUCCESS) - printk(KERN_ERR "Error: Prod to wake up processor %d\ - Ret= %ld\n", nr, rc); + printk(KERN_ERR "Error: Prod to wake up processor %d " + "Ret= %ld\n", nr, rc); } } @@ -186,17 +214,19 @@ static void __init smp_init_pseries(void) pr_debug(" -> smp_init_pSeries()\n"); + alloc_bootmem_cpumask_var(&of_spin_mask); + /* Mark threads which are still spinning in hold loops. */ if (cpu_has_feature(CPU_FTR_SMT)) { for_each_present_cpu(i) { if (cpu_thread_in_core(i) == 0) - cpu_set(i, of_spin_map); + cpumask_set_cpu(i, of_spin_mask); } } else { - of_spin_map = cpu_present_map; + cpumask_copy(of_spin_mask, cpu_present_mask); } - cpu_clear(boot_cpuid, of_spin_map); + cpumask_clear_cpu(boot_cpuid, of_spin_mask); /* Non-lpar has additional take/give timebase */ if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) { diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index d80f193cd87..f19d1946839 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -120,14 +120,14 @@ static inline void direct_qirr_info(int n_cpu, u8 value) /* LPAR low level accessors */ -static inline unsigned int lpar_xirr_info_get(void) +static inline unsigned int lpar_xirr_info_get(unsigned char cppr) { unsigned long lpar_rc; unsigned long return_value; - lpar_rc = plpar_xirr(&return_value); + lpar_rc = plpar_xirr(&return_value, cppr); if (lpar_rc != H_SUCCESS) - panic(" bad return code xirr - rc = %lx \n", lpar_rc); + panic(" bad return code xirr - rc = %lx\n", lpar_rc); return (unsigned int)return_value; } @@ -163,39 +163,43 @@ static inline void lpar_qirr_info(int n_cpu , u8 value) /* Interface to generic irq subsystem */ #ifdef CONFIG_SMP -static int get_irq_server(unsigned int virq, cpumask_t cpumask, +/* + * For the moment we only implement delivery to all cpus or one cpu. + * + * If the requested affinity is cpu_all_mask, we set global affinity. + * If not we set it to the first cpu in the mask, even if multiple cpus + * are set. This is so things like irqbalance (which set core and package + * wide affinities) do the right thing. + */ +static int get_irq_server(unsigned int virq, const struct cpumask *cpumask, unsigned int strict_check) { - int server; - /* For the moment only implement delivery to all cpus or one cpu */ - cpumask_t tmp = CPU_MASK_NONE; if (!distribute_irqs) return default_server; - if (!cpus_equal(cpumask, CPU_MASK_ALL)) { - cpus_and(tmp, cpu_online_map, cpumask); - - server = first_cpu(tmp); + if (!cpumask_equal(cpumask, cpu_all_mask)) { + int server = cpumask_first_and(cpu_online_mask, cpumask); - if (server < NR_CPUS) + if (server < nr_cpu_ids) return get_hard_smp_processor_id(server); if (strict_check) return -1; } - if (cpus_equal(cpu_online_map, cpu_present_map)) + /* + * Workaround issue with some versions of JS20 firmware that + * deliver interrupts to cpus which haven't been started. This + * happens when using the maxcpus= boot option. + */ + if (cpumask_equal(cpu_online_mask, cpu_present_mask)) return default_distrib_server; return default_server; } #else -static int get_irq_server(unsigned int virq, cpumask_t cpumask, - unsigned int strict_check) -{ - return default_server; -} +#define get_irq_server(virq, cpumask, strict_check) (default_server) #endif static void xics_unmask_irq(unsigned int virq) @@ -211,7 +215,7 @@ static void xics_unmask_irq(unsigned int virq) if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) return; - server = get_irq_server(virq, *(irq_to_desc(virq)->affinity), 0); + server = get_irq_server(virq, irq_to_desc(virq)->affinity, 0); call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server, DEFAULT_PRIORITY); @@ -335,7 +339,8 @@ static unsigned int xics_get_irq_direct(void) static unsigned int xics_get_irq_lpar(void) { - unsigned int xirr = lpar_xirr_info_get(); + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + unsigned int xirr = lpar_xirr_info_get(os_cppr->stack[os_cppr->index]); unsigned int vec = xics_xirr_vector(xirr); unsigned int irq; @@ -401,11 +406,7 @@ static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) return -1; } - /* - * For the moment only implement delivery to all cpus or one cpu. - * Get current irq_server for the given irq - */ - irq_server = get_irq_server(virq, *cpumask, 1); + irq_server = get_irq_server(virq, cpumask, 1); if (irq_server == -1) { char cpulist[128]; cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask); @@ -428,7 +429,7 @@ static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) } static struct irq_chip xics_pic_direct = { - .name = " XICS ", + .name = "XICS", .startup = xics_startup, .mask = xics_mask_irq, .unmask = xics_unmask_irq, @@ -437,7 +438,7 @@ static struct irq_chip xics_pic_direct = { }; static struct irq_chip xics_pic_lpar = { - .name = " XICS ", + .name = "XICS", .startup = xics_startup, .mask = xics_mask_irq, .unmask = xics_unmask_irq, @@ -514,15 +515,13 @@ static void __init xics_init_host(void) /* * XICS only has a single IPI, so encode the messages per CPU */ -struct xics_ipi_struct { - unsigned long value; - } ____cacheline_aligned; - -static struct xics_ipi_struct xics_ipi_message[NR_CPUS] __cacheline_aligned; +static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, xics_ipi_message); static inline void smp_xics_do_message(int cpu, int msg) { - set_bit(msg, &xics_ipi_message[cpu].value); + unsigned long *tgt = &per_cpu(xics_ipi_message, cpu); + + set_bit(msg, tgt); mb(); if (firmware_has_feature(FW_FEATURE_LPAR)) lpar_qirr_info(cpu, IPI_PRIORITY); @@ -548,25 +547,23 @@ void smp_xics_message_pass(int target, int msg) static irqreturn_t xics_ipi_dispatch(int cpu) { + unsigned long *tgt = &per_cpu(xics_ipi_message, cpu); + WARN_ON(cpu_is_offline(cpu)); mb(); /* order mmio clearing qirr */ - while (xics_ipi_message[cpu].value) { - if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, - &xics_ipi_message[cpu].value)) { + while (*tgt) { + if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, tgt)) { smp_message_recv(PPC_MSG_CALL_FUNCTION); } - if (test_and_clear_bit(PPC_MSG_RESCHEDULE, - &xics_ipi_message[cpu].value)) { + if (test_and_clear_bit(PPC_MSG_RESCHEDULE, tgt)) { smp_message_recv(PPC_MSG_RESCHEDULE); } - if (test_and_clear_bit(PPC_MSG_CALL_FUNC_SINGLE, - &xics_ipi_message[cpu].value)) { + if (test_and_clear_bit(PPC_MSG_CALL_FUNC_SINGLE, tgt)) { smp_message_recv(PPC_MSG_CALL_FUNC_SINGLE); } #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) - if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK, - &xics_ipi_message[cpu].value)) { + if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK, tgt)) { smp_message_recv(PPC_MSG_DEBUGGER_BREAK); } #endif @@ -618,7 +615,7 @@ int __init smp_xics_probe(void) { xics_request_ipi(); - return cpus_weight(cpu_possible_map); + return cpumask_weight(cpu_possible_mask); } #endif /* CONFIG_SMP */ @@ -788,9 +785,13 @@ static void xics_set_cpu_priority(unsigned char cppr) { struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); - BUG_ON(os_cppr->index != 0); + /* + * we only really want to set the priority when there's + * just one cppr value on the stack + */ + WARN_ON(os_cppr->index != 0); - os_cppr->stack[os_cppr->index] = cppr; + os_cppr->stack[0] = cppr; if (firmware_has_feature(FW_FEATURE_LPAR)) lpar_cppr_info(cppr); @@ -825,8 +826,14 @@ void xics_setup_cpu(void) void xics_teardown_cpu(void) { + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); int cpu = smp_processor_id(); + /* + * we have to reset the cppr index to 0 because we're + * not going to return from the IPI + */ + os_cppr->index = 0; xics_set_cpu_priority(0); /* Clear any pending IPI request */ |