summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/pseries
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/pseries')
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig2
-rw-r--r--arch/powerpc/platforms/pseries/cmm.c254
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c16
-rw-r--r--arch/powerpc/platforms/pseries/smp.c4
-rw-r--r--arch/powerpc/platforms/pseries/xics.c28
5 files changed, 272 insertions, 32 deletions
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 27554c807fd..c667f0f02c3 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -2,6 +2,8 @@ config PPC_PSERIES
depends on PPC64 && PPC_BOOK3S
bool "IBM pSeries & new (POWER5-based) iSeries"
select MPIC
+ select PCI_MSI
+ select XICS
select PPC_I8259
select PPC_RTAS
select PPC_RTAS_DAEMON
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index bcdcf0ccc8d..a277f2e28db 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -38,19 +38,28 @@
#include <asm/mmu.h>
#include <asm/pgalloc.h>
#include <asm/uaccess.h>
+#include <linux/memory.h>
#include "plpar_wrappers.h"
#define CMM_DRIVER_VERSION "1.0.0"
#define CMM_DEFAULT_DELAY 1
+#define CMM_HOTPLUG_DELAY 5
#define CMM_DEBUG 0
#define CMM_DISABLE 0
#define CMM_OOM_KB 1024
#define CMM_MIN_MEM_MB 256
#define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10))
#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
+/*
+ * The priority level tries to ensure that this notifier is called as
+ * late as possible to reduce thrashing in the shared memory pool.
+ */
+#define CMM_MEM_HOTPLUG_PRI 1
+#define CMM_MEM_ISOLATE_PRI 15
static unsigned int delay = CMM_DEFAULT_DELAY;
+static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
static unsigned int oom_kb = CMM_OOM_KB;
static unsigned int cmm_debug = CMM_DEBUG;
static unsigned int cmm_disabled = CMM_DISABLE;
@@ -65,6 +74,10 @@ MODULE_VERSION(CMM_DRIVER_VERSION);
module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
"[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
+module_param_named(hotplug_delay, hotplug_delay, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(delay, "Delay (in seconds) after memory hotplug remove "
+ "before loaning resumes. "
+ "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
"[Default=" __stringify(CMM_OOM_KB) "]");
@@ -92,6 +105,9 @@ static unsigned long oom_freed_pages;
static struct cmm_page_array *cmm_page_list;
static DEFINE_SPINLOCK(cmm_lock);
+static DEFINE_MUTEX(hotplug_mutex);
+static int hotplug_occurred; /* protected by the hotplug mutex */
+
static struct task_struct *cmm_thread_ptr;
/**
@@ -110,6 +126,17 @@ static long cmm_alloc_pages(long nr)
cmm_dbg("Begin request for %ld pages\n", nr);
while (nr) {
+ /* Exit if a hotplug operation is in progress or occurred */
+ if (mutex_trylock(&hotplug_mutex)) {
+ if (hotplug_occurred) {
+ mutex_unlock(&hotplug_mutex);
+ break;
+ }
+ mutex_unlock(&hotplug_mutex);
+ } else {
+ break;
+ }
+
addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
__GFP_NORETRY | __GFP_NOMEMALLOC);
if (!addr)
@@ -119,8 +146,9 @@ static long cmm_alloc_pages(long nr)
if (!pa || pa->index >= CMM_NR_PAGES) {
/* Need a new page for the page list. */
spin_unlock(&cmm_lock);
- npa = (struct cmm_page_array *)__get_free_page(GFP_NOIO | __GFP_NOWARN |
- __GFP_NORETRY | __GFP_NOMEMALLOC);
+ npa = (struct cmm_page_array *)__get_free_page(
+ GFP_NOIO | __GFP_NOWARN |
+ __GFP_NORETRY | __GFP_NOMEMALLOC);
if (!npa) {
pr_info("%s: Can not allocate new page list\n", __func__);
free_page(addr);
@@ -282,9 +310,28 @@ static int cmm_thread(void *dummy)
while (1) {
timeleft = msleep_interruptible(delay * 1000);
- if (kthread_should_stop() || timeleft) {
- loaned_pages_target = loaned_pages;
+ if (kthread_should_stop() || timeleft)
break;
+
+ if (mutex_trylock(&hotplug_mutex)) {
+ if (hotplug_occurred) {
+ hotplug_occurred = 0;
+ mutex_unlock(&hotplug_mutex);
+ cmm_dbg("Hotplug operation has occurred, "
+ "loaning activity suspended "
+ "for %d seconds.\n",
+ hotplug_delay);
+ timeleft = msleep_interruptible(hotplug_delay *
+ 1000);
+ if (kthread_should_stop() || timeleft)
+ break;
+ continue;
+ }
+ mutex_unlock(&hotplug_mutex);
+ } else {
+ cmm_dbg("Hotplug operation in progress, activity "
+ "suspended\n");
+ continue;
}
cmm_get_mpp();
@@ -414,6 +461,193 @@ static struct notifier_block cmm_reboot_nb = {
};
/**
+ * cmm_count_pages - Count the number of pages loaned in a particular range.
+ *
+ * @arg: memory_isolate_notify structure with address range and count
+ *
+ * Return value:
+ * 0 on success
+ **/
+static unsigned long cmm_count_pages(void *arg)
+{
+ struct memory_isolate_notify *marg = arg;
+ struct cmm_page_array *pa;
+ unsigned long start = (unsigned long)pfn_to_kaddr(marg->start_pfn);
+ unsigned long end = start + (marg->nr_pages << PAGE_SHIFT);
+ unsigned long idx;
+
+ spin_lock(&cmm_lock);
+ pa = cmm_page_list;
+ while (pa) {
+ if ((unsigned long)pa >= start && (unsigned long)pa < end)
+ marg->pages_found++;
+ for (idx = 0; idx < pa->index; idx++)
+ if (pa->page[idx] >= start && pa->page[idx] < end)
+ marg->pages_found++;
+ pa = pa->next;
+ }
+ spin_unlock(&cmm_lock);
+ return 0;
+}
+
+/**
+ * cmm_memory_isolate_cb - Handle memory isolation notifier calls
+ * @self: notifier block struct
+ * @action: action to take
+ * @arg: struct memory_isolate_notify data for handler
+ *
+ * Return value:
+ * NOTIFY_OK or notifier error based on subfunction return value
+ **/
+static int cmm_memory_isolate_cb(struct notifier_block *self,
+ unsigned long action, void *arg)
+{
+ int ret = 0;
+
+ if (action == MEM_ISOLATE_COUNT)
+ ret = cmm_count_pages(arg);
+
+ if (ret)
+ ret = notifier_from_errno(ret);
+ else
+ ret = NOTIFY_OK;
+
+ return ret;
+}
+
+static struct notifier_block cmm_mem_isolate_nb = {
+ .notifier_call = cmm_memory_isolate_cb,
+ .priority = CMM_MEM_ISOLATE_PRI
+};
+
+/**
+ * cmm_mem_going_offline - Unloan pages where memory is to be removed
+ * @arg: memory_notify structure with page range to be offlined
+ *
+ * Return value:
+ * 0 on success
+ **/
+static int cmm_mem_going_offline(void *arg)
+{
+ struct memory_notify *marg = arg;
+ unsigned long start_page = (unsigned long)pfn_to_kaddr(marg->start_pfn);
+ unsigned long end_page = start_page + (marg->nr_pages << PAGE_SHIFT);
+ struct cmm_page_array *pa_curr, *pa_last, *npa;
+ unsigned long idx;
+ unsigned long freed = 0;
+
+ cmm_dbg("Memory going offline, searching 0x%lx (%ld pages).\n",
+ start_page, marg->nr_pages);
+ spin_lock(&cmm_lock);
+
+ /* Search the page list for pages in the range to be offlined */
+ pa_last = pa_curr = cmm_page_list;
+ while (pa_curr) {
+ for (idx = (pa_curr->index - 1); (idx + 1) > 0; idx--) {
+ if ((pa_curr->page[idx] < start_page) ||
+ (pa_curr->page[idx] >= end_page))
+ continue;
+
+ plpar_page_set_active(__pa(pa_curr->page[idx]));
+ free_page(pa_curr->page[idx]);
+ freed++;
+ loaned_pages--;
+ totalram_pages++;
+ pa_curr->page[idx] = pa_last->page[--pa_last->index];
+ if (pa_last->index == 0) {
+ if (pa_curr == pa_last)
+ pa_curr = pa_last->next;
+ pa_last = pa_last->next;
+ free_page((unsigned long)cmm_page_list);
+ cmm_page_list = pa_last;
+ continue;
+ }
+ }
+ pa_curr = pa_curr->next;
+ }
+
+ /* Search for page list structures in the range to be offlined */
+ pa_last = NULL;
+ pa_curr = cmm_page_list;
+ while (pa_curr) {
+ if (((unsigned long)pa_curr >= start_page) &&
+ ((unsigned long)pa_curr < end_page)) {
+ npa = (struct cmm_page_array *)__get_free_page(
+ GFP_NOIO | __GFP_NOWARN |
+ __GFP_NORETRY | __GFP_NOMEMALLOC);
+ if (!npa) {
+ spin_unlock(&cmm_lock);
+ cmm_dbg("Failed to allocate memory for list "
+ "management. Memory hotplug "
+ "failed.\n");
+ return ENOMEM;
+ }
+ memcpy(npa, pa_curr, PAGE_SIZE);
+ if (pa_curr == cmm_page_list)
+ cmm_page_list = npa;
+ if (pa_last)
+ pa_last->next = npa;
+ free_page((unsigned long) pa_curr);
+ freed++;
+ pa_curr = npa;
+ }
+
+ pa_last = pa_curr;
+ pa_curr = pa_curr->next;
+ }
+
+ spin_unlock(&cmm_lock);
+ cmm_dbg("Released %ld pages in the search range.\n", freed);
+
+ return 0;
+}
+
+/**
+ * cmm_memory_cb - Handle memory hotplug notifier calls
+ * @self: notifier block struct
+ * @action: action to take
+ * @arg: struct memory_notify data for handler
+ *
+ * Return value:
+ * NOTIFY_OK or notifier error based on subfunction return value
+ *
+ **/
+static int cmm_memory_cb(struct notifier_block *self,
+ unsigned long action, void *arg)
+{
+ int ret = 0;
+
+ switch (action) {
+ case MEM_GOING_OFFLINE:
+ mutex_lock(&hotplug_mutex);
+ hotplug_occurred = 1;
+ ret = cmm_mem_going_offline(arg);
+ break;
+ case MEM_OFFLINE:
+ case MEM_CANCEL_OFFLINE:
+ mutex_unlock(&hotplug_mutex);
+ cmm_dbg("Memory offline operation complete.\n");
+ break;
+ case MEM_GOING_ONLINE:
+ case MEM_ONLINE:
+ case MEM_CANCEL_ONLINE:
+ break;
+ }
+
+ if (ret)
+ ret = notifier_from_errno(ret);
+ else
+ ret = NOTIFY_OK;
+
+ return ret;
+}
+
+static struct notifier_block cmm_mem_nb = {
+ .notifier_call = cmm_memory_cb,
+ .priority = CMM_MEM_HOTPLUG_PRI
+};
+
+/**
* cmm_init - Module initialization
*
* Return value:
@@ -435,18 +669,24 @@ static int cmm_init(void)
if ((rc = cmm_sysfs_register(&cmm_sysdev)))
goto out_reboot_notifier;
+ if (register_memory_notifier(&cmm_mem_nb) ||
+ register_memory_isolate_notifier(&cmm_mem_isolate_nb))
+ goto out_unregister_notifier;
+
if (cmm_disabled)
return rc;
cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
if (IS_ERR(cmm_thread_ptr)) {
rc = PTR_ERR(cmm_thread_ptr);
- goto out_unregister_sysfs;
+ goto out_unregister_notifier;
}
return rc;
-out_unregister_sysfs:
+out_unregister_notifier:
+ unregister_memory_notifier(&cmm_mem_nb);
+ unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
cmm_unregister_sysfs(&cmm_sysdev);
out_reboot_notifier:
unregister_reboot_notifier(&cmm_reboot_nb);
@@ -467,6 +707,8 @@ static void cmm_exit(void)
kthread_stop(cmm_thread_ptr);
unregister_oom_notifier(&cmm_oom_nb);
unregister_reboot_notifier(&cmm_reboot_nb);
+ unregister_memory_notifier(&cmm_mem_nb);
+ unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
cmm_free_pages(loaned_pages);
cmm_unregister_sysfs(&cmm_sysdev);
}
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 12df9e8812a..37bce52526d 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -236,7 +236,9 @@ static struct device_node *derive_parent(const char *path)
int dlpar_attach_node(struct device_node *dn)
{
+#ifdef CONFIG_PROC_DEVICETREE
struct proc_dir_entry *ent;
+#endif
int rc;
of_node_set_flag(dn, OF_DYNAMIC);
@@ -267,10 +269,10 @@ int dlpar_attach_node(struct device_node *dn)
int dlpar_detach_node(struct device_node *dn)
{
+#ifdef CONFIG_PROC_DEVICETREE
struct device_node *parent = dn->parent;
struct property *prop = dn->properties;
-#ifdef CONFIG_PROC_DEVICETREE
while (prop) {
remove_proc_entry(prop->name, dn->pde);
prop = prop->next;
@@ -344,18 +346,6 @@ int dlpar_release_drc(u32 drc_index)
#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
-static DEFINE_MUTEX(pseries_cpu_hotplug_mutex);
-
-void cpu_hotplug_driver_lock()
-{
- mutex_lock(&pseries_cpu_hotplug_mutex);
-}
-
-void cpu_hotplug_driver_unlock()
-{
- mutex_unlock(&pseries_cpu_hotplug_mutex);
-}
-
static int dlpar_online_cpu(struct device_node *dn)
{
int rc = 0;
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 8868c012268..b4886635972 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -144,8 +144,8 @@ static void __devinit smp_pSeries_kick_cpu(int nr)
hcpuid = get_hard_smp_processor_id(nr);
rc = plpar_hcall_norets(H_PROD, hcpuid);
if (rc != H_SUCCESS)
- panic("Error: Prod to wake up processor %d Ret= %ld\n",
- nr, rc);
+ printk(KERN_ERR "Error: Prod to wake up processor %d\
+ Ret= %ld\n", nr, rc);
}
}
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index b9b9e11609e..f5f79196721 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -163,14 +163,13 @@ static inline void lpar_qirr_info(int n_cpu , u8 value)
/* Interface to generic irq subsystem */
#ifdef CONFIG_SMP
-static int get_irq_server(unsigned int virq, unsigned int strict_check)
+static int get_irq_server(unsigned int virq, cpumask_t cpumask,
+ unsigned int strict_check)
{
int server;
/* For the moment only implement delivery to all cpus or one cpu */
- cpumask_t cpumask;
cpumask_t tmp = CPU_MASK_NONE;
- cpumask_copy(&cpumask, irq_to_desc(virq)->affinity);
if (!distribute_irqs)
return default_server;
@@ -192,10 +191,7 @@ static int get_irq_server(unsigned int virq, unsigned int strict_check)
return default_server;
}
#else
-static int get_irq_server(unsigned int virq, unsigned int strict_check)
-{
- return default_server;
-}
+#define get_irq_server(virq, cpumask, strict_check) (default_server)
#endif
static void xics_unmask_irq(unsigned int virq)
@@ -211,7 +207,7 @@ static void xics_unmask_irq(unsigned int virq)
if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
return;
- server = get_irq_server(virq, 0);
+ server = get_irq_server(virq, *(irq_to_desc(virq)->affinity), 0);
call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server,
DEFAULT_PRIORITY);
@@ -405,7 +401,7 @@ static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
* For the moment only implement delivery to all cpus or one cpu.
* Get current irq_server for the given irq
*/
- irq_server = get_irq_server(virq, 1);
+ irq_server = get_irq_server(virq, *cpumask, 1);
if (irq_server == -1) {
char cpulist[128];
cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
@@ -788,9 +784,13 @@ static void xics_set_cpu_priority(unsigned char cppr)
{
struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
- BUG_ON(os_cppr->index != 0);
+ /*
+ * we only really want to set the priority when there's
+ * just one cppr value on the stack
+ */
+ WARN_ON(os_cppr->index != 0);
- os_cppr->stack[os_cppr->index] = cppr;
+ os_cppr->stack[0] = cppr;
if (firmware_has_feature(FW_FEATURE_LPAR))
lpar_cppr_info(cppr);
@@ -825,8 +825,14 @@ void xics_setup_cpu(void)
void xics_teardown_cpu(void)
{
+ struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
int cpu = smp_processor_id();
+ /*
+ * we have to reset the cppr index to 0 because we're
+ * not going to return from the IPI
+ */
+ os_cppr->index = 0;
xics_set_cpu_priority(0);
/* Clear any pending IPI request */