summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/pseries
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/pseries')
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig9
-rw-r--r--arch/powerpc/platforms/pseries/Makefile1
-rw-r--r--arch/powerpc/platforms/pseries/cmm.c67
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S3
-rw-r--r--arch/powerpc/platforms/pseries/hvCall_inst.c4
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c61
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c16
-rw-r--r--arch/powerpc/platforms/pseries/nvram.c8
-rw-r--r--arch/powerpc/platforms/pseries/processor_idle.c329
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h3
-rw-r--r--arch/powerpc/platforms/pseries/pseries_energy.c71
-rw-r--r--arch/powerpc/platforms/pseries/setup.c109
-rw-r--r--arch/powerpc/platforms/pseries/smp.c3
-rw-r--r--arch/powerpc/platforms/pseries/suspend.c33
14 files changed, 527 insertions, 190 deletions
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index c81f6bb9c10..ae7b6d41fed 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -120,3 +120,12 @@ config DTL
which are accessible through a debugfs file.
Say N if you are unsure.
+
+config PSERIES_IDLE
+ tristate "Cpuidle driver for pSeries platforms"
+ depends on CPU_IDLE
+ depends on PPC_PSERIES
+ default y
+ help
+ Select this option to enable processor idle state management
+ through cpuidle subsystem.
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 3556e402cbf..236db46b407 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_DTL) += dtl.o
obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o
+obj-$(CONFIG_PSERIES_IDLE) += processor_idle.o
ifeq ($(CONFIG_PPC_PSERIES),y)
obj-$(CONFIG_SUSPEND) += suspend.o
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index 3cafc306b97..c638535753d 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -33,7 +33,7 @@
#include <linux/sched.h>
#include <linux/stringify.h>
#include <linux/swap.h>
-#include <linux/sysdev.h>
+#include <linux/device.h>
#include <asm/firmware.h>
#include <asm/hvcall.h>
#include <asm/mmu.h>
@@ -65,7 +65,7 @@ static unsigned int oom_kb = CMM_OOM_KB;
static unsigned int cmm_debug = CMM_DEBUG;
static unsigned int cmm_disabled = CMM_DISABLE;
static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
-static struct sys_device cmm_sysdev;
+static struct device cmm_dev;
MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
@@ -347,25 +347,25 @@ static int cmm_thread(void *dummy)
}
#define CMM_SHOW(name, format, args...) \
- static ssize_t show_##name(struct sys_device *dev, \
- struct sysdev_attribute *attr, \
+ static ssize_t show_##name(struct device *dev, \
+ struct device_attribute *attr, \
char *buf) \
{ \
return sprintf(buf, format, ##args); \
} \
- static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
+ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
-static ssize_t show_oom_pages(struct sys_device *dev,
- struct sysdev_attribute *attr, char *buf)
+static ssize_t show_oom_pages(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
}
-static ssize_t store_oom_pages(struct sys_device *dev,
- struct sysdev_attribute *attr,
+static ssize_t store_oom_pages(struct device *dev,
+ struct device_attribute *attr,
const char *buf, size_t count)
{
unsigned long val = simple_strtoul (buf, NULL, 10);
@@ -379,17 +379,18 @@ static ssize_t store_oom_pages(struct sys_device *dev,
return count;
}
-static SYSDEV_ATTR(oom_freed_kb, S_IWUSR| S_IRUGO,
+static DEVICE_ATTR(oom_freed_kb, S_IWUSR | S_IRUGO,
show_oom_pages, store_oom_pages);
-static struct sysdev_attribute *cmm_attrs[] = {
- &attr_loaned_kb,
- &attr_loaned_target_kb,
- &attr_oom_freed_kb,
+static struct device_attribute *cmm_attrs[] = {
+ &dev_attr_loaned_kb,
+ &dev_attr_loaned_target_kb,
+ &dev_attr_oom_freed_kb,
};
-static struct sysdev_class cmm_sysdev_class = {
+static struct bus_type cmm_subsys = {
.name = "cmm",
+ .dev_name = "cmm",
};
/**
@@ -398,21 +399,21 @@ static struct sysdev_class cmm_sysdev_class = {
* Return value:
* 0 on success / other on failure
**/
-static int cmm_sysfs_register(struct sys_device *sysdev)
+static int cmm_sysfs_register(struct device *dev)
{
int i, rc;
- if ((rc = sysdev_class_register(&cmm_sysdev_class)))
+ if ((rc = subsys_system_register(&cmm_subsys, NULL)))
return rc;
- sysdev->id = 0;
- sysdev->cls = &cmm_sysdev_class;
+ dev->id = 0;
+ dev->bus = &cmm_subsys;
- if ((rc = sysdev_register(sysdev)))
- goto class_unregister;
+ if ((rc = device_register(dev)))
+ goto subsys_unregister;
for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
- if ((rc = sysdev_create_file(sysdev, cmm_attrs[i])))
+ if ((rc = device_create_file(dev, cmm_attrs[i])))
goto fail;
}
@@ -420,10 +421,10 @@ static int cmm_sysfs_register(struct sys_device *sysdev)
fail:
while (--i >= 0)
- sysdev_remove_file(sysdev, cmm_attrs[i]);
- sysdev_unregister(sysdev);
-class_unregister:
- sysdev_class_unregister(&cmm_sysdev_class);
+ device_remove_file(dev, cmm_attrs[i]);
+ device_unregister(dev);
+subsys_unregister:
+ bus_unregister(&cmm_subsys);
return rc;
}
@@ -431,14 +432,14 @@ class_unregister:
* cmm_unregister_sysfs - Unregister from sysfs
*
**/
-static void cmm_unregister_sysfs(struct sys_device *sysdev)
+static void cmm_unregister_sysfs(struct device *dev)
{
int i;
for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
- sysdev_remove_file(sysdev, cmm_attrs[i]);
- sysdev_unregister(sysdev);
- sysdev_class_unregister(&cmm_sysdev_class);
+ device_remove_file(dev, cmm_attrs[i]);
+ device_unregister(dev);
+ bus_unregister(&cmm_subsys);
}
/**
@@ -657,7 +658,7 @@ static int cmm_init(void)
if ((rc = register_reboot_notifier(&cmm_reboot_nb)))
goto out_oom_notifier;
- if ((rc = cmm_sysfs_register(&cmm_sysdev)))
+ if ((rc = cmm_sysfs_register(&cmm_dev)))
goto out_reboot_notifier;
if (register_memory_notifier(&cmm_mem_nb) ||
@@ -678,7 +679,7 @@ static int cmm_init(void)
out_unregister_notifier:
unregister_memory_notifier(&cmm_mem_nb);
unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
- cmm_unregister_sysfs(&cmm_sysdev);
+ cmm_unregister_sysfs(&cmm_dev);
out_reboot_notifier:
unregister_reboot_notifier(&cmm_reboot_nb);
out_oom_notifier:
@@ -701,7 +702,7 @@ static void cmm_exit(void)
unregister_memory_notifier(&cmm_mem_nb);
unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
cmm_free_pages(loaned_pages);
- cmm_unregister_sysfs(&cmm_sysdev);
+ cmm_unregister_sysfs(&cmm_dev);
}
/**
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index fd05fdee576..3ce73d0052b 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -36,6 +36,7 @@ BEGIN_FTR_SECTION; \
b 1f; \
END_FTR_SECTION(0, 1); \
ld r12,hcall_tracepoint_refcount@toc(r2); \
+ std r12,32(r1); \
cmpdi r12,0; \
beq+ 1f; \
mflr r0; \
@@ -74,7 +75,7 @@ END_FTR_SECTION(0, 1); \
BEGIN_FTR_SECTION; \
b 1f; \
END_FTR_SECTION(0, 1); \
- ld r12,hcall_tracepoint_refcount@toc(r2); \
+ ld r12,32(r1); \
cmpdi r12,0; \
beq+ 1f; \
mflr r0; \
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index f106662f438..c9311cfdfca 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -109,7 +109,7 @@ static void probe_hcall_entry(void *ignored, unsigned long opcode, unsigned long
if (opcode > MAX_HCALL_OPCODE)
return;
- h = &get_cpu_var(hcall_stats)[opcode / 4];
+ h = &__get_cpu_var(hcall_stats)[opcode / 4];
h->tb_start = mftb();
h->purr_start = mfspr(SPRN_PURR);
}
@@ -126,8 +126,6 @@ static void probe_hcall_exit(void *ignored, unsigned long opcode, unsigned long
h->num_calls++;
h->tb_total += mftb() - h->tb_start;
h->purr_total += mfspr(SPRN_PURR) - h->purr_start;
-
- put_cpu_var(hcall_stats);
}
static int __init hcall_inst_init(void)
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index b719d970973..c442f2b1980 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -52,13 +52,42 @@
#include "plpar_wrappers.h"
+static void tce_invalidate_pSeries_sw(struct iommu_table *tbl,
+ u64 *startp, u64 *endp)
+{
+ u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index;
+ unsigned long start, end, inc;
+
+ start = __pa(startp);
+ end = __pa(endp);
+ inc = L1_CACHE_BYTES; /* invalidate a cacheline of TCEs at a time */
+
+ /* If this is non-zero, change the format. We shift the
+ * address and or in the magic from the device tree. */
+ if (tbl->it_busno) {
+ start <<= 12;
+ end <<= 12;
+ inc <<= 12;
+ start |= tbl->it_busno;
+ end |= tbl->it_busno;
+ }
+
+ end |= inc - 1; /* round up end to be different than start */
+
+ mb(); /* Make sure TCEs in memory are written */
+ while (start <= end) {
+ out_be64(invalidate, start);
+ start += inc;
+ }
+}
+
static int tce_build_pSeries(struct iommu_table *tbl, long index,
long npages, unsigned long uaddr,
enum dma_data_direction direction,
struct dma_attrs *attrs)
{
u64 proto_tce;
- u64 *tcep;
+ u64 *tcep, *tces;
u64 rpn;
proto_tce = TCE_PCI_READ; // Read allowed
@@ -66,7 +95,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
if (direction != DMA_TO_DEVICE)
proto_tce |= TCE_PCI_WRITE;
- tcep = ((u64 *)tbl->it_base) + index;
+ tces = tcep = ((u64 *)tbl->it_base) + index;
while (npages--) {
/* can't move this out since we might cross MEMBLOCK boundary */
@@ -76,18 +105,24 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
uaddr += TCE_PAGE_SIZE;
tcep++;
}
+
+ if (tbl->it_type == TCE_PCI_SWINV_CREATE)
+ tce_invalidate_pSeries_sw(tbl, tces, tcep - 1);
return 0;
}
static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
{
- u64 *tcep;
+ u64 *tcep, *tces;
- tcep = ((u64 *)tbl->it_base) + index;
+ tces = tcep = ((u64 *)tbl->it_base) + index;
while (npages--)
*(tcep++) = 0;
+
+ if (tbl->it_type == TCE_PCI_SWINV_FREE)
+ tce_invalidate_pSeries_sw(tbl, tces, tcep - 1);
}
static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
@@ -425,7 +460,7 @@ static void iommu_table_setparms(struct pci_controller *phb,
struct iommu_table *tbl)
{
struct device_node *node;
- const unsigned long *basep;
+ const unsigned long *basep, *sw_inval;
const u32 *sizep;
node = phb->dn;
@@ -462,6 +497,22 @@ static void iommu_table_setparms(struct pci_controller *phb,
tbl->it_index = 0;
tbl->it_blocksize = 16;
tbl->it_type = TCE_PCI;
+
+ sw_inval = of_get_property(node, "linux,tce-sw-invalidate-info", NULL);
+ if (sw_inval) {
+ /*
+ * This property contains information on how to
+ * invalidate the TCE entry. The first property is
+ * the base MMIO address used to invalidate entries.
+ * The second property tells us the format of the TCE
+ * invalidate (whether it needs to be shifted) and
+ * some magic routing info to add to our invalidate
+ * command.
+ */
+ tbl->it_index = (unsigned long) ioremap(sw_inval[0], 8);
+ tbl->it_busno = sw_inval[1]; /* overload this with magic */
+ tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
+ }
}
/*
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 52d429be6c7..7bc73af6c7b 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -546,6 +546,13 @@ void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
unsigned long flags;
unsigned int *depth;
+ /*
+ * We cannot call tracepoints inside RCU idle regions which
+ * means we must not trace H_CEDE.
+ */
+ if (opcode == H_CEDE)
+ return;
+
local_irq_save(flags);
depth = &__get_cpu_var(hcall_trace_depth);
@@ -554,9 +561,8 @@ void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
goto out;
(*depth)++;
+ preempt_disable();
trace_hcall_entry(opcode, args);
- if (opcode == H_CEDE)
- rcu_idle_enter();
(*depth)--;
out:
@@ -569,6 +575,9 @@ void __trace_hcall_exit(long opcode, unsigned long retval,
unsigned long flags;
unsigned int *depth;
+ if (opcode == H_CEDE)
+ return;
+
local_irq_save(flags);
depth = &__get_cpu_var(hcall_trace_depth);
@@ -577,9 +586,8 @@ void __trace_hcall_exit(long opcode, unsigned long retval,
goto out;
(*depth)++;
- if (opcode == H_CEDE)
- rcu_idle_exit();
trace_hcall_exit(opcode, retval, retbuf);
+ preempt_enable();
(*depth)--;
out:
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index a76b22844d1..36f957f3184 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -625,6 +625,8 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
{
static unsigned int oops_count = 0;
static bool panicking = false;
+ static DEFINE_SPINLOCK(lock);
+ unsigned long flags;
size_t text_len;
unsigned int err_type = ERR_TYPE_KERNEL_PANIC_GZ;
int rc = -1;
@@ -636,7 +638,6 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
/* These are almost always orderly shutdowns. */
return;
case KMSG_DUMP_OOPS:
- case KMSG_DUMP_KEXEC:
break;
case KMSG_DUMP_PANIC:
panicking = true;
@@ -655,6 +656,9 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
if (clobbering_unread_rtas_event())
return;
+ if (!spin_trylock_irqsave(&lock, flags))
+ return;
+
if (big_oops_buf) {
text_len = capture_last_msgs(old_msgs, old_len,
new_msgs, new_len, big_oops_buf, big_oops_buf_sz);
@@ -670,4 +674,6 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
(void) nvram_write_os_partition(&oops_log_partition, oops_buf,
(int) (sizeof(*oops_len) + *oops_len), err_type, ++oops_count);
+
+ spin_unlock_irqrestore(&lock, flags);
}
diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c
new file mode 100644
index 00000000000..085fd3f45ad
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/processor_idle.c
@@ -0,0 +1,329 @@
+/*
+ * processor_idle - idle state cpuidle driver.
+ * Adapted from drivers/idle/intel_idle.c and
+ * drivers/acpi/processor_idle.c
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+#include <linux/cpuidle.h>
+#include <linux/cpu.h>
+
+#include <asm/paca.h>
+#include <asm/reg.h>
+#include <asm/system.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+
+#include "plpar_wrappers.h"
+#include "pseries.h"
+
+struct cpuidle_driver pseries_idle_driver = {
+ .name = "pseries_idle",
+ .owner = THIS_MODULE,
+};
+
+#define MAX_IDLE_STATE_COUNT 2
+
+static int max_idle_state = MAX_IDLE_STATE_COUNT - 1;
+static struct cpuidle_device __percpu *pseries_cpuidle_devices;
+static struct cpuidle_state *cpuidle_state_table;
+
+void update_smt_snooze_delay(int snooze)
+{
+ struct cpuidle_driver *drv = cpuidle_get_driver();
+ if (drv)
+ drv->states[0].target_residency = snooze;
+}
+
+static inline void idle_loop_prolog(unsigned long *in_purr, ktime_t *kt_before)
+{
+
+ *kt_before = ktime_get_real();
+ *in_purr = mfspr(SPRN_PURR);
+ /*
+ * Indicate to the HV that we are idle. Now would be
+ * a good time to find other work to dispatch.
+ */
+ get_lppaca()->idle = 1;
+}
+
+static inline s64 idle_loop_epilog(unsigned long in_purr, ktime_t kt_before)
+{
+ get_lppaca()->wait_state_cycles += mfspr(SPRN_PURR) - in_purr;
+ get_lppaca()->idle = 0;
+
+ return ktime_to_us(ktime_sub(ktime_get_real(), kt_before));
+}
+
+static int snooze_loop(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ unsigned long in_purr;
+ ktime_t kt_before;
+ unsigned long start_snooze;
+ long snooze = drv->states[0].target_residency;
+
+ idle_loop_prolog(&in_purr, &kt_before);
+
+ if (snooze) {
+ start_snooze = get_tb() + snooze * tb_ticks_per_usec;
+ local_irq_enable();
+ set_thread_flag(TIF_POLLING_NRFLAG);
+
+ while ((snooze < 0) || (get_tb() < start_snooze)) {
+ if (need_resched() || cpu_is_offline(dev->cpu))
+ goto out;
+ ppc64_runlatch_off();
+ HMT_low();
+ HMT_very_low();
+ }
+
+ HMT_medium();
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ smp_mb();
+ local_irq_disable();
+ }
+
+out:
+ HMT_medium();
+ dev->last_residency =
+ (int)idle_loop_epilog(in_purr, kt_before);
+ return index;
+}
+
+static int dedicated_cede_loop(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ unsigned long in_purr;
+ ktime_t kt_before;
+
+ idle_loop_prolog(&in_purr, &kt_before);
+ get_lppaca()->donate_dedicated_cpu = 1;
+
+ ppc64_runlatch_off();
+ HMT_medium();
+ cede_processor();
+
+ get_lppaca()->donate_dedicated_cpu = 0;
+ dev->last_residency =
+ (int)idle_loop_epilog(in_purr, kt_before);
+ return index;
+}
+
+static int shared_cede_loop(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ unsigned long in_purr;
+ ktime_t kt_before;
+
+ idle_loop_prolog(&in_purr, &kt_before);
+
+ /*
+ * Yield the processor to the hypervisor. We return if
+ * an external interrupt occurs (which are driven prior
+ * to returning here) or if a prod occurs from another
+ * processor. When returning here, external interrupts
+ * are enabled.
+ */
+ cede_processor();
+
+ dev->last_residency =
+ (int)idle_loop_epilog(in_purr, kt_before);
+ return index;
+}
+
+/*
+ * States for dedicated partition case.
+ */
+static struct cpuidle_state dedicated_states[MAX_IDLE_STATE_COUNT] = {
+ { /* Snooze */
+ .name = "snooze",
+ .desc = "snooze",
+ .flags = CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 0,
+ .target_residency = 0,
+ .enter = &snooze_loop },
+ { /* CEDE */
+ .name = "CEDE",
+ .desc = "CEDE",
+ .flags = CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 1,
+ .target_residency = 10,
+ .enter = &dedicated_cede_loop },
+};
+
+/*
+ * States for shared partition case.
+ */
+static struct cpuidle_state shared_states[MAX_IDLE_STATE_COUNT] = {
+ { /* Shared Cede */
+ .name = "Shared Cede",
+ .desc = "Shared Cede",
+ .flags = CPUIDLE_FLAG_TIME_VALID,
+ .exit_latency = 0,
+ .target_residency = 0,
+ .enter = &shared_cede_loop },
+};
+
+int pseries_notify_cpuidle_add_cpu(int cpu)
+{
+ struct cpuidle_device *dev =
+ per_cpu_ptr(pseries_cpuidle_devices, cpu);
+ if (dev && cpuidle_get_driver()) {
+ cpuidle_disable_device(dev);
+ cpuidle_enable_device(dev);
+ }
+ return 0;
+}
+
+/*
+ * pseries_cpuidle_driver_init()
+ */
+static int pseries_cpuidle_driver_init(void)
+{
+ int idle_state;
+ struct cpuidle_driver *drv = &pseries_idle_driver;
+
+ drv->state_count = 0;
+
+ for (idle_state = 0; idle_state < MAX_IDLE_STATE_COUNT; ++idle_state) {
+
+ if (idle_state > max_idle_state)
+ break;
+
+ /* is the state not enabled? */
+ if (cpuidle_state_table[idle_state].enter == NULL)
+ continue;
+
+ drv->states[drv->state_count] = /* structure copy */
+ cpuidle_state_table[idle_state];
+
+ if (cpuidle_state_table == dedicated_states)
+ drv->states[drv->state_count].target_residency =
+ __get_cpu_var(smt_snooze_delay);
+
+ drv->state_count += 1;
+ }
+
+ return 0;
+}
+
+/* pseries_idle_devices_uninit(void)
+ * unregister cpuidle devices and de-allocate memory
+ */
+static void pseries_idle_devices_uninit(void)
+{
+ int i;
+ struct cpuidle_device *dev;
+
+ for_each_possible_cpu(i) {
+ dev = per_cpu_ptr(pseries_cpuidle_devices, i);
+ cpuidle_unregister_device(dev);
+ }
+
+ free_percpu(pseries_cpuidle_devices);
+ return;
+}
+
+/* pseries_idle_devices_init()
+ * allocate, initialize and register cpuidle device
+ */
+static int pseries_idle_devices_init(void)
+{
+ int i;
+ struct cpuidle_driver *drv = &pseries_idle_driver;
+ struct cpuidle_device *dev;
+
+ pseries_cpuidle_devices = alloc_percpu(struct cpuidle_device);
+ if (pseries_cpuidle_devices == NULL)
+ return -ENOMEM;
+
+ for_each_possible_cpu(i) {
+ dev = per_cpu_ptr(pseries_cpuidle_devices, i);
+ dev->state_count = drv->state_count;
+ dev->cpu = i;
+ if (cpuidle_register_device(dev)) {
+ printk(KERN_DEBUG \
+ "cpuidle_register_device %d failed!\n", i);
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * pseries_idle_probe()
+ * Choose state table for shared versus dedicated partition
+ */
+static int pseries_idle_probe(void)
+{
+
+ if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+ return -ENODEV;
+
+ if (cpuidle_disable != IDLE_NO_OVERRIDE)
+ return -ENODEV;
+
+ if (max_idle_state == 0) {
+ printk(KERN_DEBUG "pseries processor idle disabled.\n");
+ return -EPERM;
+ }
+
+ if (get_lppaca()->shared_proc)
+ cpuidle_state_table = shared_states;
+ else
+ cpuidle_state_table = dedicated_states;
+
+ return 0;
+}
+
+static int __init pseries_processor_idle_init(void)
+{
+ int retval;
+
+ retval = pseries_idle_probe();
+ if (retval)
+ return retval;
+
+ pseries_cpuidle_driver_init();
+ retval = cpuidle_register_driver(&pseries_idle_driver);
+ if (retval) {
+ printk(KERN_DEBUG "Registration of pseries driver failed.\n");
+ return retval;
+ }
+
+ retval = pseries_idle_devices_init();
+ if (retval) {
+ pseries_idle_devices_uninit();
+ cpuidle_unregister_driver(&pseries_idle_driver);
+ return retval;
+ }
+
+ printk(KERN_DEBUG "pseries_idle_driver registered\n");
+
+ return 0;
+}
+
+static void __exit pseries_processor_idle_exit(void)
+{
+
+ pseries_idle_devices_uninit();
+ cpuidle_unregister_driver(&pseries_idle_driver);
+
+ return;
+}
+
+module_init(pseries_processor_idle_init);
+module_exit(pseries_processor_idle_exit);
+
+MODULE_AUTHOR("Deepthi Dharwar <deepthi@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("Cpuidle driver for POWER");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 24c7162f11d..9a3dda07566 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -57,4 +57,7 @@ extern struct device_node *dlpar_configure_connector(u32);
extern int dlpar_attach_node(struct device_node *);
extern int dlpar_detach_node(struct device_node *);
+/* Snooze Delay, pseries_idle */
+DECLARE_PER_CPU(long, smt_snooze_delay);
+
#endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c
index c8b3c69fe89..af281dce510 100644
--- a/arch/powerpc/platforms/pseries/pseries_energy.c
+++ b/arch/powerpc/platforms/pseries/pseries_energy.c
@@ -15,7 +15,7 @@
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/seq_file.h>
-#include <linux/sysdev.h>
+#include <linux/device.h>
#include <linux/cpu.h>
#include <linux/of.h>
#include <asm/cputhreads.h>
@@ -184,7 +184,7 @@ static ssize_t get_best_energy_list(char *page, int activate)
return s-page;
}
-static ssize_t get_best_energy_data(struct sys_device *dev,
+static ssize_t get_best_energy_data(struct device *dev,
char *page, int activate)
{
int rc;
@@ -207,26 +207,26 @@ static ssize_t get_best_energy_data(struct sys_device *dev,
/* Wrapper functions */
-static ssize_t cpu_activate_hint_list_show(struct sysdev_class *class,
- struct sysdev_class_attribute *attr, char *page)
+static ssize_t cpu_activate_hint_list_show(struct device *dev,
+ struct device_attribute *attr, char *page)
{
return get_best_energy_list(page, 1);
}
-static ssize_t cpu_deactivate_hint_list_show(struct sysdev_class *class,
- struct sysdev_class_attribute *attr, char *page)
+static ssize_t cpu_deactivate_hint_list_show(struct device *dev,
+ struct device_attribute *attr, char *page)
{
return get_best_energy_list(page, 0);
}
-static ssize_t percpu_activate_hint_show(struct sys_device *dev,
- struct sysdev_attribute *attr, char *page)
+static ssize_t percpu_activate_hint_show(struct device *dev,
+ struct device_attribute *attr, char *page)
{
return get_best_energy_data(dev, page, 1);
}
-static ssize_t percpu_deactivate_hint_show(struct sys_device *dev,
- struct sysdev_attribute *attr, char *page)
+static ssize_t percpu_deactivate_hint_show(struct device *dev,
+ struct device_attribute *attr, char *page)
{
return get_best_energy_data(dev, page, 0);
}
@@ -241,48 +241,48 @@ static ssize_t percpu_deactivate_hint_show(struct sys_device *dev,
* Per-cpu value of the hint
*/
-struct sysdev_class_attribute attr_cpu_activate_hint_list =
- _SYSDEV_CLASS_ATTR(pseries_activate_hint_list, 0444,
+struct device_attribute attr_cpu_activate_hint_list =
+ __ATTR(pseries_activate_hint_list, 0444,
cpu_activate_hint_list_show, NULL);
-struct sysdev_class_attribute attr_cpu_deactivate_hint_list =
- _SYSDEV_CLASS_ATTR(pseries_deactivate_hint_list, 0444,
+struct device_attribute attr_cpu_deactivate_hint_list =
+ __ATTR(pseries_deactivate_hint_list, 0444,
cpu_deactivate_hint_list_show, NULL);
-struct sysdev_attribute attr_percpu_activate_hint =
- _SYSDEV_ATTR(pseries_activate_hint, 0444,
+struct device_attribute attr_percpu_activate_hint =
+ __ATTR(pseries_activate_hint, 0444,
percpu_activate_hint_show, NULL);
-struct sysdev_attribute attr_percpu_deactivate_hint =
- _SYSDEV_ATTR(pseries_deactivate_hint, 0444,
+struct device_attribute attr_percpu_deactivate_hint =
+ __ATTR(pseries_deactivate_hint, 0444,
percpu_deactivate_hint_show, NULL);
static int __init pseries_energy_init(void)
{
int cpu, err;
- struct sys_device *cpu_sys_dev;
+ struct device *cpu_dev;
if (!check_for_h_best_energy()) {
printk(KERN_INFO "Hypercall H_BEST_ENERGY not supported\n");
return 0;
}
/* Create the sysfs files */
- err = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
- &attr_cpu_activate_hint_list.attr);
+ err = device_create_file(cpu_subsys.dev_root,
+ &attr_cpu_activate_hint_list);
if (!err)
- err = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
- &attr_cpu_deactivate_hint_list.attr);
+ err = device_create_file(cpu_subsys.dev_root,
+ &attr_cpu_deactivate_hint_list);
if (err)
return err;
for_each_possible_cpu(cpu) {
- cpu_sys_dev = get_cpu_sysdev(cpu);
- err = sysfs_create_file(&cpu_sys_dev->kobj,
- &attr_percpu_activate_hint.attr);
+ cpu_dev = get_cpu_device(cpu);
+ err = device_create_file(cpu_dev,
+ &attr_percpu_activate_hint);
if (err)
break;
- err = sysfs_create_file(&cpu_sys_dev->kobj,
- &attr_percpu_deactivate_hint.attr);
+ err = device_create_file(cpu_dev,
+ &attr_percpu_deactivate_hint);
if (err)
break;
}
@@ -298,23 +298,20 @@ static int __init pseries_energy_init(void)
static void __exit pseries_energy_cleanup(void)
{
int cpu;
- struct sys_device *cpu_sys_dev;
+ struct device *cpu_dev;
if (!sysfs_entries)
return;
/* Remove the sysfs files */
- sysfs_remove_file(&cpu_sysdev_class.kset.kobj,
- &attr_cpu_activate_hint_list.attr);
-
- sysfs_remove_file(&cpu_sysdev_class.kset.kobj,
- &attr_cpu_deactivate_hint_list.attr);
+ device_remove_file(cpu_subsys.dev_root, &attr_cpu_activate_hint_list);
+ device_remove_file(cpu_subsys.dev_root, &attr_cpu_deactivate_hint_list);
for_each_possible_cpu(cpu) {
- cpu_sys_dev = get_cpu_sysdev(cpu);
- sysfs_remove_file(&cpu_sys_dev->kobj,
+ cpu_dev = get_cpu_device(cpu);
+ sysfs_remove_file(&cpu_dev->kobj,
&attr_percpu_activate_hint.attr);
- sysfs_remove_file(&cpu_sys_dev->kobj,
+ sysfs_remove_file(&cpu_dev->kobj,
&attr_percpu_deactivate_hint.attr);
}
}
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index c3408ca8855..f79f1278dfc 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -39,6 +39,7 @@
#include <linux/irq.h>
#include <linux/seq_file.h>
#include <linux/root_dev.h>
+#include <linux/cpuidle.h>
#include <asm/mmu.h>
#include <asm/processor.h>
@@ -74,9 +75,6 @@ EXPORT_SYMBOL(CMO_PageSize);
int fwnmi_active; /* TRUE if an FWNMI handler is present */
-static void pseries_shared_idle_sleep(void);
-static void pseries_dedicated_idle_sleep(void);
-
static struct device_node *pSeries_mpic_node;
static void pSeries_show_cpuinfo(struct seq_file *m)
@@ -192,8 +190,7 @@ static void __init pseries_mpic_init_IRQ(void)
BUG_ON(openpic_addr == 0);
/* Setup the openpic driver */
- mpic = mpic_alloc(pSeries_mpic_node, openpic_addr,
- MPIC_PRIMARY,
+ mpic = mpic_alloc(pSeries_mpic_node, openpic_addr, 0,
16, 250, /* isu size, irq count */
" MPIC ");
BUG_ON(mpic == NULL);
@@ -352,8 +349,25 @@ static int alloc_dispatch_log_kmem_cache(void)
}
early_initcall(alloc_dispatch_log_kmem_cache);
+static void pSeries_idle(void)
+{
+ /* This would call on the cpuidle framework, and the back-end pseries
+ * driver to go to idle states
+ */
+ if (cpuidle_idle_call()) {
+ /* On error, execute default handler
+ * to go into low thread priority and possibly
+ * low power mode.
+ */
+ HMT_low();
+ HMT_very_low();
+ }
+}
+
static void __init pSeries_setup_arch(void)
{
+ panic_timeout = 10;
+
/* Discover PIC type and setup ppc_md accordingly */
pseries_discover_pic();
@@ -374,18 +388,9 @@ static void __init pSeries_setup_arch(void)
pSeries_nvram_init();
- /* Choose an idle loop */
if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
vpa_init(boot_cpuid);
- if (get_lppaca()->shared_proc) {
- printk(KERN_DEBUG "Using shared processor idle loop\n");
- ppc_md.power_save = pseries_shared_idle_sleep;
- } else {
- printk(KERN_DEBUG "Using dedicated idle loop\n");
- ppc_md.power_save = pseries_dedicated_idle_sleep;
- }
- } else {
- printk(KERN_DEBUG "Using default idle loop\n");
+ ppc_md.power_save = pSeries_idle;
}
if (firmware_has_feature(FW_FEATURE_LPAR))
@@ -586,80 +591,6 @@ static int __init pSeries_probe(void)
return 1;
}
-
-DECLARE_PER_CPU(long, smt_snooze_delay);
-
-static void pseries_dedicated_idle_sleep(void)
-{
- unsigned int cpu = smp_processor_id();
- unsigned long start_snooze;
- unsigned long in_purr, out_purr;
- long snooze = __get_cpu_var(smt_snooze_delay);
-
- /*
- * Indicate to the HV that we are idle. Now would be
- * a good time to find other work to dispatch.
- */
- get_lppaca()->idle = 1;
- get_lppaca()->donate_dedicated_cpu = 1;
- in_purr = mfspr(SPRN_PURR);
-
- /*
- * We come in with interrupts disabled, and need_resched()
- * has been checked recently. If we should poll for a little
- * while, do so.
- */
- if (snooze) {
- start_snooze = get_tb() + snooze * tb_ticks_per_usec;
- local_irq_enable();
- set_thread_flag(TIF_POLLING_NRFLAG);
-
- while ((snooze < 0) || (get_tb() < start_snooze)) {
- if (need_resched() || cpu_is_offline(cpu))
- goto out;
- ppc64_runlatch_off();
- HMT_low();
- HMT_very_low();
- }
-
- HMT_medium();
- clear_thread_flag(TIF_POLLING_NRFLAG);
- smp_mb();
- local_irq_disable();
- if (need_resched() || cpu_is_offline(cpu))
- goto out;
- }
-
- cede_processor();
-
-out:
- HMT_medium();
- out_purr = mfspr(SPRN_PURR);
- get_lppaca()->wait_state_cycles += out_purr - in_purr;
- get_lppaca()->donate_dedicated_cpu = 0;
- get_lppaca()->idle = 0;
-}
-
-static void pseries_shared_idle_sleep(void)
-{
- /*
- * Indicate to the HV that we are idle. Now would be
- * a good time to find other work to dispatch.
- */
- get_lppaca()->idle = 1;
-
- /*
- * Yield the processor to the hypervisor. We return if
- * an external interrupt occurs (which are driven prior
- * to returning here) or if a prod occurs from another
- * processor. When returning here, external interrupts
- * are enabled.
- */
- cede_processor();
-
- get_lppaca()->idle = 0;
-}
-
static int pSeries_pci_probe_mode(struct pci_bus *bus)
{
if (firmware_has_feature(FW_FEATURE_LPAR))
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 26e93fd4c62..eadba9521a3 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -22,7 +22,7 @@
#include <linux/spinlock.h>
#include <linux/cache.h>
#include <linux/err.h>
-#include <linux/sysdev.h>
+#include <linux/device.h>
#include <linux/cpu.h>
#include <asm/ptrace.h>
@@ -148,6 +148,7 @@ static void __devinit smp_xics_setup_cpu(int cpu)
set_cpu_current_state(cpu, CPU_STATE_ONLINE);
set_default_offline_state(cpu);
#endif
+ pseries_notify_cpuidle_add_cpu(cpu);
}
static int __devinit smp_pSeries_kick_cpu(int nr)
diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c
index d3de0849f29..b84a8b2238d 100644
--- a/arch/powerpc/platforms/pseries/suspend.c
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -26,7 +26,7 @@
#include <asm/rtas.h>
static u64 stream_id;
-static struct sys_device suspend_sysdev;
+static struct device suspend_dev;
static DECLARE_COMPLETION(suspend_work);
static struct rtas_suspend_me_data suspend_data;
static atomic_t suspending;
@@ -110,8 +110,8 @@ static int pseries_prepare_late(void)
/**
* store_hibernate - Initiate partition hibernation
- * @classdev: sysdev class struct
- * @attr: class device attribute struct
+ * @dev: subsys root device
+ * @attr: device attribute struct
* @buf: buffer
* @count: buffer size
*
@@ -121,8 +121,8 @@ static int pseries_prepare_late(void)
* Return value:
* number of bytes printed to buffer / other on failure
**/
-static ssize_t store_hibernate(struct sysdev_class *classdev,
- struct sysdev_class_attribute *attr,
+static ssize_t store_hibernate(struct device *dev,
+ struct device_attribute *attr,
const char *buf, size_t count)
{
int rc;
@@ -148,10 +148,11 @@ static ssize_t store_hibernate(struct sysdev_class *classdev,
return rc;
}
-static SYSDEV_CLASS_ATTR(hibernate, S_IWUSR, NULL, store_hibernate);
+static DEVICE_ATTR(hibernate, S_IWUSR, NULL, store_hibernate);
-static struct sysdev_class suspend_sysdev_class = {
+static struct bus_type suspend_subsys = {
.name = "power",
+ .dev_name = "power",
};
static const struct platform_suspend_ops pseries_suspend_ops = {
@@ -167,23 +168,23 @@ static const struct platform_suspend_ops pseries_suspend_ops = {
* Return value:
* 0 on success / other on failure
**/
-static int pseries_suspend_sysfs_register(struct sys_device *sysdev)
+static int pseries_suspend_sysfs_register(struct device *dev)
{
int rc;
- if ((rc = sysdev_class_register(&suspend_sysdev_class)))
+ if ((rc = subsys_system_register(&suspend_subsys, NULL)))
return rc;
- sysdev->id = 0;
- sysdev->cls = &suspend_sysdev_class;
+ dev->id = 0;
+ dev->bus = &suspend_subsys;
- if ((rc = sysdev_class_create_file(&suspend_sysdev_class, &attr_hibernate)))
- goto class_unregister;
+ if ((rc = device_create_file(suspend_subsys.dev_root, &dev_attr_hibernate)))
+ goto subsys_unregister;
return 0;
-class_unregister:
- sysdev_class_unregister(&suspend_sysdev_class);
+subsys_unregister:
+ bus_unregister(&suspend_subsys);
return rc;
}
@@ -204,7 +205,7 @@ static int __init pseries_suspend_init(void)
if (suspend_data.token == RTAS_UNKNOWN_SERVICE)
return 0;
- if ((rc = pseries_suspend_sysfs_register(&suspend_sysdev)))
+ if ((rc = pseries_suspend_sysfs_register(&suspend_dev)))
return rc;
ppc_md.suspend_disable_cpu = pseries_suspend_cpu;