diff options
32 files changed, 2287 insertions, 305 deletions
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt new file mode 100644 index 00000000000..f49a33b704d --- /dev/null +++ b/Documentation/power/runtime_pm.txt @@ -0,0 +1,378 @@ +Run-time Power Management Framework for I/O Devices + +(C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. + +1. Introduction + +Support for run-time power management (run-time PM) of I/O devices is provided +at the power management core (PM core) level by means of: + +* The power management workqueue pm_wq in which bus types and device drivers can + put their PM-related work items. It is strongly recommended that pm_wq be + used for queuing all work items related to run-time PM, because this allows + them to be synchronized with system-wide power transitions (suspend to RAM, + hibernation and resume from system sleep states). pm_wq is declared in + include/linux/pm_runtime.h and defined in kernel/power/main.c. + +* A number of run-time PM fields in the 'power' member of 'struct device' (which + is of the type 'struct dev_pm_info', defined in include/linux/pm.h) that can + be used for synchronizing run-time PM operations with one another. + +* Three device run-time PM callbacks in 'struct dev_pm_ops' (defined in + include/linux/pm.h). + +* A set of helper functions defined in drivers/base/power/runtime.c that can be + used for carrying out run-time PM operations in such a way that the + synchronization between them is taken care of by the PM core. Bus types and + device drivers are encouraged to use these functions. + +The run-time PM callbacks present in 'struct dev_pm_ops', the device run-time PM +fields of 'struct dev_pm_info' and the core helper functions provided for +run-time PM are described below. + +2. Device Run-time PM Callbacks + +There are three device run-time PM callbacks defined in 'struct dev_pm_ops': + +struct dev_pm_ops { + ... + int (*runtime_suspend)(struct device *dev); + int (*runtime_resume)(struct device *dev); + void (*runtime_idle)(struct device *dev); + ... +}; + +The ->runtime_suspend() callback is executed by the PM core for the bus type of +the device being suspended. The bus type's callback is then _entirely_ +_responsible_ for handling the device as appropriate, which may, but need not +include executing the device driver's own ->runtime_suspend() callback (from the +PM core's point of view it is not necessary to implement a ->runtime_suspend() +callback in a device driver as long as the bus type's ->runtime_suspend() knows +what to do to handle the device). + + * Once the bus type's ->runtime_suspend() callback has completed successfully + for given device, the PM core regards the device as suspended, which need + not mean that the device has been put into a low power state. It is + supposed to mean, however, that the device will not process data and will + not communicate with the CPU(s) and RAM until its bus type's + ->runtime_resume() callback is executed for it. The run-time PM status of + a device after successful execution of its bus type's ->runtime_suspend() + callback is 'suspended'. + + * If the bus type's ->runtime_suspend() callback returns -EBUSY or -EAGAIN, + the device's run-time PM status is supposed to be 'active', which means that + the device _must_ be fully operational afterwards. + + * If the bus type's ->runtime_suspend() callback returns an error code + different from -EBUSY or -EAGAIN, the PM core regards this as a fatal + error and will refuse to run the helper functions described in Section 4 + for the device, until the status of it is directly set either to 'active' + or to 'suspended' (the PM core provides special helper functions for this + purpose). + +In particular, if the driver requires remote wakeup capability for proper +functioning and device_may_wakeup() returns 'false' for the device, then +->runtime_suspend() should return -EBUSY. On the other hand, if +device_may_wakeup() returns 'true' for the device and the device is put +into a low power state during the execution of its bus type's +->runtime_suspend(), it is expected that remote wake-up (i.e. hardware mechanism +allowing the device to request a change of its power state, such as PCI PME) +will be enabled for the device. Generally, remote wake-up should be enabled +for all input devices put into a low power state at run time. + +The ->runtime_resume() callback is executed by the PM core for the bus type of +the device being woken up. The bus type's callback is then _entirely_ +_responsible_ for handling the device as appropriate, which may, but need not +include executing the device driver's own ->runtime_resume() callback (from the +PM core's point of view it is not necessary to implement a ->runtime_resume() +callback in a device driver as long as the bus type's ->runtime_resume() knows +what to do to handle the device). + + * Once the bus type's ->runtime_resume() callback has completed successfully, + the PM core regards the device as fully operational, which means that the + device _must_ be able to complete I/O operations as needed. The run-time + PM status of the device is then 'active'. + + * If the bus type's ->runtime_resume() callback returns an error code, the PM + core regards this as a fatal error and will refuse to run the helper + functions described in Section 4 for the device, until its status is + directly set either to 'active' or to 'suspended' (the PM core provides + special helper functions for this purpose). + +The ->runtime_idle() callback is executed by the PM core for the bus type of +given device whenever the device appears to be idle, which is indicated to the +PM core by two counters, the device's usage counter and the counter of 'active' +children of the device. + + * If any of these counters is decreased using a helper function provided by + the PM core and it turns out to be equal to zero, the other counter is + checked. If that counter also is equal to zero, the PM core executes the + device bus type's ->runtime_idle() callback (with the device as an + argument). + +The action performed by a bus type's ->runtime_idle() callback is totally +dependent on the bus type in question, but the expected and recommended action +is to check if the device can be suspended (i.e. if all of the conditions +necessary for suspending the device are satisfied) and to queue up a suspend +request for the device in that case. + +The helper functions provided by the PM core, described in Section 4, guarantee +that the following constraints are met with respect to the bus type's run-time +PM callbacks: + +(1) The callbacks are mutually exclusive (e.g. it is forbidden to execute + ->runtime_suspend() in parallel with ->runtime_resume() or with another + instance of ->runtime_suspend() for the same device) with the exception that + ->runtime_suspend() or ->runtime_resume() can be executed in parallel with + ->runtime_idle() (although ->runtime_idle() will not be started while any + of the other callbacks is being executed for the same device). + +(2) ->runtime_idle() and ->runtime_suspend() can only be executed for 'active' + devices (i.e. the PM core will only execute ->runtime_idle() or + ->runtime_suspend() for the devices the run-time PM status of which is + 'active'). + +(3) ->runtime_idle() and ->runtime_suspend() can only be executed for a device + the usage counter of which is equal to zero _and_ either the counter of + 'active' children of which is equal to zero, or the 'power.ignore_children' + flag of which is set. + +(4) ->runtime_resume() can only be executed for 'suspended' devices (i.e. the + PM core will only execute ->runtime_resume() for the devices the run-time + PM status of which is 'suspended'). + +Additionally, the helper functions provided by the PM core obey the following +rules: + + * If ->runtime_suspend() is about to be executed or there's a pending request + to execute it, ->runtime_idle() will not be executed for the same device. + + * A request to execute or to schedule the execution of ->runtime_suspend() + will cancel any pending requests to execute ->runtime_idle() for the same + device. + + * If ->runtime_resume() is about to be executed or there's a pending request + to execute it, the other callbacks will not be executed for the same device. + + * A request to execute ->runtime_resume() will cancel any pending or + scheduled requests to execute the other callbacks for the same device. + +3. Run-time PM Device Fields + +The following device run-time PM fields are present in 'struct dev_pm_info', as +defined in include/linux/pm.h: + + struct timer_list suspend_timer; + - timer used for scheduling (delayed) suspend request + + unsigned long timer_expires; + - timer expiration time, in jiffies (if this is different from zero, the + timer is running and will expire at that time, otherwise the timer is not + running) + + struct work_struct work; + - work structure used for queuing up requests (i.e. work items in pm_wq) + + wait_queue_head_t wait_queue; + - wait queue used if any of the helper functions needs to wait for another + one to complete + + spinlock_t lock; + - lock used for synchronisation + + atomic_t usage_count; + - the usage counter of the device + + atomic_t child_count; + - the count of 'active' children of the device + + unsigned int ignore_children; + - if set, the value of child_count is ignored (but still updated) + + unsigned int disable_depth; + - used for disabling the helper funcions (they work normally if this is + equal to zero); the initial value of it is 1 (i.e. run-time PM is + initially disabled for all devices) + + unsigned int runtime_error; + - if set, there was a fatal error (one of the callbacks returned error code + as described in Section 2), so the helper funtions will not work until + this flag is cleared; this is the error code returned by the failing + callback + + unsigned int idle_notification; + - if set, ->runtime_idle() is being executed + + unsigned int request_pending; + - if set, there's a pending request (i.e. a work item queued up into pm_wq) + + enum rpm_request request; + - type of request that's pending (valid if request_pending is set) + + unsigned int deferred_resume; + - set if ->runtime_resume() is about to be run while ->runtime_suspend() is + being executed for that device and it is not practical to wait for the + suspend to complete; means "start a resume as soon as you've suspended" + + enum rpm_status runtime_status; + - the run-time PM status of the device; this field's initial value is + RPM_SUSPENDED, which means that each device is initially regarded by the + PM core as 'suspended', regardless of its real hardware status + +All of the above fields are members of the 'power' member of 'struct device'. + +4. Run-time PM Device Helper Functions + +The following run-time PM helper functions are defined in +drivers/base/power/runtime.c and include/linux/pm_runtime.h: + + void pm_runtime_init(struct device *dev); + - initialize the device run-time PM fields in 'struct dev_pm_info' + + void pm_runtime_remove(struct device *dev); + - make sure that the run-time PM of the device will be disabled after + removing the device from device hierarchy + + int pm_runtime_idle(struct device *dev); + - execute ->runtime_idle() for the device's bus type; returns 0 on success + or error code on failure, where -EINPROGRESS means that ->runtime_idle() + is already being executed + + int pm_runtime_suspend(struct device *dev); + - execute ->runtime_suspend() for the device's bus type; returns 0 on + success, 1 if the device's run-time PM status was already 'suspended', or + error code on failure, where -EAGAIN or -EBUSY means it is safe to attempt + to suspend the device again in future + + int pm_runtime_resume(struct device *dev); + - execute ->runtime_resume() for the device's bus type; returns 0 on + success, 1 if the device's run-time PM status was already 'active' or + error code on failure, where -EAGAIN means it may be safe to attempt to + resume the device again in future, but 'power.runtime_error' should be + checked additionally + + int pm_request_idle(struct device *dev); + - submit a request to execute ->runtime_idle() for the device's bus type + (the request is represented by a work item in pm_wq); returns 0 on success + or error code if the request has not been queued up + + int pm_schedule_suspend(struct device *dev, unsigned int delay); + - schedule the execution of ->runtime_suspend() for the device's bus type + in future, where 'delay' is the time to wait before queuing up a suspend + work item in pm_wq, in milliseconds (if 'delay' is zero, the work item is + queued up immediately); returns 0 on success, 1 if the device's PM + run-time status was already 'suspended', or error code if the request + hasn't been scheduled (or queued up if 'delay' is 0); if the execution of + ->runtime_suspend() is already scheduled and not yet expired, the new + value of 'delay' will be used as the time to wait + + int pm_request_resume(struct device *dev); + - submit a request to execute ->runtime_resume() for the device's bus type + (the request is represented by a work item in pm_wq); returns 0 on + success, 1 if the device's run-time PM status was already 'active', or + error code if the request hasn't been queued up + + void pm_runtime_get_noresume(struct device *dev); + - increment the device's usage counter + + int pm_runtime_get(struct device *dev); + - increment the device's usage counter, run pm_request_resume(dev) and + return its result + + int pm_runtime_get_sync(struct device *dev); + - increment the device's usage counter, run pm_runtime_resume(dev) and + return its result + + void pm_runtime_put_noidle(struct device *dev); + - decrement the device's usage counter + + int pm_runtime_put(struct device *dev); + - decrement the device's usage counter, run pm_request_idle(dev) and return + its result + + int pm_runtime_put_sync(struct device *dev); + - decrement the device's usage counter, run pm_runtime_idle(dev) and return + its result + + void pm_runtime_enable(struct device *dev); + - enable the run-time PM helper functions to run the device bus type's + run-time PM callbacks described in Section 2 + + int pm_runtime_disable(struct device *dev); + - prevent the run-time PM helper functions from running the device bus + type's run-time PM callbacks, make sure that all of the pending run-time + PM operations on the device are either completed or canceled; returns + 1 if there was a resume request pending and it was necessary to execute + ->runtime_resume() for the device's bus type to satisfy that request, + otherwise 0 is returned + + void pm_suspend_ignore_children(struct device *dev, bool enable); + - set/unset the power.ignore_children flag of the device + + int pm_runtime_set_active(struct device *dev); + - clear the device's 'power.runtime_error' flag, set the device's run-time + PM status to 'active' and update its parent's counter of 'active' + children as appropriate (it is only valid to use this function if + 'power.runtime_error' is set or 'power.disable_depth' is greater than + zero); it will fail and return error code if the device has a parent + which is not active and the 'power.ignore_children' flag of which is unset + + void pm_runtime_set_suspended(struct device *dev); + - clear the device's 'power.runtime_error' flag, set the device's run-time + PM status to 'suspended' and update its parent's counter of 'active' + children as appropriate (it is only valid to use this function if + 'power.runtime_error' is set or 'power.disable_depth' is greater than + zero) + +It is safe to execute the following helper functions from interrupt context: + +pm_request_idle() +pm_schedule_suspend() +pm_request_resume() +pm_runtime_get_noresume() +pm_runtime_get() +pm_runtime_put_noidle() +pm_runtime_put() +pm_suspend_ignore_children() +pm_runtime_set_active() +pm_runtime_set_suspended() +pm_runtime_enable() + +5. Run-time PM Initialization, Device Probing and Removal + +Initially, the run-time PM is disabled for all devices, which means that the +majority of the run-time PM helper funtions described in Section 4 will return +-EAGAIN until pm_runtime_enable() is called for the device. + +In addition to that, the initial run-time PM status of all devices is +'suspended', but it need not reflect the actual physical state of the device. +Thus, if the device is initially active (i.e. it is able to process I/O), its +run-time PM status must be changed to 'active', with the help of +pm_runtime_set_active(), before pm_runtime_enable() is called for the device. + +However, if the device has a parent and the parent's run-time PM is enabled, +calling pm_runtime_set_active() for the device will affect the parent, unless +the parent's 'power.ignore_children' flag is set. Namely, in that case the +parent won't be able to suspend at run time, using the PM core's helper +functions, as long as the child's status is 'active', even if the child's +run-time PM is still disabled (i.e. pm_runtime_enable() hasn't been called for +the child yet or pm_runtime_disable() has been called for it). For this reason, +once pm_runtime_set_active() has been called for the device, pm_runtime_enable() +should be called for it too as soon as reasonably possible or its run-time PM +status should be changed back to 'suspended' with the help of +pm_runtime_set_suspended(). + +If the default initial run-time PM status of the device (i.e. 'suspended') +reflects the actual state of the device, its bus type's or its driver's +->probe() callback will likely need to wake it up using one of the PM core's +helper functions described in Section 4. In that case, pm_runtime_resume() +should be used. Of course, for this purpose the device's run-time PM has to be +enabled earlier by calling pm_runtime_enable(). + +If the device bus type's or driver's ->probe() or ->remove() callback runs +pm_runtime_suspend() or pm_runtime_idle() or their asynchronous counterparts, +they will fail returning -EAGAIN, because the device's usage counter is +incremented by the core before executing ->probe() and ->remove(). Still, it +may be desirable to suspend the device as soon as ->probe() or ->remove() has +finished, so the PM core uses pm_runtime_idle_sync() to invoke the device bus +type's ->runtime_idle() callback at that time. diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h index c61642b4060..9f390ce335c 100644 --- a/arch/arm/include/asm/device.h +++ b/arch/arm/include/asm/device.h @@ -12,4 +12,7 @@ struct dev_archdata { #endif }; +struct pdev_archdata { +}; + #endif diff --git a/arch/arm/plat-omap/debug-leds.c b/arch/arm/plat-omap/debug-leds.c index be4eefda476..9395898dd49 100644 --- a/arch/arm/plat-omap/debug-leds.c +++ b/arch/arm/plat-omap/debug-leds.c @@ -281,24 +281,27 @@ static int /* __init */ fpga_probe(struct platform_device *pdev) return 0; } -static int fpga_suspend_late(struct platform_device *pdev, pm_message_t mesg) +static int fpga_suspend_noirq(struct device *dev) { __raw_writew(~0, &fpga->leds); return 0; } -static int fpga_resume_early(struct platform_device *pdev) +static int fpga_resume_noirq(struct device *dev) { __raw_writew(~hw_led_state, &fpga->leds); return 0; } +static struct dev_pm_ops fpga_dev_pm_ops = { + .suspend_noirq = fpga_suspend_noirq, + .resume_noirq = fpga_resume_noirq, +}; static struct platform_driver led_driver = { .driver.name = "omap_dbg_led", + .driver.pm = &fpga_dev_pm_ops, .probe = fpga_probe, - .suspend_late = fpga_suspend_late, - .resume_early = fpga_resume_early, }; static int __init fpga_init(void) diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c index fd21937fe11..176c86e5531 100644 --- a/arch/arm/plat-omap/gpio.c +++ b/arch/arm/plat-omap/gpio.c @@ -1418,8 +1418,9 @@ static struct irq_chip mpuio_irq_chip = { #include <linux/platform_device.h> -static int omap_mpuio_suspend_late(struct platform_device *pdev, pm_message_t mesg) +static int omap_mpuio_suspend_noirq(struct device *dev) { + struct platform_device *pdev = to_platform_device(dev); struct gpio_bank *bank = platform_get_drvdata(pdev); void __iomem *mask_reg = bank->base + OMAP_MPUIO_GPIO_MASKIT; unsigned long flags; @@ -1432,8 +1433,9 @@ static int omap_mpuio_suspend_late(struct platform_device *pdev, pm_message_t me return 0; } -static int omap_mpuio_resume_early(struct platform_device *pdev) +static int omap_mpuio_resume_noirq(struct device *dev) { + struct platform_device *pdev = to_platform_device(dev); struct gpio_bank *bank = platform_get_drvdata(pdev); void __iomem *mask_reg = bank->base + OMAP_MPUIO_GPIO_MASKIT; unsigned long flags; @@ -1445,14 +1447,18 @@ static int omap_mpuio_resume_early(struct platform_device *pdev) return 0; } +static struct dev_pm_ops omap_mpuio_dev_pm_ops = { + .suspend_noirq = omap_mpuio_suspend_noirq, + .resume_noirq = omap_mpuio_resume_noirq, +}; + /* use platform_driver for this, now that there's no longer any * point to sys_device (other than not disturbing old code). */ static struct platform_driver omap_mpuio_driver = { - .suspend_late = omap_mpuio_suspend_late, - .resume_early = omap_mpuio_resume_early, .driver = { .name = "mpuio", + .pm = &omap_mpuio_dev_pm_ops, }, }; diff --git a/arch/ia64/include/asm/device.h b/arch/ia64/include/asm/device.h index 41ab85d66f3..d66d446b127 100644 --- a/arch/ia64/include/asm/device.h +++ b/arch/ia64/include/asm/device.h @@ -15,4 +15,7 @@ struct dev_archdata { #endif }; +struct pdev_archdata { +}; + #endif /* _ASM_IA64_DEVICE_H */ diff --git a/arch/microblaze/include/asm/device.h b/arch/microblaze/include/asm/device.h index c042830793e..30286db27c1 100644 --- a/arch/microblaze/include/asm/device.h +++ b/arch/microblaze/include/asm/device.h @@ -16,6 +16,9 @@ struct dev_archdata { struct device_node *of_node; }; +struct pdev_archdata { +}; + #endif /* _ASM_MICROBLAZE_DEVICE_H */ diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index 7d2277cef09..e3e06e0f7fc 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -30,4 +30,7 @@ dev_archdata_get_node(const struct dev_archdata *ad) return ad->of_node; } +struct pdev_archdata { +}; + #endif /* _ASM_POWERPC_DEVICE_H */ diff --git a/arch/sparc/include/asm/device.h b/arch/sparc/include/asm/device.h index 3702e087df2..f3b85b6b0b7 100644 --- a/arch/sparc/include/asm/device.h +++ b/arch/sparc/include/asm/device.h @@ -32,4 +32,7 @@ dev_archdata_get_node(const struct dev_archdata *ad) return ad->prom_node; } +struct pdev_archdata { +}; + #endif /* _ASM_SPARC_DEVICE_H */ diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index 4994a20acbc..cee34e9ca45 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -13,4 +13,7 @@ struct dma_map_ops *dma_ops; #endif }; +struct pdev_archdata { +}; + #endif /* _ASM_X86_DEVICE_H */ diff --git a/drivers/base/dd.c b/drivers/base/dd.c index f0106875f01..7b34b3a48f6 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -23,6 +23,7 @@ #include <linux/kthread.h> #include <linux/wait.h> #include <linux/async.h> +#include <linux/pm_runtime.h> #include "base.h" #include "power/power.h" @@ -202,7 +203,10 @@ int driver_probe_device(struct device_driver *drv, struct device *dev) pr_debug("bus: '%s': %s: matched device %s with driver %s\n", drv->bus->name, __func__, dev_name(dev), drv->name); + pm_runtime_get_noresume(dev); + pm_runtime_barrier(dev); ret = really_probe(dev, drv); + pm_runtime_put_sync(dev); return ret; } @@ -245,7 +249,9 @@ int device_attach(struct device *dev) ret = 0; } } else { + pm_runtime_get_noresume(dev); ret = bus_for_each_drv(dev->bus, NULL, dev, __device_attach); + pm_runtime_put_sync(dev); } up(&dev->sem); return ret; @@ -306,6 +312,9 @@ static void __device_release_driver(struct device *dev) drv = dev->driver; if (drv) { + pm_runtime_get_noresume(dev); + pm_runtime_barrier(dev); + driver_sysfs_remove(dev); if (dev->bus) @@ -324,6 +333,8 @@ static void __device_release_driver(struct device *dev) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_UNBOUND_DRIVER, dev); + + pm_runtime_put_sync(dev); } } diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 0b111e8e444..0f7d434ce98 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -17,6 +17,7 @@ #include <linux/bootmem.h> #include <linux/err.h> #include <linux/slab.h> +#include <linux/pm_runtime.h> #include "base.h" @@ -625,30 +626,6 @@ static int platform_legacy_suspend(struct device *dev, pm_message_t mesg) return ret; } -static int platform_legacy_suspend_late(struct device *dev, pm_message_t mesg) -{ - struct platform_driver *pdrv = to_platform_driver(dev->driver); - struct platform_device *pdev = to_platform_device(dev); - int ret = 0; - - if (dev->driver && pdrv->suspend_late) - ret = pdrv->suspend_late(pdev, mesg); - - return ret; -} - -static int platform_legacy_resume_early(struct device *dev) -{ - struct platform_driver *pdrv = to_platform_driver(dev->driver); - struct platform_device *pdev = to_platform_device(dev); - int ret = 0; - - if (dev->driver && pdrv->resume_early) - ret = pdrv->resume_early(pdev); - - return ret; -} - static int platform_legacy_resume(struct device *dev) { struct platform_driver *pdrv = to_platform_driver(dev->driver); @@ -680,6 +657,13 @@ static void platform_pm_complete(struct device *dev) drv->pm->complete(dev); } +#else /* !CONFIG_PM_SLEEP */ + +#define platform_pm_prepare NULL +#define platform_pm_complete NULL + +#endif /* !CONFIG_PM_SLEEP */ + #ifdef CONFIG_SUSPEND static int platform_pm_suspend(struct device *dev) @@ -711,8 +695,6 @@ static int platform_pm_suspend_noirq(struct device *dev) if (drv->pm) { if (drv->pm->suspend_noirq) ret = drv->pm->suspend_noirq(dev); - } else { - ret = platform_legacy_suspend_late(dev, PMSG_SUSPEND); } return ret; @@ -747,8 +729,6 @@ static int platform_pm_resume_noirq(struct device *dev) if (drv->pm) { if (drv->pm->resume_noirq) ret = drv->pm->resume_noirq(dev); - } else { - ret = platform_legacy_resume_early(dev); } return ret; @@ -794,8 +774,6 @@ static int platform_pm_freeze_noirq(struct device *dev) if (drv->pm) { if (drv->pm->freeze_noirq) ret = drv->pm->freeze_noirq(dev); - } else { - ret = platform_legacy_suspend_late(dev, PMSG_FREEZE); } return ret; @@ -830,8 +808,6 @@ static int platform_pm_thaw_noirq(struct device *dev) if (drv->pm) { if (drv->pm->thaw_noirq) ret = drv->pm->thaw_noirq(dev); - } else { - ret = platform_legacy_resume_early(dev); } return ret; @@ -866,8 +842,6 @@ static int platform_pm_poweroff_noirq(struct device *dev) if (drv->pm) { if (drv->pm->poweroff_noirq) ret = drv->pm->poweroff_noirq(dev); - } else { - ret = platform_legacy_suspend_late(dev, PMSG_HIBERNATE); } return ret; @@ -902,8 +876,6 @@ static int platform_pm_restore_noirq(struct device *dev) if (drv->pm) { if (drv->pm->restore_noirq) ret = drv->pm->restore_noirq(dev); - } else { - ret = platform_legacy_resume_early(dev); } return ret; @@ -922,6 +894,31 @@ static int platform_pm_restore_noirq(struct device *dev) #endif /* !CONFIG_HIBERNATION */ +#ifdef CONFIG_PM_RUNTIME + +int __weak platform_pm_runtime_suspend(struct device *dev) +{ + return -ENOSYS; +}; + +int __weak platform_pm_runtime_resume(struct device *dev) +{ + return -ENOSYS; +}; + +int __weak platform_pm_runtime_idle(struct device *dev) +{ + return -ENOSYS; +}; + +#else /* !CONFIG_PM_RUNTIME */ + +#define platform_pm_runtime_suspend NULL +#define platform_pm_runtime_resume NULL +#define platform_pm_runtime_idle NULL + +#endif /* !CONFIG_PM_RUNTIME */ + static const struct dev_pm_ops platform_dev_pm_ops = { .prepare = platform_pm_prepare, .complete = platform_pm_complete, @@ -937,22 +934,17 @@ static const struct dev_pm_ops platform_dev_pm_ops = { .thaw_noirq = platform_pm_thaw_noirq, .poweroff_noirq = platform_pm_poweroff_noirq, .restore_noirq = platform_pm_restore_noirq, + .runtime_suspend = platform_pm_runtime_suspend, + .runtime_resume = platform_pm_runtime_resume, + .runtime_idle = platform_pm_runtime_idle, }; -#define PLATFORM_PM_OPS_PTR (&platform_dev_pm_ops) - -#else /* !CONFIG_PM_SLEEP */ - -#define PLATFORM_PM_OPS_PTR NULL - -#endif /* !CONFIG_PM_SLEEP */ - struct bus_type platform_bus_type = { .name = "platform", .dev_attrs = platform_dev_attrs, .match = platform_match, .uevent = platform_uevent, - .pm = PLATFORM_PM_OPS_PTR, + .pm = &platform_dev_pm_ops, }; EXPORT_SYMBOL_GPL(platform_bus_type); diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile index 911208b8925..3ce3519e8f3 100644 --- a/drivers/base/power/Makefile +++ b/drivers/base/power/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_PM) += sysfs.o obj-$(CONFIG_PM_SLEEP) += main.o +obj-$(CONFIG_PM_RUNTIME) += runtime.o obj-$(CONFIG_PM_TRACE_RTC) += trace.o ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 1b1a786b7de..e0dc4071e08 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -21,6 +21,7 @@ #include <linux/kallsyms.h> #include <linux/mutex.h> #include <linux/pm.h> +#include <linux/pm_runtime.h> #include <linux/resume-trace.h> #include <linux/rwsem.h> #include <linux/interrupt.h> @@ -49,7 +50,17 @@ static DEFINE_MUTEX(dpm_list_mtx); static bool transition_started; /** - * device_pm_lock - lock the list of active devices used by the PM core + * device_pm_init - Initialize the PM-related part of a device object. + * @dev: Device object being initialized. + */ +void device_pm_init(struct device *dev) +{ + dev->power.status = DPM_ON; + pm_runtime_init(dev); +} + +/** + * device_pm_lock - Lock the list of active devices used by the PM core. */ void device_pm_lock(void) { @@ -57,7 +68,7 @@ void device_pm_lock(void) } /** - * device_pm_unlock - unlock the list of active devices used by the PM core + * device_pm_unlock - Unlock the list of active devices used by the PM core. */ void device_pm_unlock(void) { @@ -65,8 +76,8 @@ void device_pm_unlock(void) } /** - * device_pm_add - add a device to the list of active devices - * @dev: Device to be added to the list + * device_pm_add - Add a device to the PM core's list of active devices. + * @dev: Device to add to the list. */ void device_pm_add(struct device *dev) { @@ -92,10 +103,8 @@ void device_pm_add(struct device *dev) } /** - * device_pm_remove - remove a device from the list of active devices - * @dev: Device to be removed from the list - * - * This function also removes the device's PM-related sysfs attributes. + * device_pm_remove - Remove a device from the PM core's list of active devices. + * @dev: Device to be removed from the list. */ void device_pm_remove(struct device *dev) { @@ -105,12 +114,13 @@ void device_pm_remove(struct device *dev) mutex_lock(&dpm_list_mtx); list_del_init(&dev->power.entry); mutex_unlock(&dpm_list_mtx); + pm_runtime_remove(dev); } /** - * device_pm_move_before - move device in dpm_list - * @deva: Device to move in dpm_list - * @devb: Device @deva should come before + * device_pm_move_before - Move device in the PM core's list of active devices. + * @deva: Device to move in dpm_list. + * @devb: Device @deva should come before. */ void device_pm_move_before(struct device *deva, struct device *devb) { @@ -124,9 +134,9 @@ void device_pm_move_before(struct device *deva, struct device *devb) } /** - * device_pm_move_after - move device in dpm_list - * @deva: Device to move in dpm_list - * @devb: Device @deva should come after + * device_pm_move_after - Move device in the PM core's list of active devices. + * @deva: Device to move in dpm_list. + * @devb: Device @deva should come after. */ void device_pm_move_after(struct device *deva, struct device *devb) { @@ -140,8 +150,8 @@ void device_pm_move_after(struct device *deva, struct device *devb) } /** - * device_pm_move_last - move device to end of dpm_list - * @dev: Device to move in dpm_list + * device_pm_move_last - Move device to end of the PM core's list of devices. + * @dev: Device to move in dpm_list. */ void device_pm_move_last(struct device *dev) { @@ -152,10 +162,10 @@ void device_pm_move_last(struct device *dev) } /** - * pm_op - execute the PM operation appropiate for given PM event - * @dev: Device. - * @ops: PM operations to choose from. - * @state: PM transition of the system being carried out. + * pm_op - Execute the PM operation appropriate for given PM event. + * @dev: Device to handle. + * @ops: PM operations to choose from. + * @state: PM transition of the system being carried out. */ static int pm_op(struct device *dev, const struct dev_pm_ops *ops, @@ -213,13 +223,13 @@ static int pm_op(struct device *dev, } /** - * pm_noirq_op - execute the PM operation appropiate for given PM event - * @dev: Device. - * @ops: PM operations to choose from. - * @state: PM transition of the system being carried out. + * pm_noirq_op - Execute the PM operation appropriate for given PM event. + * @dev: Device to handle. + * @ops: PM operations to choose from. + * @state: PM transition of the system being carried out. * - * The operation is executed with interrupts disabled by the only remaining - * functional CPU in the system. + * The driver of @dev will not receive interrupts while this function is being + * executed. */ static int pm_noirq_op(struct device *dev, const struct dev_pm_ops *ops, @@ -317,11 +327,12 @@ static void pm_dev_err(struct device *dev, pm_message_t state, char *info, /*------------------------- Resume routines -------------------------*/ /** - * device_resume_noirq - Power on one device (early resume). - * @dev: Device. - * @state: PM transition of the system being carried out. + * device_resume_noirq - Execute an "early resume" callback for given device. + * @dev: Device to handle. + * @state: PM transition of the system being carried out. * - * Must be called with interrupts disabled. + * The driver of @dev will not receive interrupts while this function is being + * executed. */ static int device_resume_noirq(struct device *dev, pm_message_t state) { @@ -343,20 +354,18 @@ static int device_resume_noirq(struct device *dev, pm_message_t state) } /** - * dpm_resume_noirq - Power on all regular (non-sysdev) devices. - * @state: PM transition of the system being carried out. - * - * Call the "noirq" resume handlers for all devices marked as - * DPM_OFF_IRQ and enable device drivers to receive interrupts. + * dpm_resume_noirq - Execute "early resume" callbacks for non-sysdev devices. + * @state: PM transition of the system being carried out. * - * Must be called under dpm_list_mtx. Device drivers should not receive - * interrupts while it's being executed. + * Call the "noirq" resume handlers for all devices marked as DPM_OFF_IRQ and + * enable device drivers to receive interrupts. */ void dpm_resume_noirq(pm_message_t state) { struct device *dev; mutex_lock(&dpm_list_mtx); + transition_started = false; list_for_each_entry(dev, &dpm_list, power.entry) if (dev->power.status > DPM_OFF) { int error; @@ -372,9 +381,9 @@ void dpm_resume_noirq(pm_message_t state) EXPORT_SYMBOL_GPL(dpm_resume_noirq); /** - * device_resume - Restore state for one device. - * @dev: Device. - * @state: PM transition of the system being carried out. + * device_resume - Execute "resume" callbacks for given device. + * @dev: Device to handle. + * @state: PM transition of the system being carried out. */ static int device_resume(struct device *dev, pm_message_t state) { @@ -423,11 +432,11 @@ static int device_resume(struct device *dev, pm_message_t state) } /** - * dpm_resume - Resume every device. - * @state: PM transition of the system being carried out. + * dpm_resume - Execute "resume" callbacks for non-sysdev devices. + * @state: PM transition of the system being carried out. * - * Execute the appropriate "resume" callback for all devices the status of - * which indicates that they are inactive. + * Execute the appropriate "resume" callback for all devices whose status + * indicates that they are suspended. */ static void dpm_resume(pm_message_t state) { @@ -435,7 +444,6 @@ static void dpm_resume(pm_message_t state) INIT_LIST_HEAD(&list); mutex_lock(&dpm_list_mtx); - transition_started = false; while (!list_empty(&dpm_list)) { struct device *dev = to_device(dpm_list.next); @@ -464,9 +472,9 @@ static void dpm_resume(pm_message_t state) } /** - * device_complete - Complete a PM transition for given device - * @dev: Device. - * @state: PM transition of the system being carried out. + * device_complete - Complete a PM transition for given device. + * @dev: Device to handle. + * @state: PM transition of the system being carried out. */ static void device_complete(struct device *dev, pm_message_t state) { @@ -491,11 +499,11 @@ static void device_complete(struct device *dev, pm_message_t state) } /** - * dpm_complete - Complete a PM transition for all devices. - * @state: PM transition of the system being carried out. + * dpm_complete - Complete a PM transition for all non-sysdev devices. + * @state: PM transition of the system being carried out. * - * Execute the ->complete() callbacks for all devices that are not marked - * as DPM_ON. + * Execute the ->complete() callbacks for all devices whose PM status is not + * DPM_ON (this allows new devices to be registered). */ static void dpm_complete(pm_message_t state) { @@ -512,6 +520,7 @@ static void dpm_complete(pm_message_t state) mutex_unlock(&dpm_list_mtx); device_complete(dev, state); + pm_runtime_put_noidle(dev); mutex_lock(&dpm_list_mtx); } @@ -524,11 +533,11 @@ static void dpm_complete(pm_message_t state) } /** - * dpm_resume_end - Restore state of each device in system. - * @state: PM transition of the system being carried out. + * dpm_resume_end - Execute "resume" callbacks and complete system transition. + * @state: PM transition of the system being carried out. * - * Resume all the devices, unlock them all, and allow new - * devices to be registered once again. + * Execute "resume" callbacks for all devices and complete the PM transition of + * the system. */ void dpm_resume_end(pm_message_t state) { @@ -542,9 +551,11 @@ EXPORT_SYMBOL_GPL(dpm_resume_end); /*------------------------- Suspend routines -------------------------*/ /** - * resume_event - return a PM message representing the resume event - * corresponding to given sleep state. - * @sleep_state: PM message representing a sleep state. + * resume_event - Return a "resume" message for given "suspend" sleep state. + * @sleep_state: PM message representing a sleep state. + * + * Return a PM message representing the resume event corresponding to given + * sleep state. */ static pm_message_t resume_event(pm_message_t sleep_state) { @@ -561,11 +572,12 @@ static pm_message_t resume_event(pm_message_t sleep_state) } /** - * device_suspend_noirq - Shut down one device (late suspend). - * @dev: Device. - * @state: PM transition of the system being carried out. + * device_suspend_noirq - Execute a "late suspend" callback for given device. + * @dev: Device to handle. + * @state: PM transition of the system being carried out. * - * This is called with interrupts off and only a single CPU running. + * The driver of @dev will not receive interrupts while this function is being + * executed. */ static int device_suspend_noirq(struct device *dev, pm_message_t state) { @@ -582,13 +594,11 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state) } /** - * dpm_suspend_noirq - Power down all regular (non-sysdev) devices. - * @state: PM transition of the system being carried out. - * - * Prevent device drivers from receiving interrupts and call the "noirq" - * suspend handlers. + * dpm_suspend_noirq - Execute "late suspend" callbacks for non-sysdev devices. + * @state: PM transition of the system being carried out. * - * Must be called under dpm_list_mtx. + * Prevent device drivers from receiving interrupts and call the "noirq" suspend + * handlers for all non-sysdev devices. */ int dpm_suspend_noirq(pm_message_t state) { @@ -613,9 +623,9 @@ int dpm_suspend_noirq(pm_message_t state) EXPORT_SYMBOL_GPL(dpm_suspend_noirq); /** - * device_suspend - Save state of one device. - * @dev: Device. - * @state: PM transition of the system being carried out. + * device_suspend - Execute "suspend" callbacks for given device. + * @dev: Device to handle. + * @state: PM transition of the system being carried out. */ static int device_suspend(struct device *dev, pm_message_t state) { @@ -662,10 +672,8 @@ static int device_suspend(struct device *dev, pm_message_t state) } /** - * dpm_suspend - Suspend every device. - * @state: PM transition of the system being carried out. - * - * Execute the appropriate "suspend" callbacks for all devices. + * dpm_suspend - Execute "suspend" callbacks for all non-sysdev devices. + * @state: PM transition of the system being carried out. */ static int dpm_suspend(pm_message_t state) { @@ -699,9 +707,12 @@ static int dpm_suspend(pm_message_t state) } /** - * device_prepare - Execute the ->prepare() callback(s) for given device. - * @dev: Device. - * @state: PM transition of the system being carried out. + * device_prepare - Prepare a device for system power transition. + * @dev: Device to handle. + * @state: PM transition of the system being carried out. + * + * Execute the ->prepare() callback(s) for given device. No new children of the + * device may be registered after this function has returned. */ static int device_prepare(struct device *dev, pm_message_t state) { @@ -737,10 +748,10 @@ static int device_prepare(struct device *dev, pm_message_t state) } /** - * dpm_prepare - Prepare all devices for a PM transition. - * @state: PM transition of the system being carried out. + * dpm_prepare - Prepare all non-sysdev devices for a system PM transition. + * @state: PM transition of the system being carried out. * - * Execute the ->prepare() callback for all devices. + * Execute the ->prepare() callback(s) for all devices. */ static int dpm_prepare(pm_message_t state) { @@ -757,7 +768,14 @@ static int dpm_prepare(pm_message_t state) dev->power.status = DPM_PREPARING; mutex_unlock(&dpm_list_mtx); - error = device_prepare(dev, state); + pm_runtime_get_noresume(dev); + if (pm_runtime_barrier(dev) && device_may_wakeup(dev)) { + /* Wake-up requested during system sleep transition. */ + pm_runtime_put_noidle(dev); + error = -EBUSY; + } else { + error = device_prepare(dev, state); + } mutex_lock(&dpm_list_mtx); if (error) { @@ -784,10 +802,11 @@ static int dpm_prepare(pm_message_t state) } /** - * dpm_suspend_start - Save state and stop all devices in system. - * @state: PM transition of the system being carried out. + * dpm_suspend_start - Prepare devices for PM transition and suspend them. + * @state: PM transition of the system being carried out. * - * Prepare and suspend all devices. + * Prepare all non-sysdev devices for system PM transition and execute "suspend" + * callbacks for them. */ int dpm_suspend_start(pm_message_t state) { diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index c7cb4fc3735..b8fa1aa5225 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -1,7 +1,14 @@ -static inline void device_pm_init(struct device *dev) -{ - dev->power.status = DPM_ON; -} +#ifdef CONFIG_PM_RUNTIME + +extern void pm_runtime_init(struct device *dev); +extern void pm_runtime_remove(struct device *dev); + +#else /* !CONFIG_PM_RUNTIME */ + +static inline void pm_runtime_init(struct device *dev) {} +static inline void pm_runtime_remove(struct device *dev) {} + +#endif /* !CONFIG_PM_RUNTIME */ #ifdef CONFIG_PM_SLEEP @@ -16,23 +23,33 @@ static inline struct device *to_device(struct list_head *entry) return container_of(entry, struct device, power.entry); } +extern void device_pm_init(struct device *dev); extern void device_pm_add(struct device *); extern void device_pm_remove(struct device *); extern void device_pm_move_before(struct device *, struct device *); extern void device_pm_move_after(struct device *, struct device *); extern void device_pm_move_last(struct device *); -#else /* CONFIG_PM_SLEEP */ +#else /* !CONFIG_PM_SLEEP */ + +static inline void device_pm_init(struct device *dev) +{ + pm_runtime_init(dev); +} + +static inline void device_pm_remove(struct device *dev) +{ + pm_runtime_remove(dev); +} static inline void device_pm_add(struct device *dev) {} -static inline void device_pm_remove(struct device *dev) {} static inline void device_pm_move_before(struct device *deva, struct device *devb) {} static inline void device_pm_move_after(struct device *deva, struct device *devb) {} static inline void device_pm_move_last(struct device *dev) {} -#endif +#endif /* !CONFIG_PM_SLEEP */ #ifdef CONFIG_PM diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c new file mode 100644 index 00000000000..38556f6cc22 --- /dev/null +++ b/drivers/base/power/runtime.c @@ -0,0 +1,1011 @@ +/* + * drivers/base/power/runtime.c - Helper functions for device run-time PM + * + * Copyright (c) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. + * + * This file is released under the GPLv2. + */ + +#include <linux/sched.h> +#include <linux/pm_runtime.h> +#include <linux/jiffies.h> + +static int __pm_runtime_resume(struct device *dev, bool from_wq); +static int __pm_request_idle(struct device *dev); +static int __pm_request_resume(struct device *dev); + +/** + * pm_runtime_deactivate_timer - Deactivate given device's suspend timer. + * @dev: Device to handle. + */ +static void pm_runtime_deactivate_timer(struct device *dev) +{ + if (dev->power.timer_expires > 0) { + del_timer(&dev->power.suspend_timer); + dev->power.timer_expires = 0; + } +} + +/** + * pm_runtime_cancel_pending - Deactivate suspend timer and cancel requests. + * @dev: Device to handle. + */ +static void pm_runtime_cancel_pending(struct device *dev) +{ + pm_runtime_deactivate_timer(dev); + /* + * In case there's a request pending, make sure its work function will + * return without doing anything. + */ + dev->power.request = RPM_REQ_NONE; +} + +/** + * __pm_runtime_idle - Notify device bus type if the device can be suspended. + * @dev: Device to notify the bus type about. + * + * This function must be called under dev->power.lock with interrupts disabled. + */ +static int __pm_runtime_idle(struct device *dev) + __releases(&dev->power.lock) __acquires(&dev->power.lock) +{ + int retval = 0; + + dev_dbg(dev, "__pm_runtime_idle()!\n"); + + if (dev->power.runtime_error) + retval = -EINVAL; + else if (dev->power.idle_notification) + retval = -EINPROGRESS; + else if (atomic_read(&dev->power.usage_count) > 0 + || dev->power.disable_depth > 0 + || dev->power.runtime_status != RPM_ACTIVE) + retval = -EAGAIN; + else if (!pm_children_suspended(dev)) + retval = -EBUSY; + if (retval) + goto out; + + if (dev->power.request_pending) { + /* + * If an idle notification request is pending, cancel it. Any + * other pending request takes precedence over us. + */ + if (dev->power.request == RPM_REQ_IDLE) { + dev->power.request = RPM_REQ_NONE; + } else if (dev->power.request != RPM_REQ_NONE) { + retval = -EAGAIN; + goto out; + } + } + + dev->power.idle_notification = true; + + if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_idle) { + spin_unlock_irq(&dev->power.lock); + + dev->bus->pm->runtime_idle(dev); + + spin_lock_irq(&dev->power.lock); + } + + dev->power.idle_notification = false; + wake_up_all(&dev->power.wait_queue); + + out: + dev_dbg(dev, "__pm_runtime_idle() returns %d!\n", retval); + + return retval; +} + +/** + * pm_runtime_idle - Notify device bus type if the device can be suspended. + * @dev: Device to notify the bus type about. + */ +int pm_runtime_idle(struct device *dev) +{ + int retval; + + spin_lock_irq(&dev->power.lock); + retval = __pm_runtime_idle(dev); + spin_unlock_irq(&dev->power.lock); + + return retval; +} +EXPORT_SYMBOL_GPL(pm_runtime_idle); + +/** + * __pm_runtime_suspend - Carry out run-time suspend of given device. + * @dev: Device to suspend. + * @from_wq: If set, the function has been called via pm_wq. + * + * Check if the device can be suspended and run the ->runtime_suspend() callback + * provided by its bus type. If another suspend has been started earlier, wait + * for it to finish. If an idle notification or suspend request is pending or + * scheduled, cancel it. + * + * This function must be called under dev->power.lock with interrupts disabled. + */ +int __pm_runtime_suspend(struct device *dev, bool from_wq) + __releases(&dev->power.lock) __acquires(&dev->power.lock) +{ + struct device *parent = NULL; + bool notify = false; + int retval = 0; + + dev_dbg(dev, "__pm_runtime_suspend()%s!\n", + from_wq ? " from workqueue" : ""); + + repeat: + if (dev->power.runtime_error) { + retval = -EINVAL; + goto out; + } + + /* Pending resume requests take precedence over us. */ + if (dev->power.request_pending + && dev->power.request == RPM_REQ_RESUME) { + retval = -EAGAIN; + goto out; + } + + /* Other scheduled or pending requests need to be canceled. */ + pm_runtime_cancel_pending(dev); + + if (dev->power.runtime_status == RPM_SUSPENDED) + retval = 1; + else if (dev->power.runtime_status == RPM_RESUMING + || dev->power.disable_depth > 0 + || atomic_read(&dev->power.usage_count) > 0) + retval = -EAGAIN; + else if (!pm_children_suspended(dev)) + retval = -EBUSY; + if (retval) + goto out; + + if (dev->power.runtime_status == RPM_SUSPENDING) { + DEFINE_WAIT(wait); + + if (from_wq) { + retval = -EINPROGRESS; + goto out; + } + + /* Wait for the other suspend running in parallel with us. */ + for (;;) { + prepare_to_wait(&dev->power.wait_queue, &wait, + TASK_UNINTERRUPTIBLE); + if (dev->power.runtime_status != RPM_SUSPENDING) + break; + + spin_unlock_irq(&dev->power.lock); + + schedule(); + + spin_lock_irq(&dev->power.lock); + } + finish_wait(&dev->power.wait_queue, &wait); + goto repeat; + } + + dev->power.runtime_status = RPM_SUSPENDING; + + if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend) { + spin_unlock_irq(&dev->power.lock); + + retval = dev->bus->pm->runtime_suspend(dev); + + spin_lock_irq(&dev->power.lock); + dev->power.runtime_error = retval; + } else { + retval = -ENOSYS; + } + + if (retval) { + dev->power.runtime_status = RPM_ACTIVE; + pm_runtime_cancel_pending(dev); + dev->power.deferred_resume = false; + + if (retval == -EAGAIN || retval == -EBUSY) { + notify = true; + dev->power.runtime_error = 0; + } + } else { + dev->power.runtime_status = RPM_SUSPENDED; + + if (dev->parent) { + parent = dev->parent; + atomic_add_unless(&parent->power.child_count, -1, 0); + } + } + wake_up_all(&dev->power.wait_queue); + + if (dev->power.deferred_resume) { + dev->power.deferred_resume = false; + __pm_runtime_resume(dev, false); + retval = -EAGAIN; + goto out; + } + + if (notify) + __pm_runtime_idle(dev); + + if (parent && !parent->power.ignore_children) { + spin_unlock_irq(&dev->power.lock); + + pm_request_idle(parent); + + spin_lock_irq(&dev->power.lock); + } + + out: + dev_dbg(dev, "__pm_runtime_suspend() returns %d!\n", retval); + + return retval; +} + +/** + * pm_runtime_suspend - Carry out run-time suspend of given device. + * @dev: Device to suspend. + */ +int pm_runtime_suspend(struct device *dev) +{ + int retval; + + spin_lock_irq(&dev->power.lock); + retval = __pm_runtime_suspend(dev, false); + spin_unlock_irq(&dev->power.lock); + + return retval; +} +EXPORT_SYMBOL_GPL(pm_runtime_suspend); + +/** + * __pm_runtime_resume - Carry out run-time resume of given device. + * @dev: Device to resume. + * @from_wq: If set, the function has been called via pm_wq. + * + * Check if the device can be woken up and run the ->runtime_resume() callback + * provided by its bus type. If another resume has been started earlier, wait + * for it to finish. If there's a suspend running in parallel with this + * function, wait for it to finish and resume the device. Cancel any scheduled + * or pending requests. + * + * This function must be called under dev->power.lock with interrupts disabled. + */ +int __pm_runtime_resume(struct device *dev, bool from_wq) + __releases(&dev->power.lock) __acquires(&dev->power.lock) +{ + struct device *parent = NULL; + int retval = 0; + + dev_dbg(dev, "__pm_runtime_resume()%s!\n", + from_wq ? " from workqueue" : ""); + + repeat: + if (dev->power.runtime_error) { + retval = -EINVAL; + goto out; + } + + pm_runtime_cancel_pending(dev); + + if (dev->power.runtime_status == RPM_ACTIVE) + retval = 1; + else if (dev->power.disable_depth > 0) + retval = -EAGAIN; + if (retval) + goto out; + + if (dev->power.runtime_status == RPM_RESUMING + || dev->power.runtime_status == RPM_SUSPENDING) { + DEFINE_WAIT(wait); + + if (from_wq) { + if (dev->power.runtime_status == RPM_SUSPENDING) + dev->power.deferred_resume = true; + retval = -EINPROGRESS; + goto out; + } + + /* Wait for the operation carried out in parallel with us. */ + for (;;) { + prepare_to_wait(&dev->power.wait_queue, &wait, + TASK_UNINTERRUPTIBLE); + if (dev->power.runtime_status != RPM_RESUMING + && dev->power.runtime_status != RPM_SUSPENDING) + break; + + spin_unlock_irq(&dev->power.lock); + + schedule(); + + spin_lock_irq(&dev->power.lock); + } + finish_wait(&dev->power.wait_queue, &wait); + goto repeat; + } + + if (!parent && dev->parent) { + /* + * Increment the parent's resume counter and resume it if + * necessary. + */ + parent = dev->parent; + spin_unlock_irq(&dev->power.lock); + + pm_runtime_get_noresume(parent); + + spin_lock_irq(&parent->power.lock); + /* + * We can resume if the parent's run-time PM is disabled or it + * is set to ignore children. + */ + if (!parent->power.disable_depth + && !parent->power.ignore_children) { + __pm_runtime_resume(parent, false); + if (parent->power.runtime_status != RPM_ACTIVE) + retval = -EBUSY; + } + spin_unlock_irq(&parent->power.lock); + + spin_lock_irq(&dev->power.lock); + if (retval) + goto out; + goto repeat; + } + + dev->power.runtime_status = RPM_RESUMING; + + if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume) { + spin_unlock_irq(&dev->power.lock); + + retval = dev->bus->pm->runtime_resume(dev); + + spin_lock_irq(&dev->power.lock); + dev->power.runtime_error = retval; + } else { + retval = -ENOSYS; + } + + if (retval) { + dev->power.runtime_status = RPM_SUSPENDED; + pm_runtime_cancel_pending(dev); + } else { + dev->power.runtime_status = RPM_ACTIVE; + if (parent) + atomic_inc(&parent->power.child_count); + } + wake_up_all(&dev->power.wait_queue); + + if (!retval) + __pm_request_idle(dev); + + out: + if (parent) { + spin_unlock_irq(&dev->power.lock); + + pm_runtime_put(parent); + + spin_lock_irq(&dev->power.lock); + } + + dev_dbg(dev, "__pm_runtime_resume() returns %d!\n", retval); + + return retval; +} + +/** + * pm_runtime_resume - Carry out run-time resume of given device. + * @dev: Device to suspend. + */ +int pm_runtime_resume(struct device *dev) +{ + int retval; + + spin_lock_irq(&dev->power.lock); + retval = __pm_runtime_resume(dev, false); + spin_unlock_irq(&dev->power.lock); + + return retval; +} +EXPORT_SYMBOL_GPL(pm_runtime_resume); + +/** + * pm_runtime_work - Universal run-time PM work function. + * @work: Work structure used for scheduling the execution of this function. + * + * Use @work to get the device object the work is to be done for, determine what + * is to be done and execute the appropriate run-time PM function. + */ +static void pm_runtime_work(struct work_struct *work) +{ + struct device *dev = container_of(work, struct device, power.work); + enum rpm_request req; + + spin_lock_irq(&dev->power.lock); + + if (!dev->power.request_pending) + goto out; + + req = dev->power.request; + dev->power.request = RPM_REQ_NONE; + dev->power.request_pending = false; + + switch (req) { + case RPM_REQ_NONE: + break; + case RPM_REQ_IDLE: + __pm_runtime_idle(dev); + break; + case RPM_REQ_SUSPEND: + __pm_runtime_suspend(dev, true); + break; + case RPM_REQ_RESUME: + __pm_runtime_resume(dev, true); + break; + } + + out: + spin_unlock_irq(&dev->power.lock); +} + +/** + * __pm_request_idle - Submit an idle notification request for given device. + * @dev: Device to handle. + * + * Check if the device's run-time PM status is correct for suspending the device + * and queue up a request to run __pm_runtime_idle() for it. + * + * This function must be called under dev->power.lock with interrupts disabled. + */ +static int __pm_request_idle(struct device *dev) +{ + int retval = 0; + + if (dev->power.runtime_error) + retval = -EINVAL; + else if (atomic_read(&dev->power.usage_count) > 0 + || dev->power.disable_depth > 0 + || dev->power.runtime_status == RPM_SUSPENDED + || dev->power.runtime_status == RPM_SUSPENDING) + retval = -EAGAIN; + else if (!pm_children_suspended(dev)) + retval = -EBUSY; + if (retval) + return retval; + + if (dev->power.request_pending) { + /* Any requests other then RPM_REQ_IDLE take precedence. */ + if (dev->power.request == RPM_REQ_NONE) + dev->power.request = RPM_REQ_IDLE; + else if (dev->power.request != RPM_REQ_IDLE) + retval = -EAGAIN; + return retval; + } + + dev->power.request = RPM_REQ_IDLE; + dev->power.request_pending = true; + queue_work(pm_wq, &dev->power.work); + + return retval; +} + +/** + * pm_request_idle - Submit an idle notification request for given device. + * @dev: Device to handle. + */ +int pm_request_idle(struct device *dev) +{ + unsigned long flags; + int retval; + + spin_lock_irqsave(&dev->power.lock, flags); + retval = __pm_request_idle(dev); + spin_unlock_irqrestore(&dev->power.lock, flags); + + return retval; +} +EXPORT_SYMBOL_GPL(pm_request_idle); + +/** + * __pm_request_suspend - Submit a suspend request for given device. + * @dev: Device to suspend. + * + * This function must be called under dev->power.lock with interrupts disabled. + */ +static int __pm_request_suspend(struct device *dev) +{ + int retval = 0; + + if (dev->power.runtime_error) + return -EINVAL; + + if (dev->power.runtime_status == RPM_SUSPENDED) + retval = 1; + else if (atomic_read(&dev->power.usage_count) > 0 + || dev->power.disable_depth > 0) + retval = -EAGAIN; + else if (dev->power.runtime_status == RPM_SUSPENDING) + retval = -EINPROGRESS; + else if (!pm_children_suspended(dev)) + retval = -EBUSY; + if (retval < 0) + return retval; + + pm_runtime_deactivate_timer(dev); + + if (dev->power.request_pending) { + /* + * Pending resume requests take precedence over us, but we can + * overtake any other pending request. + */ + if (dev->power.request == RPM_REQ_RESUME) + retval = -EAGAIN; + else if (dev->power.request != RPM_REQ_SUSPEND) + dev->power.request = retval ? + RPM_REQ_NONE : RPM_REQ_SUSPEND; + return retval; + } else if (retval) { + return retval; + } + + dev->power.request = RPM_REQ_SUSPEND; + dev->power.request_pending = true; + queue_work(pm_wq, &dev->power.work); + + return 0; +} + +/** + * pm_suspend_timer_fn - Timer function for pm_schedule_suspend(). + * @data: Device pointer passed by pm_schedule_suspend(). + * + * Check if the time is right and execute __pm_request_suspend() in that case. + */ +static void pm_suspend_timer_fn(unsigned long data) +{ + struct device *dev = (struct device *)data; + unsigned long flags; + unsigned long expires; + + spin_lock_irqsave(&dev->power.lock, flags); + + expires = dev->power.timer_expires; + /* If 'expire' is after 'jiffies' we've been called too early. */ + if (expires > 0 && !time_after(expires, jiffies)) { + dev->power.timer_expires = 0; + __pm_request_suspend(dev); + } + + spin_unlock_irqrestore(&dev->power.lock, flags); +} + +/** + * pm_schedule_suspend - Set up a timer to submit a suspend request in future. + * @dev: Device to suspend. + * @delay: Time to wait before submitting a suspend request, in milliseconds. + */ +int pm_schedule_suspend(struct device *dev, unsigned int delay) +{ + unsigned long flags; + int retval = 0; + + spin_lock_irqsave(&dev->power.lock, flags); + + if (dev->power.runtime_error) { + retval = -EINVAL; + goto out; + } + + if (!delay) { + retval = __pm_request_suspend(dev); + goto out; + } + + pm_runtime_deactivate_timer(dev); + + if (dev->power.request_pending) { + /* + * Pending resume requests take precedence over us, but any + * other pending requests have to be canceled. + */ + if (dev->power.request == RPM_REQ_RESUME) { + retval = -EAGAIN; + goto out; + } + dev->power.request = RPM_REQ_NONE; + } + + if (dev->power.runtime_status == RPM_SUSPENDED) + retval = 1; + else if (dev->power.runtime_status == RPM_SUSPENDING) + retval = -EINPROGRESS; + else if (atomic_read(&dev->power.usage_count) > 0 + || dev->power.disable_depth > 0) + retval = -EAGAIN; + else if (!pm_children_suspended(dev)) + retval = -EBUSY; + if (retval) + goto out; + + dev->power.timer_expires = jiffies + msecs_to_jiffies(delay); + mod_timer(&dev->power.suspend_timer, dev->power.timer_expires); + + out: + spin_unlock_irqrestore(&dev->power.lock, flags); + + return retval; +} +EXPORT_SYMBOL_GPL(pm_schedule_suspend); + +/** + * pm_request_resume - Submit a resume request for given device. + * @dev: Device to resume. + * + * This function must be called under dev->power.lock with interrupts disabled. + */ +static int __pm_request_resume(struct device *dev) +{ + int retval = 0; + + if (dev->power.runtime_error) + return -EINVAL; + + if (dev->power.runtime_status == RPM_ACTIVE) + retval = 1; + else if (dev->power.runtime_status == RPM_RESUMING) + retval = -EINPROGRESS; + else if (dev->power.disable_depth > 0) + retval = -EAGAIN; + if (retval < 0) + return retval; + + pm_runtime_deactivate_timer(dev); + + if (dev->power.request_pending) { + /* If non-resume request is pending, we can overtake it. */ + dev->power.request = retval ? RPM_REQ_NONE : RPM_REQ_RESUME; + return retval; + } else if (retval) { + return retval; + } + + dev->power.request = RPM_REQ_RESUME; + dev->power.request_pending = true; + queue_work(pm_wq, &dev->power.work); + + return retval; +} + +/** + * pm_request_resume - Submit a resume request for given device. + * @dev: Device to resume. + */ +int pm_request_resume(struct device *dev) +{ + unsigned long flags; + int retval; + + spin_lock_irqsave(&dev->power.lock, flags); + retval = __pm_request_resume(dev); + spin_unlock_irqrestore(&dev->power.lock, flags); + + return retval; +} +EXPORT_SYMBOL_GPL(pm_request_resume); + +/** + * __pm_runtime_get - Reference count a device and wake it up, if necessary. + * @dev: Device to handle. + * @sync: If set and the device is suspended, resume it synchronously. + * + * Increment the usage count of the device and if it was zero previously, + * resume it or submit a resume request for it, depending on the value of @sync. + */ +int __pm_runtime_get(struct device *dev, bool sync) +{ + int retval = 1; + + if (atomic_add_return(1, &dev->power.usage_count) == 1) + retval = sync ? pm_runtime_resume(dev) : pm_request_resume(dev); + + return retval; +} +EXPORT_SYMBOL_GPL(__pm_runtime_get); + +/** + * __pm_runtime_put - Decrement the device's usage counter and notify its bus. + * @dev: Device to handle. + * @sync: If the device's bus type is to be notified, do that synchronously. + * + * Decrement the usage count of the device and if it reaches zero, carry out a + * synchronous idle notification or submit an idle notification request for it, + * depending on the value of @sync. + */ +int __pm_runtime_put(struct device *dev, bool sync) +{ + int retval = 0; + + if (atomic_dec_and_test(&dev->power.usage_count)) + retval = sync ? pm_runtime_idle(dev) : pm_request_idle(dev); + + return retval; +} +EXPORT_SYMBOL_GPL(__pm_runtime_put); + +/** + * __pm_runtime_set_status - Set run-time PM status of a device. + * @dev: Device to handle. + * @status: New run-time PM status of the device. + * + * If run-time PM of the device is disabled or its power.runtime_error field is + * different from zero, the status may be changed either to RPM_ACTIVE, or to + * RPM_SUSPENDED, as long as that reflects the actual state of the device. + * However, if the device has a parent and the parent is not active, and the + * parent's power.ignore_children flag is unset, the device's status cannot be + * set to RPM_ACTIVE, so -EBUSY is returned in that case. + * + * If successful, __pm_runtime_set_status() clears the power.runtime_error field + * and the device parent's counter of unsuspended children is modified to + * reflect the new status. If the new status is RPM_SUSPENDED, an idle + * notification request for the parent is submitted. + */ +int __pm_runtime_set_status(struct device *dev, unsigned int status) +{ + struct device *parent = dev->parent; + unsigned long flags; + bool notify_parent = false; + int error = 0; + + if (status != RPM_ACTIVE && status != RPM_SUSPENDED) + return -EINVAL; + + spin_lock_irqsave(&dev->power.lock, flags); + + if (!dev->power.runtime_error && !dev->power.disable_depth) { + error = -EAGAIN; + goto out; + } + + if (dev->power.runtime_status == status) + goto out_set; + + if (status == RPM_SUSPENDED) { + /* It always is possible to set the status to 'suspended'. */ + if (parent) { + atomic_add_unless(&parent->power.child_count, -1, 0); + notify_parent = !parent->power.ignore_children; + } + goto out_set; + } + + if (parent) { + spin_lock_irq(&parent->power.lock); + + /* + * It is invalid to put an active child under a parent that is + * not active, has run-time PM enabled and the + * 'power.ignore_children' flag unset. + */ + if (!parent->power.disable_depth + && !parent->power.ignore_children + && parent->power.runtime_status != RPM_ACTIVE) { + error = -EBUSY; + } else { + if (dev->power.runtime_status == RPM_SUSPENDED) + atomic_inc(&parent->power.child_count); + } + + spin_unlock_irq(&parent->power.lock); + + if (error) + goto out; + } + + out_set: + dev->power.runtime_status = status; + dev->power.runtime_error = 0; + out: + spin_unlock_irqrestore(&dev->power.lock, flags); + + if (notify_parent) + pm_request_idle(parent); + + return error; +} +EXPORT_SYMBOL_GPL(__pm_runtime_set_status); + +/** + * __pm_runtime_barrier - Cancel pending requests and wait for completions. + * @dev: Device to handle. + * + * Flush all pending requests for the device from pm_wq and wait for all + * run-time PM operations involving the device in progress to complete. + * + * Should be called under dev->power.lock with interrupts disabled. + */ +static void __pm_runtime_barrier(struct device *dev) +{ + pm_runtime_deactivate_timer(dev); + + if (dev->power.request_pending) { + dev->power.request = RPM_REQ_NONE; + spin_unlock_irq(&dev->power.lock); + + cancel_work_sync(&dev->power.work); + + spin_lock_irq(&dev->power.lock); + dev->power.request_pending = false; + } + + if (dev->power.runtime_status == RPM_SUSPENDING + || dev->power.runtime_status == RPM_RESUMING + || dev->power.idle_notification) { + DEFINE_WAIT(wait); + + /* Suspend, wake-up or idle notification in progress. */ + for (;;) { + prepare_to_wait(&dev->power.wait_queue, &wait, + TASK_UNINTERRUPTIBLE); + if (dev->power.runtime_status != RPM_SUSPENDING + && dev->power.runtime_status != RPM_RESUMING + && !dev->power.idle_notification) + break; + spin_unlock_irq(&dev->power.lock); + + schedule(); + + spin_lock_irq(&dev->power.lock); + } + finish_wait(&dev->power.wait_queue, &wait); + } +} + +/** + * pm_runtime_barrier - Flush pending requests and wait for completions. + * @dev: Device to handle. + * + * Prevent the device from being suspended by incrementing its usage counter and + * if there's a pending resume request for the device, wake the device up. + * Next, make sure that all pending requests for the device have been flushed + * from pm_wq and wait for all run-time PM operations involving the device in + * progress to complete. + * + * Return value: + * 1, if there was a resume request pending and the device had to be woken up, + * 0, otherwise + */ +int pm_runtime_barrier(struct device *dev) +{ + int retval = 0; + + pm_runtime_get_noresume(dev); + spin_lock_irq(&dev->power.lock); + + if (dev->power.request_pending + && dev->power.request == RPM_REQ_RESUME) { + __pm_runtime_resume(dev, false); + retval = 1; + } + + __pm_runtime_barrier(dev); + + spin_unlock_irq(&dev->power.lock); + pm_runtime_put_noidle(dev); + + return retval; +} +EXPORT_SYMBOL_GPL(pm_runtime_barrier); + +/** + * __pm_runtime_disable - Disable run-time PM of a device. + * @dev: Device to handle. + * @check_resume: If set, check if there's a resume request for the device. + * + * Increment power.disable_depth for the device and if was zero previously, + * cancel all pending run-time PM requests for the device and wait for all + * operations in progress to complete. The device can be either active or + * suspended after its run-time PM has been disabled. + * + * If @check_resume is set and there's a resume request pending when + * __pm_runtime_disable() is called and power.disable_depth is zero, the + * function will wake up the device before disabling its run-time PM. + */ +void __pm_runtime_disable(struct device *dev, bool check_resume) +{ + spin_lock_irq(&dev->power.lock); + + if (dev->power.disable_depth > 0) { + dev->power.disable_depth++; + goto out; + } + + /* + * Wake up the device if there's a resume request pending, because that + * means there probably is some I/O to process and disabling run-time PM + * shouldn't prevent the device from processing the I/O. + */ + if (check_resume && dev->power.request_pending + && dev->power.request == RPM_REQ_RESUME) { + /* + * Prevent suspends and idle notifications from being carried + * out after we have woken up the device. + */ + pm_runtime_get_noresume(dev); + + __pm_runtime_resume(dev, false); + + pm_runtime_put_noidle(dev); + } + + if (!dev->power.disable_depth++) + __pm_runtime_barrier(dev); + + out: + spin_unlock_irq(&dev->power.lock); +} +EXPORT_SYMBOL_GPL(__pm_runtime_disable); + +/** + * pm_runtime_enable - Enable run-time PM of a device. + * @dev: Device to handle. + */ +void pm_runtime_enable(struct device *dev) +{ + unsigned long flags; + + spin_lock_irqsave(&dev->power.lock, flags); + + if (dev->power.disable_depth > 0) + dev->power.disable_depth--; + else + dev_warn(dev, "Unbalanced %s!\n", __func__); + + spin_unlock_irqrestore(&dev->power.lock, flags); +} +EXPORT_SYMBOL_GPL(pm_runtime_enable); + +/** + * pm_runtime_init - Initialize run-time PM fields in given device object. + * @dev: Device object to initialize. + */ +void pm_runtime_init(struct device *dev) +{ + spin_lock_init(&dev->power.lock); + + dev->power.runtime_status = RPM_SUSPENDED; + dev->power.idle_notification = false; + + dev->power.disable_depth = 1; + atomic_set(&dev->power.usage_count, 0); + + dev->power.runtime_error = 0; + + atomic_set(&dev->power.child_count, 0); + pm_suspend_ignore_children(dev, false); + + dev->power.request_pending = false; + dev->power.request = RPM_REQ_NONE; + dev->power.deferred_resume = false; + INIT_WORK(&dev->power.work, pm_runtime_work); + + dev->power.timer_expires = 0; + setup_timer(&dev->power.suspend_timer, pm_suspend_timer_fn, + (unsigned long)dev); + + init_waitqueue_head(&dev->power.wait_queue); +} + +/** + * pm_runtime_remove - Prepare for removing a device from device hierarchy. + * @dev: Device object being removed from device hierarchy. + */ +void pm_runtime_remove(struct device *dev) +{ + __pm_runtime_disable(dev, false); + + /* Change the status back to 'suspended' to match the initial status. */ + if (dev->power.runtime_status == RPM_ACTIVE) + pm_runtime_set_suspended(dev); +} diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 91b75301378..2b387c2260d 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4151,7 +4151,7 @@ static void floppy_device_release(struct device *dev) { } -static int floppy_resume(struct platform_device *dev) +static int floppy_resume(struct device *dev) { int fdc; @@ -4162,10 +4162,15 @@ static int floppy_resume(struct platform_device *dev) return 0; } -static struct platform_driver floppy_driver = { +static struct dev_pm_ops floppy_pm_ops = { .resume = floppy_resume, + .restore = floppy_resume, +}; + +static struct platform_driver floppy_driver = { .driver = { .name = "floppy", + .pm = &floppy_pm_ops, }, }; diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 9a1e5fb412e..c8522e6f1ad 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1166,32 +1166,37 @@ static void at_dma_shutdown(struct platform_device *pdev) clk_disable(atdma->clk); } -static int at_dma_suspend_late(struct platform_device *pdev, pm_message_t mesg) +static int at_dma_suspend_noirq(struct device *dev) { - struct at_dma *atdma = platform_get_drvdata(pdev); + struct platform_device *pdev = to_platform_device(dev); + struct at_dma *atdma = platform_get_drvdata(pdev); at_dma_off(platform_get_drvdata(pdev)); clk_disable(atdma->clk); return 0; } -static int at_dma_resume_early(struct platform_device *pdev) +static int at_dma_resume_noirq(struct device *dev) { - struct at_dma *atdma = platform_get_drvdata(pdev); + struct platform_device *pdev = to_platform_device(dev); + struct at_dma *atdma = platform_get_drvdata(pdev); clk_enable(atdma->clk); dma_writel(atdma, EN, AT_DMA_ENABLE); return 0; - } +static struct dev_pm_ops at_dma_dev_pm_ops = { + .suspend_noirq = at_dma_suspend_noirq, + .resume_noirq = at_dma_resume_noirq, +}; + static struct platform_driver at_dma_driver = { .remove = __exit_p(at_dma_remove), .shutdown = at_dma_shutdown, - .suspend_late = at_dma_suspend_late, - .resume_early = at_dma_resume_early, .driver = { .name = "at_hdmac", + .pm = &at_dma_dev_pm_ops, }, }; diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 98c9a847bf5..933c143b6a7 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -1399,8 +1399,9 @@ static void dw_shutdown(struct platform_device *pdev) clk_disable(dw->clk); } -static int dw_suspend_late(struct platform_device *pdev, pm_message_t mesg) +static int dw_suspend_noirq(struct device *dev) { + struct platform_device *pdev = to_platform_device(dev); struct dw_dma *dw = platform_get_drvdata(pdev); dw_dma_off(platform_get_drvdata(pdev)); @@ -1408,23 +1409,27 @@ static int dw_suspend_late(struct platform_device *pdev, pm_message_t mesg) return 0; } -static int dw_resume_early(struct platform_device *pdev) +static int dw_resume_noirq(struct device *dev) { + struct platform_device *pdev = to_platform_device(dev); struct dw_dma *dw = platform_get_drvdata(pdev); clk_enable(dw->clk); dma_writel(dw, CFG, DW_CFG_DMA_EN); return 0; - } +static struct dev_pm_ops dw_dev_pm_ops = { + .suspend_noirq = dw_suspend_noirq, + .resume_noirq = dw_resume_noirq, +}; + static struct platform_driver dw_driver = { .remove = __exit_p(dw_remove), .shutdown = dw_shutdown, - .suspend_late = dw_suspend_late, - .resume_early = dw_resume_early, .driver = { .name = "dw_dmac", + .pm = &dw_dev_pm_ops, }, }; diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c index 88dab52926f..7837930146a 100644 --- a/drivers/dma/txx9dmac.c +++ b/drivers/dma/txx9dmac.c @@ -1291,17 +1291,18 @@ static void txx9dmac_shutdown(struct platform_device *pdev) txx9dmac_off(ddev); } -static int txx9dmac_suspend_late(struct platform_device *pdev, - pm_message_t mesg) +static int txx9dmac_suspend_noirq(struct device *dev) { + struct platform_device *pdev = to_platform_device(dev); struct txx9dmac_dev *ddev = platform_get_drvdata(pdev); txx9dmac_off(ddev); return 0; } -static int txx9dmac_resume_early(struct platform_device *pdev) +static int txx9dmac_resume_noirq(struct device *dev) { + struct platform_device *pdev = to_platform_device(dev); struct txx9dmac_dev *ddev = platform_get_drvdata(pdev); struct txx9dmac_platform_data *pdata = pdev->dev.platform_data; u32 mcr; @@ -1314,6 +1315,11 @@ static int txx9dmac_resume_early(struct platform_device *pdev) } +static struct dev_pm_ops txx9dmac_dev_pm_ops = { + .suspend_noirq = txx9dmac_suspend_noirq, + .resume_noirq = txx9dmac_resume_noirq, +}; + static struct platform_driver txx9dmac_chan_driver = { .remove = __exit_p(txx9dmac_chan_remove), .driver = { @@ -1324,10 +1330,9 @@ static struct platform_driver txx9dmac_chan_driver = { static struct platform_driver txx9dmac_driver = { .remove = __exit_p(txx9dmac_remove), .shutdown = txx9dmac_shutdown, - .suspend_late = txx9dmac_suspend_late, - .resume_early = txx9dmac_resume_early, .driver = { .name = "txx9dmac", + .pm = &txx9dmac_dev_pm_ops, }, }; diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c index 762e1e53088..049555777f6 100644 --- a/drivers/i2c/busses/i2c-pxa.c +++ b/drivers/i2c/busses/i2c-pxa.c @@ -1134,35 +1134,44 @@ static int __exit i2c_pxa_remove(struct platform_device *dev) } #ifdef CONFIG_PM -static int i2c_pxa_suspend_late(struct platform_device *dev, pm_message_t state) +static int i2c_pxa_suspend_noirq(struct device *dev) { - struct pxa_i2c *i2c = platform_get_drvdata(dev); + struct platform_device *pdev = to_platform_device(dev); + struct pxa_i2c *i2c = platform_get_drvdata(pdev); + clk_disable(i2c->clk); + return 0; } -static int i2c_pxa_resume_early(struct platform_device *dev) +static int i2c_pxa_resume_noirq(struct device *dev) { - struct pxa_i2c *i2c = platform_get_drvdata(dev); + struct platform_device *pdev = to_platform_device(dev); + struct pxa_i2c *i2c = platform_get_drvdata(pdev); clk_enable(i2c->clk); i2c_pxa_reset(i2c); return 0; } + +static struct dev_pm_ops i2c_pxa_dev_pm_ops = { + .suspend_noirq = i2c_pxa_suspend_noirq, + .resume_noirq = i2c_pxa_resume_noirq, +}; + +#define I2C_PXA_DEV_PM_OPS (&i2c_pxa_dev_pm_ops) #else -#define i2c_pxa_suspend_late NULL -#define i2c_pxa_resume_early NULL +#define I2C_PXA_DEV_PM_OPS NULL #endif static struct platform_driver i2c_pxa_driver = { .probe = i2c_pxa_probe, .remove = __exit_p(i2c_pxa_remove), - .suspend_late = i2c_pxa_suspend_late, - .resume_early = i2c_pxa_resume_early, .driver = { .name = "pxa2xx-i2c", .owner = THIS_MODULE, + .pm = I2C_PXA_DEV_PM_OPS, }, .id_table = i2c_pxa_id_table, }; diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c index 20bb0ceb027..96aafb91b69 100644 --- a/drivers/i2c/busses/i2c-s3c2410.c +++ b/drivers/i2c/busses/i2c-s3c2410.c @@ -946,17 +946,20 @@ static int s3c24xx_i2c_remove(struct platform_device *pdev) } #ifdef CONFIG_PM -static int s3c24xx_i2c_suspend_late(struct platform_device *dev, - pm_message_t msg) +static int s3c24xx_i2c_suspend_noirq(struct device *dev) { - struct s3c24xx_i2c *i2c = platform_get_drvdata(dev); + struct platform_device *pdev = to_platform_device(dev); + struct s3c24xx_i2c *i2c = platform_get_drvdata(pdev); + i2c->suspended = 1; + return 0; } -static int s3c24xx_i2c_resume(struct platform_device *dev) +static int s3c24xx_i2c_resume(struct device *dev) { - struct s3c24xx_i2c *i2c = platform_get_drvdata(dev); + struct platform_device *pdev = to_platform_device(dev); + struct s3c24xx_i2c *i2c = platform_get_drvdata(pdev); i2c->suspended = 0; s3c24xx_i2c_init(i2c); @@ -964,9 +967,14 @@ static int s3c24xx_i2c_resume(struct platform_device *dev) return 0; } +static struct dev_pm_ops s3c24xx_i2c_dev_pm_ops = { + .suspend_noirq = s3c24xx_i2c_suspend_noirq, + .resume = s3c24xx_i2c_resume, +}; + +#define S3C24XX_DEV_PM_OPS (&s3c24xx_i2c_dev_pm_ops) #else -#define s3c24xx_i2c_suspend_late NULL -#define s3c24xx_i2c_resume NULL +#define S3C24XX_DEV_PM_OPS NULL #endif /* device driver for platform bus bits */ @@ -985,12 +993,11 @@ MODULE_DEVICE_TABLE(platform, s3c24xx_driver_ids); static struct platform_driver s3c24xx_i2c_driver = { .probe = s3c24xx_i2c_probe, .remove = s3c24xx_i2c_remove, - .suspend_late = s3c24xx_i2c_suspend_late, - .resume = s3c24xx_i2c_resume, .id_table = s3c24xx_driver_ids, .driver = { .owner = THIS_MODULE, .name = "s3c-i2c", + .pm = S3C24XX_DEV_PM_OPS, }, }; diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index a2ad53e1587..af04f5b049d 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -53,7 +53,7 @@ MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4"); static int __init hp_wmi_bios_setup(struct platform_device *device); static int __exit hp_wmi_bios_remove(struct platform_device *device); -static int hp_wmi_resume_handler(struct platform_device *device); +static int hp_wmi_resume_handler(struct device *device); struct bios_args { u32 signature; @@ -94,14 +94,19 @@ static struct rfkill *wifi_rfkill; static struct rfkill *bluetooth_rfkill; static struct rfkill *wwan_rfkill; +static struct dev_pm_ops hp_wmi_pm_ops = { + .resume = hp_wmi_resume_handler, + .restore = hp_wmi_resume_handler, +}; + static struct platform_driver hp_wmi_driver = { .driver = { - .name = "hp-wmi", - .owner = THIS_MODULE, + .name = "hp-wmi", + .owner = THIS_MODULE, + .pm = &hp_wmi_pm_ops, }, .probe = hp_wmi_bios_setup, .remove = hp_wmi_bios_remove, - .resume = hp_wmi_resume_handler, }; static int hp_wmi_perform_query(int query, int write, int value) @@ -512,7 +517,7 @@ static int __exit hp_wmi_bios_remove(struct platform_device *device) return 0; } -static int hp_wmi_resume_handler(struct platform_device *device) +static int hp_wmi_resume_handler(struct device *device) { /* * Hardware state may have changed while suspended, so trigger diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index c7c1ca0494c..1d26beddf2c 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -2167,8 +2167,9 @@ static int __devexit musb_remove(struct platform_device *pdev) #ifdef CONFIG_PM -static int musb_suspend(struct platform_device *pdev, pm_message_t message) +static int musb_suspend(struct device *dev) { + struct platform_device *pdev = to_platform_device(dev); unsigned long flags; struct musb *musb = dev_to_musb(&pdev->dev); @@ -2195,8 +2196,9 @@ static int musb_suspend(struct platform_device *pdev, pm_message_t message) return 0; } -static int musb_resume_early(struct platform_device *pdev) +static int musb_resume_noirq(struct device *dev) { + struct platform_device *pdev = to_platform_device(dev); struct musb *musb = dev_to_musb(&pdev->dev); if (!musb->clock) @@ -2214,9 +2216,14 @@ static int musb_resume_early(struct platform_device *pdev) return 0; } +static struct dev_pm_ops musb_dev_pm_ops = { + .suspend = musb_suspend, + .resume_noirq = musb_resume_noirq, +}; + +#define MUSB_DEV_PM_OPS (&musb_dev_pm_ops) #else -#define musb_suspend NULL -#define musb_resume_early NULL +#define MUSB_DEV_PM_OPS NULL #endif static struct platform_driver musb_driver = { @@ -2224,11 +2231,10 @@ static struct platform_driver musb_driver = { .name = (char *)musb_driver_name, .bus = &platform_bus_type, .owner = THIS_MODULE, + .pm = MUSB_DEV_PM_OPS, }, .remove = __devexit_p(musb_remove), .shutdown = musb_shutdown, - .suspend = musb_suspend, - .resume_early = musb_resume_early, }; /*-------------------------------------------------------------------------*/ diff --git a/include/asm-generic/device.h b/include/asm-generic/device.h index c17c9600f22..d7c76bba640 100644 --- a/include/asm-generic/device.h +++ b/include/asm-generic/device.h @@ -9,4 +9,7 @@ struct dev_archdata { }; +struct pdev_archdata { +}; + #endif /* _ASM_GENERIC_DEVICE_H */ diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 8dc5123b630..3c6675c2444 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -22,6 +22,9 @@ struct platform_device { struct resource * resource; struct platform_device_id *id_entry; + + /* arch specific additions */ + struct pdev_archdata archdata; }; #define platform_get_device_id(pdev) ((pdev)->id_entry) @@ -57,8 +60,6 @@ struct platform_driver { int (*remove)(struct platform_device *); void (*shutdown)(struct platform_device *); int (*suspend)(struct platform_device *, pm_message_t state); - int (*suspend_late)(struct platform_device *, pm_message_t state); - int (*resume_early)(struct platform_device *); int (*resume)(struct platform_device *); struct device_driver driver; struct platform_device_id *id_table; diff --git a/include/linux/pm.h b/include/linux/pm.h index b3f74764a58..3b7e04b95bd 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -22,6 +22,10 @@ #define _LINUX_PM_H #include <linux/list.h> +#include <linux/workqueue.h> +#include <linux/spinlock.h> +#include <linux/wait.h> +#include <linux/timer.h> /* * Callbacks for platform drivers to implement. @@ -165,6 +169,28 @@ typedef struct pm_message { * It is allowed to unregister devices while the above callbacks are being * executed. However, it is not allowed to unregister a device from within any * of its own callbacks. + * + * There also are the following callbacks related to run-time power management + * of devices: + * + * @runtime_suspend: Prepare the device for a condition in which it won't be + * able to communicate with the CPU(s) and RAM due to power management. + * This need not mean that the device should be put into a low power state. + * For example, if the device is behind a link which is about to be turned + * off, the device may remain at full power. If the device does go to low + * power and if device_may_wakeup(dev) is true, remote wake-up (i.e., a + * hardware mechanism allowing the device to request a change of its power + * state, such as PCI PME) should be enabled for it. + * + * @runtime_resume: Put the device into the fully active state in response to a + * wake-up event generated by hardware or at the request of software. If + * necessary, put the device into the full power state and restore its + * registers, so that it is fully operational. + * + * @runtime_idle: Device appears to be inactive and it might be put into a low + * power state if all of the necessary conditions are satisfied. Check + * these conditions and handle the device as appropriate, possibly queueing + * a suspend request for it. The return value is ignored by the PM core. */ struct dev_pm_ops { @@ -182,8 +208,25 @@ struct dev_pm_ops { int (*thaw_noirq)(struct device *dev); int (*poweroff_noirq)(struct device *dev); int (*restore_noirq)(struct device *dev); + int (*runtime_suspend)(struct device *dev); + int (*runtime_resume)(struct device *dev); + int (*runtime_idle)(struct device *dev); }; +/* + * Use this if you want to use the same suspend and resume callbacks for suspend + * to RAM and hibernation. + */ +#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \ +struct dev_pm_ops name = { \ + .suspend = suspend_fn, \ + .resume = resume_fn, \ + .freeze = suspend_fn, \ + .thaw = resume_fn, \ + .poweroff = suspend_fn, \ + .restore = resume_fn, \ +} + /** * PM_EVENT_ messages * @@ -315,14 +358,80 @@ enum dpm_state { DPM_OFF_IRQ, }; +/** + * Device run-time power management status. + * + * These status labels are used internally by the PM core to indicate the + * current status of a device with respect to the PM core operations. They do + * not reflect the actual power state of the device or its status as seen by the + * driver. + * + * RPM_ACTIVE Device is fully operational. Indicates that the device + * bus type's ->runtime_resume() callback has completed + * successfully. + * + * RPM_SUSPENDED Device bus type's ->runtime_suspend() callback has + * completed successfully. The device is regarded as + * suspended. + * + * RPM_RESUMING Device bus type's ->runtime_resume() callback is being + * executed. + * + * RPM_SUSPENDING Device bus type's ->runtime_suspend() callback is being + * executed. + */ + +enum rpm_status { + RPM_ACTIVE = 0, + RPM_RESUMING, + RPM_SUSPENDED, + RPM_SUSPENDING, +}; + +/** + * Device run-time power management request types. + * + * RPM_REQ_NONE Do nothing. + * + * RPM_REQ_IDLE Run the device bus type's ->runtime_idle() callback + * + * RPM_REQ_SUSPEND Run the device bus type's ->runtime_suspend() callback + * + * RPM_REQ_RESUME Run the device bus type's ->runtime_resume() callback + */ + +enum rpm_request { + RPM_REQ_NONE = 0, + RPM_REQ_IDLE, + RPM_REQ_SUSPEND, + RPM_REQ_RESUME, +}; + struct dev_pm_info { pm_message_t power_state; - unsigned can_wakeup:1; - unsigned should_wakeup:1; + unsigned int can_wakeup:1; + unsigned int should_wakeup:1; enum dpm_state status; /* Owned by the PM core */ -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_PM_SLEEP struct list_head entry; #endif +#ifdef CONFIG_PM_RUNTIME + struct timer_list suspend_timer; + unsigned long timer_expires; + struct work_struct work; + wait_queue_head_t wait_queue; + spinlock_t lock; + atomic_t usage_count; + atomic_t child_count; + unsigned int disable_depth:3; + unsigned int ignore_children:1; + unsigned int idle_notification:1; + unsigned int request_pending:1; + unsigned int deferred_resume:1; + enum rpm_request request; + enum rpm_status runtime_status; + int runtime_error; +#endif }; /* diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h new file mode 100644 index 00000000000..44087044910 --- /dev/null +++ b/include/linux/pm_runtime.h @@ -0,0 +1,114 @@ +/* + * pm_runtime.h - Device run-time power management helper functions. + * + * Copyright (C) 2009 Rafael J. Wysocki <rjw@sisk.pl> + * + * This file is released under the GPLv2. + */ + +#ifndef _LINUX_PM_RUNTIME_H +#define _LINUX_PM_RUNTIME_H + +#include <linux/device.h> +#include <linux/pm.h> + +#ifdef CONFIG_PM_RUNTIME + +extern struct workqueue_struct *pm_wq; + +extern int pm_runtime_idle(struct device *dev); +extern int pm_runtime_suspend(struct device *dev); +extern int pm_runtime_resume(struct device *dev); +extern int pm_request_idle(struct device *dev); +extern int pm_schedule_suspend(struct device *dev, unsigned int delay); +extern int pm_request_resume(struct device *dev); +extern int __pm_runtime_get(struct device *dev, bool sync); +extern int __pm_runtime_put(struct device *dev, bool sync); +extern int __pm_runtime_set_status(struct device *dev, unsigned int status); +extern int pm_runtime_barrier(struct device *dev); +extern void pm_runtime_enable(struct device *dev); +extern void __pm_runtime_disable(struct device *dev, bool check_resume); + +static inline bool pm_children_suspended(struct device *dev) +{ + return dev->power.ignore_children + || !atomic_read(&dev->power.child_count); +} + +static inline void pm_suspend_ignore_children(struct device *dev, bool enable) +{ + dev->power.ignore_children = enable; +} + +static inline void pm_runtime_get_noresume(struct device *dev) +{ + atomic_inc(&dev->power.usage_count); +} + +static inline void pm_runtime_put_noidle(struct device *dev) +{ + atomic_add_unless(&dev->power.usage_count, -1, 0); +} + +#else /* !CONFIG_PM_RUNTIME */ + +static inline int pm_runtime_idle(struct device *dev) { return -ENOSYS; } +static inline int pm_runtime_suspend(struct device *dev) { return -ENOSYS; } +static inline int pm_runtime_resume(struct device *dev) { return 0; } +static inline int pm_request_idle(struct device *dev) { return -ENOSYS; } +static inline int pm_schedule_suspend(struct device *dev, unsigned int delay) +{ + return -ENOSYS; +} +static inline int pm_request_resume(struct device *dev) { return 0; } +static inline int __pm_runtime_get(struct device *dev, bool sync) { return 1; } +static inline int __pm_runtime_put(struct device *dev, bool sync) { return 0; } +static inline int __pm_runtime_set_status(struct device *dev, + unsigned int status) { return 0; } +static inline int pm_runtime_barrier(struct device *dev) { return 0; } +static inline void pm_runtime_enable(struct device *dev) {} +static inline void __pm_runtime_disable(struct device *dev, bool c) {} + +static inline bool pm_children_suspended(struct device *dev) { return false; } +static inline void pm_suspend_ignore_children(struct device *dev, bool en) {} +static inline void pm_runtime_get_noresume(struct device *dev) {} +static inline void pm_runtime_put_noidle(struct device *dev) {} + +#endif /* !CONFIG_PM_RUNTIME */ + +static inline int pm_runtime_get(struct device *dev) +{ + return __pm_runtime_get(dev, false); +} + +static inline int pm_runtime_get_sync(struct device *dev) +{ + return __pm_runtime_get(dev, true); +} + +static inline int pm_runtime_put(struct device *dev) +{ + return __pm_runtime_put(dev, false); +} + +static inline int pm_runtime_put_sync(struct device *dev) +{ + return __pm_runtime_put(dev, true); +} + +static inline int pm_runtime_set_active(struct device *dev) +{ + return __pm_runtime_set_status(dev, RPM_ACTIVE); +} + +static inline void pm_runtime_set_suspended(struct device *dev) +{ + __pm_runtime_set_status(dev, RPM_SUSPENDED); +} + +static inline void pm_runtime_disable(struct device *dev) +{ + __pm_runtime_disable(dev, true); +} + +#endif diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 72067cbdb37..91e09d3b2eb 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -208,3 +208,17 @@ config APM_EMULATION random kernel OOPSes or reboots that don't seem to be related to anything, try disabling/enabling this option (or disabling/enabling APM in your BIOS). + +config PM_RUNTIME + bool "Run-time PM core functionality" + depends on PM + ---help--- + Enable functionality allowing I/O devices to be put into energy-saving + (low power) states at run time (or autosuspended) after a specified + period of inactivity and woken up in response to a hardware-generated + wake-up event or a driver's request. + + Hardware support is generally required for this functionality to work + and the bus type drivers of the buses the devices are on are + responsible for the actual handling of the autosuspend requests and + wake-up events. diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 81d2e746489..04b3a83d686 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -298,8 +298,8 @@ int hibernation_snapshot(int platform_mode) if (error) return error; - /* Free memory before shutting down devices. */ - error = swsusp_shrink_memory(); + /* Preallocate image memory before shutting down devices. */ + error = hibernate_preallocate_memory(); if (error) goto Close; @@ -315,6 +315,10 @@ int hibernation_snapshot(int platform_mode) /* Control returns here after successful restore */ Resume_devices: + /* We may need to release the preallocated image pages here. */ + if (error || !in_suspend) + swsusp_free(); + dpm_resume_end(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); resume_console(); @@ -460,11 +464,11 @@ int hibernation_platform_enter(void) error = hibernation_ops->prepare(); if (error) - goto Platofrm_finish; + goto Platform_finish; error = disable_nonboot_cpus(); if (error) - goto Platofrm_finish; + goto Platform_finish; local_irq_disable(); sysdev_suspend(PMSG_HIBERNATE); @@ -476,7 +480,7 @@ int hibernation_platform_enter(void) * We don't need to reenable the nonboot CPUs or resume consoles, since * the system is going to be halted anyway. */ - Platofrm_finish: + Platform_finish: hibernation_ops->finish(); dpm_suspend_noirq(PMSG_RESTORE); @@ -578,7 +582,10 @@ int hibernate(void) goto Thaw; error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); - if (in_suspend && !error) { + if (error) + goto Thaw; + + if (in_suspend) { unsigned int flags = 0; if (hibernation_mode == HIBERNATION_PLATFORM) @@ -590,8 +597,8 @@ int hibernate(void) power_down(); } else { pr_debug("PM: Image restored successfully.\n"); - swsusp_free(); } + Thaw: thaw_processes(); Finish: diff --git a/kernel/power/main.c b/kernel/power/main.c index f710e36930c..347d2cc88cd 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -11,6 +11,7 @@ #include <linux/kobject.h> #include <linux/string.h> #include <linux/resume-trace.h> +#include <linux/workqueue.h> #include "power.h" @@ -217,8 +218,24 @@ static struct attribute_group attr_group = { .attrs = g, }; +#ifdef CONFIG_PM_RUNTIME +struct workqueue_struct *pm_wq; + +static int __init pm_start_workqueue(void) +{ + pm_wq = create_freezeable_workqueue("pm"); + + return pm_wq ? 0 : -ENOMEM; +} +#else +static inline int pm_start_workqueue(void) { return 0; } +#endif + static int __init pm_init(void) { + int error = pm_start_workqueue(); + if (error) + return error; power_kobj = kobject_create_and_add("power", NULL); if (!power_kobj) return -ENOMEM; diff --git a/kernel/power/power.h b/kernel/power/power.h index 26d5a26f82e..46c5a26630a 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -74,7 +74,7 @@ extern asmlinkage int swsusp_arch_resume(void); extern int create_basic_memory_bitmaps(void); extern void free_basic_memory_bitmaps(void); -extern int swsusp_shrink_memory(void); +extern int hibernate_preallocate_memory(void); /** * Auxiliary structure used for reading the snapshot image data and diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 523a451b45d..97955b0e44f 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -233,7 +233,7 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) #define BM_END_OF_MAP (~0UL) -#define BM_BITS_PER_BLOCK (PAGE_SIZE << 3) +#define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) struct bm_block { struct list_head hook; /* hook into a list of bitmap blocks */ @@ -275,7 +275,7 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); /** * create_bm_block_list - create a list of block bitmap objects - * @nr_blocks - number of blocks to allocate + * @pages - number of pages to track * @list - list to put the allocated blocks into * @ca - chain allocator to be used for allocating memory */ @@ -853,7 +853,7 @@ static unsigned int count_highmem_pages(void) struct zone *zone; unsigned int n = 0; - for_each_zone(zone) { + for_each_populated_zone(zone) { unsigned long pfn, max_zone_pfn; if (!is_highmem(zone)) @@ -916,7 +916,7 @@ static unsigned int count_data_pages(void) unsigned long pfn, max_zone_pfn; unsigned int n = 0; - for_each_zone(zone) { + for_each_populated_zone(zone) { if (is_highmem(zone)) continue; @@ -1010,7 +1010,7 @@ copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) struct zone *zone; unsigned long pfn; - for_each_zone(zone) { + for_each_populated_zone(zone) { unsigned long max_zone_pfn; mark_free_pages(zone); @@ -1033,6 +1033,25 @@ copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) static unsigned int nr_copy_pages; /* Number of pages needed for saving the original pfns of the image pages */ static unsigned int nr_meta_pages; +/* + * Numbers of normal and highmem page frames allocated for hibernation image + * before suspending devices. + */ +unsigned int alloc_normal, alloc_highmem; +/* + * Memory bitmap used for marking saveable pages (during hibernation) or + * hibernation image pages (during restore) + */ +static struct memory_bitmap orig_bm; +/* + * Memory bitmap used during hibernation for marking allocated page frames that + * will contain copies of saveable pages. During restore it is initially used + * for marking hibernation image pages, but then the set bits from it are + * duplicated in @orig_bm and it is released. On highmem systems it is next + * used for marking "safe" highmem pages, but it has to be reinitialized for + * this purpose. + */ +static struct memory_bitmap copy_bm; /** * swsusp_free - free pages allocated for the suspend. @@ -1046,7 +1065,7 @@ void swsusp_free(void) struct zone *zone; unsigned long pfn, max_zone_pfn; - for_each_zone(zone) { + for_each_populated_zone(zone) { max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (pfn_valid(pfn)) { @@ -1064,74 +1083,286 @@ void swsusp_free(void) nr_meta_pages = 0; restore_pblist = NULL; buffer = NULL; + alloc_normal = 0; + alloc_highmem = 0; } +/* Helper functions used for the shrinking of memory. */ + +#define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN) + /** - * swsusp_shrink_memory - Try to free as much memory as needed - * - * ... but do not OOM-kill anyone + * preallocate_image_pages - Allocate a number of pages for hibernation image + * @nr_pages: Number of page frames to allocate. + * @mask: GFP flags to use for the allocation. * - * Notice: all userland should be stopped before it is called, or - * livelock is possible. + * Return value: Number of page frames actually allocated + */ +static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask) +{ + unsigned long nr_alloc = 0; + + while (nr_pages > 0) { + struct page *page; + + page = alloc_image_page(mask); + if (!page) + break; + memory_bm_set_bit(©_bm, page_to_pfn(page)); + if (PageHighMem(page)) + alloc_highmem++; + else + alloc_normal++; + nr_pages--; + nr_alloc++; + } + + return nr_alloc; +} + +static unsigned long preallocate_image_memory(unsigned long nr_pages) +{ + return preallocate_image_pages(nr_pages, GFP_IMAGE); +} + +#ifdef CONFIG_HIGHMEM +static unsigned long preallocate_image_highmem(unsigned long nr_pages) +{ + return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM); +} + +/** + * __fraction - Compute (an approximation of) x * (multiplier / base) */ +static unsigned long __fraction(u64 x, u64 multiplier, u64 base) +{ + x *= multiplier; + do_div(x, base); + return (unsigned long)x; +} + +static unsigned long preallocate_highmem_fraction(unsigned long nr_pages, + unsigned long highmem, + unsigned long total) +{ + unsigned long alloc = __fraction(nr_pages, highmem, total); -#define SHRINK_BITE 10000 -static inline unsigned long __shrink_memory(long tmp) + return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM); +} +#else /* CONFIG_HIGHMEM */ +static inline unsigned long preallocate_image_highmem(unsigned long nr_pages) { - if (tmp > SHRINK_BITE) - tmp = SHRINK_BITE; - return shrink_all_memory(tmp); + return 0; } -int swsusp_shrink_memory(void) +static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, + unsigned long highmem, + unsigned long total) +{ + return 0; +} +#endif /* CONFIG_HIGHMEM */ + +/** + * free_unnecessary_pages - Release preallocated pages not needed for the image + */ +static void free_unnecessary_pages(void) +{ + unsigned long save_highmem, to_free_normal, to_free_highmem; + + to_free_normal = alloc_normal - count_data_pages(); + save_highmem = count_highmem_pages(); + if (alloc_highmem > save_highmem) { + to_free_highmem = alloc_highmem - save_highmem; + } else { + to_free_highmem = 0; + to_free_normal -= save_highmem - alloc_highmem; + } + + memory_bm_position_reset(©_bm); + + while (to_free_normal > 0 && to_free_highmem > 0) { + unsigned long pfn = memory_bm_next_pfn(©_bm); + struct page *page = pfn_to_page(pfn); + + if (PageHighMem(page)) { + if (!to_free_highmem) + continue; + to_free_highmem--; + alloc_highmem--; + } else { + if (!to_free_normal) + continue; + to_free_normal--; + alloc_normal--; + } + memory_bm_clear_bit(©_bm, pfn); + swsusp_unset_page_forbidden(page); + swsusp_unset_page_free(page); + __free_page(page); + } +} + +/** + * minimum_image_size - Estimate the minimum acceptable size of an image + * @saveable: Number of saveable pages in the system. + * + * We want to avoid attempting to free too much memory too hard, so estimate the + * minimum acceptable size of a hibernation image to use as the lower limit for + * preallocating memory. + * + * We assume that the minimum image size should be proportional to + * + * [number of saveable pages] - [number of pages that can be freed in theory] + * + * where the second term is the sum of (1) reclaimable slab pages, (2) active + * and (3) inactive anonymouns pages, (4) active and (5) inactive file pages, + * minus mapped file pages. + */ +static unsigned long minimum_image_size(unsigned long saveable) +{ + unsigned long size; + + size = global_page_state(NR_SLAB_RECLAIMABLE) + + global_page_state(NR_ACTIVE_ANON) + + global_page_state(NR_INACTIVE_ANON) + + global_page_state(NR_ACTIVE_FILE) + + global_page_state(NR_INACTIVE_FILE) + - global_page_state(NR_FILE_MAPPED); + + return saveable <= size ? 0 : saveable - size; +} + +/** + * hibernate_preallocate_memory - Preallocate memory for hibernation image + * + * To create a hibernation image it is necessary to make a copy of every page + * frame in use. We also need a number of page frames to be free during + * hibernation for allocations made while saving the image and for device + * drivers, in case they need to allocate memory from their hibernation + * callbacks (these two numbers are given by PAGES_FOR_IO and SPARE_PAGES, + * respectively, both of which are rough estimates). To make this happen, we + * compute the total number of available page frames and allocate at least + * + * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 + 2 * SPARE_PAGES + * + * of them, which corresponds to the maximum size of a hibernation image. + * + * If image_size is set below the number following from the above formula, + * the preallocation of memory is continued until the total number of saveable + * pages in the system is below the requested image size or the minimum + * acceptable image size returned by minimum_image_size(), whichever is greater. + */ +int hibernate_preallocate_memory(void) { - long tmp; struct zone *zone; - unsigned long pages = 0; - unsigned int i = 0; - char *p = "-\\|/"; + unsigned long saveable, size, max_size, count, highmem, pages = 0; + unsigned long alloc, save_highmem, pages_highmem; struct timeval start, stop; + int error; - printk(KERN_INFO "PM: Shrinking memory... "); + printk(KERN_INFO "PM: Preallocating image memory... "); do_gettimeofday(&start); - do { - long size, highmem_size; - - highmem_size = count_highmem_pages(); - size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES; - tmp = size; - size += highmem_size; - for_each_populated_zone(zone) { - tmp += snapshot_additional_pages(zone); - if (is_highmem(zone)) { - highmem_size -= - zone_page_state(zone, NR_FREE_PAGES); - } else { - tmp -= zone_page_state(zone, NR_FREE_PAGES); - tmp += zone->lowmem_reserve[ZONE_NORMAL]; - } - } - if (highmem_size < 0) - highmem_size = 0; + error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY); + if (error) + goto err_out; - tmp += highmem_size; - if (tmp > 0) { - tmp = __shrink_memory(tmp); - if (!tmp) - return -ENOMEM; - pages += tmp; - } else if (size > image_size / PAGE_SIZE) { - tmp = __shrink_memory(size - (image_size / PAGE_SIZE)); - pages += tmp; - } - printk("\b%c", p[i++%4]); - } while (tmp > 0); + error = memory_bm_create(©_bm, GFP_IMAGE, PG_ANY); + if (error) + goto err_out; + + alloc_normal = 0; + alloc_highmem = 0; + + /* Count the number of saveable data pages. */ + save_highmem = count_highmem_pages(); + saveable = count_data_pages(); + + /* + * Compute the total number of page frames we can use (count) and the + * number of pages needed for image metadata (size). + */ + count = saveable; + saveable += save_highmem; + highmem = save_highmem; + size = 0; + for_each_populated_zone(zone) { + size += snapshot_additional_pages(zone); + if (is_highmem(zone)) + highmem += zone_page_state(zone, NR_FREE_PAGES); + else + count += zone_page_state(zone, NR_FREE_PAGES); + } + count += highmem; + count -= totalreserve_pages; + + /* Compute the maximum number of saveable pages to leave in memory. */ + max_size = (count - (size + PAGES_FOR_IO)) / 2 - 2 * SPARE_PAGES; + size = DIV_ROUND_UP(image_size, PAGE_SIZE); + if (size > max_size) + size = max_size; + /* + * If the maximum is not less than the current number of saveable pages + * in memory, allocate page frames for the image and we're done. + */ + if (size >= saveable) { + pages = preallocate_image_highmem(save_highmem); + pages += preallocate_image_memory(saveable - pages); + goto out; + } + + /* Estimate the minimum size of the image. */ + pages = minimum_image_size(saveable); + if (size < pages) + size = min_t(unsigned long, pages, max_size); + + /* + * Let the memory management subsystem know that we're going to need a + * large number of page frames to allocate and make it free some memory. + * NOTE: If this is not done, performance will be hurt badly in some + * test cases. + */ + shrink_all_memory(saveable - size); + + /* + * The number of saveable pages in memory was too high, so apply some + * pressure to decrease it. First, make room for the largest possible + * image and fail if that doesn't work. Next, try to decrease the size + * of the image as much as indicated by 'size' using allocations from + * highmem and non-highmem zones separately. + */ + pages_highmem = preallocate_image_highmem(highmem / 2); + alloc = (count - max_size) - pages_highmem; + pages = preallocate_image_memory(alloc); + if (pages < alloc) + goto err_out; + size = max_size - size; + alloc = size; + size = preallocate_highmem_fraction(size, highmem, count); + pages_highmem += size; + alloc -= size; + pages += preallocate_image_memory(alloc); + pages += pages_highmem; + + /* + * We only need as many page frames for the image as there are saveable + * pages in memory, but we have allocated more. Release the excessive + * ones now. + */ + free_unnecessary_pages(); + + out: do_gettimeofday(&stop); - printk("\bdone (%lu pages freed)\n", pages); - swsusp_show_speed(&start, &stop, pages, "Freed"); + printk(KERN_CONT "done (allocated %lu pages)\n", pages); + swsusp_show_speed(&start, &stop, pages, "Allocated"); return 0; + + err_out: + printk(KERN_CONT "\n"); + swsusp_free(); + return -ENOMEM; } #ifdef CONFIG_HIGHMEM @@ -1142,7 +1373,7 @@ int swsusp_shrink_memory(void) static unsigned int count_pages_for_highmem(unsigned int nr_highmem) { - unsigned int free_highmem = count_free_highmem_pages(); + unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem; if (free_highmem >= nr_highmem) nr_highmem = 0; @@ -1164,19 +1395,17 @@ count_pages_for_highmem(unsigned int nr_highmem) { return 0; } static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) { struct zone *zone; - unsigned int free = 0, meta = 0; + unsigned int free = alloc_normal; - for_each_zone(zone) { - meta += snapshot_additional_pages(zone); + for_each_populated_zone(zone) if (!is_highmem(zone)) free += zone_page_state(zone, NR_FREE_PAGES); - } nr_pages += count_pages_for_highmem(nr_highmem); - pr_debug("PM: Normal pages needed: %u + %u + %u, available pages: %u\n", - nr_pages, PAGES_FOR_IO, meta, free); + pr_debug("PM: Normal pages needed: %u + %u, available pages: %u\n", + nr_pages, PAGES_FOR_IO, free); - return free > nr_pages + PAGES_FOR_IO + meta; + return free > nr_pages + PAGES_FOR_IO; } #ifdef CONFIG_HIGHMEM @@ -1198,7 +1427,7 @@ static inline int get_highmem_buffer(int safe_needed) */ static inline unsigned int -alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int nr_highmem) +alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem) { unsigned int to_alloc = count_free_highmem_pages(); @@ -1218,7 +1447,7 @@ alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int nr_highmem) static inline int get_highmem_buffer(int safe_needed) { return 0; } static inline unsigned int -alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int n) { return 0; } +alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; } #endif /* CONFIG_HIGHMEM */ /** @@ -1237,51 +1466,36 @@ static int swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, unsigned int nr_pages, unsigned int nr_highmem) { - int error; - - error = memory_bm_create(orig_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY); - if (error) - goto Free; - - error = memory_bm_create(copy_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY); - if (error) - goto Free; + int error = 0; if (nr_highmem > 0) { error = get_highmem_buffer(PG_ANY); if (error) - goto Free; - - nr_pages += alloc_highmem_image_pages(copy_bm, nr_highmem); + goto err_out; + if (nr_highmem > alloc_highmem) { + nr_highmem -= alloc_highmem; + nr_pages += alloc_highmem_pages(copy_bm, nr_highmem); + } } - while (nr_pages-- > 0) { - struct page *page = alloc_image_page(GFP_ATOMIC | __GFP_COLD); - - if (!page) - goto Free; + if (nr_pages > alloc_normal) { + nr_pages -= alloc_normal; + while (nr_pages-- > 0) { + struct page *page; - memory_bm_set_bit(copy_bm, page_to_pfn(page)); + page = alloc_image_page(GFP_ATOMIC | __GFP_COLD); + if (!page) + goto err_out; + memory_bm_set_bit(copy_bm, page_to_pfn(page)); + } } + return 0; - Free: + err_out: swsusp_free(); - return -ENOMEM; + return error; } -/* Memory bitmap used for marking saveable pages (during suspend) or the - * suspend image pages (during resume) - */ -static struct memory_bitmap orig_bm; -/* Memory bitmap used on suspend for marking allocated pages that will contain - * the copies of saveable pages. During resume it is initially used for - * marking the suspend image pages, but then its set bits are duplicated in - * @orig_bm and it is released. Next, on systems with high memory, it may be - * used for marking "safe" highmem pages, but it has to be reinitialized for - * this purpose. - */ -static struct memory_bitmap copy_bm; - asmlinkage int swsusp_save(void) { unsigned int nr_pages, nr_highmem; @@ -1474,7 +1688,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm) unsigned long pfn, max_zone_pfn; /* Clear page flags */ - for_each_zone(zone) { + for_each_populated_zone(zone) { max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (pfn_valid(pfn)) |