summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-07-02 19:53:30 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-02 19:53:30 -0700
commitf317ff9eed763e99bd226a447f93d42509434f43 (patch)
treed77edfcdb1e7c32c30327f50dce04bcff8ec1531
parent13cc56013842a847a0f6ff805d9ed9181e753ef8 (diff)
parenta85f1a41f020bc2c97611060bcfae6f48a1db28d (diff)
Merge branch 'for-3.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
Pull workqueue changes from Tejun Heo: "Surprisingly, Lai and I didn't break too many things implementing custom pools and stuff last time around and there aren't any follow-up changes necessary at this point. The only change in this pull request is Viresh's patches to make some per-cpu workqueues to behave as unbound workqueues dependent on a boot param whose default can be configured via a config option. This leads to higher processing overhead / lower bandwidth as more work items are bounced across CPUs; however, it can lead to noticeable powersave in certain configurations - ~10% w/ idlish constant workload on a big.LITTLE configuration according to Viresh. This is because per-cpu workqueues interfere with how the scheduler perceives whether or not each CPU is idle by forcing pinned tasks on them, which makes the scheduler's power-aware scheduling decisions less effective. Its effectiveness is likely less pronounced on homogenous configurations and this type of optimization can probably be made automatic; however, the changes are pretty minimal and the affected workqueues are clearly marked, so it's an easy gain for some configurations for the time being with pretty unintrusive changes." * 'for-3.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq: fbcon: queue work on power efficient wq block: queue work on power efficient wq PHYLIB: queue work on system_power_efficient_wq workqueue: Add system wide power_efficient workqueues workqueues: Introduce new flag WQ_POWER_EFFICIENT for power oriented workqueues
-rw-r--r--Documentation/kernel-parameters.txt15
-rw-r--r--block/blk-core.c3
-rw-r--r--block/blk-ioc.c3
-rw-r--r--block/genhd.c12
-rw-r--r--drivers/net/phy/phy.c9
-rw-r--r--drivers/video/console/fbcon.c2
-rw-r--r--include/linux/workqueue.h35
-rw-r--r--kernel/power/Kconfig20
-rw-r--r--kernel/workqueue.c26
9 files changed, 113 insertions, 12 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 2fe6e767b3d..81732b8abd3 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3341,6 +3341,21 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
that this also can be controlled per-workqueue for
workqueues visible under /sys/bus/workqueue/.
+ workqueue.power_efficient
+ Per-cpu workqueues are generally preferred because
+ they show better performance thanks to cache
+ locality; unfortunately, per-cpu workqueues tend to
+ be more power hungry than unbound workqueues.
+
+ Enabling this makes the per-cpu workqueues which
+ were observed to contribute significantly to power
+ consumption unbound, leading to measurably lower
+ power usage at the cost of small performance
+ overhead.
+
+ The default value of this parameter is determined by
+ the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT.
+
x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
default x2apic cluster mode on platforms
supporting x2apic.
diff --git a/block/blk-core.c b/block/blk-core.c
index d5745b5833c..0852e5d4343 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -3180,7 +3180,8 @@ int __init blk_dev_init(void)
/* used for unplugging and affects IO latency/throughput - HIGHPRI */
kblockd_workqueue = alloc_workqueue("kblockd",
- WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
+ WQ_MEM_RECLAIM | WQ_HIGHPRI |
+ WQ_POWER_EFFICIENT, 0);
if (!kblockd_workqueue)
panic("Failed to create kblockd\n");
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 9c4bb8266bc..4464c823cff 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -144,7 +144,8 @@ void put_io_context(struct io_context *ioc)
if (atomic_long_dec_and_test(&ioc->refcount)) {
spin_lock_irqsave(&ioc->lock, flags);
if (!hlist_empty(&ioc->icq_list))
- schedule_work(&ioc->release_work);
+ queue_work(system_power_efficient_wq,
+ &ioc->release_work);
else
free_ioc = true;
spin_unlock_irqrestore(&ioc->lock, flags);
diff --git a/block/genhd.c b/block/genhd.c
index 20625eed551..e9094b375c0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1489,9 +1489,11 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now)
intv = disk_events_poll_jiffies(disk);
set_timer_slack(&ev->dwork.timer, intv / 4);
if (check_now)
- queue_delayed_work(system_freezable_wq, &ev->dwork, 0);
+ queue_delayed_work(system_freezable_power_efficient_wq,
+ &ev->dwork, 0);
else if (intv)
- queue_delayed_work(system_freezable_wq, &ev->dwork, intv);
+ queue_delayed_work(system_freezable_power_efficient_wq,
+ &ev->dwork, intv);
out_unlock:
spin_unlock_irqrestore(&ev->lock, flags);
}
@@ -1534,7 +1536,8 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask)
spin_lock_irq(&ev->lock);
ev->clearing |= mask;
if (!ev->block)
- mod_delayed_work(system_freezable_wq, &ev->dwork, 0);
+ mod_delayed_work(system_freezable_power_efficient_wq,
+ &ev->dwork, 0);
spin_unlock_irq(&ev->lock);
}
@@ -1627,7 +1630,8 @@ static void disk_check_events(struct disk_events *ev,
intv = disk_events_poll_jiffies(disk);
if (!ev->block && intv)
- queue_delayed_work(system_freezable_wq, &ev->dwork, intv);
+ queue_delayed_work(system_freezable_power_efficient_wq,
+ &ev->dwork, intv);
spin_unlock_irq(&ev->lock);
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 38f0b312ff8..663d2d0448b 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -439,7 +439,7 @@ void phy_start_machine(struct phy_device *phydev,
{
phydev->adjust_state = handler;
- schedule_delayed_work(&phydev->state_queue, HZ);
+ queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, HZ);
}
/**
@@ -500,7 +500,7 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
disable_irq_nosync(irq);
atomic_inc(&phydev->irq_disable);
- schedule_work(&phydev->phy_queue);
+ queue_work(system_power_efficient_wq, &phydev->phy_queue);
return IRQ_HANDLED;
}
@@ -655,7 +655,7 @@ static void phy_change(struct work_struct *work)
/* reschedule state queue work to run as soon as possible */
cancel_delayed_work_sync(&phydev->state_queue);
- schedule_delayed_work(&phydev->state_queue, 0);
+ queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, 0);
return;
@@ -918,7 +918,8 @@ void phy_state_machine(struct work_struct *work)
if (err < 0)
phy_error(phydev);
- schedule_delayed_work(&phydev->state_queue, PHY_STATE_TIME * HZ);
+ queue_delayed_work(system_power_efficient_wq, &phydev->state_queue,
+ PHY_STATE_TIME * HZ);
}
static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad,
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index d55b3375746..cd8a8027f8a 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -404,7 +404,7 @@ static void cursor_timer_handler(unsigned long dev_addr)
struct fb_info *info = (struct fb_info *) dev_addr;
struct fbcon_ops *ops = info->fbcon_par;
- schedule_work(&info->queue);
+ queue_work(system_power_efficient_wq, &info->queue);
mod_timer(&ops->cursor_timer, jiffies + HZ/5);
}
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 623488fdc1f..a9f4119c7e2 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -303,6 +303,33 @@ enum {
WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */
WQ_SYSFS = 1 << 6, /* visible in sysfs, see wq_sysfs_register() */
+ /*
+ * Per-cpu workqueues are generally preferred because they tend to
+ * show better performance thanks to cache locality. Per-cpu
+ * workqueues exclude the scheduler from choosing the CPU to
+ * execute the worker threads, which has an unfortunate side effect
+ * of increasing power consumption.
+ *
+ * The scheduler considers a CPU idle if it doesn't have any task
+ * to execute and tries to keep idle cores idle to conserve power;
+ * however, for example, a per-cpu work item scheduled from an
+ * interrupt handler on an idle CPU will force the scheduler to
+ * excute the work item on that CPU breaking the idleness, which in
+ * turn may lead to more scheduling choices which are sub-optimal
+ * in terms of power consumption.
+ *
+ * Workqueues marked with WQ_POWER_EFFICIENT are per-cpu by default
+ * but become unbound if workqueue.power_efficient kernel param is
+ * specified. Per-cpu workqueues which are identified to
+ * contribute significantly to power-consumption are identified and
+ * marked with this flag and enabling the power_efficient mode
+ * leads to noticeable power saving at the cost of small
+ * performance disadvantage.
+ *
+ * http://thread.gmane.org/gmane.linux.kernel/1480396
+ */
+ WQ_POWER_EFFICIENT = 1 << 7,
+
__WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */
__WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */
@@ -333,11 +360,19 @@ enum {
*
* system_freezable_wq is equivalent to system_wq except that it's
* freezable.
+ *
+ * *_power_efficient_wq are inclined towards saving power and converted
+ * into WQ_UNBOUND variants if 'wq_power_efficient' is enabled; otherwise,
+ * they are same as their non-power-efficient counterparts - e.g.
+ * system_power_efficient_wq is identical to system_wq if
+ * 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info.
*/
extern struct workqueue_struct *system_wq;
extern struct workqueue_struct *system_long_wq;
extern struct workqueue_struct *system_unbound_wq;
extern struct workqueue_struct *system_freezable_wq;
+extern struct workqueue_struct *system_power_efficient_wq;
+extern struct workqueue_struct *system_freezable_power_efficient_wq;
static inline struct workqueue_struct * __deprecated __system_nrt_wq(void)
{
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 9c39de095ba..d444c4e834f 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -262,6 +262,26 @@ config PM_GENERIC_DOMAINS
bool
depends on PM
+config WQ_POWER_EFFICIENT_DEFAULT
+ bool "Enable workqueue power-efficient mode by default"
+ depends on PM
+ default n
+ help
+ Per-cpu workqueues are generally preferred because they show
+ better performance thanks to cache locality; unfortunately,
+ per-cpu workqueues tend to be more power hungry than unbound
+ workqueues.
+
+ Enabling workqueue.power_efficient kernel parameter makes the
+ per-cpu workqueues which were observed to contribute
+ significantly to power consumption unbound, leading to measurably
+ lower power usage at the cost of small performance overhead.
+
+ This config option determines whether workqueue.power_efficient
+ is enabled by default.
+
+ If in doubt, say N.
+
config PM_GENERIC_DOMAINS_SLEEP
def_bool y
depends on PM_SLEEP && PM_GENERIC_DOMAINS
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ee8e29a2320..f02c4a4a0c3 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -272,6 +272,15 @@ static cpumask_var_t *wq_numa_possible_cpumask;
static bool wq_disable_numa;
module_param_named(disable_numa, wq_disable_numa, bool, 0444);
+/* see the comment above the definition of WQ_POWER_EFFICIENT */
+#ifdef CONFIG_WQ_POWER_EFFICIENT_DEFAULT
+static bool wq_power_efficient = true;
+#else
+static bool wq_power_efficient;
+#endif
+
+module_param_named(power_efficient, wq_power_efficient, bool, 0444);
+
static bool wq_numa_enabled; /* unbound NUMA affinity enabled */
/* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
@@ -305,6 +314,10 @@ struct workqueue_struct *system_unbound_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_unbound_wq);
struct workqueue_struct *system_freezable_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_freezable_wq);
+struct workqueue_struct *system_power_efficient_wq __read_mostly;
+EXPORT_SYMBOL_GPL(system_power_efficient_wq);
+struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
+EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
static int worker_thread(void *__worker);
static void copy_workqueue_attrs(struct workqueue_attrs *to,
@@ -4086,6 +4099,10 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
struct workqueue_struct *wq;
struct pool_workqueue *pwq;
+ /* see the comment above the definition of WQ_POWER_EFFICIENT */
+ if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
+ flags |= WQ_UNBOUND;
+
/* allocate wq and format name */
if (flags & WQ_UNBOUND)
tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]);
@@ -4985,8 +5002,15 @@ static int __init init_workqueues(void)
WQ_UNBOUND_MAX_ACTIVE);
system_freezable_wq = alloc_workqueue("events_freezable",
WQ_FREEZABLE, 0);
+ system_power_efficient_wq = alloc_workqueue("events_power_efficient",
+ WQ_POWER_EFFICIENT, 0);
+ system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
+ WQ_FREEZABLE | WQ_POWER_EFFICIENT,
+ 0);
BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
- !system_unbound_wq || !system_freezable_wq);
+ !system_unbound_wq || !system_freezable_wq ||
+ !system_power_efficient_wq ||
+ !system_freezable_power_efficient_wq);
return 0;
}
early_initcall(init_workqueues);