diff options
-rw-r--r-- | drivers/base/power/clock_ops.c | 85 | ||||
-rw-r--r-- | drivers/base/power/opp.c | 196 | ||||
-rw-r--r-- | drivers/powercap/Kconfig | 2 | ||||
-rw-r--r-- | drivers/powercap/intel_rapl.c | 264 | ||||
-rw-r--r-- | include/linux/pm_clock.h | 8 | ||||
-rw-r--r-- | include/linux/pm_opp.h | 12 | ||||
-rw-r--r-- | kernel/power/Kconfig | 1 |
7 files changed, 405 insertions, 163 deletions
diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index 78369305e06..b32b5d47b3c 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -12,6 +12,7 @@ #include <linux/pm.h> #include <linux/pm_clock.h> #include <linux/clk.h> +#include <linux/clkdev.h> #include <linux/slab.h> #include <linux/err.h> @@ -34,14 +35,20 @@ struct pm_clock_entry { /** * pm_clk_enable - Enable a clock, reporting any errors * @dev: The device for the given clock - * @clk: The clock being enabled. + * @ce: PM clock entry corresponding to the clock. */ -static inline int __pm_clk_enable(struct device *dev, struct clk *clk) +static inline int __pm_clk_enable(struct device *dev, struct pm_clock_entry *ce) { - int ret = clk_enable(clk); - if (ret) - dev_err(dev, "%s: failed to enable clk %p, error %d\n", - __func__, clk, ret); + int ret; + + if (ce->status < PCE_STATUS_ERROR) { + ret = clk_enable(ce->clk); + if (!ret) + ce->status = PCE_STATUS_ENABLED; + else + dev_err(dev, "%s: failed to enable clk %p, error %d\n", + __func__, ce->clk, ret); + } return ret; } @@ -53,7 +60,8 @@ static inline int __pm_clk_enable(struct device *dev, struct clk *clk) */ static void pm_clk_acquire(struct device *dev, struct pm_clock_entry *ce) { - ce->clk = clk_get(dev, ce->con_id); + if (!ce->clk) + ce->clk = clk_get(dev, ce->con_id); if (IS_ERR(ce->clk)) { ce->status = PCE_STATUS_ERROR; } else { @@ -63,15 +71,8 @@ static void pm_clk_acquire(struct device *dev, struct pm_clock_entry *ce) } } -/** - * pm_clk_add - Start using a device clock for power management. - * @dev: Device whose clock is going to be used for power management. - * @con_id: Connection ID of the clock. - * - * Add the clock represented by @con_id to the list of clocks used for - * the power management of @dev. - */ -int pm_clk_add(struct device *dev, const char *con_id) +static int __pm_clk_add(struct device *dev, const char *con_id, + struct clk *clk) { struct pm_subsys_data *psd = dev_to_psd(dev); struct pm_clock_entry *ce; @@ -93,6 +94,12 @@ int pm_clk_add(struct device *dev, const char *con_id) kfree(ce); return -ENOMEM; } + } else { + if (IS_ERR(ce->clk) || !__clk_get(clk)) { + kfree(ce); + return -ENOENT; + } + ce->clk = clk; } pm_clk_acquire(dev, ce); @@ -104,6 +111,32 @@ int pm_clk_add(struct device *dev, const char *con_id) } /** + * pm_clk_add - Start using a device clock for power management. + * @dev: Device whose clock is going to be used for power management. + * @con_id: Connection ID of the clock. + * + * Add the clock represented by @con_id to the list of clocks used for + * the power management of @dev. + */ +int pm_clk_add(struct device *dev, const char *con_id) +{ + return __pm_clk_add(dev, con_id, NULL); +} + +/** + * pm_clk_add_clk - Start using a device clock for power management. + * @dev: Device whose clock is going to be used for power management. + * @clk: Clock pointer + * + * Add the clock to the list of clocks used for the power management of @dev. + * It will increment refcount on clock pointer, use clk_put() on it when done. + */ +int pm_clk_add_clk(struct device *dev, struct clk *clk) +{ + return __pm_clk_add(dev, NULL, clk); +} + +/** * __pm_clk_remove - Destroy PM clock entry. * @ce: PM clock entry to destroy. */ @@ -266,7 +299,6 @@ int pm_clk_resume(struct device *dev) struct pm_subsys_data *psd = dev_to_psd(dev); struct pm_clock_entry *ce; unsigned long flags; - int ret; dev_dbg(dev, "%s()\n", __func__); @@ -275,13 +307,8 @@ int pm_clk_resume(struct device *dev) spin_lock_irqsave(&psd->lock, flags); - list_for_each_entry(ce, &psd->clock_list, node) { - if (ce->status < PCE_STATUS_ERROR) { - ret = __pm_clk_enable(dev, ce->clk); - if (!ret) - ce->status = PCE_STATUS_ENABLED; - } - } + list_for_each_entry(ce, &psd->clock_list, node) + __pm_clk_enable(dev, ce); spin_unlock_irqrestore(&psd->lock, flags); @@ -390,7 +417,6 @@ int pm_clk_resume(struct device *dev) struct pm_subsys_data *psd = dev_to_psd(dev); struct pm_clock_entry *ce; unsigned long flags; - int ret; dev_dbg(dev, "%s()\n", __func__); @@ -400,13 +426,8 @@ int pm_clk_resume(struct device *dev) spin_lock_irqsave(&psd->lock, flags); - list_for_each_entry(ce, &psd->clock_list, node) { - if (ce->status < PCE_STATUS_ERROR) { - ret = __pm_clk_enable(dev, ce->clk); - if (!ret) - ce->status = PCE_STATUS_ENABLED; - } - } + list_for_each_entry(ce, &psd->clock_list, node) + __pm_clk_enable(dev, ce); spin_unlock_irqrestore(&psd->lock, flags); diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index 89ced955faf..2d195f3a199 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -49,11 +49,12 @@ * are protected by the dev_opp_list_lock for integrity. * IMPORTANT: the opp nodes should be maintained in increasing * order. + * @dynamic: not-created from static DT entries. * @available: true/false - marks if this OPP as available or not * @rate: Frequency in hertz * @u_volt: Nominal voltage in microvolts corresponding to this OPP * @dev_opp: points back to the device_opp struct this opp belongs to - * @head: RCU callback head used for deferred freeing + * @rcu_head: RCU callback head used for deferred freeing * * This structure stores the OPP information for a given device. */ @@ -61,11 +62,12 @@ struct dev_pm_opp { struct list_head node; bool available; + bool dynamic; unsigned long rate; unsigned long u_volt; struct device_opp *dev_opp; - struct rcu_head head; + struct rcu_head rcu_head; }; /** @@ -76,7 +78,8 @@ struct dev_pm_opp { * RCU usage: nodes are not modified in the list of device_opp, * however addition is possible and is secured by dev_opp_list_lock * @dev: device pointer - * @head: notifier head to notify the OPP availability changes. + * @srcu_head: notifier head to notify the OPP availability changes. + * @rcu_head: RCU callback head used for deferred freeing * @opp_list: list of opps * * This is an internal data structure maintaining the link to opps attached to @@ -87,7 +90,8 @@ struct device_opp { struct list_head node; struct device *dev; - struct srcu_notifier_head head; + struct srcu_notifier_head srcu_head; + struct rcu_head rcu_head; struct list_head opp_list; }; @@ -378,30 +382,8 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, } EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor); -/** - * dev_pm_opp_add() - Add an OPP table from a table definitions - * @dev: device for which we do this operation - * @freq: Frequency in Hz for this OPP - * @u_volt: Voltage in uVolts for this OPP - * - * This function adds an opp definition to the opp list and returns status. - * The opp is made available by default and it can be controlled using - * dev_pm_opp_enable/disable functions. - * - * Locking: The internal device_opp and opp structures are RCU protected. - * Hence this function internally uses RCU updater strategy with mutex locks - * to keep the integrity of the internal data structures. Callers should ensure - * that this function is *NOT* called under RCU protection or in contexts where - * mutex cannot be locked. - * - * Return: - * 0: On success OR - * Duplicate OPPs (both freq and volt are same) and opp->available - * -EEXIST: Freq are same and volt are different OR - * Duplicate OPPs (both freq and volt are same) and !opp->available - * -ENOMEM: Memory allocation failure - */ -int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) +static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq, + unsigned long u_volt, bool dynamic) { struct device_opp *dev_opp = NULL; struct dev_pm_opp *opp, *new_opp; @@ -417,6 +399,13 @@ int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) /* Hold our list modification lock here */ mutex_lock(&dev_opp_list_lock); + /* populate the opp table */ + new_opp->dev_opp = dev_opp; + new_opp->rate = freq; + new_opp->u_volt = u_volt; + new_opp->available = true; + new_opp->dynamic = dynamic; + /* Check for existing list for 'dev' */ dev_opp = find_device_opp(dev); if (IS_ERR(dev_opp)) { @@ -436,19 +425,15 @@ int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) } dev_opp->dev = dev; - srcu_init_notifier_head(&dev_opp->head); + srcu_init_notifier_head(&dev_opp->srcu_head); INIT_LIST_HEAD(&dev_opp->opp_list); /* Secure the device list modification */ list_add_rcu(&dev_opp->node, &dev_opp_list); + head = &dev_opp->opp_list; + goto list_add; } - /* populate the opp table */ - new_opp->dev_opp = dev_opp; - new_opp->rate = freq; - new_opp->u_volt = u_volt; - new_opp->available = true; - /* * Insert new OPP in order of increasing frequency * and discard if already present @@ -474,6 +459,7 @@ int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) return ret; } +list_add: list_add_rcu(&new_opp->node, head); mutex_unlock(&dev_opp_list_lock); @@ -481,11 +467,109 @@ int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) * Notify the changes in the availability of the operable * frequency/voltage list. */ - srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_ADD, new_opp); + srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp); return 0; } + +/** + * dev_pm_opp_add() - Add an OPP table from a table definitions + * @dev: device for which we do this operation + * @freq: Frequency in Hz for this OPP + * @u_volt: Voltage in uVolts for this OPP + * + * This function adds an opp definition to the opp list and returns status. + * The opp is made available by default and it can be controlled using + * dev_pm_opp_enable/disable functions. + * + * Locking: The internal device_opp and opp structures are RCU protected. + * Hence this function internally uses RCU updater strategy with mutex locks + * to keep the integrity of the internal data structures. Callers should ensure + * that this function is *NOT* called under RCU protection or in contexts where + * mutex cannot be locked. + * + * Return: + * 0: On success OR + * Duplicate OPPs (both freq and volt are same) and opp->available + * -EEXIST: Freq are same and volt are different OR + * Duplicate OPPs (both freq and volt are same) and !opp->available + * -ENOMEM: Memory allocation failure + */ +int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) +{ + return dev_pm_opp_add_dynamic(dev, freq, u_volt, true); +} EXPORT_SYMBOL_GPL(dev_pm_opp_add); +static void kfree_opp_rcu(struct rcu_head *head) +{ + struct dev_pm_opp *opp = container_of(head, struct dev_pm_opp, rcu_head); + + kfree_rcu(opp, rcu_head); +} + +static void kfree_device_rcu(struct rcu_head *head) +{ + struct device_opp *device_opp = container_of(head, struct device_opp, rcu_head); + + kfree(device_opp); +} + +void __dev_pm_opp_remove(struct device_opp *dev_opp, struct dev_pm_opp *opp) +{ + /* + * Notify the changes in the availability of the operable + * frequency/voltage list. + */ + srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_REMOVE, opp); + list_del_rcu(&opp->node); + call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, kfree_opp_rcu); + + if (list_empty(&dev_opp->opp_list)) { + list_del_rcu(&dev_opp->node); + call_srcu(&dev_opp->srcu_head.srcu, &dev_opp->rcu_head, + kfree_device_rcu); + } +} + +/** + * dev_pm_opp_remove() - Remove an OPP from OPP list + * @dev: device for which we do this operation + * @freq: OPP to remove with matching 'freq' + * + * This function removes an opp from the opp list. + */ +void dev_pm_opp_remove(struct device *dev, unsigned long freq) +{ + struct dev_pm_opp *opp; + struct device_opp *dev_opp; + bool found = false; + + /* Hold our list modification lock here */ + mutex_lock(&dev_opp_list_lock); + + dev_opp = find_device_opp(dev); + if (IS_ERR(dev_opp)) + goto unlock; + + list_for_each_entry(opp, &dev_opp->opp_list, node) { + if (opp->rate == freq) { + found = true; + break; + } + } + + if (!found) { + dev_warn(dev, "%s: Couldn't find OPP with freq: %lu\n", + __func__, freq); + goto unlock; + } + + __dev_pm_opp_remove(dev_opp, opp); +unlock: + mutex_unlock(&dev_opp_list_lock); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_remove); + /** * opp_set_availability() - helper to set the availability of an opp * @dev: device for which we do this operation @@ -557,14 +641,14 @@ static int opp_set_availability(struct device *dev, unsigned long freq, list_replace_rcu(&opp->node, &new_opp->node); mutex_unlock(&dev_opp_list_lock); - kfree_rcu(opp, head); + call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, kfree_opp_rcu); /* Notify the change of the OPP availability */ if (availability_req) - srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_ENABLE, + srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ENABLE, new_opp); else - srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_DISABLE, + srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_DISABLE, new_opp); return 0; @@ -629,7 +713,7 @@ struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev) if (IS_ERR(dev_opp)) return ERR_CAST(dev_opp); /* matching type */ - return &dev_opp->head; + return &dev_opp->srcu_head; } #ifdef CONFIG_OF @@ -666,7 +750,7 @@ int of_init_opp_table(struct device *dev) unsigned long freq = be32_to_cpup(val++) * 1000; unsigned long volt = be32_to_cpup(val++); - if (dev_pm_opp_add(dev, freq, volt)) + if (dev_pm_opp_add_dynamic(dev, freq, volt, false)) dev_warn(dev, "%s: Failed to add OPP %ld\n", __func__, freq); nr -= 2; @@ -675,4 +759,34 @@ int of_init_opp_table(struct device *dev) return 0; } EXPORT_SYMBOL_GPL(of_init_opp_table); + +/** + * of_free_opp_table() - Free OPP table entries created from static DT entries + * @dev: device pointer used to lookup device OPPs. + * + * Free OPPs created using static entries present in DT. + */ +void of_free_opp_table(struct device *dev) +{ + struct device_opp *dev_opp = find_device_opp(dev); + struct dev_pm_opp *opp, *tmp; + + /* Check for existing list for 'dev' */ + dev_opp = find_device_opp(dev); + if (WARN(IS_ERR(dev_opp), "%s: dev_opp: %ld\n", dev_name(dev), + PTR_ERR(dev_opp))) + return; + + /* Hold our list modification lock here */ + mutex_lock(&dev_opp_list_lock); + + /* Free static OPPs */ + list_for_each_entry_safe(opp, tmp, &dev_opp->opp_list, node) { + if (!opp->dynamic) + __dev_pm_opp_remove(dev_opp, opp); + } + + mutex_unlock(&dev_opp_list_lock); +} +EXPORT_SYMBOL_GPL(of_free_opp_table); #endif diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig index a7c81b53d88..85727ef6ce8 100644 --- a/drivers/powercap/Kconfig +++ b/drivers/powercap/Kconfig @@ -17,7 +17,7 @@ if POWERCAP # Client driver configurations go here. config INTEL_RAPL tristate "Intel RAPL Support" - depends on X86 + depends on X86 && IOSF_MBI default n ---help--- This enables support for the Intel Running Average Power Limit (RAPL) diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index 45e05b32f9b..c71443c4f26 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c @@ -29,6 +29,7 @@ #include <linux/sysfs.h> #include <linux/cpu.h> #include <linux/powercap.h> +#include <asm/iosf_mbi.h> #include <asm/processor.h> #include <asm/cpu_device_id.h> @@ -70,11 +71,6 @@ #define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */ #define RAPL_PRIMITIVE_DUMMY BIT(2) -/* scale RAPL units to avoid floating point math inside kernel */ -#define POWER_UNIT_SCALE (1000000) -#define ENERGY_UNIT_SCALE (1000000) -#define TIME_UNIT_SCALE (1000000) - #define TIME_WINDOW_MAX_MSEC 40000 #define TIME_WINDOW_MIN_MSEC 250 @@ -175,9 +171,9 @@ struct rapl_package { unsigned int id; /* physical package/socket id */ unsigned int nr_domains; unsigned long domain_map; /* bit map of active domains */ - unsigned int power_unit_divisor; - unsigned int energy_unit_divisor; - unsigned int time_unit_divisor; + unsigned int power_unit; + unsigned int energy_unit; + unsigned int time_unit; struct rapl_domain *domains; /* array of domains, sized at runtime */ struct powercap_zone *power_zone; /* keep track of parent zone */ int nr_cpus; /* active cpus on the package, topology info is lost during @@ -188,6 +184,18 @@ struct rapl_package { */ struct list_head plist; }; + +struct rapl_defaults { + int (*check_unit)(struct rapl_package *rp, int cpu); + void (*set_floor_freq)(struct rapl_domain *rd, bool mode); + u64 (*compute_time_window)(struct rapl_package *rp, u64 val, + bool to_raw); +}; +static struct rapl_defaults *rapl_defaults; + +/* Sideband MBI registers */ +#define IOSF_CPU_POWER_BUDGET_CTL (0x2) + #define PACKAGE_PLN_INT_SAVED BIT(0) #define MAX_PRIM_NAME (32) @@ -339,23 +347,13 @@ static int find_nr_power_limit(struct rapl_domain *rd) static int set_domain_enable(struct powercap_zone *power_zone, bool mode) { struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); - int nr_powerlimit; if (rd->state & DOMAIN_STATE_BIOS_LOCKED) return -EACCES; + get_online_cpus(); - nr_powerlimit = find_nr_power_limit(rd); - /* here we activate/deactivate the hardware for power limiting */ rapl_write_data_raw(rd, PL1_ENABLE, mode); - /* always enable clamp such that p-state can go below OS requested - * range. power capping priority over guranteed frequency. - */ - rapl_write_data_raw(rd, PL1_CLAMP, mode); - /* some domains have pl2 */ - if (nr_powerlimit > 1) { - rapl_write_data_raw(rd, PL2_ENABLE, mode); - rapl_write_data_raw(rd, PL2_CLAMP, mode); - } + rapl_defaults->set_floor_freq(rd, mode); put_online_cpus(); return 0; @@ -653,9 +651,7 @@ static void rapl_init_domains(struct rapl_package *rp) static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value, int to_raw) { - u64 divisor = 1; - int scale = 1; /* scale to user friendly data without floating point */ - u64 f, y; /* fraction and exp. used for time unit */ + u64 units = 1; struct rapl_package *rp; rp = find_package_by_id(package); @@ -664,42 +660,24 @@ static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value, switch (type) { case POWER_UNIT: - divisor = rp->power_unit_divisor; - scale = POWER_UNIT_SCALE; + units = rp->power_unit; break; case ENERGY_UNIT: - scale = ENERGY_UNIT_SCALE; - divisor = rp->energy_unit_divisor; + units = rp->energy_unit; break; case TIME_UNIT: - divisor = rp->time_unit_divisor; - scale = TIME_UNIT_SCALE; - /* special processing based on 2^Y*(1+F)/4 = val/divisor, refer - * to Intel Software Developer's manual Vol. 3a, CH 14.7.4. - */ - if (!to_raw) { - f = (value & 0x60) >> 5; - y = value & 0x1f; - value = (1 << y) * (4 + f) * scale / 4; - return div64_u64(value, divisor); - } else { - do_div(value, scale); - value *= divisor; - y = ilog2(value); - f = div64_u64(4 * (value - (1 << y)), 1 << y); - value = (y & 0x1f) | ((f & 0x3) << 5); - return value; - } - break; + return rapl_defaults->compute_time_window(rp, value, to_raw); case ARBITRARY_UNIT: default: return value; }; if (to_raw) - return div64_u64(value * divisor, scale); - else - return div64_u64(value * scale, divisor); + return div64_u64(value, units); + + value *= units; + + return value; } /* in the order of enum rapl_primitives */ @@ -833,12 +811,18 @@ static int rapl_write_data_raw(struct rapl_domain *rd, return 0; } -static const struct x86_cpu_id energy_unit_quirk_ids[] = { - { X86_VENDOR_INTEL, 6, 0x37},/* Valleyview */ - {} -}; - -static int rapl_check_unit(struct rapl_package *rp, int cpu) +/* + * Raw RAPL data stored in MSRs are in certain scales. We need to + * convert them into standard units based on the units reported in + * the RAPL unit MSRs. This is specific to CPUs as the method to + * calculate units differ on different CPUs. + * We convert the units to below format based on CPUs. + * i.e. + * energy unit: microJoules : Represented in microJoules by default + * power unit : microWatts : Represented in milliWatts by default + * time unit : microseconds: Represented in seconds by default + */ +static int rapl_check_unit_core(struct rapl_package *rp, int cpu) { u64 msr_val; u32 value; @@ -849,36 +833,47 @@ static int rapl_check_unit(struct rapl_package *rp, int cpu) return -ENODEV; } - /* Raw RAPL data stored in MSRs are in certain scales. We need to - * convert them into standard units based on the divisors reported in - * the RAPL unit MSRs. - * i.e. - * energy unit: 1/enery_unit_divisor Joules - * power unit: 1/power_unit_divisor Watts - * time unit: 1/time_unit_divisor Seconds - */ value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; - /* some CPUs have different way to calculate energy unit */ - if (x86_match_cpu(energy_unit_quirk_ids)) - rp->energy_unit_divisor = 1000000 / (1 << value); - else - rp->energy_unit_divisor = 1 << value; + rp->energy_unit = 1000000 / (1 << value); value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; - rp->power_unit_divisor = 1 << value; + rp->power_unit = 1000000 / (1 << value); value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; - rp->time_unit_divisor = 1 << value; + rp->time_unit = 1000000 / (1 << value); - pr_debug("Physical package %d units: energy=%d, time=%d, power=%d\n", - rp->id, - rp->energy_unit_divisor, - rp->time_unit_divisor, - rp->power_unit_divisor); + pr_debug("Core CPU package %d energy=%duJ, time=%dus, power=%duW\n", + rp->id, rp->energy_unit, rp->time_unit, rp->power_unit); return 0; } +static int rapl_check_unit_atom(struct rapl_package *rp, int cpu) +{ + u64 msr_val; + u32 value; + + if (rdmsrl_safe_on_cpu(cpu, MSR_RAPL_POWER_UNIT, &msr_val)) { + pr_err("Failed to read power unit MSR 0x%x on CPU %d, exit.\n", + MSR_RAPL_POWER_UNIT, cpu); + return -ENODEV; + } + value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; + rp->energy_unit = 1 << value; + + value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; + rp->power_unit = (1 << value) * 1000; + + value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; + rp->time_unit = 1000000 / (1 << value); + + pr_debug("Atom package %d energy=%duJ, time=%dus, power=%duW\n", + rp->id, rp->energy_unit, rp->time_unit, rp->power_unit); + + return 0; +} + + /* REVISIT: * When package power limit is set artificially low by RAPL, LVT * thermal interrupt for package power limit should be ignored @@ -946,16 +941,107 @@ static void package_power_limit_irq_restore(int package_id) wrmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); } +static void set_floor_freq_default(struct rapl_domain *rd, bool mode) +{ + int nr_powerlimit = find_nr_power_limit(rd); + + /* always enable clamp such that p-state can go below OS requested + * range. power capping priority over guranteed frequency. + */ + rapl_write_data_raw(rd, PL1_CLAMP, mode); + + /* some domains have pl2 */ + if (nr_powerlimit > 1) { + rapl_write_data_raw(rd, PL2_ENABLE, mode); + rapl_write_data_raw(rd, PL2_CLAMP, mode); + } +} + +static void set_floor_freq_atom(struct rapl_domain *rd, bool enable) +{ + static u32 power_ctrl_orig_val; + u32 mdata; + + if (!power_ctrl_orig_val) + iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_PMC_READ, + IOSF_CPU_POWER_BUDGET_CTL, &power_ctrl_orig_val); + mdata = power_ctrl_orig_val; + if (enable) { + mdata &= ~(0x7f << 8); + mdata |= 1 << 8; + } + iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_PMC_WRITE, + IOSF_CPU_POWER_BUDGET_CTL, mdata); +} + +static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value, + bool to_raw) +{ + u64 f, y; /* fraction and exp. used for time unit */ + + /* + * Special processing based on 2^Y*(1+F/4), refer + * to Intel Software Developer's manual Vol.3B: CH 14.9.3. + */ + if (!to_raw) { + f = (value & 0x60) >> 5; + y = value & 0x1f; + value = (1 << y) * (4 + f) * rp->time_unit / 4; + } else { + do_div(value, rp->time_unit); + y = ilog2(value); + f = div64_u64(4 * (value - (1 << y)), 1 << y); + value = (y & 0x1f) | ((f & 0x3) << 5); + } + return value; +} + +static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value, + bool to_raw) +{ + /* + * Atom time unit encoding is straight forward val * time_unit, + * where time_unit is default to 1 sec. Never 0. + */ + if (!to_raw) + return (value) ? value *= rp->time_unit : rp->time_unit; + else + value = div64_u64(value, rp->time_unit); + + return value; +} + +static const struct rapl_defaults rapl_defaults_core = { + .check_unit = rapl_check_unit_core, + .set_floor_freq = set_floor_freq_default, + .compute_time_window = rapl_compute_time_window_core, +}; + +static const struct rapl_defaults rapl_defaults_atom = { + .check_unit = rapl_check_unit_atom, + .set_floor_freq = set_floor_freq_atom, + .compute_time_window = rapl_compute_time_window_atom, +}; + +#define RAPL_CPU(_model, _ops) { \ + .vendor = X86_VENDOR_INTEL, \ + .family = 6, \ + .model = _model, \ + .driver_data = (kernel_ulong_t)&_ops, \ + } + static const struct x86_cpu_id rapl_ids[] = { - { X86_VENDOR_INTEL, 6, 0x2a},/* Sandy Bridge */ - { X86_VENDOR_INTEL, 6, 0x2d},/* Sandy Bridge EP */ - { X86_VENDOR_INTEL, 6, 0x37},/* Valleyview */ - { X86_VENDOR_INTEL, 6, 0x3a},/* Ivy Bridge */ - { X86_VENDOR_INTEL, 6, 0x3c},/* Haswell */ - { X86_VENDOR_INTEL, 6, 0x3d},/* Broadwell */ - { X86_VENDOR_INTEL, 6, 0x3f},/* Haswell */ - { X86_VENDOR_INTEL, 6, 0x45},/* Haswell ULT */ - /* TODO: Add more CPU IDs after testing */ + RAPL_CPU(0x2a, rapl_defaults_core),/* Sandy Bridge */ + RAPL_CPU(0x2d, rapl_defaults_core),/* Sandy Bridge EP */ + RAPL_CPU(0x37, rapl_defaults_atom),/* Valleyview */ + RAPL_CPU(0x3a, rapl_defaults_core),/* Ivy Bridge */ + RAPL_CPU(0x3c, rapl_defaults_core),/* Haswell */ + RAPL_CPU(0x3d, rapl_defaults_core),/* Broadwell */ + RAPL_CPU(0x3f, rapl_defaults_core),/* Haswell */ + RAPL_CPU(0x45, rapl_defaults_core),/* Haswell ULT */ + RAPL_CPU(0x4C, rapl_defaults_atom),/* Braswell */ + RAPL_CPU(0x4A, rapl_defaults_atom),/* Tangier */ + RAPL_CPU(0x5A, rapl_defaults_atom),/* Annidale */ {} }; MODULE_DEVICE_TABLE(x86cpu, rapl_ids); @@ -1241,7 +1327,7 @@ static int rapl_detect_topology(void) /* check if the package contains valid domains */ if (rapl_detect_domains(new_package, i) || - rapl_check_unit(new_package, i)) { + rapl_defaults->check_unit(new_package, i)) { kfree(new_package->domains); kfree(new_package); /* free up the packages already initialized */ @@ -1296,7 +1382,7 @@ static int rapl_add_package(int cpu) rp->nr_cpus = 1; /* check if the package contains valid domains */ if (rapl_detect_domains(rp, cpu) || - rapl_check_unit(rp, cpu)) { + rapl_defaults->check_unit(rp, cpu)) { ret = -ENODEV; goto err_free_package; } @@ -1358,14 +1444,18 @@ static struct notifier_block rapl_cpu_notifier = { static int __init rapl_init(void) { int ret = 0; + const struct x86_cpu_id *id; - if (!x86_match_cpu(rapl_ids)) { + id = x86_match_cpu(rapl_ids); + if (!id) { pr_err("driver does not support CPU family %d model %d\n", boot_cpu_data.x86, boot_cpu_data.x86_model); return -ENODEV; } + rapl_defaults = (struct rapl_defaults *)id->driver_data; + cpu_notifier_register_begin(); /* prevent CPU hotplug during detection */ diff --git a/include/linux/pm_clock.h b/include/linux/pm_clock.h index 8348866e7b0..0b003963441 100644 --- a/include/linux/pm_clock.h +++ b/include/linux/pm_clock.h @@ -18,6 +18,8 @@ struct pm_clk_notifier_block { char *con_ids[]; }; +struct clk; + #ifdef CONFIG_PM_CLK static inline bool pm_clk_no_clocks(struct device *dev) { @@ -29,6 +31,7 @@ extern void pm_clk_init(struct device *dev); extern int pm_clk_create(struct device *dev); extern void pm_clk_destroy(struct device *dev); extern int pm_clk_add(struct device *dev, const char *con_id); +extern int pm_clk_add_clk(struct device *dev, struct clk *clk); extern void pm_clk_remove(struct device *dev, const char *con_id); extern int pm_clk_suspend(struct device *dev); extern int pm_clk_resume(struct device *dev); @@ -51,6 +54,11 @@ static inline int pm_clk_add(struct device *dev, const char *con_id) { return -EINVAL; } + +static inline int pm_clk_add_clk(struct device *dev, struct clk *clk) +{ + return -EINVAL; +} static inline void pm_clk_remove(struct device *dev, const char *con_id) { } diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 0330217abfa..cec2d454091 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -21,7 +21,7 @@ struct dev_pm_opp; struct device; enum dev_pm_opp_event { - OPP_EVENT_ADD, OPP_EVENT_ENABLE, OPP_EVENT_DISABLE, + OPP_EVENT_ADD, OPP_EVENT_REMOVE, OPP_EVENT_ENABLE, OPP_EVENT_DISABLE, }; #if defined(CONFIG_PM_OPP) @@ -44,6 +44,7 @@ struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt); +void dev_pm_opp_remove(struct device *dev, unsigned long freq); int dev_pm_opp_enable(struct device *dev, unsigned long freq); @@ -90,6 +91,10 @@ static inline int dev_pm_opp_add(struct device *dev, unsigned long freq, return -EINVAL; } +static inline void dev_pm_opp_remove(struct device *dev, unsigned long freq) +{ +} + static inline int dev_pm_opp_enable(struct device *dev, unsigned long freq) { return 0; @@ -109,11 +114,16 @@ static inline struct srcu_notifier_head *dev_pm_opp_get_notifier( #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) int of_init_opp_table(struct device *dev); +void of_free_opp_table(struct device *dev); #else static inline int of_init_opp_table(struct device *dev) { return -EINVAL; } + +static inline void of_free_opp_table(struct device *dev) +{ +} #endif #endif /* __LINUX_OPP_H__ */ diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index bbef57f5bdf..1eb7da7bc8e 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -308,4 +308,3 @@ config PM_GENERIC_DOMAINS_OF config CPU_PM bool - depends on SUSPEND || CPU_IDLE |