From c0a86a9bea55d505574120f3e9775e3844276505 Mon Sep 17 00:00:00 2001 From: Seth Heasley Date: Tue, 19 Apr 2011 16:35:15 -0700 Subject: x86/PCI: irq and pci_ids patch for Intel Panther Point DeviceIDs This patch adds the LPC Controller DeviceIDs for the Intel Panther Point PCH. Acked-by: Jean Delvare Signed-off-by: Seth Heasley Signed-off-by: Jesse Barnes --- include/linux/pci_ids.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 4e2c9150a78..52f4ed4de49 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2480,6 +2480,9 @@ #define PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS 0x1c22 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN 0x1c41 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX 0x1c5f +#define PCI_DEVICE_ID_INTEL_PANTHERPOINT_SMBUS 0x1e22 +#define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MIN 0x1e40 +#define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MAX 0x1e5f #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS 0x1d22 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0 0x1d40 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1 0x1d41 -- cgit v1.2.3-70-g09d2 From b48d4425b602f5f4978299474743dbea130d940d Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Tue, 19 Oct 2010 13:07:57 -0700 Subject: PCI: add ID-based ordering enable/disable support Add support to allow drivers to enable/disable ID-based ordering. Where supported, ID-based ordering can significantly improve the latency of individual requests by preventing them from queueing up behind unrelated traffic. Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 13 ++++++++++++ include/linux/pci_regs.h | 2 ++ 3 files changed, 68 insertions(+) (limited to 'include') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 44d1c7c3876..d0182bed7ac 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1834,6 +1834,59 @@ void pci_enable_ari(struct pci_dev *dev) bridge->ari_enabled = 1; } +/** + * pci_enable_ido - enable ID-based ordering on a device + * @dev: the PCI device + * @type: which types of IDO to enable + * + * Enable ID-based ordering on @dev. @type can contain the bits + * %PCI_EXP_IDO_REQUEST and/or %PCI_EXP_IDO_COMPLETION to indicate + * which types of transactions are allowed to be re-ordered. + */ +void pci_enable_ido(struct pci_dev *dev, unsigned long type) +{ + int pos; + u16 ctrl; + + pos = pci_pcie_cap(dev); + if (!pos) + return; + + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl); + if (type & PCI_EXP_IDO_REQUEST) + ctrl |= PCI_EXP_IDO_REQ_EN; + if (type & PCI_EXP_IDO_COMPLETION) + ctrl |= PCI_EXP_IDO_CMP_EN; + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl); +} +EXPORT_SYMBOL(pci_enable_ido); + +/** + * pci_disable_ido - disable ID-based ordering on a device + * @dev: the PCI device + * @type: which types of IDO to disable + */ +void pci_disable_ido(struct pci_dev *dev, unsigned long type) +{ + int pos; + u16 ctrl; + + if (!pci_is_pcie(dev)) + return; + + pos = pci_pcie_cap(dev); + if (!pos) + return; + + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl); + if (type & PCI_EXP_IDO_REQUEST) + ctrl &= ~PCI_EXP_IDO_REQ_EN; + if (type & PCI_EXP_IDO_COMPLETION) + ctrl &= ~PCI_EXP_IDO_CMP_EN; + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl); +} +EXPORT_SYMBOL(pci_disable_ido); + static int pci_acs_enable; /** diff --git a/include/linux/pci.h b/include/linux/pci.h index 96f70d7e058..551ddcb5f94 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -828,6 +828,11 @@ static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state, return __pci_enable_wake(dev, state, false, enable); } +#define PCI_EXP_IDO_REQUEST (1<<0) +#define PCI_EXP_IDO_COMPLETION (1<<1) +void pci_enable_ido(struct pci_dev *dev, unsigned long type); +void pci_disable_ido(struct pci_dev *dev, unsigned long type); + /* For use by arch with custom probe code */ void set_pcie_port_type(struct pci_dev *pdev); void set_pcie_hotplug_bridge(struct pci_dev *pdev); @@ -1207,6 +1212,14 @@ static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state, return 0; } +static inline void pci_enable_ido(struct pci_dev *dev, unsigned long type) +{ +} + +static inline void pci_disable_ido(struct pci_dev *dev, unsigned long type) +{ +} + static inline int pci_request_regions(struct pci_dev *dev, const char *res_name) { return -EIO; diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index be01380f798..d9acf9b9981 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -510,6 +510,8 @@ #define PCI_EXP_DEVCAP2_ARI 0x20 /* Alternative Routing-ID */ #define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ #define PCI_EXP_DEVCTL2_ARI 0x20 /* Alternative Routing-ID */ +#define PCI_EXP_IDO_REQ_EN 0x100 /* ID-based ordering request enable */ +#define PCI_EXP_IDO_CMP_EN 0x200 /* ID-based ordering completion enable */ #define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ #define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */ -- cgit v1.2.3-70-g09d2 From 48a92a8179b3e677fac07db7bd109e68f020468c Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Mon, 10 Jan 2011 12:46:36 -0800 Subject: PCI: add OBFF enable/disable support OBFF (optimized buffer flush/fill), where supported, can help improve energy efficiency by giving devices information about when interrupts and other activity will have a reduced power impact. It requires support from both the device and system (i.e. not only does the device need to respond to OBFF messages, but the platform must be capable of generating and routing them to the end point). Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 16 +++++++++ include/linux/pci_regs.h | 6 ++++ 3 files changed, 114 insertions(+) (limited to 'include') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index d0182bed7ac..01e4cab2e5c 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1887,6 +1887,98 @@ void pci_disable_ido(struct pci_dev *dev, unsigned long type) } EXPORT_SYMBOL(pci_disable_ido); +/** + * pci_enable_obff - enable optimized buffer flush/fill + * @dev: PCI device + * @type: type of signaling to use + * + * Try to enable @type OBFF signaling on @dev. It will try using WAKE# + * signaling if possible, falling back to message signaling only if + * WAKE# isn't supported. @type should indicate whether the PCIe link + * be brought out of L0s or L1 to send the message. It should be either + * %PCI_EXP_OBFF_SIGNAL_ALWAYS or %PCI_OBFF_SIGNAL_L0. + * + * If your device can benefit from receiving all messages, even at the + * power cost of bringing the link back up from a low power state, use + * %PCI_EXP_OBFF_SIGNAL_ALWAYS. Otherwise, use %PCI_OBFF_SIGNAL_L0 (the + * preferred type). + * + * RETURNS: + * Zero on success, appropriate error number on failure. + */ +int pci_enable_obff(struct pci_dev *dev, enum pci_obff_signal_type type) +{ + int pos; + u32 cap; + u16 ctrl; + int ret; + + if (!pci_is_pcie(dev)) + return -ENOTSUPP; + + pos = pci_pcie_cap(dev); + if (!pos) + return -ENOTSUPP; + + pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP2, &cap); + if (!(cap & PCI_EXP_OBFF_MASK)) + return -ENOTSUPP; /* no OBFF support at all */ + + /* Make sure the topology supports OBFF as well */ + if (dev->bus) { + ret = pci_enable_obff(dev->bus->self, type); + if (ret) + return ret; + } + + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl); + if (cap & PCI_EXP_OBFF_WAKE) + ctrl |= PCI_EXP_OBFF_WAKE_EN; + else { + switch (type) { + case PCI_EXP_OBFF_SIGNAL_L0: + if (!(ctrl & PCI_EXP_OBFF_WAKE_EN)) + ctrl |= PCI_EXP_OBFF_MSGA_EN; + break; + case PCI_EXP_OBFF_SIGNAL_ALWAYS: + ctrl &= ~PCI_EXP_OBFF_WAKE_EN; + ctrl |= PCI_EXP_OBFF_MSGB_EN; + break; + default: + WARN(1, "bad OBFF signal type\n"); + return -ENOTSUPP; + } + } + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl); + + return 0; +} +EXPORT_SYMBOL(pci_enable_obff); + +/** + * pci_disable_obff - disable optimized buffer flush/fill + * @dev: PCI device + * + * Disable OBFF on @dev. + */ +void pci_disable_obff(struct pci_dev *dev) +{ + int pos; + u16 ctrl; + + if (!pci_is_pcie(dev)) + return; + + pos = pci_pcie_cap(dev); + if (!pos) + return; + + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl); + ctrl &= ~PCI_EXP_OBFF_WAKE_EN; + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl); +} +EXPORT_SYMBOL(pci_disable_obff); + static int pci_acs_enable; /** diff --git a/include/linux/pci.h b/include/linux/pci.h index 551ddcb5f94..45a035cccd9 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -833,6 +833,13 @@ static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state, void pci_enable_ido(struct pci_dev *dev, unsigned long type); void pci_disable_ido(struct pci_dev *dev, unsigned long type); +enum pci_obff_signal_type { + PCI_EXP_OBFF_SIGNAL_L0, + PCI_EXP_OBFF_SIGNAL_ALWAYS, +}; +int pci_enable_obff(struct pci_dev *dev, enum pci_obff_signal_type); +void pci_disable_obff(struct pci_dev *dev); + /* For use by arch with custom probe code */ void set_pcie_port_type(struct pci_dev *pdev); void set_pcie_hotplug_bridge(struct pci_dev *pdev); @@ -1220,6 +1227,15 @@ static inline void pci_disable_ido(struct pci_dev *dev, unsigned long type) { } +static inline int pci_enable_obff(struct pci_dev *dev, unsigned long type) +{ + return 0; +} + +static inline void pci_disable_obff(struct pci_dev *dev) +{ +} + static inline int pci_request_regions(struct pci_dev *dev, const char *res_name) { return -EIO; diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index d9acf9b9981..aa420261843 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -508,10 +508,16 @@ #define PCI_EXP_RTSTA_PENDING 0x20000 /* PME pending */ #define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */ #define PCI_EXP_DEVCAP2_ARI 0x20 /* Alternative Routing-ID */ +#define PCI_EXP_OBFF_MASK 0xc0000 /* OBFF support mechanism */ +#define PCI_EXP_OBFF_MSG 0x40000 /* New message signaling */ +#define PCI_EXP_OBFF_WAKE 0x80000 /* Re-use WAKE# for OBFF */ #define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ #define PCI_EXP_DEVCTL2_ARI 0x20 /* Alternative Routing-ID */ #define PCI_EXP_IDO_REQ_EN 0x100 /* ID-based ordering request enable */ #define PCI_EXP_IDO_CMP_EN 0x200 /* ID-based ordering completion enable */ +#define PCI_EXP_OBFF_MSGA_EN 0x2000 /* OBFF enable with Message type A */ +#define PCI_EXP_OBFF_MSGB_EN 0x4000 /* OBFF enable with Message type B */ +#define PCI_EXP_OBFF_WAKE_EN 0x6000 /* OBFF using WAKE# signaling */ #define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ #define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */ -- cgit v1.2.3-70-g09d2 From 51c2e0a7e5bc7ed1384cc68cfb95e702571500c9 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Fri, 14 Jan 2011 08:53:04 -0800 Subject: PCI: add latency tolerance reporting enable/disable support Latency tolerance reporting allows devices to send messages to the root complex indicating their latency tolerance for snooped & unsnooped memory transactions. Add support for enabling & disabling this feature, along with a routine to set the max latencies a device should send upstream. Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 149 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 5 ++ include/linux/pci_regs.h | 9 +++ 3 files changed, 163 insertions(+) (limited to 'include') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 01e4cab2e5c..53302cbdb94 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1979,6 +1979,155 @@ void pci_disable_obff(struct pci_dev *dev) } EXPORT_SYMBOL(pci_disable_obff); +/** + * pci_ltr_supported - check whether a device supports LTR + * @dev: PCI device + * + * RETURNS: + * True if @dev supports latency tolerance reporting, false otherwise. + */ +bool pci_ltr_supported(struct pci_dev *dev) +{ + int pos; + u32 cap; + + if (!pci_is_pcie(dev)) + return false; + + pos = pci_pcie_cap(dev); + if (!pos) + return false; + + pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP2, &cap); + + return cap & PCI_EXP_DEVCAP2_LTR; +} +EXPORT_SYMBOL(pci_ltr_supported); + +/** + * pci_enable_ltr - enable latency tolerance reporting + * @dev: PCI device + * + * Enable LTR on @dev if possible, which means enabling it first on + * upstream ports. + * + * RETURNS: + * Zero on success, errno on failure. + */ +int pci_enable_ltr(struct pci_dev *dev) +{ + int pos; + u16 ctrl; + int ret; + + if (!pci_ltr_supported(dev)) + return -ENOTSUPP; + + pos = pci_pcie_cap(dev); + if (!pos) + return -ENOTSUPP; + + /* Only primary function can enable/disable LTR */ + if (PCI_FUNC(dev->devfn) != 0) + return -EINVAL; + + /* Enable upstream ports first */ + if (dev->bus) { + ret = pci_enable_ltr(dev->bus->self); + if (ret) + return ret; + } + + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl); + ctrl |= PCI_EXP_LTR_EN; + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl); + + return 0; +} +EXPORT_SYMBOL(pci_enable_ltr); + +/** + * pci_disable_ltr - disable latency tolerance reporting + * @dev: PCI device + */ +void pci_disable_ltr(struct pci_dev *dev) +{ + int pos; + u16 ctrl; + + if (!pci_ltr_supported(dev)) + return; + + pos = pci_pcie_cap(dev); + if (!pos) + return; + + /* Only primary function can enable/disable LTR */ + if (PCI_FUNC(dev->devfn) != 0) + return; + + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &ctrl); + ctrl &= ~PCI_EXP_LTR_EN; + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl); +} +EXPORT_SYMBOL(pci_disable_ltr); + +static int __pci_ltr_scale(int *val) +{ + int scale = 0; + + while (*val > 1023) { + *val = (*val + 31) / 32; + scale++; + } + return scale; +} + +/** + * pci_set_ltr - set LTR latency values + * @dev: PCI device + * @snoop_lat_ns: snoop latency in nanoseconds + * @nosnoop_lat_ns: nosnoop latency in nanoseconds + * + * Figure out the scale and set the LTR values accordingly. + */ +int pci_set_ltr(struct pci_dev *dev, int snoop_lat_ns, int nosnoop_lat_ns) +{ + int pos, ret, snoop_scale, nosnoop_scale; + u16 val; + + if (!pci_ltr_supported(dev)) + return -ENOTSUPP; + + snoop_scale = __pci_ltr_scale(&snoop_lat_ns); + nosnoop_scale = __pci_ltr_scale(&nosnoop_lat_ns); + + if (snoop_lat_ns > PCI_LTR_VALUE_MASK || + nosnoop_lat_ns > PCI_LTR_VALUE_MASK) + return -EINVAL; + + if ((snoop_scale > (PCI_LTR_SCALE_MASK >> PCI_LTR_SCALE_SHIFT)) || + (nosnoop_scale > (PCI_LTR_SCALE_MASK >> PCI_LTR_SCALE_SHIFT))) + return -EINVAL; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_LTR); + if (!pos) + return -ENOTSUPP; + + val = (snoop_scale << PCI_LTR_SCALE_SHIFT) | snoop_lat_ns; + ret = pci_write_config_word(dev, pos + PCI_LTR_MAX_SNOOP_LAT, val); + if (ret != 4) + return -EIO; + + val = (nosnoop_scale << PCI_LTR_SCALE_SHIFT) | nosnoop_lat_ns; + ret = pci_write_config_word(dev, pos + PCI_LTR_MAX_NOSNOOP_LAT, val); + if (ret != 4) + return -EIO; + + return 0; +} +EXPORT_SYMBOL(pci_set_ltr); + static int pci_acs_enable; /** diff --git a/include/linux/pci.h b/include/linux/pci.h index 45a035cccd9..df4d69b8214 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -840,6 +840,11 @@ enum pci_obff_signal_type { int pci_enable_obff(struct pci_dev *dev, enum pci_obff_signal_type); void pci_disable_obff(struct pci_dev *dev); +bool pci_ltr_supported(struct pci_dev *dev); +int pci_enable_ltr(struct pci_dev *dev); +void pci_disable_ltr(struct pci_dev *dev); +int pci_set_ltr(struct pci_dev *dev, int snoop_lat_ns, int nosnoop_lat_ns); + /* For use by arch with custom probe code */ void set_pcie_port_type(struct pci_dev *pdev); void set_pcie_hotplug_bridge(struct pci_dev *pdev); diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index aa420261843..e8840964aca 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -508,6 +508,7 @@ #define PCI_EXP_RTSTA_PENDING 0x20000 /* PME pending */ #define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */ #define PCI_EXP_DEVCAP2_ARI 0x20 /* Alternative Routing-ID */ +#define PCI_EXP_DEVCAP2_LTR 0x800 /* Latency tolerance reporting */ #define PCI_EXP_OBFF_MASK 0xc0000 /* OBFF support mechanism */ #define PCI_EXP_OBFF_MSG 0x40000 /* New message signaling */ #define PCI_EXP_OBFF_WAKE 0x80000 /* Re-use WAKE# for OBFF */ @@ -515,6 +516,7 @@ #define PCI_EXP_DEVCTL2_ARI 0x20 /* Alternative Routing-ID */ #define PCI_EXP_IDO_REQ_EN 0x100 /* ID-based ordering request enable */ #define PCI_EXP_IDO_CMP_EN 0x200 /* ID-based ordering completion enable */ +#define PCI_EXP_LTR_EN 0x400 /* Latency tolerance reporting */ #define PCI_EXP_OBFF_MSGA_EN 0x2000 /* OBFF enable with Message type A */ #define PCI_EXP_OBFF_MSGB_EN 0x4000 /* OBFF enable with Message type B */ #define PCI_EXP_OBFF_WAKE_EN 0x6000 /* OBFF using WAKE# signaling */ @@ -535,6 +537,7 @@ #define PCI_EXT_CAP_ID_ARI 14 #define PCI_EXT_CAP_ID_ATS 15 #define PCI_EXT_CAP_ID_SRIOV 16 +#define PCI_EXT_CAP_ID_LTR 24 /* Advanced Error Reporting */ #define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ @@ -691,6 +694,12 @@ #define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */ #define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */ +#define PCI_LTR_MAX_SNOOP_LAT 0x4 +#define PCI_LTR_MAX_NOSNOOP_LAT 0x6 +#define PCI_LTR_VALUE_MASK 0x000003ff +#define PCI_LTR_SCALE_MASK 0x00001c00 +#define PCI_LTR_SCALE_SHIFT 10 + /* Access Control Service */ #define PCI_ACS_CAP 0x04 /* ACS Capability Register */ #define PCI_ACS_SV 0x01 /* Source Validation */ -- cgit v1.2.3-70-g09d2 From 9f728f53dd70396f3183d2f0861022259471824b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 12 May 2011 17:11:47 -0700 Subject: PCI/e1000e: Add and use pci_disable_link_state_locked() Need to use it in _e1000e_disable_aspm. This routine is used for error recovery, where the pci_bus_sem is already held, and we don't want pci_disable_link_state to try to take it again. So add a locked variant for use in cases like this. Found lock up: [ 2374.654557] kworker/32:1 D ffff881027f6b0f0 0 6075 2 0x00000000 [ 2374.654816] ffff88503f099a68 0000000000000046 ffff88503f098000 0000000000004000 [ 2374.654837] 00000000001d1ec0 ffff88503f099fd8 00000000001d1ec0 ffff88503f099fd8 [ 2374.654860] 0000000000004000 00000000001d1ec0 ffff88503dcc8000 ffff88503f090000 [ 2374.654880] Call Trace: [ 2374.654898] [] ? __lock_acquired+0x3a/0x224 [ 2374.654914] [] ? _raw_spin_unlock_irq+0x30/0x36 [ 2374.654925] [] ? trace_hardirqs_on_caller+0x1f/0x178 [ 2374.654936] [] rwsem_down_failed_common+0xd3/0x103 [ 2374.654945] [] ? __lock_contended+0x3a/0x2a2 [ 2374.654955] [] rwsem_down_read_failed+0x12/0x14 [ 2374.654967] [] call_rwsem_down_read_failed+0x14/0x30 [ 2374.654981] [] ? pci_disable_link_state+0x5f/0xf5 [ 2374.654990] [] ? down_read+0x7e/0x91 [ 2374.654999] [] ? pci_disable_link_state+0x5f/0xf5 [ 2374.655008] [] pci_disable_link_state+0x5f/0xf5 [ 2374.655024] [] e1000e_disable_aspm+0x55/0x5a [ 2374.655037] [] e1000_io_slot_reset+0x59/0xea [ 2374.655048] [] ? report_mmio_enabled+0x5d/0x5d [ 2374.655057] [] report_slot_reset+0x2e/0x5d [ 2374.655072] [] pci_walk_bus+0x8a/0xb7 [ 2374.655081] [] ? report_mmio_enabled+0x5d/0x5d [ 2374.655091] [] broadcast_error_message+0xa4/0xb2 [ 2374.655101] [] ? pci_bus_read_config_dword+0x72/0x80 [ 2374.655110] [] do_recovery+0x9e/0xf9 [ 2374.655120] [] handle_error_source+0x4c/0x51 [ 2374.655129] [] aer_isr_one_error+0x1e9/0x21a [ 2374.655138] [] aer_isr+0xc7/0xcc [ 2374.655147] [] ? aer_isr_one_error+0x21a/0x21a [ 2374.655159] [] process_one_work+0x237/0x3ec [ 2374.655168] [] ? process_one_work+0x1a8/0x3ec [ 2374.655178] [] worker_thread+0x17c/0x240 [ 2374.655186] [] ? trace_hardirqs_on+0xd/0xf [ 2374.655196] [] ? manage_workers+0xab/0xab [ 2374.655209] [] kthread+0xa0/0xa8 [ 2374.655223] [] kernel_thread_helper+0x4/0x10 [ 2374.655232] [] ? retint_restore_args+0xe/0xe [ 2374.655243] [] ? __init_kthread_worker+0x5b/0x5b [ 2374.655252] [] ? gs_change+0xb/0xb when aer happens, pci_walk_bus already have down_read(&pci_bus_sem)... then report_slot_reset ==> e1000_io_slot_reset ==> e1000e_disable_aspm ==> pci_disable_link_state... We can not use pci_disable_link_state, and it will try to hold pci_bus_sem again. Try to have __pci_disable_link_state that will not need to hold pci_bus_sem. -v2: change name to pci_disable_link_state_locked() according to Jesse. [jbarnes: make sure new function is exported for modules] Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- drivers/net/e1000e/netdev.c | 2 +- drivers/pci/pcie/aspm.c | 19 ++++++++++++++++--- include/linux/pci-aspm.h | 1 + 3 files changed, 18 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 506a0a0043b..5fb43f098f1 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -5347,7 +5347,7 @@ static void e1000_complete_shutdown(struct pci_dev *pdev, bool sleep, #ifdef CONFIG_PCIEASPM static void __e1000e_disable_aspm(struct pci_dev *pdev, u16 state) { - pci_disable_link_state(pdev, state); + pci_disable_link_state_locked(pdev, state); } #else static void __e1000e_disable_aspm(struct pci_dev *pdev, u16 state) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 3eb667b2478..6892601fc76 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -734,7 +734,7 @@ void pcie_aspm_powersave_config_link(struct pci_dev *pdev) * pci_disable_link_state - disable pci device's link state, so the link will * never enter specific states */ -void pci_disable_link_state(struct pci_dev *pdev, int state) +static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem) { struct pci_dev *parent = pdev->bus->self; struct pcie_link_state *link; @@ -747,7 +747,8 @@ void pci_disable_link_state(struct pci_dev *pdev, int state) if (!parent || !parent->link_state) return; - down_read(&pci_bus_sem); + if (sem) + down_read(&pci_bus_sem); mutex_lock(&aspm_lock); link = parent->link_state; if (state & PCIE_LINK_STATE_L0S) @@ -761,7 +762,19 @@ void pci_disable_link_state(struct pci_dev *pdev, int state) pcie_set_clkpm(link, 0); } mutex_unlock(&aspm_lock); - up_read(&pci_bus_sem); + if (sem) + up_read(&pci_bus_sem); +} + +void pci_disable_link_state_locked(struct pci_dev *pdev, int state) +{ + __pci_disable_link_state(pdev, state, false); +} +EXPORT_SYMBOL(pci_disable_link_state_locked); + +void pci_disable_link_state(struct pci_dev *pdev, int state) +{ + __pci_disable_link_state(pdev, state, true); } EXPORT_SYMBOL(pci_disable_link_state); diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h index 67cb3ae3801..7cea7b6c141 100644 --- a/include/linux/pci-aspm.h +++ b/include/linux/pci-aspm.h @@ -28,6 +28,7 @@ extern void pcie_aspm_exit_link_state(struct pci_dev *pdev); extern void pcie_aspm_pm_state_change(struct pci_dev *pdev); extern void pcie_aspm_powersave_config_link(struct pci_dev *pdev); extern void pci_disable_link_state(struct pci_dev *pdev, int state); +extern void pci_disable_link_state_locked(struct pci_dev *pdev, int state); extern void pcie_clear_aspm(void); extern void pcie_no_aspm(void); #else -- cgit v1.2.3-70-g09d2 From 24a4742f0be6226eb0106fbb17caf4d711d1ad43 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 10 May 2011 10:02:11 -0600 Subject: PCI: Track the size of each saved capability data area This will allow us to store and load it later. Signed-off-by: Alex Williamson Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 12 +++++++----- include/linux/pci.h | 11 ++++++++--- 2 files changed, 15 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 53302cbdb94..d6e5b8ea919 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -830,7 +830,7 @@ static int pci_save_pcie_state(struct pci_dev *dev) dev_err(&dev->dev, "buffer not found in %s\n", __func__); return -ENOMEM; } - cap = (u16 *)&save_state->data[0]; + cap = (u16 *)&save_state->cap.data[0]; pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &flags); @@ -863,7 +863,7 @@ static void pci_restore_pcie_state(struct pci_dev *dev) pos = pci_find_capability(dev, PCI_CAP_ID_EXP); if (!save_state || pos <= 0) return; - cap = (u16 *)&save_state->data[0]; + cap = (u16 *)&save_state->cap.data[0]; pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &flags); @@ -899,7 +899,8 @@ static int pci_save_pcix_state(struct pci_dev *dev) return -ENOMEM; } - pci_read_config_word(dev, pos + PCI_X_CMD, (u16 *)save_state->data); + pci_read_config_word(dev, pos + PCI_X_CMD, + (u16 *)save_state->cap.data); return 0; } @@ -914,7 +915,7 @@ static void pci_restore_pcix_state(struct pci_dev *dev) pos = pci_find_capability(dev, PCI_CAP_ID_PCIX); if (!save_state || pos <= 0) return; - cap = (u16 *)&save_state->data[0]; + cap = (u16 *)&save_state->cap.data[0]; pci_write_config_word(dev, pos + PCI_X_CMD, cap[i++]); } @@ -1771,7 +1772,8 @@ static int pci_add_cap_save_buffer( if (!save_state) return -ENOMEM; - save_state->cap_nr = cap; + save_state->cap.cap_nr = cap; + save_state->cap.size = size; pci_add_saved_cap(dev, save_state); return 0; diff --git a/include/linux/pci.h b/include/linux/pci.h index df4d69b8214..61ef8f2f9b1 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -214,12 +214,17 @@ enum pci_bus_speed { PCI_SPEED_UNKNOWN = 0xff, }; -struct pci_cap_saved_state { - struct hlist_node next; +struct pci_cap_saved_data { char cap_nr; + unsigned int size; u32 data[0]; }; +struct pci_cap_saved_state { + struct hlist_node next; + struct pci_cap_saved_data cap; +}; + struct pcie_link_state; struct pci_vpd; struct pci_sriov; @@ -366,7 +371,7 @@ static inline struct pci_cap_saved_state *pci_find_saved_cap( struct hlist_node *pos; hlist_for_each_entry(tmp, pos, &pci_dev->saved_cap_space, next) { - if (tmp->cap_nr == cap) + if (tmp->cap.cap_nr == cap) return tmp; } return NULL; -- cgit v1.2.3-70-g09d2 From ffbdd3f7931fb7cb7e36d00d16303ec433be5145 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 10 May 2011 10:02:27 -0600 Subject: PCI: Add interfaces to store and load the device saved state For KVM device assignment, we'd like to save off the state of a device prior to passing it to the guest and restore it later. We also want to allow pci_reset_funciton() to be called while the device is owned by the guest. This however overwrites and invalidates the struct pci_dev buffers, so we can't just manually call save and restore. Add generic interfaces for the saved state to be stored and reloaded back into struct pci_dev at a later time. Signed-off-by: Alex Williamson Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 4 +++ 2 files changed, 102 insertions(+) (limited to 'include') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index d6e5b8ea919..22c9b27fdd8 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -976,6 +976,104 @@ void pci_restore_state(struct pci_dev *dev) dev->state_saved = false; } +struct pci_saved_state { + u32 config_space[16]; + struct pci_cap_saved_data cap[0]; +}; + +/** + * pci_store_saved_state - Allocate and return an opaque struct containing + * the device saved state. + * @dev: PCI device that we're dealing with + * + * Rerturn NULL if no state or error. + */ +struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev) +{ + struct pci_saved_state *state; + struct pci_cap_saved_state *tmp; + struct pci_cap_saved_data *cap; + struct hlist_node *pos; + size_t size; + + if (!dev->state_saved) + return NULL; + + size = sizeof(*state) + sizeof(struct pci_cap_saved_data); + + hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next) + size += sizeof(struct pci_cap_saved_data) + tmp->cap.size; + + state = kzalloc(size, GFP_KERNEL); + if (!state) + return NULL; + + memcpy(state->config_space, dev->saved_config_space, + sizeof(state->config_space)); + + cap = state->cap; + hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next) { + size_t len = sizeof(struct pci_cap_saved_data) + tmp->cap.size; + memcpy(cap, &tmp->cap, len); + cap = (struct pci_cap_saved_data *)((u8 *)cap + len); + } + /* Empty cap_save terminates list */ + + return state; +} +EXPORT_SYMBOL_GPL(pci_store_saved_state); + +/** + * pci_load_saved_state - Reload the provided save state into struct pci_dev. + * @dev: PCI device that we're dealing with + * @state: Saved state returned from pci_store_saved_state() + */ +int pci_load_saved_state(struct pci_dev *dev, struct pci_saved_state *state) +{ + struct pci_cap_saved_data *cap; + + dev->state_saved = false; + + if (!state) + return 0; + + memcpy(dev->saved_config_space, state->config_space, + sizeof(state->config_space)); + + cap = state->cap; + while (cap->size) { + struct pci_cap_saved_state *tmp; + + tmp = pci_find_saved_cap(dev, cap->cap_nr); + if (!tmp || tmp->cap.size != cap->size) + return -EINVAL; + + memcpy(tmp->cap.data, cap->data, tmp->cap.size); + cap = (struct pci_cap_saved_data *)((u8 *)cap + + sizeof(struct pci_cap_saved_data) + cap->size); + } + + dev->state_saved = true; + return 0; +} +EXPORT_SYMBOL_GPL(pci_load_saved_state); + +/** + * pci_load_and_free_saved_state - Reload the save state pointed to by state, + * and free the memory allocated for it. + * @dev: PCI device that we're dealing with + * @state: Pointer to saved state returned from pci_store_saved_state() + */ +int pci_load_and_free_saved_state(struct pci_dev *dev, + struct pci_saved_state **state) +{ + int ret = pci_load_saved_state(dev, *state); + kfree(*state); + *state = NULL; + return ret; +} +EXPORT_SYMBOL_GPL(pci_load_and_free_saved_state); + static int do_pci_enable_device(struct pci_dev *dev, int bars) { int err; diff --git a/include/linux/pci.h b/include/linux/pci.h index 61ef8f2f9b1..4604d1d5514 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -812,6 +812,10 @@ size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size); /* Power management related routines */ int pci_save_state(struct pci_dev *dev); void pci_restore_state(struct pci_dev *dev); +struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev); +int pci_load_saved_state(struct pci_dev *dev, struct pci_saved_state *state); +int pci_load_and_free_saved_state(struct pci_dev *dev, + struct pci_saved_state **state); int __pci_complete_power_transition(struct pci_dev *dev, pci_power_t state); int pci_set_power_state(struct pci_dev *dev, pci_power_t state); pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state); -- cgit v1.2.3-70-g09d2 From f8fcfd775523347afe460dc3a0f45d0479e784a2 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 10 May 2011 10:02:39 -0600 Subject: KVM: Use pci_store/load_saved_state() around VM device usage Store the device saved state so that we can reload the device back to the original state when it's unassigned. This has the benefit that the state survives across pci_reset_function() calls via the PCI sysfs reset interface while the VM is using the device. Signed-off-by: Alex Williamson Acked-by: Avi Kivity Signed-off-by: Jesse Barnes --- include/linux/kvm_host.h | 1 + virt/kvm/assigned-dev.c | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ab428552af8..9272db03a3e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -513,6 +513,7 @@ struct kvm_assigned_dev_kernel { struct kvm *kvm; spinlock_t intx_lock; char irq_name[32]; + struct pci_saved_state *pci_saved_state; }; struct kvm_irq_mask_notifier { diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index ae72ae604c8..6cc4b97ec45 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -197,8 +197,13 @@ static void kvm_free_assigned_device(struct kvm *kvm, { kvm_free_assigned_irq(kvm, assigned_dev); - __pci_reset_function(assigned_dev->dev); - pci_restore_state(assigned_dev->dev); + pci_reset_function(assigned_dev->dev); + if (pci_load_and_free_saved_state(assigned_dev->dev, + &assigned_dev->pci_saved_state)) + printk(KERN_INFO "%s: Couldn't reload %s saved state\n", + __func__, dev_name(&assigned_dev->dev->dev)); + else + pci_restore_state(assigned_dev->dev); pci_release_regions(assigned_dev->dev); pci_disable_device(assigned_dev->dev); @@ -516,7 +521,10 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, pci_reset_function(dev); pci_save_state(dev); - + match->pci_saved_state = pci_store_saved_state(dev); + if (!match->pci_saved_state) + printk(KERN_DEBUG "%s: Couldn't store %s saved state\n", + __func__, dev_name(&dev->dev)); match->assigned_dev_id = assigned_dev->assigned_dev_id; match->host_segnr = assigned_dev->segnr; match->host_busnr = assigned_dev->busnr; @@ -546,7 +554,9 @@ out: mutex_unlock(&kvm->lock); return r; out_list_del: - pci_restore_state(dev); + if (pci_load_and_free_saved_state(dev, &match->pci_saved_state)) + printk(KERN_INFO "%s: Couldn't reload %s saved state\n", + __func__, dev_name(&dev->dev)); list_del(&match->list); pci_release_regions(dev); out_disable: -- cgit v1.2.3-70-g09d2