From 09177e85d6a0bffac8b55afd28ed8b82bd873f0b Mon Sep 17 00:00:00 2001 From: Maciej Sosnowski Date: Tue, 22 Jul 2008 10:07:33 -0700 Subject: I/OAT: Add watchdog/reset functionality to ioatdma Due to occasional DMA channel hangs observed for I/OAT versions 1.2 and 2.0 a watchdog has been introduced to check every 2 seconds if all channels progress normally. If stuck channel is detected, driver resets it. The reset is done in two parts. The second part is scheduled by the first one to reinitialize the channel after the restart. Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioatdma.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/dma/ioatdma.h') diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h index f2c7fedbf00..c6ec933f989 100644 --- a/drivers/dma/ioatdma.h +++ b/drivers/dma/ioatdma.h @@ -28,7 +28,7 @@ #include #include -#define IOAT_DMA_VERSION "2.04" +#define IOAT_DMA_VERSION "2.18" enum ioat_interrupt { none = 0, @@ -40,6 +40,7 @@ enum ioat_interrupt { #define IOAT_LOW_COMPLETION_MASK 0xffffffc0 #define IOAT_DMA_DCA_ANY_CPU ~0 +#define IOAT_WATCHDOG_PERIOD (2 * HZ) /** @@ -62,6 +63,7 @@ struct ioatdma_device { struct dma_device common; u8 version; enum ioat_interrupt irq_mode; + struct delayed_work work; struct msix_entry msix_entries[4]; struct ioat_dma_chan *idx[4]; }; @@ -75,6 +77,7 @@ struct ioat_dma_chan { dma_cookie_t completed_cookie; unsigned long last_completion; + unsigned long last_completion_time; size_t xfercap; /* XFERCAP register value expanded out */ @@ -82,6 +85,10 @@ struct ioat_dma_chan { spinlock_t desc_lock; struct list_head free_desc; struct list_head used_desc; + unsigned long watchdog_completion; + int watchdog_tcp_cookie; + u32 watchdog_last_tcp_cookie; + struct delayed_work work; int pending; int dmacount; @@ -98,6 +105,7 @@ struct ioat_dma_chan { u32 high; }; } *completion_virt; + unsigned long last_compl_desc_addr_hw; struct tasklet_struct cleanup_task; }; -- cgit v1.2.3-70-g09d2 From 16a37acaaf4aaa631ba3f83710ed6cdb1a597520 Mon Sep 17 00:00:00 2001 From: Maciej Sosnowski Date: Tue, 22 Jul 2008 17:30:57 -0700 Subject: I/OAT: tcp_dma_copybreak default value dependent on I/OAT version I/OAT DMA performance tuning showed different optimal values of tcp_dma_copybreak for different I/OAT versions (4096 for 1.2 and 2048 for 2.0). This patch lets ioatdma driver set tcp_dma_copybreak value according to these results. [dan.j.williams@intel.com: remove some ifdefs] Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat_dma.c | 2 ++ drivers/dma/ioatdma.h | 15 +++++++++++++++ net/core/user_dma.c | 1 + 3 files changed, 18 insertions(+) (limited to 'drivers/dma/ioatdma.h') diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c index da572968a7d..ece5a0e3a33 100644 --- a/drivers/dma/ioat_dma.c +++ b/drivers/dma/ioat_dma.c @@ -1581,6 +1581,8 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, if (err) goto err_self_test; + ioat_set_tcp_copy_break(device); + dma_async_device_register(&device->common); INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h index c6ec933f989..685adb62aa5 100644 --- a/drivers/dma/ioatdma.h +++ b/drivers/dma/ioatdma.h @@ -27,6 +27,7 @@ #include #include #include +#include #define IOAT_DMA_VERSION "2.18" @@ -129,6 +130,20 @@ struct ioat_desc_sw { struct dma_async_tx_descriptor async_tx; }; +static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev) +{ + #ifdef CONFIG_NET_DMA + switch (dev->version) { + case IOAT_VER_1_2: + sysctl_tcp_dma_copybreak = 4096; + break; + case IOAT_VER_2_0: + sysctl_tcp_dma_copybreak = 2048; + break; + } + #endif +} + #if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE) struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, void __iomem *iobase); diff --git a/net/core/user_dma.c b/net/core/user_dma.c index 0ad1cd57bc3..de760504f6f 100644 --- a/net/core/user_dma.c +++ b/net/core/user_dma.c @@ -34,6 +34,7 @@ #define NET_DMA_DEFAULT_COPYBREAK 4096 int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK; +EXPORT_SYMBOL(sysctl_tcp_dma_copybreak); /** * dma_skb_copy_datagram_iovec - Copy a datagram to an iovec. -- cgit v1.2.3-70-g09d2 From 7f1b358a236ee9c19657a619ac6f2dcabcaa0924 Mon Sep 17 00:00:00 2001 From: Maciej Sosnowski Date: Tue, 22 Jul 2008 17:30:57 -0700 Subject: I/OAT: I/OAT version 3.0 support This patch adds to ioatdma and dca modules support for Intel I/OAT DMA engine ver.3 (aka CB3 device). The main features of I/OAT ver.3 are: * 8 single channel DMA devices (8 channels total) * 8 DCA providers, each can accept 2 requesters * 8-bit TAG values and 32-bit extended APIC IDs Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dca/dca-core.c | 131 ++++++++++++++++----- drivers/dca/dca-sysfs.c | 3 +- drivers/dma/ioat.c | 15 +++ drivers/dma/ioat_dca.c | 244 ++++++++++++++++++++++++++++++++++++++-- drivers/dma/ioat_dma.c | 96 ++++++++++++++-- drivers/dma/ioatdma.h | 5 +- drivers/dma/ioatdma_hw.h | 1 + drivers/dma/ioatdma_registers.h | 20 ++++ include/linux/dca.h | 7 +- include/linux/pci_ids.h | 8 ++ 10 files changed, 481 insertions(+), 49 deletions(-) (limited to 'drivers/dma/ioatdma.h') diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c index bf5b92f86df..ec249d2db63 100644 --- a/drivers/dca/dca-core.c +++ b/drivers/dca/dca-core.c @@ -28,13 +28,29 @@ #include #include -MODULE_LICENSE("GPL"); +#define DCA_VERSION "1.4" -/* For now we're assuming a single, global, DCA provider for the system. */ +MODULE_VERSION(DCA_VERSION); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Intel Corporation"); static DEFINE_SPINLOCK(dca_lock); -static struct dca_provider *global_dca = NULL; +static LIST_HEAD(dca_providers); + +static struct dca_provider *dca_find_provider_by_dev(struct device *dev) +{ + struct dca_provider *dca, *ret = NULL; + + list_for_each_entry(dca, &dca_providers, node) { + if ((!dev) || (dca->ops->dev_managed(dca, dev))) { + ret = dca; + break; + } + } + + return ret; +} /** * dca_add_requester - add a dca client to the list @@ -42,25 +58,39 @@ static struct dca_provider *global_dca = NULL; */ int dca_add_requester(struct device *dev) { - int err, slot; + struct dca_provider *dca; + int err, slot = -ENODEV; - if (!global_dca) - return -ENODEV; + if (!dev) + return -EFAULT; spin_lock(&dca_lock); - slot = global_dca->ops->add_requester(global_dca, dev); - spin_unlock(&dca_lock); - if (slot < 0) + + /* check if the requester has not been added already */ + dca = dca_find_provider_by_dev(dev); + if (dca) { + spin_unlock(&dca_lock); + return -EEXIST; + } + + list_for_each_entry(dca, &dca_providers, node) { + slot = dca->ops->add_requester(dca, dev); + if (slot >= 0) + break; + } + if (slot < 0) { + spin_unlock(&dca_lock); return slot; + } - err = dca_sysfs_add_req(global_dca, dev, slot); + err = dca_sysfs_add_req(dca, dev, slot); if (err) { - spin_lock(&dca_lock); - global_dca->ops->remove_requester(global_dca, dev); + dca->ops->remove_requester(dca, dev); spin_unlock(&dca_lock); return err; } + spin_unlock(&dca_lock); return 0; } EXPORT_SYMBOL_GPL(dca_add_requester); @@ -71,30 +101,78 @@ EXPORT_SYMBOL_GPL(dca_add_requester); */ int dca_remove_requester(struct device *dev) { + struct dca_provider *dca; int slot; - if (!global_dca) - return -ENODEV; + + if (!dev) + return -EFAULT; spin_lock(&dca_lock); - slot = global_dca->ops->remove_requester(global_dca, dev); - spin_unlock(&dca_lock); - if (slot < 0) + dca = dca_find_provider_by_dev(dev); + if (!dca) { + spin_unlock(&dca_lock); + return -ENODEV; + } + slot = dca->ops->remove_requester(dca, dev); + if (slot < 0) { + spin_unlock(&dca_lock); return slot; + } - dca_sysfs_remove_req(global_dca, slot); + dca_sysfs_remove_req(dca, slot); + + spin_unlock(&dca_lock); return 0; } EXPORT_SYMBOL_GPL(dca_remove_requester); /** - * dca_get_tag - return the dca tag for the given cpu + * dca_common_get_tag - return the dca tag (serves both new and old api) + * @dev - the device that wants dca service * @cpu - the cpuid as returned by get_cpu() */ -u8 dca_get_tag(int cpu) +u8 dca_common_get_tag(struct device *dev, int cpu) { - if (!global_dca) + struct dca_provider *dca; + u8 tag; + + spin_lock(&dca_lock); + + dca = dca_find_provider_by_dev(dev); + if (!dca) { + spin_unlock(&dca_lock); return -ENODEV; - return global_dca->ops->get_tag(global_dca, cpu); + } + tag = dca->ops->get_tag(dca, dev, cpu); + + spin_unlock(&dca_lock); + return tag; +} + +/** + * dca3_get_tag - return the dca tag to the requester device + * for the given cpu (new api) + * @dev - the device that wants dca service + * @cpu - the cpuid as returned by get_cpu() + */ +u8 dca3_get_tag(struct device *dev, int cpu) +{ + if (!dev) + return -EFAULT; + + return dca_common_get_tag(dev, cpu); +} +EXPORT_SYMBOL_GPL(dca3_get_tag); + +/** + * dca_get_tag - return the dca tag for the given cpu (old api) + * @cpu - the cpuid as returned by get_cpu() + */ +u8 dca_get_tag(int cpu) +{ + struct device *dev = NULL; + + return dca_common_get_tag(dev, cpu); } EXPORT_SYMBOL_GPL(dca_get_tag); @@ -140,12 +218,10 @@ int register_dca_provider(struct dca_provider *dca, struct device *dev) { int err; - if (global_dca) - return -EEXIST; err = dca_sysfs_add_provider(dca, dev); if (err) return err; - global_dca = dca; + list_add(&dca->node, &dca_providers); blocking_notifier_call_chain(&dca_provider_chain, DCA_PROVIDER_ADD, NULL); return 0; @@ -158,11 +234,9 @@ EXPORT_SYMBOL_GPL(register_dca_provider); */ void unregister_dca_provider(struct dca_provider *dca) { - if (!global_dca) - return; blocking_notifier_call_chain(&dca_provider_chain, DCA_PROVIDER_REMOVE, NULL); - global_dca = NULL; + list_del(&dca->node); dca_sysfs_remove_provider(dca); } EXPORT_SYMBOL_GPL(unregister_dca_provider); @@ -187,6 +261,7 @@ EXPORT_SYMBOL_GPL(dca_unregister_notify); static int __init dca_init(void) { + printk(KERN_ERR "dca service started, version %s\n", DCA_VERSION); return dca_sysfs_init(); } diff --git a/drivers/dca/dca-sysfs.c b/drivers/dca/dca-sysfs.c index 011328faa5f..3d47e9d8e34 100644 --- a/drivers/dca/dca-sysfs.c +++ b/drivers/dca/dca-sysfs.c @@ -13,9 +13,10 @@ static spinlock_t dca_idr_lock; int dca_sysfs_add_req(struct dca_provider *dca, struct device *dev, int slot) { struct device *cd; + static int req_count; cd = device_create(dca_class, dca->cd, MKDEV(0, slot + 1), - "requester%d", slot); + "requester%d", req_count++); if (IS_ERR(cd)) return PTR_ERR(cd); return 0; diff --git a/drivers/dma/ioat.c b/drivers/dma/ioat.c index 16e0fd8facf..9b16a3af9a0 100644 --- a/drivers/dma/ioat.c +++ b/drivers/dma/ioat.c @@ -47,6 +47,16 @@ static struct pci_device_id ioat_pci_tbl[] = { /* I/OAT v2 platforms */ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, + + /* I/OAT v3 platforms */ + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, { 0, } }; @@ -83,6 +93,11 @@ static int ioat_setup_functionality(struct pci_dev *pdev, void __iomem *iobase) if (device->dma && ioat_dca_enabled) device->dca = ioat2_dca_init(pdev, iobase); break; + case IOAT_VER_3_0: + device->dma = ioat_dma_probe(pdev, iobase); + if (device->dma && ioat_dca_enabled) + device->dca = ioat3_dca_init(pdev, iobase); + break; default: err = -ENODEV; break; diff --git a/drivers/dma/ioat_dca.c b/drivers/dma/ioat_dca.c index 9e922760b7f..6cf622da028 100644 --- a/drivers/dma/ioat_dca.c +++ b/drivers/dma/ioat_dca.c @@ -37,12 +37,18 @@ #include "ioatdma_registers.h" /* - * Bit 16 of a tag map entry is the "valid" bit, if it is set then bits 0:15 + * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6 * contain the bit number of the APIC ID to map into the DCA tag. If the valid * bit is not set, then the value must be 0 or 1 and defines the bit in the tag. */ #define DCA_TAG_MAP_VALID 0x80 +#define DCA3_TAG_MAP_BIT_TO_INV 0x80 +#define DCA3_TAG_MAP_BIT_TO_SEL 0x40 +#define DCA3_TAG_MAP_LITERAL_VAL 0x1 + +#define DCA_TAG_MAP_MASK 0xDF + /* * "Legacy" DCA systems do not implement the DCA register set in the * I/OAT device. Software needs direct support for their tag mappings. @@ -95,6 +101,7 @@ struct ioat_dca_slot { }; #define IOAT_DCA_MAX_REQ 6 +#define IOAT3_DCA_MAX_REQ 2 struct ioat_dca_priv { void __iomem *iobase; @@ -171,7 +178,9 @@ static int ioat_dca_remove_requester(struct dca_provider *dca, return -ENODEV; } -static u8 ioat_dca_get_tag(struct dca_provider *dca, int cpu) +static u8 ioat_dca_get_tag(struct dca_provider *dca, + struct device *dev, + int cpu) { struct ioat_dca_priv *ioatdca = dca_priv(dca); int i, apic_id, bit, value; @@ -193,10 +202,26 @@ static u8 ioat_dca_get_tag(struct dca_provider *dca, int cpu) return tag; } +static int ioat_dca_dev_managed(struct dca_provider *dca, + struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + + pdev = to_pci_dev(dev); + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == pdev) + return 1; + } + return 0; +} + static struct dca_ops ioat_dca_ops = { .add_requester = ioat_dca_add_requester, .remove_requester = ioat_dca_remove_requester, .get_tag = ioat_dca_get_tag, + .dev_managed = ioat_dca_dev_managed, }; @@ -207,6 +232,8 @@ struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) u8 *tag_map = NULL; int i; int err; + u8 version; + u8 max_requesters; if (!system_has_dca_enabled(pdev)) return NULL; @@ -237,15 +264,20 @@ struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) if (tag_map == NULL) return NULL; + version = readb(iobase + IOAT_VER_OFFSET); + if (version == IOAT_VER_3_0) + max_requesters = IOAT3_DCA_MAX_REQ; + else + max_requesters = IOAT_DCA_MAX_REQ; + dca = alloc_dca_provider(&ioat_dca_ops, sizeof(*ioatdca) + - (sizeof(struct ioat_dca_slot) * IOAT_DCA_MAX_REQ)); + (sizeof(struct ioat_dca_slot) * max_requesters)); if (!dca) return NULL; ioatdca = dca_priv(dca); - ioatdca->max_requesters = IOAT_DCA_MAX_REQ; - + ioatdca->max_requesters = max_requesters; ioatdca->dca_base = iobase + 0x54; /* copy over the APIC ID to DCA tag mapping */ @@ -323,11 +355,13 @@ static int ioat2_dca_remove_requester(struct dca_provider *dca, return -ENODEV; } -static u8 ioat2_dca_get_tag(struct dca_provider *dca, int cpu) +static u8 ioat2_dca_get_tag(struct dca_provider *dca, + struct device *dev, + int cpu) { u8 tag; - tag = ioat_dca_get_tag(dca, cpu); + tag = ioat_dca_get_tag(dca, dev, cpu); tag = (~tag) & 0x1F; return tag; } @@ -336,6 +370,7 @@ static struct dca_ops ioat2_dca_ops = { .add_requester = ioat2_dca_add_requester, .remove_requester = ioat2_dca_remove_requester, .get_tag = ioat2_dca_get_tag, + .dev_managed = ioat_dca_dev_managed, }; static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset) @@ -425,3 +460,198 @@ struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) return dca; } + +static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 id; + u16 global_req_table; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + id = dcaid_from_pcidev(pdev); + + if (ioatdca->requester_count == ioatdca->max_requesters) + return -ENODEV; + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == NULL) { + /* found an empty slot */ + ioatdca->requester_count++; + ioatdca->req_slots[i].pdev = pdev; + ioatdca->req_slots[i].rid = id; + global_req_table = + readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET); + writel(id | IOAT_DCA_GREQID_VALID, + ioatdca->iobase + global_req_table + (i * 4)); + return i; + } + } + /* Error, ioatdma->requester_count is out of whack */ + return -EFAULT; +} + +static int ioat3_dca_remove_requester(struct dca_provider *dca, + struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 global_req_table; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == pdev) { + global_req_table = + readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET); + writel(0, ioatdca->iobase + global_req_table + (i * 4)); + ioatdca->req_slots[i].pdev = NULL; + ioatdca->req_slots[i].rid = 0; + ioatdca->requester_count--; + return i; + } + } + return -ENODEV; +} + +static u8 ioat3_dca_get_tag(struct dca_provider *dca, + struct device *dev, + int cpu) +{ + u8 tag; + + struct ioat_dca_priv *ioatdca = dca_priv(dca); + int i, apic_id, bit, value; + u8 entry; + + tag = 0; + apic_id = cpu_physical_id(cpu); + + for (i = 0; i < IOAT_TAG_MAP_LEN; i++) { + entry = ioatdca->tag_map[i]; + if (entry & DCA3_TAG_MAP_BIT_TO_SEL) { + bit = entry & + ~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV); + value = (apic_id & (1 << bit)) ? 1 : 0; + } else if (entry & DCA3_TAG_MAP_BIT_TO_INV) { + bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV; + value = (apic_id & (1 << bit)) ? 0 : 1; + } else { + value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0; + } + tag |= (value << i); + } + + return tag; +} + +static struct dca_ops ioat3_dca_ops = { + .add_requester = ioat3_dca_add_requester, + .remove_requester = ioat3_dca_remove_requester, + .get_tag = ioat3_dca_get_tag, + .dev_managed = ioat_dca_dev_managed, +}; + +static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset) +{ + int slots = 0; + u32 req; + u16 global_req_table; + + global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET); + if (global_req_table == 0) + return 0; + + do { + req = readl(iobase + global_req_table + (slots * sizeof(u32))); + slots++; + } while ((req & IOAT_DCA_GREQID_LASTID) == 0); + + return slots; +} + +struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) +{ + struct dca_provider *dca; + struct ioat_dca_priv *ioatdca; + int slots; + int i; + int err; + u16 dca_offset; + u16 csi_fsb_control; + u16 pcie_control; + u8 bit; + + union { + u64 full; + struct { + u32 low; + u32 high; + }; + } tag_map; + + if (!system_has_dca_enabled(pdev)) + return NULL; + + dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET); + if (dca_offset == 0) + return NULL; + + slots = ioat3_dca_count_dca_slots(iobase, dca_offset); + if (slots == 0) + return NULL; + + dca = alloc_dca_provider(&ioat3_dca_ops, + sizeof(*ioatdca) + + (sizeof(struct ioat_dca_slot) * slots)); + if (!dca) + return NULL; + + ioatdca = dca_priv(dca); + ioatdca->iobase = iobase; + ioatdca->dca_base = iobase + dca_offset; + ioatdca->max_requesters = slots; + + /* some bios might not know to turn these on */ + csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET); + if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) { + csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH; + writew(csi_fsb_control, + ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET); + } + pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET); + if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) { + pcie_control |= IOAT3_PCI_CONTROL_MEMWR; + writew(pcie_control, + ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET); + } + + + /* TODO version, compatibility and configuration checks */ + + /* copy out the APIC to DCA tag map */ + tag_map.low = + readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW); + tag_map.high = + readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH); + for (i = 0; i < 8; i++) { + bit = tag_map.full >> (8 * i); + ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK; + } + + err = register_dca_provider(dca, &pdev->dev); + if (err) { + free_dca_provider(dca); + return NULL; + } + + return dca; +} diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c index ece5a0e3a33..a52156e5688 100644 --- a/drivers/dma/ioat_dma.c +++ b/drivers/dma/ioat_dma.c @@ -53,6 +53,12 @@ MODULE_PARM_DESC(ioat_pending_level, static void ioat_dma_chan_reset_part2(struct work_struct *work); static void ioat_dma_chan_watchdog(struct work_struct *work); +/* + * workaround for IOAT ver.3.0 null descriptor issue + * (channel returns error when size is 0) + */ +#define NULL_DESC_BUFFER_SIZE 1 + /* internal functions */ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan); static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan); @@ -129,6 +135,38 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) int i; struct ioat_dma_chan *ioat_chan; + /* + * IOAT ver.3 workarounds + */ + if (device->version == IOAT_VER_3_0) { + u32 chan_err_mask; + u16 dev_id; + u32 dmauncerrsts; + + /* + * Write CHANERRMSK_INT with 3E07h to mask out the errors + * that can cause stability issues for IOAT ver.3 + */ + chan_err_mask = 0x3E07; + pci_write_config_dword(device->pdev, + IOAT_PCI_CHANERRMASK_INT_OFFSET, + chan_err_mask); + + /* + * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(device->pdev, + IOAT_PCI_DEVICE_ID_OFFSET, + &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { + dmauncerrsts = 0x10; + pci_write_config_dword(device->pdev, + IOAT_PCI_DMAUNCERRSTS_OFFSET, + dmauncerrsts); + } + } + device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); @@ -473,6 +511,13 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) prev = new; } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan))); + if (!new) { + dev_err(&ioat_chan->device->pdev->dev, + "tx submit failed\n"); + spin_unlock_bh(&ioat_chan->desc_lock); + return -ENOMEM; + } + hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; if (new->async_tx.callback) { hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; @@ -558,7 +603,14 @@ static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) desc_count++; } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan))); - hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; + if (!new) { + dev_err(&ioat_chan->device->pdev->dev, + "tx submit failed\n"); + spin_unlock_bh(&ioat_chan->desc_lock); + return -ENOMEM; + } + + hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS; if (new->async_tx.callback) { hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; if (first != new) { @@ -629,6 +681,7 @@ static struct ioat_desc_sw *ioat_dma_alloc_descriptor( desc_sw->async_tx.tx_submit = ioat1_tx_submit; break; case IOAT_VER_2_0: + case IOAT_VER_3_0: desc_sw->async_tx.tx_submit = ioat2_tx_submit; break; } @@ -779,6 +832,7 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan) } break; case IOAT_VER_2_0: + case IOAT_VER_3_0: list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) { list_del(&desc->node); @@ -868,7 +922,8 @@ ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) /* set up the noop descriptor */ noop_desc = to_ioat_desc(ioat_chan->used_desc.next); - noop_desc->hw->size = 0; + /* set size to non-zero value (channel returns error when size is 0) */ + noop_desc->hw->size = NULL_DESC_BUFFER_SIZE; noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; noop_desc->hw->src_addr = 0; noop_desc->hw->dst_addr = 0; @@ -918,6 +973,7 @@ static struct ioat_desc_sw *ioat_dma_get_next_descriptor( return ioat1_dma_get_next_descriptor(ioat_chan); break; case IOAT_VER_2_0: + case IOAT_VER_3_0: return ioat2_dma_get_next_descriptor(ioat_chan); break; } @@ -1061,10 +1117,12 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) * perhaps we're stuck so hard that the watchdog can't go off? * try to catch it after 2 seconds */ - if (time_after(jiffies, - ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) { - ioat_dma_chan_watchdog(&(ioat_chan->device->work.work)); - ioat_chan->last_completion_time = jiffies; + if (ioat_chan->device->version != IOAT_VER_3_0) { + if (time_after(jiffies, + ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) { + ioat_dma_chan_watchdog(&(ioat_chan->device->work.work)); + ioat_chan->last_completion_time = jiffies; + } } return; } @@ -1120,6 +1178,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) } break; case IOAT_VER_2_0: + case IOAT_VER_3_0: /* has some other thread has already cleaned up? */ if (ioat_chan->used_desc.prev == NULL) break; @@ -1223,10 +1282,19 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) spin_lock_bh(&ioat_chan->desc_lock); desc = ioat_dma_get_next_descriptor(ioat_chan); + + if (!desc) { + dev_err(&ioat_chan->device->pdev->dev, + "Unable to start null desc - get next desc failed\n"); + spin_unlock_bh(&ioat_chan->desc_lock); + return; + } + desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL | IOAT_DMA_DESCRIPTOR_CTL_INT_GN | IOAT_DMA_DESCRIPTOR_CTL_CP_STS; - desc->hw->size = 0; + /* set size to non-zero value (channel returns error when size is 0) */ + desc->hw->size = NULL_DESC_BUFFER_SIZE; desc->hw->src_addr = 0; desc->hw->dst_addr = 0; async_tx_ack(&desc->async_tx); @@ -1244,6 +1312,7 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); break; case IOAT_VER_2_0: + case IOAT_VER_3_0: writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); writel(((u64) desc->async_tx.phys) >> 32, @@ -1562,6 +1631,7 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, ioat1_dma_memcpy_issue_pending; break; case IOAT_VER_2_0: + case IOAT_VER_3_0: device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy; device->common.device_issue_pending = ioat2_dma_memcpy_issue_pending; @@ -1585,9 +1655,11 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, dma_async_device_register(&device->common); - INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); - schedule_delayed_work(&device->work, - WATCHDOG_DELAY); + if (device->version != IOAT_VER_3_0) { + INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); + schedule_delayed_work(&device->work, + WATCHDOG_DELAY); + } return device; @@ -1621,7 +1693,9 @@ void ioat_dma_remove(struct ioatdma_device *device) pci_release_regions(device->pdev); pci_disable_device(device->pdev); - cancel_delayed_work(&device->work); + if (device->version != IOAT_VER_3_0) { + cancel_delayed_work(&device->work); + } list_for_each_entry_safe(chan, _chan, &device->common.channels, device_node) { diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h index 685adb62aa5..a3306d0e137 100644 --- a/drivers/dma/ioatdma.h +++ b/drivers/dma/ioatdma.h @@ -29,7 +29,7 @@ #include #include -#define IOAT_DMA_VERSION "2.18" +#define IOAT_DMA_VERSION "3.30" enum ioat_interrupt { none = 0, @@ -135,6 +135,7 @@ static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev) #ifdef CONFIG_NET_DMA switch (dev->version) { case IOAT_VER_1_2: + case IOAT_VER_3_0: sysctl_tcp_dma_copybreak = 4096; break; case IOAT_VER_2_0: @@ -150,11 +151,13 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, void ioat_dma_remove(struct ioatdma_device *device); struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); +struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); #else #define ioat_dma_probe(pdev, iobase) NULL #define ioat_dma_remove(device) do { } while (0) #define ioat_dca_init(pdev, iobase) NULL #define ioat2_dca_init(pdev, iobase) NULL +#define ioat3_dca_init(pdev, iobase) NULL #endif #endif /* IOATDMA_H */ diff --git a/drivers/dma/ioatdma_hw.h b/drivers/dma/ioatdma_hw.h index dd470fa91d8..f1ae2c776f7 100644 --- a/drivers/dma/ioatdma_hw.h +++ b/drivers/dma/ioatdma_hw.h @@ -35,6 +35,7 @@ #define IOAT_PCI_SID 0x8086 #define IOAT_VER_1_2 0x12 /* Version 1.2 */ #define IOAT_VER_2_0 0x20 /* Version 2.0 */ +#define IOAT_VER_3_0 0x30 /* Version 3.0 */ struct ioat_dma_descriptor { uint32_t size; diff --git a/drivers/dma/ioatdma_registers.h b/drivers/dma/ioatdma_registers.h index 9832d7ebd93..827cb503cac 100644 --- a/drivers/dma/ioatdma_registers.h +++ b/drivers/dma/ioatdma_registers.h @@ -25,6 +25,10 @@ #define IOAT_PCI_DMACTRL_DMA_EN 0x00000001 #define IOAT_PCI_DMACTRL_MSI_EN 0x00000002 +#define IOAT_PCI_DEVICE_ID_OFFSET 0x02 +#define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148 +#define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184 + /* MMIO Device Registers */ #define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */ @@ -149,7 +153,23 @@ #define IOAT_DCA_GREQID_VALID 0x20000000 #define IOAT_DCA_GREQID_LASTID 0x80000000 +#define IOAT3_CSI_CAPABILITY_OFFSET 0x08 +#define IOAT3_CSI_CAPABILITY_PREFETCH 0x1 + +#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A +#define IOAT3_PCI_CAPABILITY_MEMWR 0x1 + +#define IOAT3_CSI_CONTROL_OFFSET 0x0C +#define IOAT3_CSI_CONTROL_PREFETCH 0x1 + +#define IOAT3_PCI_CONTROL_OFFSET 0x0E +#define IOAT3_PCI_CONTROL_MEMWR 0x1 + +#define IOAT3_APICID_TAG_MAP_OFFSET 0x10 +#define IOAT3_APICID_TAG_MAP_OFFSET_LOW 0x10 +#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14 +#define IOAT3_DCA_GREQID_OFFSET 0x02 #define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */ #define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */ diff --git a/include/linux/dca.h b/include/linux/dca.h index af61cd1f37e..b00a753eda5 100644 --- a/include/linux/dca.h +++ b/include/linux/dca.h @@ -10,6 +10,7 @@ void dca_unregister_notify(struct notifier_block *nb); #define DCA_PROVIDER_REMOVE 0x0002 struct dca_provider { + struct list_head node; struct dca_ops *ops; struct device *cd; int id; @@ -18,7 +19,9 @@ struct dca_provider { struct dca_ops { int (*add_requester) (struct dca_provider *, struct device *); int (*remove_requester) (struct dca_provider *, struct device *); - u8 (*get_tag) (struct dca_provider *, int cpu); + u8 (*get_tag) (struct dca_provider *, struct device *, + int cpu); + int (*dev_managed) (struct dca_provider *, struct device *); }; struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size); @@ -32,9 +35,11 @@ static inline void *dca_priv(struct dca_provider *dca) } /* Requester API */ +#define DCA_GET_TAG_TWO_ARGS int dca_add_requester(struct device *dev); int dca_remove_requester(struct device *dev); u8 dca_get_tag(int cpu); +u8 dca3_get_tag(struct device *dev, int cpu); /* internal stuff */ int __init dca_sysfs_init(void); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 9b940e64417..06a5b7ae79b 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2363,6 +2363,14 @@ #define PCI_DEVICE_ID_INTEL_ICH9_7 0x2916 #define PCI_DEVICE_ID_INTEL_ICH9_8 0x2918 #define PCI_DEVICE_ID_INTEL_82855PM_HB 0x3340 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG4 0x3429 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG5 0x342a +#define PCI_DEVICE_ID_INTEL_IOAT_TBG6 0x342b +#define PCI_DEVICE_ID_INTEL_IOAT_TBG7 0x342c +#define PCI_DEVICE_ID_INTEL_IOAT_TBG0 0x3430 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG1 0x3431 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG2 0x3432 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG3 0x3433 #define PCI_DEVICE_ID_INTEL_82830_HB 0x3575 #define PCI_DEVICE_ID_INTEL_82830_CGC 0x3577 #define PCI_DEVICE_ID_INTEL_82855GM_HB 0x3580 -- cgit v1.2.3-70-g09d2