From 584ec22759c06cdfc189c03a727f20038526245b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 28 Jul 2009 14:32:12 -0700 Subject: ioat: move to drivers/dma/ioat/ When first created the ioat driver was the only inhabitant of drivers/dma/. Now, it is the only multi-file (more than a .c and a .h) driver in the directory. Moving it to an ioat/ subdirectory allows the naming convention to be cleaned up, and allows for future splitting of the source files by hardware version (v1, v2, and v3). Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/pci.c | 202 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 drivers/dma/ioat/pci.c (limited to 'drivers/dma/ioat/pci.c') diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c new file mode 100644 index 00000000000..d7948bfd8fb --- /dev/null +++ b/drivers/dma/ioat/pci.c @@ -0,0 +1,202 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2007 - 2009 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +/* + * This driver supports an Intel I/OAT DMA engine, which does asynchronous + * copy operations. + */ + +#include +#include +#include +#include +#include +#include "dma.h" +#include "registers.h" +#include "hw.h" + +MODULE_VERSION(IOAT_DMA_VERSION); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Intel Corporation"); + +static struct pci_device_id ioat_pci_tbl[] = { + /* I/OAT v1 platforms */ + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) }, + { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, + + /* I/OAT v2 platforms */ + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, + + /* I/OAT v3 platforms */ + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, + { 0, } +}; + +struct ioat_device { + struct pci_dev *pdev; + void __iomem *iobase; + struct ioatdma_device *dma; + struct dca_provider *dca; +}; + +static int __devinit ioat_probe(struct pci_dev *pdev, + const struct pci_device_id *id); +static void __devexit ioat_remove(struct pci_dev *pdev); + +static int ioat_dca_enabled = 1; +module_param(ioat_dca_enabled, int, 0644); +MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); + +static struct pci_driver ioat_pci_driver = { + .name = "ioatdma", + .id_table = ioat_pci_tbl, + .probe = ioat_probe, + .remove = __devexit_p(ioat_remove), +}; + +static int __devinit ioat_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + void __iomem *iobase; + struct ioat_device *device; + unsigned long mmio_start, mmio_len; + int err; + + err = pci_enable_device(pdev); + if (err) + goto err_enable_device; + + err = pci_request_regions(pdev, ioat_pci_driver.name); + if (err) + goto err_request_regions; + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) + goto err_set_dma_mask; + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) + goto err_set_dma_mask; + + mmio_start = pci_resource_start(pdev, 0); + mmio_len = pci_resource_len(pdev, 0); + iobase = ioremap(mmio_start, mmio_len); + if (!iobase) { + err = -ENOMEM; + goto err_ioremap; + } + + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (!device) { + err = -ENOMEM; + goto err_kzalloc; + } + device->pdev = pdev; + pci_set_drvdata(pdev, device); + device->iobase = iobase; + + pci_set_master(pdev); + + switch (readb(iobase + IOAT_VER_OFFSET)) { + case IOAT_VER_1_2: + device->dma = ioat_dma_probe(pdev, iobase); + if (device->dma && ioat_dca_enabled) + device->dca = ioat_dca_init(pdev, iobase); + break; + case IOAT_VER_2_0: + device->dma = ioat_dma_probe(pdev, iobase); + if (device->dma && ioat_dca_enabled) + device->dca = ioat2_dca_init(pdev, iobase); + break; + case IOAT_VER_3_0: + device->dma = ioat_dma_probe(pdev, iobase); + if (device->dma && ioat_dca_enabled) + device->dca = ioat3_dca_init(pdev, iobase); + break; + default: + err = -ENODEV; + break; + } + if (!device->dma) + err = -ENODEV; + + if (err) + goto err_version; + + return 0; + +err_version: + kfree(device); +err_kzalloc: + iounmap(iobase); +err_ioremap: +err_set_dma_mask: + pci_release_regions(pdev); + pci_disable_device(pdev); +err_request_regions: +err_enable_device: + return err; +} + +static void __devexit ioat_remove(struct pci_dev *pdev) +{ + struct ioat_device *device = pci_get_drvdata(pdev); + + dev_err(&pdev->dev, "Removing dma and dca services\n"); + if (device->dca) { + unregister_dca_provider(device->dca); + free_dca_provider(device->dca); + device->dca = NULL; + } + + if (device->dma) { + ioat_dma_remove(device->dma); + device->dma = NULL; + } + + kfree(device); +} + +static int __init ioat_init_module(void) +{ + return pci_register_driver(&ioat_pci_driver); +} +module_init(ioat_init_module); + +static void __exit ioat_exit_module(void) +{ + pci_unregister_driver(&ioat_pci_driver); +} +module_exit(ioat_exit_module); -- cgit v1.2.3-70-g09d2 From e6c0b69a43150c1a37cf342ce5faedf12583bf79 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:29:44 -0700 Subject: ioat: convert ioat_probe to pcim/devm The driver currently duplicates much of what these routines offer, so just use the common code. For example ->irq_mode tracks what interrupt mode was initialized, which duplicates the ->msix_enabled and ->msi_enabled handling in pcim_release. This also adds a check to the return value of dma_async_device_register, which can fail. Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 130 ++++++++++++++++--------------------------------- drivers/dma/ioat/dma.h | 11 ----- drivers/dma/ioat/hw.h | 1 + drivers/dma/ioat/pci.c | 67 +++++++++---------------- 4 files changed, 68 insertions(+), 141 deletions(-) (limited to 'drivers/dma/ioat/pci.c') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 16c080786a6..65f8b7492a4 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -121,6 +121,7 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) u32 xfercap; int i; struct ioat_dma_chan *ioat_chan; + struct device *dev = &device->pdev->dev; /* * IOAT ver.3 workarounds @@ -164,7 +165,7 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) } #endif for (i = 0; i < device->common.chancnt; i++) { - ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL); + ioat_chan = devm_kzalloc(dev, sizeof(*ioat_chan), GFP_KERNEL); if (!ioat_chan) { device->common.chancnt = i; break; @@ -1450,7 +1451,11 @@ MODULE_PARM_DESC(ioat_interrupt_style, static int ioat_dma_setup_interrupts(struct ioatdma_device *device) { struct ioat_dma_chan *ioat_chan; - int err, i, j, msixcnt; + struct pci_dev *pdev = device->pdev; + struct device *dev = &pdev->dev; + struct msix_entry *msix; + int i, j, msixcnt; + int err = -EINVAL; u8 intrctrl = 0; if (!strcmp(ioat_interrupt_style, "msix")) @@ -1461,8 +1466,7 @@ static int ioat_dma_setup_interrupts(struct ioatdma_device *device) goto msi; if (!strcmp(ioat_interrupt_style, "intx")) goto intx; - dev_err(&device->pdev->dev, "invalid ioat_interrupt_style %s\n", - ioat_interrupt_style); + dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style); goto err_no_irq; msix: @@ -1471,55 +1475,55 @@ msix: for (i = 0; i < msixcnt; i++) device->msix_entries[i].entry = i; - err = pci_enable_msix(device->pdev, device->msix_entries, msixcnt); + err = pci_enable_msix(pdev, device->msix_entries, msixcnt); if (err < 0) goto msi; if (err > 0) goto msix_single_vector; for (i = 0; i < msixcnt; i++) { + msix = &device->msix_entries[i]; ioat_chan = ioat_lookup_chan_by_index(device, i); - err = request_irq(device->msix_entries[i].vector, - ioat_dma_do_interrupt_msix, - 0, "ioat-msix", ioat_chan); + err = devm_request_irq(dev, msix->vector, + ioat_dma_do_interrupt_msix, 0, + "ioat-msix", ioat_chan); if (err) { for (j = 0; j < i; j++) { + msix = &device->msix_entries[j]; ioat_chan = ioat_lookup_chan_by_index(device, j); - free_irq(device->msix_entries[j].vector, - ioat_chan); + devm_free_irq(dev, msix->vector, ioat_chan); } goto msix_single_vector; } } intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; - device->irq_mode = msix_multi_vector; goto done; msix_single_vector: - device->msix_entries[0].entry = 0; - err = pci_enable_msix(device->pdev, device->msix_entries, 1); + msix = &device->msix_entries[0]; + msix->entry = 0; + err = pci_enable_msix(pdev, device->msix_entries, 1); if (err) goto msi; - err = request_irq(device->msix_entries[0].vector, ioat_dma_do_interrupt, - 0, "ioat-msix", device); + err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0, + "ioat-msix", device); if (err) { - pci_disable_msix(device->pdev); + pci_disable_msix(pdev); goto msi; } - device->irq_mode = msix_single_vector; goto done; msi: - err = pci_enable_msi(device->pdev); + err = pci_enable_msi(pdev); if (err) goto intx; - err = request_irq(device->pdev->irq, ioat_dma_do_interrupt, - 0, "ioat-msi", device); + err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0, + "ioat-msi", device); if (err) { - pci_disable_msi(device->pdev); + pci_disable_msi(pdev); goto intx; } /* @@ -1527,21 +1531,17 @@ msi: */ if (device->version == IOAT_VER_1_2) { u32 dmactrl; - pci_read_config_dword(device->pdev, - IOAT_PCI_DMACTRL_OFFSET, &dmactrl); + pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl); dmactrl |= IOAT_PCI_DMACTRL_MSI_EN; - pci_write_config_dword(device->pdev, - IOAT_PCI_DMACTRL_OFFSET, dmactrl); + pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl); } - device->irq_mode = msi; goto done; intx: - err = request_irq(device->pdev->irq, ioat_dma_do_interrupt, - IRQF_SHARED, "ioat-intx", device); + err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, + IRQF_SHARED, "ioat-intx", device); if (err) goto err_no_irq; - device->irq_mode = intx; done: intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; @@ -1551,60 +1551,26 @@ done: err_no_irq: /* Disable all interrupt generation */ writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); - dev_err(&device->pdev->dev, "no usable interrupts\n"); - device->irq_mode = none; - return -1; + dev_err(dev, "no usable interrupts\n"); + return err; } -/** - * ioat_dma_remove_interrupts - remove whatever interrupts were set - * @device: ioat device - */ -static void ioat_dma_remove_interrupts(struct ioatdma_device *device) +static void ioat_disable_interrupts(struct ioatdma_device *device) { - struct ioat_dma_chan *ioat_chan; - int i; - /* Disable all interrupt generation */ writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); - - switch (device->irq_mode) { - case msix_multi_vector: - for (i = 0; i < device->common.chancnt; i++) { - ioat_chan = ioat_lookup_chan_by_index(device, i); - free_irq(device->msix_entries[i].vector, ioat_chan); - } - pci_disable_msix(device->pdev); - break; - case msix_single_vector: - free_irq(device->msix_entries[0].vector, device); - pci_disable_msix(device->pdev); - break; - case msi: - free_irq(device->pdev->irq, device); - pci_disable_msi(device->pdev); - break; - case intx: - free_irq(device->pdev->irq, device); - break; - case none: - dev_warn(&device->pdev->dev, - "call to %s without interrupts setup\n", __func__); - } - device->irq_mode = none; } struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, void __iomem *iobase) { int err; + struct device *dev = &pdev->dev; struct ioatdma_device *device; - device = kzalloc(sizeof(*device), GFP_KERNEL); - if (!device) { + device = devm_kzalloc(dev, sizeof(*device), GFP_KERNEL); + if (!device) err = -ENOMEM; - goto err_kzalloc; - } device->pdev = pdev; device->reg_base = iobase; device->version = readb(device->reg_base + IOAT_VER_OFFSET); @@ -1651,14 +1617,12 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, break; } - dev_err(&device->pdev->dev, - "Intel(R) I/OAT DMA Engine found," + dev_err(dev, "Intel(R) I/OAT DMA Engine found," " %d channels, device version 0x%02x, driver version %s\n", device->common.chancnt, device->version, IOAT_DMA_VERSION); if (!device->common.chancnt) { - dev_err(&device->pdev->dev, - "Intel(R) I/OAT DMA Engine problem found: " + dev_err(dev, "Intel(R) I/OAT DMA Engine problem found: " "zero channels detected\n"); goto err_setup_interrupts; } @@ -1671,9 +1635,11 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, if (err) goto err_self_test; - ioat_set_tcp_copy_break(device); + err = dma_async_device_register(&device->common); + if (err) + goto err_self_test; - dma_async_device_register(&device->common); + ioat_set_tcp_copy_break(device); if (device->version != IOAT_VER_3_0) { INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); @@ -1684,16 +1650,12 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, return device; err_self_test: - ioat_dma_remove_interrupts(device); + ioat_disable_interrupts(device); err_setup_interrupts: pci_pool_destroy(device->completion_pool); err_completion_pool: pci_pool_destroy(device->dma_pool); err_dma_pool: - kfree(device); -err_kzalloc: - dev_err(&pdev->dev, - "Intel(R) I/OAT DMA Engine initialization failed\n"); return NULL; } @@ -1705,23 +1667,17 @@ void ioat_dma_remove(struct ioatdma_device *device) if (device->version != IOAT_VER_3_0) cancel_delayed_work(&device->work); - ioat_dma_remove_interrupts(device); + ioat_disable_interrupts(device); dma_async_device_unregister(&device->common); pci_pool_destroy(device->dma_pool); pci_pool_destroy(device->completion_pool); - iounmap(device->reg_base); - pci_release_regions(device->pdev); - pci_disable_device(device->pdev); - list_for_each_entry_safe(chan, _chan, &device->common.channels, device_node) { ioat_chan = to_ioat_chan(chan); list_del(&chan->device_node); - kfree(ioat_chan); } - kfree(device); } diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index ccb400f5e27..5e8d7cfabc2 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -31,14 +31,6 @@ #define IOAT_DMA_VERSION "3.64" -enum ioat_interrupt { - none = 0, - msix_multi_vector = 1, - msix_single_vector = 2, - msi = 3, - intx = 4, -}; - #define IOAT_LOW_COMPLETION_MASK 0xffffffc0 #define IOAT_DMA_DCA_ANY_CPU ~0 #define IOAT_WATCHDOG_PERIOD (2 * HZ) @@ -59,7 +51,6 @@ enum ioat_interrupt { */ #define NULL_DESC_BUFFER_SIZE 1 - /** * struct ioatdma_device - internal representation of a IOAT device * @pdev: PCI-Express device @@ -67,7 +58,6 @@ enum ioat_interrupt { * @dma_pool: for allocating DMA descriptors * @common: embedded struct dma_device * @version: version of ioatdma device - * @irq_mode: which style irq to use * @msix_entries: irq handlers * @idx: per channel data */ @@ -79,7 +69,6 @@ struct ioatdma_device { struct pci_pool *completion_pool; struct dma_device common; u8 version; - enum ioat_interrupt irq_mode; struct delayed_work work; struct msix_entry msix_entries[4]; struct ioat_dma_chan *idx[4]; diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h index afa57eef86c..1438fa5c4d1 100644 --- a/drivers/dma/ioat/hw.h +++ b/drivers/dma/ioat/hw.h @@ -23,6 +23,7 @@ /* PCI Configuration Space Values */ #define IOAT_PCI_VID 0x8086 +#define IOAT_MMIO_BAR 0 /* CB device ID's */ #define IOAT_PCI_DID_5000 0x1A38 diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index d7948bfd8fb..982e38fd177 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -62,7 +62,6 @@ static struct pci_device_id ioat_pci_tbl[] = { struct ioat_device { struct pci_dev *pdev; - void __iomem *iobase; struct ioatdma_device *dma; struct dca_provider *dca; }; @@ -75,8 +74,10 @@ static int ioat_dca_enabled = 1; module_param(ioat_dca_enabled, int, 0644); MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); +#define DRV_NAME "ioatdma" + static struct pci_driver ioat_pci_driver = { - .name = "ioatdma", + .name = DRV_NAME, .id_table = ioat_pci_tbl, .probe = ioat_probe, .remove = __devexit_p(ioat_remove), @@ -85,47 +86,42 @@ static struct pci_driver ioat_pci_driver = { static int __devinit ioat_probe(struct pci_dev *pdev, const struct pci_device_id *id) { + void __iomem * const *iomap; void __iomem *iobase; + struct device *dev = &pdev->dev; struct ioat_device *device; - unsigned long mmio_start, mmio_len; int err; - err = pci_enable_device(pdev); + err = pcim_enable_device(pdev); if (err) - goto err_enable_device; + return err; - err = pci_request_regions(pdev, ioat_pci_driver.name); + err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME); if (err) - goto err_request_regions; + return err; + iomap = pcim_iomap_table(pdev); + if (!iomap) + return -ENOMEM; err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); if (err) err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) - goto err_set_dma_mask; + return err; err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); if (err) err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) - goto err_set_dma_mask; - - mmio_start = pci_resource_start(pdev, 0); - mmio_len = pci_resource_len(pdev, 0); - iobase = ioremap(mmio_start, mmio_len); - if (!iobase) { - err = -ENOMEM; - goto err_ioremap; - } + return err; + + device = devm_kzalloc(dev, sizeof(*device), GFP_KERNEL); + if (!device) + return -ENOMEM; - device = kzalloc(sizeof(*device), GFP_KERNEL); - if (!device) { - err = -ENOMEM; - goto err_kzalloc; - } device->pdev = pdev; pci_set_drvdata(pdev, device); - device->iobase = iobase; + iobase = iomap[IOAT_MMIO_BAR]; pci_set_master(pdev); @@ -146,28 +142,15 @@ static int __devinit ioat_probe(struct pci_dev *pdev, device->dca = ioat3_dca_init(pdev, iobase); break; default: - err = -ENODEV; - break; + return -ENODEV; } - if (!device->dma) - err = -ENODEV; - if (err) - goto err_version; + if (!device->dma) { + dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n"); + return -ENODEV; + } return 0; - -err_version: - kfree(device); -err_kzalloc: - iounmap(iobase); -err_ioremap: -err_set_dma_mask: - pci_release_regions(pdev); - pci_disable_device(pdev); -err_request_regions: -err_enable_device: - return err; } static void __devexit ioat_remove(struct pci_dev *pdev) @@ -185,8 +168,6 @@ static void __devexit ioat_remove(struct pci_dev *pdev) ioat_dma_remove(device->dma); device->dma = NULL; } - - kfree(device); } static int __init ioat_init_module(void) -- cgit v1.2.3-70-g09d2 From f2427e276ffec5ce599c6bc116e0927269a360ef Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 28 Jul 2009 14:42:38 -0700 Subject: ioat: split ioat_dma_probe into core/version-specific routines Towards the removal of ioatdma_device.version split the initialization path into distinct versions. This conversion: 1/ moves version specific probe code to version specific routines 2/ removes the need for ioat_device 3/ turns off the ioat1 msi quirk if the device is reinitialized for intx Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 253 ++++++++++++++++++++++++++++++------------------- drivers/dma/ioat/dma.h | 23 ++--- drivers/dma/ioat/pci.c | 79 ++++++++------- 3 files changed, 200 insertions(+), 155 deletions(-) (limited to 'drivers/dma/ioat/pci.c') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 462dae62719..b7508041c6d 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -121,52 +121,21 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) int i; struct ioat_dma_chan *ioat_chan; struct device *dev = &device->pdev->dev; + struct dma_device *dma = &device->common; - /* - * IOAT ver.3 workarounds - */ - if (device->version == IOAT_VER_3_0) { - u32 chan_err_mask; - u16 dev_id; - u32 dmauncerrsts; - - /* - * Write CHANERRMSK_INT with 3E07h to mask out the errors - * that can cause stability issues for IOAT ver.3 - */ - chan_err_mask = 0x3E07; - pci_write_config_dword(device->pdev, - IOAT_PCI_CHANERRMASK_INT_OFFSET, - chan_err_mask); - - /* - * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit - * (workaround for spurious config parity error after restart) - */ - pci_read_config_word(device->pdev, - IOAT_PCI_DEVICE_ID_OFFSET, - &dev_id); - if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { - dmauncerrsts = 0x10; - pci_write_config_dword(device->pdev, - IOAT_PCI_DMAUNCERRSTS_OFFSET, - dmauncerrsts); - } - } - - device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); + INIT_LIST_HEAD(&dma->channels); + dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); #ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL - if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) { - device->common.chancnt--; - } + if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) + dma->chancnt--; #endif - for (i = 0; i < device->common.chancnt; i++) { + for (i = 0; i < dma->chancnt; i++) { ioat_chan = devm_kzalloc(dev, sizeof(*ioat_chan), GFP_KERNEL); if (!ioat_chan) { - device->common.chancnt = i; + dma->chancnt = i; break; } @@ -175,28 +144,20 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) ioat_chan->xfercap = xfercap; ioat_chan->desccount = 0; INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2); - if (ioat_chan->device->version == IOAT_VER_2_0) - writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | - IOAT_DMA_DCA_ANY_CPU, - ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); - else if (ioat_chan->device->version == IOAT_VER_3_0) - writel(IOAT_DMA_DCA_ANY_CPU, - ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); spin_lock_init(&ioat_chan->cleanup_lock); spin_lock_init(&ioat_chan->desc_lock); INIT_LIST_HEAD(&ioat_chan->free_desc); INIT_LIST_HEAD(&ioat_chan->used_desc); /* This should be made common somewhere in dmaengine.c */ ioat_chan->common.device = &device->common; - list_add_tail(&ioat_chan->common.device_node, - &device->common.channels); + list_add_tail(&ioat_chan->common.device_node, &dma->channels); device->idx[i] = ioat_chan; tasklet_init(&ioat_chan->cleanup_task, ioat_dma_cleanup_tasklet, (unsigned long) ioat_chan); tasklet_disable(&ioat_chan->cleanup_task); } - return device->common.chancnt; + return dma->chancnt; } /** @@ -1504,15 +1465,6 @@ msi: pci_disable_msi(pdev); goto intx; } - /* - * CB 1.2 devices need a bit set in configuration space to enable MSI - */ - if (device->version == IOAT_VER_1_2) { - u32 dmactrl; - pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl); - dmactrl |= IOAT_PCI_DMACTRL_MSI_EN; - pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl); - } goto done; intx: @@ -1522,6 +1474,8 @@ intx: goto err_no_irq; done: + if (device->intr_quirk) + device->intr_quirk(device); intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET); return 0; @@ -1539,21 +1493,12 @@ static void ioat_disable_interrupts(struct ioatdma_device *device) writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); } -struct ioatdma_device * -ioat_dma_probe(struct pci_dev *pdev, void __iomem *iobase) +static int ioat_probe(struct ioatdma_device *device) { - int err; + int err = -ENODEV; + struct dma_device *dma = &device->common; + struct pci_dev *pdev = device->pdev; struct device *dev = &pdev->dev; - struct ioatdma_device *device; - struct dma_device *dma; - - device = devm_kzalloc(dev, sizeof(*device), GFP_KERNEL); - if (!device) - err = -ENOMEM; - device->pdev = pdev; - device->reg_base = iobase; - device->version = readb(device->reg_base + IOAT_VER_OFFSET); - dma = &device->common; /* DMA coherent memory pool for DMA descriptor allocations */ device->dma_pool = pci_pool_create("dma_desc_pool", pdev, @@ -1572,26 +1517,13 @@ ioat_dma_probe(struct pci_dev *pdev, void __iomem *iobase) goto err_completion_pool; } - INIT_LIST_HEAD(&dma->channels); ioat_dma_enumerate_channels(device); + dma_cap_set(DMA_MEMCPY, dma->cap_mask); dma->device_alloc_chan_resources = ioat_dma_alloc_chan_resources; dma->device_free_chan_resources = ioat_dma_free_chan_resources; - dma->dev = &pdev->dev; - - dma_cap_set(DMA_MEMCPY, dma->cap_mask); dma->device_is_tx_complete = ioat_dma_is_complete; - switch (device->version) { - case IOAT_VER_1_2: - dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy; - dma->device_issue_pending = ioat1_dma_memcpy_issue_pending; - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy; - dma->device_issue_pending = ioat2_dma_memcpy_issue_pending; - break; - } + dma->dev = &pdev->dev; dev_err(dev, "Intel(R) I/OAT DMA Engine found," " %d channels, device version 0x%02x, driver version %s\n", @@ -1611,19 +1543,7 @@ ioat_dma_probe(struct pci_dev *pdev, void __iomem *iobase) if (err) goto err_self_test; - err = dma_async_device_register(dma); - if (err) - goto err_self_test; - - ioat_set_tcp_copy_break(device); - - if (device->version != IOAT_VER_3_0) { - INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); - schedule_delayed_work(&device->work, - WATCHDOG_DELAY); - } - - return device; + return 0; err_self_test: ioat_disable_interrupts(device); @@ -1632,7 +1552,142 @@ err_setup_interrupts: err_completion_pool: pci_pool_destroy(device->dma_pool); err_dma_pool: - return NULL; + return err; +} + +static int ioat_register(struct ioatdma_device *device) +{ + int err = dma_async_device_register(&device->common); + + if (err) { + ioat_disable_interrupts(device); + pci_pool_destroy(device->completion_pool); + pci_pool_destroy(device->dma_pool); + } + + return err; +} + +/* ioat1_intr_quirk - fix up dma ctrl register to enable / disable msi */ +static void ioat1_intr_quirk(struct ioatdma_device *device) +{ + struct pci_dev *pdev = device->pdev; + u32 dmactrl; + + pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl); + if (pdev->msi_enabled) + dmactrl |= IOAT_PCI_DMACTRL_MSI_EN; + else + dmactrl &= ~IOAT_PCI_DMACTRL_MSI_EN; + pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl); +} + +int ioat1_dma_probe(struct ioatdma_device *device, int dca) +{ + struct pci_dev *pdev = device->pdev; + struct dma_device *dma; + int err; + + device->intr_quirk = ioat1_intr_quirk; + dma = &device->common; + dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy; + dma->device_issue_pending = ioat1_dma_memcpy_issue_pending; + + err = ioat_probe(device); + if (err) + return err; + ioat_set_tcp_copy_break(4096); + err = ioat_register(device); + if (err) + return err; + if (dca) + device->dca = ioat_dca_init(pdev, device->reg_base); + + INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); + schedule_delayed_work(&device->work, WATCHDOG_DELAY); + + return err; +} + +int ioat2_dma_probe(struct ioatdma_device *device, int dca) +{ + struct pci_dev *pdev = device->pdev; + struct dma_device *dma; + struct dma_chan *chan; + struct ioat_dma_chan *ioat_chan; + int err; + + dma = &device->common; + dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy; + dma->device_issue_pending = ioat2_dma_memcpy_issue_pending; + + err = ioat_probe(device); + if (err) + return err; + ioat_set_tcp_copy_break(2048); + + list_for_each_entry(chan, &dma->channels, device_node) { + ioat_chan = to_ioat_chan(chan); + writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU, + ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); + } + + err = ioat_register(device); + if (err) + return err; + if (dca) + device->dca = ioat2_dca_init(pdev, device->reg_base); + + INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); + schedule_delayed_work(&device->work, WATCHDOG_DELAY); + + return err; +} + +int ioat3_dma_probe(struct ioatdma_device *device, int dca) +{ + struct pci_dev *pdev = device->pdev; + struct dma_device *dma; + struct dma_chan *chan; + struct ioat_dma_chan *ioat_chan; + int err; + u16 dev_id; + + dma = &device->common; + dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy; + dma->device_issue_pending = ioat2_dma_memcpy_issue_pending; + + /* -= IOAT ver.3 workarounds =- */ + /* Write CHANERRMSK_INT with 3E07h to mask out the errors + * that can cause stability issues for IOAT ver.3 + */ + pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); + + /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) + pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); + + err = ioat_probe(device); + if (err) + return err; + ioat_set_tcp_copy_break(262144); + + list_for_each_entry(chan, &dma->channels, device_node) { + ioat_chan = to_ioat_chan(chan); + writel(IOAT_DMA_DCA_ANY_CPU, + ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); + } + + err = ioat_register(device); + if (err) + return err; + if (dca) + device->dca = ioat3_dca_init(pdev, device->reg_base); + + return err; } void ioat_dma_remove(struct ioatdma_device *device) diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 6e27ddb1e98..1226e35f270 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -61,6 +61,8 @@ * @version: version of ioatdma device * @msix_entries: irq handlers * @idx: per channel data + * @dca: direct cache access context + * @intr_quirk: interrupt setup quirk (for ioat_v1 devices) */ struct ioatdma_device { @@ -73,6 +75,8 @@ struct ioatdma_device { struct delayed_work work; struct msix_entry msix_entries[4]; struct ioat_dma_chan *idx[4]; + struct dca_provider *dca; + void (*intr_quirk)(struct ioatdma_device *device); }; /** @@ -136,25 +140,16 @@ struct ioat_desc_sw { struct dma_async_tx_descriptor txd; }; -static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev) +static inline void ioat_set_tcp_copy_break(unsigned long copybreak) { #ifdef CONFIG_NET_DMA - switch (dev->version) { - case IOAT_VER_1_2: - sysctl_tcp_dma_copybreak = 4096; - break; - case IOAT_VER_2_0: - sysctl_tcp_dma_copybreak = 2048; - break; - case IOAT_VER_3_0: - sysctl_tcp_dma_copybreak = 262144; - break; - } + sysctl_tcp_dma_copybreak = copybreak; #endif } -struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, - void __iomem *iobase); +int ioat1_dma_probe(struct ioatdma_device *dev, int dca); +int ioat2_dma_probe(struct ioatdma_device *dev, int dca); +int ioat3_dma_probe(struct ioatdma_device *dev, int dca); void ioat_dma_remove(struct ioatdma_device *device); struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index 982e38fd177..55414d88ac1 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -60,14 +60,8 @@ static struct pci_device_id ioat_pci_tbl[] = { { 0, } }; -struct ioat_device { - struct pci_dev *pdev; - struct ioatdma_device *dma; - struct dca_provider *dca; -}; - -static int __devinit ioat_probe(struct pci_dev *pdev, - const struct pci_device_id *id); +static int __devinit ioat_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id); static void __devexit ioat_remove(struct pci_dev *pdev); static int ioat_dca_enabled = 1; @@ -79,17 +73,28 @@ MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)" static struct pci_driver ioat_pci_driver = { .name = DRV_NAME, .id_table = ioat_pci_tbl, - .probe = ioat_probe, + .probe = ioat_pci_probe, .remove = __devexit_p(ioat_remove), }; -static int __devinit ioat_probe(struct pci_dev *pdev, - const struct pci_device_id *id) +static struct ioatdma_device * +alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase) +{ + struct device *dev = &pdev->dev; + struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL); + + if (!d) + return NULL; + d->pdev = pdev; + d->reg_base = iobase; + return d; +} + +static int __devinit ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { void __iomem * const *iomap; - void __iomem *iobase; struct device *dev = &pdev->dev; - struct ioat_device *device; + struct ioatdma_device *device; int err; err = pcim_enable_device(pdev); @@ -119,33 +124,24 @@ static int __devinit ioat_probe(struct pci_dev *pdev, if (!device) return -ENOMEM; - device->pdev = pdev; - pci_set_drvdata(pdev, device); - iobase = iomap[IOAT_MMIO_BAR]; - pci_set_master(pdev); - switch (readb(iobase + IOAT_VER_OFFSET)) { - case IOAT_VER_1_2: - device->dma = ioat_dma_probe(pdev, iobase); - if (device->dma && ioat_dca_enabled) - device->dca = ioat_dca_init(pdev, iobase); - break; - case IOAT_VER_2_0: - device->dma = ioat_dma_probe(pdev, iobase); - if (device->dma && ioat_dca_enabled) - device->dca = ioat2_dca_init(pdev, iobase); - break; - case IOAT_VER_3_0: - device->dma = ioat_dma_probe(pdev, iobase); - if (device->dma && ioat_dca_enabled) - device->dca = ioat3_dca_init(pdev, iobase); - break; - default: + device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]); + if (!device) + return -ENOMEM; + pci_set_drvdata(pdev, device); + + device->version = readb(device->reg_base + IOAT_VER_OFFSET); + if (device->version == IOAT_VER_1_2) + err = ioat1_dma_probe(device, ioat_dca_enabled); + else if (device->version == IOAT_VER_2_0) + err = ioat2_dma_probe(device, ioat_dca_enabled); + else if (device->version >= IOAT_VER_3_0) + err = ioat3_dma_probe(device, ioat_dca_enabled); + else return -ENODEV; - } - if (!device->dma) { + if (err) { dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n"); return -ENODEV; } @@ -155,7 +151,10 @@ static int __devinit ioat_probe(struct pci_dev *pdev, static void __devexit ioat_remove(struct pci_dev *pdev) { - struct ioat_device *device = pci_get_drvdata(pdev); + struct ioatdma_device *device = pci_get_drvdata(pdev); + + if (!device) + return; dev_err(&pdev->dev, "Removing dma and dca services\n"); if (device->dca) { @@ -163,11 +162,7 @@ static void __devexit ioat_remove(struct pci_dev *pdev) free_dca_provider(device->dca); device->dca = NULL; } - - if (device->dma) { - ioat_dma_remove(device->dma); - device->dma = NULL; - } + ioat_dma_remove(device); } static int __init ioat_init_module(void) -- cgit v1.2.3-70-g09d2 From 5cbafa65b92ee4f5b8ba915cddf94b91f186b989 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 26 Aug 2009 13:01:44 -0700 Subject: ioat2,3: convert to a true ring buffer Replace the current linked list munged into a ring with a native ring buffer implementation. The benefit of this approach is reduced overhead as many parameters can be derived from ring position with simple pointer comparisons and descriptor allocation/freeing becomes just a manipulation of head/tail pointers. It requires a contiguous allocation for the software descriptor information. Since this arrangement is significantly different from the ioat1 chain, move ioat2,3 support into its own file and header. Common routines are exported from driver/dma/ioat/dma.[ch]. Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams --- drivers/dma/ioat/Makefile | 2 +- drivers/dma/ioat/dma.c | 874 ++++++++++------------------------------------ drivers/dma/ioat/dma.h | 50 ++- drivers/dma/ioat/dma_v2.c | 750 +++++++++++++++++++++++++++++++++++++++ drivers/dma/ioat/dma_v2.h | 131 +++++++ drivers/dma/ioat/pci.c | 1 + 6 files changed, 1122 insertions(+), 686 deletions(-) create mode 100644 drivers/dma/ioat/dma_v2.c create mode 100644 drivers/dma/ioat/dma_v2.h (limited to 'drivers/dma/ioat/pci.c') diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile index 2ce3d3a4270..205a639e84d 100644 --- a/drivers/dma/ioat/Makefile +++ b/drivers/dma/ioat/Makefile @@ -1,2 +1,2 @@ obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o -ioatdma-objs := pci.o dma.o dca.o +ioatdma-objs := pci.o dma.o dma_v2.o dca.o diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 2e81e0c76e6..64b4d75059a 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -38,28 +38,14 @@ #include "registers.h" #include "hw.h" -static int ioat_pending_level = 4; +int ioat_pending_level = 4; module_param(ioat_pending_level, int, 0644); MODULE_PARM_DESC(ioat_pending_level, "high-water mark for pushing ioat descriptors (default: 4)"); -static void ioat_dma_chan_reset_part2(struct work_struct *work); -static void ioat_dma_chan_watchdog(struct work_struct *work); - /* internal functions */ -static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat); -static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat); - -static struct ioat_desc_sw * -ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat); -static struct ioat_desc_sw * -ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat); - -static inline struct ioat_chan_common * -ioat_chan_by_index(struct ioatdma_device *device, int index) -{ - return device->idx[index]; -} +static void ioat1_cleanup(struct ioat_dma_chan *ioat); +static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat); /** * ioat_dma_do_interrupt - handler used for single vector interrupt mode @@ -108,18 +94,38 @@ static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) return IRQ_HANDLED; } -static void ioat_dma_cleanup_tasklet(unsigned long data); +static void ioat1_cleanup_tasklet(unsigned long data); + +/* common channel initialization */ +void ioat_init_channel(struct ioatdma_device *device, + struct ioat_chan_common *chan, int idx, + work_func_t work_fn, void (*tasklet)(unsigned long), + unsigned long tasklet_data) +{ + struct dma_device *dma = &device->common; + + chan->device = device; + chan->reg_base = device->reg_base + (0x80 * (idx + 1)); + INIT_DELAYED_WORK(&chan->work, work_fn); + spin_lock_init(&chan->cleanup_lock); + chan->common.device = dma; + list_add_tail(&chan->common.device_node, &dma->channels); + device->idx[idx] = chan; + tasklet_init(&chan->cleanup_task, tasklet, tasklet_data); + tasklet_disable(&chan->cleanup_task); +} + +static void ioat1_reset_part2(struct work_struct *work); /** - * ioat_dma_enumerate_channels - find and initialize the device's channels + * ioat1_dma_enumerate_channels - find and initialize the device's channels * @device: the device to be enumerated */ -static int ioat_dma_enumerate_channels(struct ioatdma_device *device) +static int ioat1_enumerate_channels(struct ioatdma_device *device) { u8 xfercap_scale; u32 xfercap; int i; - struct ioat_chan_common *chan; struct ioat_dma_chan *ioat; struct device *dev = &device->pdev->dev; struct dma_device *dma = &device->common; @@ -135,31 +141,20 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) #endif for (i = 0; i < dma->chancnt; i++) { ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL); - if (!ioat) { - dma->chancnt = i; + if (!ioat) break; - } - chan = &ioat->base; - chan->device = device; - chan->reg_base = device->reg_base + (0x80 * (i + 1)); + ioat_init_channel(device, &ioat->base, i, + ioat1_reset_part2, + ioat1_cleanup_tasklet, + (unsigned long) ioat); ioat->xfercap = xfercap; - ioat->desccount = 0; - INIT_DELAYED_WORK(&chan->work, ioat_dma_chan_reset_part2); - spin_lock_init(&chan->cleanup_lock); spin_lock_init(&ioat->desc_lock); INIT_LIST_HEAD(&ioat->free_desc); INIT_LIST_HEAD(&ioat->used_desc); - /* This should be made common somewhere in dmaengine.c */ - chan->common.device = &device->common; - list_add_tail(&chan->common.device_node, &dma->channels); - device->idx[i] = chan; - tasklet_init(&chan->cleanup_task, - ioat_dma_cleanup_tasklet, - (unsigned long) ioat); - tasklet_disable(&chan->cleanup_task); } - return dma->chancnt; + dma->chancnt = i; + return i; } /** @@ -187,35 +182,16 @@ static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) } } -static inline void -__ioat2_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat) -{ - void __iomem *reg_base = ioat->base.reg_base; - - ioat->pending = 0; - writew(ioat->dmacount, reg_base + IOAT_CHAN_DMACOUNT_OFFSET); -} - -static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan) -{ - struct ioat_dma_chan *ioat = to_ioat_chan(chan); - - if (ioat->pending > 0) { - spin_lock_bh(&ioat->desc_lock); - __ioat2_dma_memcpy_issue_pending(ioat); - spin_unlock_bh(&ioat->desc_lock); - } -} - - /** - * ioat_dma_chan_reset_part2 - reinit the channel after a reset + * ioat1_reset_part2 - reinit the channel after a reset */ -static void ioat_dma_chan_reset_part2(struct work_struct *work) +static void ioat1_reset_part2(struct work_struct *work) { struct ioat_chan_common *chan; struct ioat_dma_chan *ioat; struct ioat_desc_sw *desc; + int dmacount; + bool start_null = false; chan = container_of(work, struct ioat_chan_common, work.work); ioat = container_of(chan, struct ioat_dma_chan, base); @@ -226,26 +202,22 @@ static void ioat_dma_chan_reset_part2(struct work_struct *work) chan->completion_virt->high = 0; ioat->pending = 0; - /* - * count the descriptors waiting, and be sure to do it - * right for both the CB1 line and the CB2 ring - */ - ioat->dmacount = 0; + /* count the descriptors waiting */ + dmacount = 0; if (ioat->used_desc.prev) { desc = to_ioat_desc(ioat->used_desc.prev); do { - ioat->dmacount++; + dmacount++; desc = to_ioat_desc(desc->node.next); } while (&desc->node != ioat->used_desc.next); } - /* - * write the new starting descriptor address - * this puts channel engine into ARMED state - */ - desc = to_ioat_desc(ioat->used_desc.prev); - switch (chan->device->version) { - case IOAT_VER_1_2: + if (dmacount) { + /* + * write the new starting descriptor address + * this puts channel engine into ARMED state + */ + desc = to_ioat_desc(ioat->used_desc.prev); writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); writel(((u64) desc->txd.phys) >> 32, @@ -253,32 +225,24 @@ static void ioat_dma_chan_reset_part2(struct work_struct *work) writeb(IOAT_CHANCMD_START, chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); - break; - case IOAT_VER_2_0: - writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, - chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->txd.phys) >> 32, - chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); - - /* tell the engine to go with what's left to be done */ - writew(ioat->dmacount, - chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); + } else + start_null = true; + spin_unlock_bh(&ioat->desc_lock); + spin_unlock_bh(&chan->cleanup_lock); - break; - } dev_err(to_dev(chan), "chan%d reset - %d descs waiting, %d total desc\n", - chan_num(chan), ioat->dmacount, ioat->desccount); + chan_num(chan), dmacount, ioat->desccount); - spin_unlock_bh(&ioat->desc_lock); - spin_unlock_bh(&chan->cleanup_lock); + if (start_null) + ioat1_dma_start_null_desc(ioat); } /** - * ioat_dma_reset_channel - restart a channel + * ioat1_reset_channel - restart a channel * @ioat: IOAT DMA channel handle */ -static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat) +static void ioat1_reset_channel(struct ioat_dma_chan *ioat) { struct ioat_chan_common *chan = &ioat->base; void __iomem *reg_base = chan->reg_base; @@ -316,9 +280,9 @@ static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat) } /** - * ioat_dma_chan_watchdog - watch for stuck channels + * ioat1_chan_watchdog - watch for stuck channels */ -static void ioat_dma_chan_watchdog(struct work_struct *work) +static void ioat1_chan_watchdog(struct work_struct *work) { struct ioatdma_device *device = container_of(work, struct ioatdma_device, work.work); @@ -339,16 +303,15 @@ static void ioat_dma_chan_watchdog(struct work_struct *work) chan = ioat_chan_by_index(device, i); ioat = container_of(chan, struct ioat_dma_chan, base); - if (chan->device->version == IOAT_VER_1_2 - /* have we started processing anything yet */ - && chan->last_completion - /* have we completed any since last watchdog cycle? */ + if (/* have we started processing anything yet */ + chan->last_completion + /* have we completed any since last watchdog cycle? */ && (chan->last_completion == chan->watchdog_completion) - /* has TCP stuck on one cookie since last watchdog? */ + /* has TCP stuck on one cookie since last watchdog? */ && (chan->watchdog_tcp_cookie == chan->watchdog_last_tcp_cookie) && (chan->watchdog_tcp_cookie != chan->completed_cookie) - /* is there something in the chain to be processed? */ - /* CB1 chain always has at least the last one processed */ + /* is there something in the chain to be processed? */ + /* CB1 chain always has at least the last one processed */ && (ioat->used_desc.prev != ioat->used_desc.next) && ioat->pending == 0) { @@ -387,34 +350,15 @@ static void ioat_dma_chan_watchdog(struct work_struct *work) chan->completion_virt->low = completion_hw.low; chan->completion_virt->high = completion_hw.high; } else { - ioat_dma_reset_channel(ioat); + ioat1_reset_channel(ioat); chan->watchdog_completion = 0; chan->last_compl_desc_addr_hw = 0; } - - /* - * for version 2.0 if there are descriptors yet to be processed - * and the last completed hasn't changed since the last watchdog - * if they haven't hit the pending level - * issue the pending to push them through - * else - * try resetting the channel - */ - } else if (chan->device->version == IOAT_VER_2_0 - && ioat->used_desc.prev - && chan->last_completion - && chan->last_completion == chan->watchdog_completion) { - - if (ioat->pending < ioat_pending_level) - ioat2_dma_memcpy_issue_pending(&chan->common); - else { - ioat_dma_reset_channel(ioat); - chan->watchdog_completion = 0; - } } else { chan->last_compl_desc_addr_hw = 0; chan->watchdog_completion = chan->last_completion; } + chan->watchdog_last_tcp_cookie = chan->watchdog_tcp_cookie; } @@ -447,7 +391,6 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) chain_tail->hw->next = first->txd.phys; list_splice_tail_init(&tx->tx_list, &ioat->used_desc); - ioat->dmacount += desc->tx_cnt; ioat->pending += desc->tx_cnt; if (ioat->pending >= ioat_pending_level) __ioat1_dma_memcpy_issue_pending(ioat); @@ -456,92 +399,6 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) return cookie; } -static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) -{ - struct ioat_dma_chan *ioat = to_ioat_chan(tx->chan); - struct ioat_desc_sw *first = tx_to_ioat_desc(tx); - struct ioat_desc_sw *new; - struct ioat_dma_descriptor *hw; - dma_cookie_t cookie; - u32 copy; - size_t len; - dma_addr_t src, dst; - unsigned long orig_flags; - unsigned int desc_count = 0; - - /* src and dest and len are stored in the initial descriptor */ - len = first->len; - src = first->src; - dst = first->dst; - orig_flags = first->txd.flags; - new = first; - - /* - * ioat->desc_lock is still in force in version 2 path - * it gets unlocked at end of this function - */ - do { - copy = min_t(size_t, len, ioat->xfercap); - - async_tx_ack(&new->txd); - - hw = new->hw; - hw->size = copy; - hw->ctl = 0; - hw->src_addr = src; - hw->dst_addr = dst; - - len -= copy; - dst += copy; - src += copy; - desc_count++; - } while (len && (new = ioat2_dma_get_next_descriptor(ioat))); - - if (!new) { - dev_err(to_dev(&ioat->base), "tx submit failed\n"); - spin_unlock_bh(&ioat->desc_lock); - return -ENOMEM; - } - - hw->ctl_f.compl_write = 1; - if (first->txd.callback) { - hw->ctl_f.int_en = 1; - if (first != new) { - /* move callback into to last desc */ - new->txd.callback = first->txd.callback; - new->txd.callback_param - = first->txd.callback_param; - first->txd.callback = NULL; - first->txd.callback_param = NULL; - } - } - - new->tx_cnt = desc_count; - new->txd.flags = orig_flags; /* client is in control of this ack */ - - /* store the original values for use in later cleanup */ - if (new != first) { - new->src = first->src; - new->dst = first->dst; - new->len = first->len; - } - - /* cookie incr and addition to used_list must be atomic */ - cookie = ioat->base.common.cookie; - cookie++; - if (cookie < 0) - cookie = 1; - ioat->base.common.cookie = new->txd.cookie = cookie; - - ioat->dmacount += desc_count; - ioat->pending += desc_count; - if (ioat->pending >= ioat_pending_level) - __ioat2_dma_memcpy_issue_pending(ioat); - spin_unlock_bh(&ioat->desc_lock); - - return cookie; -} - /** * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair * @ioat: the channel supplying the memory pool for the descriptors @@ -567,17 +424,9 @@ ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags) } memset(desc, 0, sizeof(*desc)); - dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common); - switch (ioatdma_device->version) { - case IOAT_VER_1_2: - desc_sw->txd.tx_submit = ioat1_tx_submit; - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - desc_sw->txd.tx_submit = ioat2_tx_submit; - break; - } + dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common); + desc_sw->txd.tx_submit = ioat1_tx_submit; desc_sw->hw = desc; desc_sw->txd.phys = phys; @@ -587,39 +436,12 @@ ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags) static int ioat_initial_desc_count = 256; module_param(ioat_initial_desc_count, int, 0644); MODULE_PARM_DESC(ioat_initial_desc_count, - "initial descriptors per channel (default: 256)"); - -/** - * ioat2_dma_massage_chan_desc - link the descriptors into a circle - * @ioat: the channel to be massaged - */ -static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat) -{ - struct ioat_desc_sw *desc, *_desc; - - /* setup used_desc */ - ioat->used_desc.next = ioat->free_desc.next; - ioat->used_desc.prev = NULL; - - /* pull free_desc out of the circle so that every node is a hw - * descriptor, but leave it pointing to the list - */ - ioat->free_desc.prev->next = ioat->free_desc.next; - ioat->free_desc.next->prev = ioat->free_desc.prev; - - /* circle link the hw descriptors */ - desc = to_ioat_desc(ioat->free_desc.next); - desc->hw->next = to_ioat_desc(desc->node.next)->txd.phys; - list_for_each_entry_safe(desc, _desc, ioat->free_desc.next, node) { - desc->hw->next = to_ioat_desc(desc->node.next)->txd.phys; - } -} - + "ioat1: initial descriptors per channel (default: 256)"); /** - * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors + * ioat1_dma_alloc_chan_resources - returns the number of allocated descriptors * @chan: the channel to be filled out */ -static int ioat_dma_alloc_chan_resources(struct dma_chan *c) +static int ioat1_dma_alloc_chan_resources(struct dma_chan *c) { struct ioat_dma_chan *ioat = to_ioat_chan(c); struct ioat_chan_common *chan = &ioat->base; @@ -657,8 +479,6 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *c) spin_lock_bh(&ioat->desc_lock); ioat->desccount = i; list_splice(&tmp_list, &ioat->free_desc); - if (chan->device->version != IOAT_VER_1_2) - ioat2_dma_massage_chan_desc(ioat); spin_unlock_bh(&ioat->desc_lock); /* allocate a completion writeback area */ @@ -674,15 +494,15 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *c) chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); tasklet_enable(&chan->cleanup_task); - ioat_dma_start_null_desc(ioat); /* give chain to dma device */ + ioat1_dma_start_null_desc(ioat); /* give chain to dma device */ return ioat->desccount; } /** - * ioat_dma_free_chan_resources - release all the descriptors + * ioat1_dma_free_chan_resources - release all the descriptors * @chan: the channel to be cleaned */ -static void ioat_dma_free_chan_resources(struct dma_chan *c) +static void ioat1_dma_free_chan_resources(struct dma_chan *c) { struct ioat_dma_chan *ioat = to_ioat_chan(c); struct ioat_chan_common *chan = &ioat->base; @@ -697,7 +517,7 @@ static void ioat_dma_free_chan_resources(struct dma_chan *c) return; tasklet_disable(&chan->cleanup_task); - ioat_dma_memcpy_cleanup(ioat); + ioat1_cleanup(ioat); /* Delay 100ms after reset to allow internal DMA logic to quiesce * before removing DMA descriptor resources. @@ -707,40 +527,20 @@ static void ioat_dma_free_chan_resources(struct dma_chan *c) mdelay(100); spin_lock_bh(&ioat->desc_lock); - switch (chan->device->version) { - case IOAT_VER_1_2: - list_for_each_entry_safe(desc, _desc, - &ioat->used_desc, node) { - in_use_descs++; - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->txd.phys); - kfree(desc); - } - list_for_each_entry_safe(desc, _desc, - &ioat->free_desc, node) { - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->txd.phys); - kfree(desc); - } - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - list_for_each_entry_safe(desc, _desc, - ioat->free_desc.next, node) { - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->txd.phys); - kfree(desc); - } - desc = to_ioat_desc(ioat->free_desc.next); + list_for_each_entry_safe(desc, _desc, + &ioat->used_desc, node) { + in_use_descs++; + list_del(&desc->node); + pci_pool_free(ioatdma_device->dma_pool, desc->hw, + desc->txd.phys); + kfree(desc); + } + list_for_each_entry_safe(desc, _desc, + &ioat->free_desc, node) { + list_del(&desc->node); pci_pool_free(ioatdma_device->dma_pool, desc->hw, desc->txd.phys); kfree(desc); - INIT_LIST_HEAD(&ioat->free_desc); - INIT_LIST_HEAD(&ioat->used_desc); - break; } spin_unlock_bh(&ioat->desc_lock); @@ -758,7 +558,6 @@ static void ioat_dma_free_chan_resources(struct dma_chan *c) chan->last_compl_desc_addr_hw = 0; chan->watchdog_tcp_cookie = chan->watchdog_last_tcp_cookie = 0; ioat->pending = 0; - ioat->dmacount = 0; ioat->desccount = 0; } @@ -791,86 +590,6 @@ ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat) return new; } -static struct ioat_desc_sw * -ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat) -{ - struct ioat_desc_sw *new; - - /* - * used.prev points to where to start processing - * used.next points to next free descriptor - * if used.prev == NULL, there are none waiting to be processed - * if used.next == used.prev.prev, there is only one free descriptor, - * and we need to use it to as a noop descriptor before - * linking in a new set of descriptors, since the device - * has probably already read the pointer to it - */ - if (ioat->used_desc.prev && - ioat->used_desc.next == ioat->used_desc.prev->prev) { - - struct ioat_desc_sw *desc; - struct ioat_desc_sw *noop_desc; - int i; - - /* set up the noop descriptor */ - noop_desc = to_ioat_desc(ioat->used_desc.next); - /* set size to non-zero value (channel returns error when size is 0) */ - noop_desc->hw->size = NULL_DESC_BUFFER_SIZE; - noop_desc->hw->ctl = 0; - noop_desc->hw->ctl_f.null = 1; - noop_desc->hw->src_addr = 0; - noop_desc->hw->dst_addr = 0; - - ioat->used_desc.next = ioat->used_desc.next->next; - ioat->pending++; - ioat->dmacount++; - - /* try to get a few more descriptors */ - for (i = 16; i; i--) { - desc = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC); - if (!desc) { - dev_err(to_dev(&ioat->base), - "alloc failed\n"); - break; - } - list_add_tail(&desc->node, ioat->used_desc.next); - - desc->hw->next - = to_ioat_desc(desc->node.next)->txd.phys; - to_ioat_desc(desc->node.prev)->hw->next - = desc->txd.phys; - ioat->desccount++; - } - - ioat->used_desc.next = noop_desc->node.next; - } - new = to_ioat_desc(ioat->used_desc.next); - prefetch(new); - ioat->used_desc.next = new->node.next; - - if (ioat->used_desc.prev == NULL) - ioat->used_desc.prev = &new->node; - - prefetch(new->hw); - return new; -} - -static struct ioat_desc_sw * -ioat_dma_get_next_descriptor(struct ioat_dma_chan *ioat) -{ - if (!ioat) - return NULL; - - switch (ioat->base.device->version) { - case IOAT_VER_1_2: - return ioat1_dma_get_next_descriptor(ioat); - case IOAT_VER_2_0: - case IOAT_VER_3_0: - return ioat2_dma_get_next_descriptor(ioat); - } - return NULL; -} - static struct dma_async_tx_descriptor * ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, dma_addr_t dma_src, size_t len, unsigned long flags) @@ -886,7 +605,7 @@ ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, int tx_cnt = 0; spin_lock_bh(&ioat->desc_lock); - desc = ioat_dma_get_next_descriptor(ioat); + desc = ioat1_dma_get_next_descriptor(ioat); do { if (!desc) break; @@ -909,7 +628,7 @@ ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, struct ioat_desc_sw *next; async_tx_ack(&desc->txd); - next = ioat_dma_get_next_descriptor(ioat); + next = ioat1_dma_get_next_descriptor(ioat); hw->next = next ? next->txd.phys : 0; desc = next; } else @@ -920,8 +639,7 @@ ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, struct ioat_chan_common *chan = &ioat->base; dev_err(to_dev(chan), - "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", - chan_num(chan), ioat->dmacount, ioat->desccount); + "chan%d - get_next_desc failed\n", chan_num(chan)); list_splice(&chain, &ioat->free_desc); spin_unlock_bh(&ioat->desc_lock); return NULL; @@ -940,94 +658,43 @@ ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, return &desc->txd; } -static struct dma_async_tx_descriptor * -ioat2_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, - dma_addr_t dma_src, size_t len, unsigned long flags) -{ - struct ioat_dma_chan *ioat = to_ioat_chan(c); - struct ioat_desc_sw *new; - - spin_lock_bh(&ioat->desc_lock); - new = ioat2_dma_get_next_descriptor(ioat); - - /* - * leave ioat->desc_lock set in ioat 2 path - * it will get unlocked at end of tx_submit - */ - - if (new) { - new->len = len; - new->dst = dma_dest; - new->src = dma_src; - new->txd.flags = flags; - return &new->txd; - } else { - struct ioat_chan_common *chan = &ioat->base; - - spin_unlock_bh(&ioat->desc_lock); - dev_err(to_dev(chan), - "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", - chan_num(chan), ioat->dmacount, ioat->desccount); - return NULL; - } -} - -static void ioat_dma_cleanup_tasklet(unsigned long data) +static void ioat1_cleanup_tasklet(unsigned long data) { struct ioat_dma_chan *chan = (void *)data; - ioat_dma_memcpy_cleanup(chan); + ioat1_cleanup(chan); writew(IOAT_CHANCTRL_INT_DISABLE, chan->base.reg_base + IOAT_CHANCTRL_OFFSET); } -static void -ioat_dma_unmap(struct ioat_chan_common *chan, struct ioat_desc_sw *desc) +static void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len, + int direction, enum dma_ctrl_flags flags, bool dst) { - if (!(desc->txd.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - if (desc->txd.flags & DMA_COMPL_DEST_UNMAP_SINGLE) - pci_unmap_single(chan->device->pdev, - pci_unmap_addr(desc, dst), - pci_unmap_len(desc, len), - PCI_DMA_FROMDEVICE); - else - pci_unmap_page(chan->device->pdev, - pci_unmap_addr(desc, dst), - pci_unmap_len(desc, len), - PCI_DMA_FROMDEVICE); - } - - if (!(desc->txd.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - if (desc->txd.flags & DMA_COMPL_SRC_UNMAP_SINGLE) - pci_unmap_single(chan->device->pdev, - pci_unmap_addr(desc, src), - pci_unmap_len(desc, len), - PCI_DMA_TODEVICE); - else - pci_unmap_page(chan->device->pdev, - pci_unmap_addr(desc, src), - pci_unmap_len(desc, len), - PCI_DMA_TODEVICE); - } + if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) || + (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE))) + pci_unmap_single(pdev, addr, len, direction); + else + pci_unmap_page(pdev, addr, len, direction); } -/** - * ioat_dma_memcpy_cleanup - cleanup up finished descriptors - * @chan: ioat channel to be cleaned up - */ -static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat) + +void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, + size_t len, struct ioat_dma_descriptor *hw) { - struct ioat_chan_common *chan = &ioat->base; - unsigned long phys_complete; - struct ioat_desc_sw *desc, *_desc; - dma_cookie_t cookie = 0; - unsigned long desc_phys; - struct ioat_desc_sw *latest_desc; - struct dma_async_tx_descriptor *tx; + struct pci_dev *pdev = chan->device->pdev; + size_t offset = len - hw->size; - prefetch(chan->completion_virt); + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) + ioat_unmap(pdev, hw->dst_addr - offset, len, + PCI_DMA_FROMDEVICE, flags, 1); - if (!spin_trylock_bh(&chan->cleanup_lock)) - return; + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) + ioat_unmap(pdev, hw->src_addr - offset, len, + PCI_DMA_TODEVICE, flags, 0); +} + +unsigned long ioat_get_current_completion(struct ioat_chan_common *chan) +{ + unsigned long phys_complete; /* The completion writeback can happen at any time, so reads by the driver need to be atomic operations @@ -1051,18 +718,37 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat) /* TODO do something to salvage the situation */ } + return phys_complete; +} + +/** + * ioat1_cleanup - cleanup up finished descriptors + * @chan: ioat channel to be cleaned up + */ +static void ioat1_cleanup(struct ioat_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + struct ioat_desc_sw *desc, *_desc; + dma_cookie_t cookie = 0; + struct dma_async_tx_descriptor *tx; + + prefetch(chan->completion_virt); + + if (!spin_trylock_bh(&chan->cleanup_lock)) + return; + + phys_complete = ioat_get_current_completion(chan); if (phys_complete == chan->last_completion) { spin_unlock_bh(&chan->cleanup_lock); /* * perhaps we're stuck so hard that the watchdog can't go off? * try to catch it after 2 seconds */ - if (chan->device->version != IOAT_VER_3_0) { - if (time_after(jiffies, - chan->last_completion_time + HZ*WATCHDOG_DELAY)) { - ioat_dma_chan_watchdog(&(chan->device->work.work)); - chan->last_completion_time = jiffies; - } + if (time_after(jiffies, + chan->last_completion_time + HZ*WATCHDOG_DELAY)) { + ioat1_chan_watchdog(&(chan->device->work.work)); + chan->last_completion_time = jiffies; } return; } @@ -1074,91 +760,42 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat) return; } - switch (chan->device->version) { - case IOAT_VER_1_2: - list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) { - tx = &desc->txd; - /* - * Incoming DMA requests may use multiple descriptors, - * due to exceeding xfercap, perhaps. If so, only the - * last one will have a cookie, and require unmapping. - */ - if (tx->cookie) { - cookie = tx->cookie; - ioat_dma_unmap(chan, desc); - if (tx->callback) { - tx->callback(tx->callback_param); - tx->callback = NULL; - } + list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) { + tx = &desc->txd; + /* + * Incoming DMA requests may use multiple descriptors, + * due to exceeding xfercap, perhaps. If so, only the + * last one will have a cookie, and require unmapping. + */ + if (tx->cookie) { + cookie = tx->cookie; + ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; } + } - if (tx->phys != phys_complete) { - /* - * a completed entry, but not the last, so clean - * up if the client is done with the descriptor - */ - if (async_tx_test_ack(tx)) { - list_move_tail(&desc->node, - &ioat->free_desc); - } else - tx->cookie = 0; - } else { - /* - * last used desc. Do not remove, so we can - * append from it, but don't look at it next - * time, either - */ + if (tx->phys != phys_complete) { + /* + * a completed entry, but not the last, so clean + * up if the client is done with the descriptor + */ + if (async_tx_test_ack(tx)) + list_move_tail(&desc->node, &ioat->free_desc); + else tx->cookie = 0; + } else { + /* + * last used desc. Do not remove, so we can + * append from it, but don't look at it next + * time, either + */ + tx->cookie = 0; - /* TODO check status bits? */ - break; - } - } - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - /* has some other thread has already cleaned up? */ - if (ioat->used_desc.prev == NULL) + /* TODO check status bits? */ break; - - /* work backwards to find latest finished desc */ - desc = to_ioat_desc(ioat->used_desc.next); - tx = &desc->txd; - latest_desc = NULL; - do { - desc = to_ioat_desc(desc->node.prev); - desc_phys = (unsigned long)tx->phys - & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; - if (desc_phys == phys_complete) { - latest_desc = desc; - break; - } - } while (&desc->node != ioat->used_desc.prev); - - if (latest_desc != NULL) { - /* work forwards to clear finished descriptors */ - for (desc = to_ioat_desc(ioat->used_desc.prev); - &desc->node != latest_desc->node.next && - &desc->node != ioat->used_desc.next; - desc = to_ioat_desc(desc->node.next)) { - if (tx->cookie) { - cookie = tx->cookie; - tx->cookie = 0; - ioat_dma_unmap(chan, desc); - if (tx->callback) { - tx->callback(tx->callback_param); - tx->callback = NULL; - } - } - } - - /* move used.prev up beyond those that are finished */ - if (&desc->node == ioat->used_desc.next) - ioat->used_desc.prev = NULL; - else - ioat->used_desc.prev = &desc->node; } - break; } spin_unlock_bh(&ioat->desc_lock); @@ -1170,50 +807,21 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat) spin_unlock_bh(&chan->cleanup_lock); } -/** - * ioat_dma_is_complete - poll the status of a IOAT DMA transaction - * @chan: IOAT DMA channel handle - * @cookie: DMA transaction identifier - * @done: if not %NULL, updated with last completed transaction - * @used: if not %NULL, updated with last used transaction - */ static enum dma_status -ioat_dma_is_complete(struct dma_chan *c, dma_cookie_t cookie, - dma_cookie_t *done, dma_cookie_t *used) +ioat1_dma_is_complete(struct dma_chan *c, dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used) { struct ioat_dma_chan *ioat = to_ioat_chan(c); - struct ioat_chan_common *chan = &ioat->base; - dma_cookie_t last_used; - dma_cookie_t last_complete; - enum dma_status ret; - - last_used = c->cookie; - last_complete = chan->completed_cookie; - chan->watchdog_tcp_cookie = cookie; - - if (done) - *done = last_complete; - if (used) - *used = last_used; - - ret = dma_async_is_complete(cookie, last_complete, last_used); - if (ret == DMA_SUCCESS) - return ret; - ioat_dma_memcpy_cleanup(ioat); + if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) + return DMA_SUCCESS; - last_used = c->cookie; - last_complete = chan->completed_cookie; + ioat1_cleanup(ioat); - if (done) - *done = last_complete; - if (used) - *used = last_used; - - return dma_async_is_complete(cookie, last_complete, last_used); + return ioat_is_complete(c, cookie, done, used); } -static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat) +static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat) { struct ioat_chan_common *chan = &ioat->base; struct ioat_desc_sw *desc; @@ -1221,7 +829,7 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat) spin_lock_bh(&ioat->desc_lock); - desc = ioat_dma_get_next_descriptor(ioat); + desc = ioat1_dma_get_next_descriptor(ioat); if (!desc) { dev_err(to_dev(chan), @@ -1240,30 +848,16 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat) hw->src_addr = 0; hw->dst_addr = 0; async_tx_ack(&desc->txd); - switch (chan->device->version) { - case IOAT_VER_1_2: - hw->next = 0; - list_add_tail(&desc->node, &ioat->used_desc); + hw->next = 0; + list_add_tail(&desc->node, &ioat->used_desc); - writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, - chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->txd.phys) >> 32, - chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); - - writeb(IOAT_CHANCMD_START, chan->reg_base - + IOAT_CHANCMD_OFFSET(chan->device->version)); - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, - chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->txd.phys) >> 32, - chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); + writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, + chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); + writel(((u64) desc->txd.phys) >> 32, + chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); - ioat->dmacount++; - __ioat2_dma_memcpy_issue_pending(ioat); - break; - } + writeb(IOAT_CHANCMD_START, chan->reg_base + + IOAT_CHANCMD_OFFSET(chan->device->version)); spin_unlock_bh(&ioat->desc_lock); } @@ -1484,7 +1078,7 @@ static void ioat_disable_interrupts(struct ioatdma_device *device) writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); } -static int ioat_probe(struct ioatdma_device *device) +int ioat_probe(struct ioatdma_device *device) { int err = -ENODEV; struct dma_device *dma = &device->common; @@ -1503,17 +1097,15 @@ static int ioat_probe(struct ioatdma_device *device) device->completion_pool = pci_pool_create("completion_pool", pdev, sizeof(u64), SMP_CACHE_BYTES, SMP_CACHE_BYTES); + if (!device->completion_pool) { err = -ENOMEM; goto err_completion_pool; } - ioat_dma_enumerate_channels(device); + device->enumerate_channels(device); dma_cap_set(DMA_MEMCPY, dma->cap_mask); - dma->device_alloc_chan_resources = ioat_dma_alloc_chan_resources; - dma->device_free_chan_resources = ioat_dma_free_chan_resources; - dma->device_is_tx_complete = ioat_dma_is_complete; dma->dev = &pdev->dev; dev_err(dev, "Intel(R) I/OAT DMA Engine found," @@ -1546,7 +1138,7 @@ err_dma_pool: return err; } -static int ioat_register(struct ioatdma_device *device) +int ioat_register(struct ioatdma_device *device) { int err = dma_async_device_register(&device->common); @@ -1580,9 +1172,13 @@ int ioat1_dma_probe(struct ioatdma_device *device, int dca) int err; device->intr_quirk = ioat1_intr_quirk; + device->enumerate_channels = ioat1_enumerate_channels; dma = &device->common; dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy; dma->device_issue_pending = ioat1_dma_memcpy_issue_pending; + dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources; + dma->device_free_chan_resources = ioat1_dma_free_chan_resources; + dma->device_is_tx_complete = ioat1_dma_is_complete; err = ioat_probe(device); if (err) @@ -1594,93 +1190,12 @@ int ioat1_dma_probe(struct ioatdma_device *device, int dca) if (dca) device->dca = ioat_dca_init(pdev, device->reg_base); - INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); - schedule_delayed_work(&device->work, WATCHDOG_DELAY); - - return err; -} - -int ioat2_dma_probe(struct ioatdma_device *device, int dca) -{ - struct pci_dev *pdev = device->pdev; - struct dma_device *dma; - struct dma_chan *c; - struct ioat_chan_common *chan; - int err; - - dma = &device->common; - dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy; - dma->device_issue_pending = ioat2_dma_memcpy_issue_pending; - - err = ioat_probe(device); - if (err) - return err; - ioat_set_tcp_copy_break(2048); - - list_for_each_entry(c, &dma->channels, device_node) { - chan = to_chan_common(c); - writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU, - chan->reg_base + IOAT_DCACTRL_OFFSET); - } - - err = ioat_register(device); - if (err) - return err; - if (dca) - device->dca = ioat2_dca_init(pdev, device->reg_base); - - INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); + INIT_DELAYED_WORK(&device->work, ioat1_chan_watchdog); schedule_delayed_work(&device->work, WATCHDOG_DELAY); return err; } -int ioat3_dma_probe(struct ioatdma_device *device, int dca) -{ - struct pci_dev *pdev = device->pdev; - struct dma_device *dma; - struct dma_chan *c; - struct ioat_chan_common *chan; - int err; - u16 dev_id; - - dma = &device->common; - dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy; - dma->device_issue_pending = ioat2_dma_memcpy_issue_pending; - - /* -= IOAT ver.3 workarounds =- */ - /* Write CHANERRMSK_INT with 3E07h to mask out the errors - * that can cause stability issues for IOAT ver.3 - */ - pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); - - /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit - * (workaround for spurious config parity error after restart) - */ - pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); - if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) - pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); - - err = ioat_probe(device); - if (err) - return err; - ioat_set_tcp_copy_break(262144); - - list_for_each_entry(c, &dma->channels, device_node) { - chan = to_chan_common(c); - writel(IOAT_DMA_DCA_ANY_CPU, - chan->reg_base + IOAT_DCACTRL_OFFSET); - } - - err = ioat_register(device); - if (err) - return err; - if (dca) - device->dca = ioat3_dca_init(pdev, device->reg_base); - - return err; -} - void ioat_dma_remove(struct ioatdma_device *device) { struct dma_device *dma = &device->common; @@ -1697,4 +1212,3 @@ void ioat_dma_remove(struct ioatdma_device *device) INIT_LIST_HEAD(&dma->channels); } - diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 5b31db73ad8..84065dfa4d4 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -62,6 +62,7 @@ * @idx: per channel data * @dca: direct cache access context * @intr_quirk: interrupt setup quirk (for ioat_v1 devices) + * @enumerate_channels: hw version specific channel enumeration */ struct ioatdma_device { @@ -76,6 +77,7 @@ struct ioatdma_device { struct ioat_chan_common *idx[4]; struct dca_provider *dca; void (*intr_quirk)(struct ioatdma_device *device); + int (*enumerate_channels)(struct ioatdma_device *device); }; struct ioat_chan_common { @@ -106,6 +108,7 @@ struct ioat_chan_common { struct tasklet_struct cleanup_task; }; + /** * struct ioat_dma_chan - internal representation of a DMA channel */ @@ -119,7 +122,6 @@ struct ioat_dma_chan { struct list_head used_desc; int pending; - u16 dmacount; u16 desccount; }; @@ -135,6 +137,33 @@ static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c) return container_of(chan, struct ioat_dma_chan, base); } +/** + * ioat_is_complete - poll the status of an ioat transaction + * @c: channel handle + * @cookie: transaction identifier + * @done: if set, updated with last completed transaction + * @used: if set, updated with last used transaction + */ +static inline enum dma_status +ioat_is_complete(struct dma_chan *c, dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used) +{ + struct ioat_chan_common *chan = to_chan_common(c); + dma_cookie_t last_used; + dma_cookie_t last_complete; + + last_used = c->cookie; + last_complete = chan->completed_cookie; + chan->watchdog_tcp_cookie = cookie; + + if (done) + *done = last_complete; + if (used) + *used = last_used; + + return dma_async_is_complete(cookie, last_complete, last_used); +} + /* wrapper around hardware descriptor format + additional software fields */ /** @@ -162,11 +191,22 @@ static inline void ioat_set_tcp_copy_break(unsigned long copybreak) #endif } +static inline struct ioat_chan_common * +ioat_chan_by_index(struct ioatdma_device *device, int index) +{ + return device->idx[index]; +} + +int ioat_probe(struct ioatdma_device *device); +int ioat_register(struct ioatdma_device *device); int ioat1_dma_probe(struct ioatdma_device *dev, int dca); -int ioat2_dma_probe(struct ioatdma_device *dev, int dca); -int ioat3_dma_probe(struct ioatdma_device *dev, int dca); void ioat_dma_remove(struct ioatdma_device *device); struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); -struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); -struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); +unsigned long ioat_get_current_completion(struct ioat_chan_common *chan); +void ioat_init_channel(struct ioatdma_device *device, + struct ioat_chan_common *chan, int idx, + work_func_t work_fn, void (*tasklet)(unsigned long), + unsigned long tasklet_data); +void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, + size_t len, struct ioat_dma_descriptor *hw); #endif /* IOATDMA_H */ diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c new file mode 100644 index 00000000000..49ba1c73d95 --- /dev/null +++ b/drivers/dma/ioat/dma_v2.c @@ -0,0 +1,750 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2009 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +/* + * This driver supports an Intel I/OAT DMA engine (versions >= 2), which + * does asynchronous data movement and checksumming operations. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "dma.h" +#include "dma_v2.h" +#include "registers.h" +#include "hw.h" + +static int ioat_ring_alloc_order = 8; +module_param(ioat_ring_alloc_order, int, 0644); +MODULE_PARM_DESC(ioat_ring_alloc_order, + "ioat2+: allocate 2^n descriptors per channel (default: n=8)"); + +static void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) +{ + void * __iomem reg_base = ioat->base.reg_base; + + ioat->pending = 0; + ioat->dmacount += ioat2_ring_pending(ioat); + ioat->issued = ioat->head; + /* make descriptor updates globally visible before notifying channel */ + wmb(); + writew(ioat->dmacount, reg_base + IOAT_CHAN_DMACOUNT_OFFSET); + +} + +static void ioat2_issue_pending(struct dma_chan *chan) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(chan); + + spin_lock_bh(&ioat->ring_lock); + if (ioat->pending == 1) + __ioat2_issue_pending(ioat); + spin_unlock_bh(&ioat->ring_lock); +} + +/** + * ioat2_update_pending - log pending descriptors + * @ioat: ioat2+ channel + * + * set pending to '1' unless pending is already set to '2', pending == 2 + * indicates that submission is temporarily blocked due to an in-flight + * reset. If we are already above the ioat_pending_level threshold then + * just issue pending. + * + * called with ring_lock held + */ +static void ioat2_update_pending(struct ioat2_dma_chan *ioat) +{ + if (unlikely(ioat->pending == 2)) + return; + else if (ioat2_ring_pending(ioat) > ioat_pending_level) + __ioat2_issue_pending(ioat); + else + ioat->pending = 1; +} + +static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat) +{ + void __iomem *reg_base = ioat->base.reg_base; + struct ioat_ring_ent *desc; + struct ioat_dma_descriptor *hw; + int idx; + + if (ioat2_ring_space(ioat) < 1) { + dev_err(to_dev(&ioat->base), + "Unable to start null desc - ring full\n"); + return; + } + + idx = ioat2_desc_alloc(ioat, 1); + desc = ioat2_get_ring_ent(ioat, idx); + + hw = desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = 1; + hw->ctl_f.compl_write = 1; + /* set size to non-zero value (channel returns error when size is 0) */ + hw->size = NULL_DESC_BUFFER_SIZE; + hw->src_addr = 0; + hw->dst_addr = 0; + async_tx_ack(&desc->txd); + writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, + reg_base + IOAT2_CHAINADDR_OFFSET_LOW); + writel(((u64) desc->txd.phys) >> 32, + reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); + __ioat2_issue_pending(ioat); +} + +static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat) +{ + spin_lock_bh(&ioat->ring_lock); + __ioat2_start_null_desc(ioat); + spin_unlock_bh(&ioat->ring_lock); +} + +static void ioat2_cleanup(struct ioat2_dma_chan *ioat); + +/** + * ioat2_reset_part2 - reinit the channel after a reset + */ +static void ioat2_reset_part2(struct work_struct *work) +{ + struct ioat_chan_common *chan; + struct ioat2_dma_chan *ioat; + + chan = container_of(work, struct ioat_chan_common, work.work); + ioat = container_of(chan, struct ioat2_dma_chan, base); + + /* ensure that ->tail points to the stalled descriptor + * (ioat->pending is set to 2 at this point so no new + * descriptors will be issued while we perform this cleanup) + */ + ioat2_cleanup(ioat); + + spin_lock_bh(&chan->cleanup_lock); + spin_lock_bh(&ioat->ring_lock); + + /* set the tail to be re-issued */ + ioat->issued = ioat->tail; + ioat->dmacount = 0; + + if (ioat2_ring_pending(ioat)) { + struct ioat_ring_ent *desc; + + desc = ioat2_get_ring_ent(ioat, ioat->tail); + writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF, + chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); + writel(((u64) desc->txd.phys) >> 32, + chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); + __ioat2_issue_pending(ioat); + } else + __ioat2_start_null_desc(ioat); + + spin_unlock_bh(&ioat->ring_lock); + spin_unlock_bh(&chan->cleanup_lock); + + dev_info(to_dev(chan), + "chan%d reset - %d descs waiting, %d total desc\n", + chan_num(chan), ioat->dmacount, 1 << ioat->alloc_order); +} + +/** + * ioat2_reset_channel - restart a channel + * @ioat: IOAT DMA channel handle + */ +static void ioat2_reset_channel(struct ioat2_dma_chan *ioat) +{ + u32 chansts, chanerr; + struct ioat_chan_common *chan = &ioat->base; + u16 active; + + spin_lock_bh(&ioat->ring_lock); + active = ioat2_ring_active(ioat); + spin_unlock_bh(&ioat->ring_lock); + if (!active) + return; + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + chansts = (chan->completion_virt->low + & IOAT_CHANSTS_DMA_TRANSFER_STATUS); + if (chanerr) { + dev_err(to_dev(chan), + "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", + chan_num(chan), chansts, chanerr); + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + } + + spin_lock_bh(&ioat->ring_lock); + ioat->pending = 2; + writeb(IOAT_CHANCMD_RESET, + chan->reg_base + + IOAT_CHANCMD_OFFSET(chan->device->version)); + spin_unlock_bh(&ioat->ring_lock); + schedule_delayed_work(&chan->work, RESET_DELAY); +} + +/** + * ioat2_chan_watchdog - watch for stuck channels + */ +static void ioat2_chan_watchdog(struct work_struct *work) +{ + struct ioatdma_device *device = + container_of(work, struct ioatdma_device, work.work); + struct ioat2_dma_chan *ioat; + struct ioat_chan_common *chan; + u16 active; + int i; + + for (i = 0; i < device->common.chancnt; i++) { + chan = ioat_chan_by_index(device, i); + ioat = container_of(chan, struct ioat2_dma_chan, base); + + /* + * for version 2.0 if there are descriptors yet to be processed + * and the last completed hasn't changed since the last watchdog + * if they haven't hit the pending level + * issue the pending to push them through + * else + * try resetting the channel + */ + spin_lock_bh(&ioat->ring_lock); + active = ioat2_ring_active(ioat); + spin_unlock_bh(&ioat->ring_lock); + + if (active && + chan->last_completion && + chan->last_completion == chan->watchdog_completion) { + + if (ioat->pending == 1) + ioat2_issue_pending(&chan->common); + else { + ioat2_reset_channel(ioat); + chan->watchdog_completion = 0; + } + } else { + chan->last_compl_desc_addr_hw = 0; + chan->watchdog_completion = chan->last_completion; + } + chan->watchdog_last_tcp_cookie = chan->watchdog_tcp_cookie; + } + schedule_delayed_work(&device->work, WATCHDOG_DELAY); +} + +/** + * ioat2_cleanup - clean finished descriptors (advance tail pointer) + * @chan: ioat channel to be cleaned up + */ +static void ioat2_cleanup(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + struct ioat_ring_ent *desc; + bool seen_current = false; + u16 active; + int i; + struct dma_async_tx_descriptor *tx; + + prefetch(chan->completion_virt); + + spin_lock_bh(&chan->cleanup_lock); + phys_complete = ioat_get_current_completion(chan); + if (phys_complete == chan->last_completion) { + spin_unlock_bh(&chan->cleanup_lock); + /* + * perhaps we're stuck so hard that the watchdog can't go off? + * try to catch it after WATCHDOG_DELAY seconds + */ + if (chan->device->version < IOAT_VER_3_0) { + unsigned long tmo; + + tmo = chan->last_completion_time + HZ*WATCHDOG_DELAY; + if (time_after(jiffies, tmo)) { + ioat2_chan_watchdog(&(chan->device->work.work)); + chan->last_completion_time = jiffies; + } + } + return; + } + chan->last_completion_time = jiffies; + + spin_lock_bh(&ioat->ring_lock); + + active = ioat2_ring_active(ioat); + for (i = 0; i < active && !seen_current; i++) { + prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1)); + desc = ioat2_get_ring_ent(ioat, ioat->tail + i); + tx = &desc->txd; + if (tx->cookie) { + ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); + chan->completed_cookie = tx->cookie; + tx->cookie = 0; + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; + } + } + + if (tx->phys == phys_complete) + seen_current = true; + } + ioat->tail += i; + BUG_ON(!seen_current); /* no active descs have written a completion? */ + spin_unlock_bh(&ioat->ring_lock); + + chan->last_completion = phys_complete; + + spin_unlock_bh(&chan->cleanup_lock); +} + +static void ioat2_cleanup_tasklet(unsigned long data) +{ + struct ioat2_dma_chan *ioat = (void *) data; + + ioat2_cleanup(ioat); + writew(IOAT_CHANCTRL_INT_DISABLE, + ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); +} + +/** + * ioat2_enumerate_channels - find and initialize the device's channels + * @device: the device to be enumerated + */ +static int ioat2_enumerate_channels(struct ioatdma_device *device) +{ + struct ioat2_dma_chan *ioat; + struct device *dev = &device->pdev->dev; + struct dma_device *dma = &device->common; + u8 xfercap_log; + int i; + + INIT_LIST_HEAD(&dma->channels); + dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); + xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET); + if (xfercap_log == 0) + return 0; + + /* FIXME which i/oat version is i7300? */ +#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL + if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) + dma->chancnt--; +#endif + for (i = 0; i < dma->chancnt; i++) { + ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL); + if (!ioat) + break; + + ioat_init_channel(device, &ioat->base, i, + ioat2_reset_part2, + ioat2_cleanup_tasklet, + (unsigned long) ioat); + ioat->xfercap_log = xfercap_log; + spin_lock_init(&ioat->ring_lock); + } + dma->chancnt = i; + return i; +} + +static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx) +{ + struct dma_chan *c = tx->chan; + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + dma_cookie_t cookie = c->cookie; + + cookie++; + if (cookie < 0) + cookie = 1; + tx->cookie = cookie; + c->cookie = cookie; + ioat2_update_pending(ioat); + spin_unlock_bh(&ioat->ring_lock); + + return cookie; +} + +static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan) +{ + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *desc; + struct ioatdma_device *dma; + dma_addr_t phys; + + dma = to_ioatdma_device(chan->device); + hw = pci_pool_alloc(dma->dma_pool, GFP_KERNEL, &phys); + if (!hw) + return NULL; + memset(hw, 0, sizeof(*hw)); + + desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) { + pci_pool_free(dma->dma_pool, hw, phys); + return NULL; + } + + dma_async_tx_descriptor_init(&desc->txd, chan); + desc->txd.tx_submit = ioat2_tx_submit_unlock; + desc->hw = hw; + desc->txd.phys = phys; + return desc; +} + +static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan) +{ + struct ioatdma_device *dma; + + dma = to_ioatdma_device(chan->device); + pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys); + kfree(desc); +} + +/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring + * @chan: channel to be initialized + */ +static int ioat2_alloc_chan_resources(struct dma_chan *c) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioat_ring_ent **ring; + u16 chanctrl; + u32 chanerr; + int descs; + int i; + + /* have we already been set up? */ + if (ioat->ring) + return 1 << ioat->alloc_order; + + /* Setup register to interrupt and write completion status on error */ + chanctrl = IOAT_CHANCTRL_ERR_INT_EN | IOAT_CHANCTRL_ANY_ERR_ABORT_EN | + IOAT_CHANCTRL_ERR_COMPLETION_EN; + writew(chanctrl, chan->reg_base + IOAT_CHANCTRL_OFFSET); + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + if (chanerr) { + dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr); + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + } + + /* allocate a completion writeback area */ + /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ + chan->completion_virt = pci_pool_alloc(chan->device->completion_pool, + GFP_KERNEL, + &chan->completion_addr); + if (!chan->completion_virt) + return -ENOMEM; + + memset(chan->completion_virt, 0, + sizeof(*chan->completion_virt)); + writel(((u64) chan->completion_addr) & 0x00000000FFFFFFFF, + chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); + writel(((u64) chan->completion_addr) >> 32, + chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); + + ioat->alloc_order = ioat_get_alloc_order(); + descs = 1 << ioat->alloc_order; + + /* allocate the array to hold the software ring */ + ring = kcalloc(descs, sizeof(*ring), GFP_KERNEL); + if (!ring) + return -ENOMEM; + for (i = 0; i < descs; i++) { + ring[i] = ioat2_alloc_ring_ent(c); + if (!ring[i]) { + while (i--) + ioat2_free_ring_ent(ring[i], c); + kfree(ring); + return -ENOMEM; + } + } + + /* link descs */ + for (i = 0; i < descs-1; i++) { + struct ioat_ring_ent *next = ring[i+1]; + struct ioat_dma_descriptor *hw = ring[i]->hw; + + hw->next = next->txd.phys; + } + ring[i]->hw->next = ring[0]->txd.phys; + + spin_lock_bh(&ioat->ring_lock); + ioat->ring = ring; + ioat->head = 0; + ioat->issued = 0; + ioat->tail = 0; + ioat->pending = 0; + spin_unlock_bh(&ioat->ring_lock); + + tasklet_enable(&chan->cleanup_task); + ioat2_start_null_desc(ioat); + + return descs; +} + +/** + * ioat2_alloc_and_lock - common descriptor alloc boilerplate for ioat2,3 ops + * @idx: gets starting descriptor index on successful allocation + * @ioat: ioat2,3 channel (ring) to operate on + * @num_descs: allocation length + */ +static int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs) +{ + struct ioat_chan_common *chan = &ioat->base; + + spin_lock_bh(&ioat->ring_lock); + if (unlikely(ioat2_ring_space(ioat) < num_descs)) { + if (printk_ratelimit()) + dev_dbg(to_dev(chan), + "%s: ring full! num_descs: %d (%x:%x:%x)\n", + __func__, num_descs, ioat->head, ioat->tail, + ioat->issued); + spin_unlock_bh(&ioat->ring_lock); + + /* do direct reclaim in the allocation failure case */ + ioat2_cleanup(ioat); + + return -ENOMEM; + } + + dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n", + __func__, num_descs, ioat->head, ioat->tail, ioat->issued); + + *idx = ioat2_desc_alloc(ioat, num_descs); + return 0; /* with ioat->ring_lock held */ +} + +static struct dma_async_tx_descriptor * +ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *desc; + dma_addr_t dst = dma_dest; + dma_addr_t src = dma_src; + size_t total_len = len; + int num_descs; + u16 idx; + int i; + + num_descs = ioat2_xferlen_to_descs(ioat, len); + if (likely(num_descs) && + ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0) + /* pass */; + else + return NULL; + for (i = 0; i < num_descs; i++) { + size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log); + + desc = ioat2_get_ring_ent(ioat, idx + i); + hw = desc->hw; + + hw->size = copy; + hw->ctl = 0; + hw->src_addr = src; + hw->dst_addr = dst; + + len -= copy; + dst += copy; + src += copy; + } + + desc->txd.flags = flags; + desc->len = total_len; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + /* we leave the channel locked to ensure in order submission */ + + return &desc->txd; +} + +/** + * ioat2_free_chan_resources - release all the descriptors + * @chan: the channel to be cleaned + */ +static void ioat2_free_chan_resources(struct dma_chan *c) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioatdma_device *ioatdma_device = chan->device; + struct ioat_ring_ent *desc; + const u16 total_descs = 1 << ioat->alloc_order; + int descs; + int i; + + /* Before freeing channel resources first check + * if they have been previously allocated for this channel. + */ + if (!ioat->ring) + return; + + tasklet_disable(&chan->cleanup_task); + ioat2_cleanup(ioat); + + /* Delay 100ms after reset to allow internal DMA logic to quiesce + * before removing DMA descriptor resources. + */ + writeb(IOAT_CHANCMD_RESET, + chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); + mdelay(100); + + spin_lock_bh(&ioat->ring_lock); + descs = ioat2_ring_space(ioat); + for (i = 0; i < descs; i++) { + desc = ioat2_get_ring_ent(ioat, ioat->head + i); + ioat2_free_ring_ent(desc, c); + } + + if (descs < total_descs) + dev_err(to_dev(chan), "Freeing %d in use descriptors!\n", + total_descs - descs); + + for (i = 0; i < total_descs - descs; i++) { + desc = ioat2_get_ring_ent(ioat, ioat->tail + i); + ioat2_free_ring_ent(desc, c); + } + + kfree(ioat->ring); + ioat->ring = NULL; + ioat->alloc_order = 0; + pci_pool_free(ioatdma_device->completion_pool, + chan->completion_virt, + chan->completion_addr); + spin_unlock_bh(&ioat->ring_lock); + + chan->last_completion = 0; + chan->completion_addr = 0; + ioat->pending = 0; + ioat->dmacount = 0; + chan->watchdog_completion = 0; + chan->last_compl_desc_addr_hw = 0; + chan->watchdog_tcp_cookie = 0; + chan->watchdog_last_tcp_cookie = 0; +} + +static enum dma_status +ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + + if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) + return DMA_SUCCESS; + + ioat2_cleanup(ioat); + + return ioat_is_complete(c, cookie, done, used); +} + +int ioat2_dma_probe(struct ioatdma_device *device, int dca) +{ + struct pci_dev *pdev = device->pdev; + struct dma_device *dma; + struct dma_chan *c; + struct ioat_chan_common *chan; + int err; + + device->enumerate_channels = ioat2_enumerate_channels; + dma = &device->common; + dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; + dma->device_issue_pending = ioat2_issue_pending; + dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; + dma->device_free_chan_resources = ioat2_free_chan_resources; + dma->device_is_tx_complete = ioat2_is_complete; + + err = ioat_probe(device); + if (err) + return err; + ioat_set_tcp_copy_break(2048); + + list_for_each_entry(c, &dma->channels, device_node) { + chan = to_chan_common(c); + writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU, + chan->reg_base + IOAT_DCACTRL_OFFSET); + } + + err = ioat_register(device); + if (err) + return err; + if (dca) + device->dca = ioat2_dca_init(pdev, device->reg_base); + + INIT_DELAYED_WORK(&device->work, ioat2_chan_watchdog); + schedule_delayed_work(&device->work, WATCHDOG_DELAY); + + return err; +} + +int ioat3_dma_probe(struct ioatdma_device *device, int dca) +{ + struct pci_dev *pdev = device->pdev; + struct dma_device *dma; + struct dma_chan *c; + struct ioat_chan_common *chan; + int err; + u16 dev_id; + + device->enumerate_channels = ioat2_enumerate_channels; + dma = &device->common; + dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; + dma->device_issue_pending = ioat2_issue_pending; + dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; + dma->device_free_chan_resources = ioat2_free_chan_resources; + dma->device_is_tx_complete = ioat2_is_complete; + + /* -= IOAT ver.3 workarounds =- */ + /* Write CHANERRMSK_INT with 3E07h to mask out the errors + * that can cause stability issues for IOAT ver.3 + */ + pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); + + /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) + pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); + + err = ioat_probe(device); + if (err) + return err; + ioat_set_tcp_copy_break(262144); + + list_for_each_entry(c, &dma->channels, device_node) { + chan = to_chan_common(c); + writel(IOAT_DMA_DCA_ANY_CPU, + chan->reg_base + IOAT_DCACTRL_OFFSET); + } + + err = ioat_register(device); + if (err) + return err; + if (dca) + device->dca = ioat3_dca_init(pdev, device->reg_base); + + return err; +} diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h new file mode 100644 index 00000000000..94a553eacdb --- /dev/null +++ b/drivers/dma/ioat/dma_v2.h @@ -0,0 +1,131 @@ +/* + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#ifndef IOATDMA_V2_H +#define IOATDMA_V2_H + +#include +#include "dma.h" +#include "hw.h" + + +extern int ioat_pending_level; + +/* + * workaround for IOAT ver.3.0 null descriptor issue + * (channel returns error when size is 0) + */ +#define NULL_DESC_BUFFER_SIZE 1 + +#define IOAT_MAX_ORDER 16 +#define ioat_get_alloc_order() \ + (min(ioat_ring_alloc_order, IOAT_MAX_ORDER)) + +/* struct ioat2_dma_chan - ioat v2 / v3 channel attributes + * @base: common ioat channel parameters + * @xfercap_log; log2 of channel max transfer length (for fast division) + * @head: allocated index + * @issued: hardware notification point + * @tail: cleanup index + * @pending: lock free indicator for issued != head + * @dmacount: identical to 'head' except for occasionally resetting to zero + * @alloc_order: log2 of the number of allocated descriptors + * @ring: software ring buffer implementation of hardware ring + * @ring_lock: protects ring attributes + */ +struct ioat2_dma_chan { + struct ioat_chan_common base; + size_t xfercap_log; + u16 head; + u16 issued; + u16 tail; + u16 dmacount; + u16 alloc_order; + int pending; + struct ioat_ring_ent **ring; + spinlock_t ring_lock; +}; + +static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c) +{ + struct ioat_chan_common *chan = to_chan_common(c); + + return container_of(chan, struct ioat2_dma_chan, base); +} + +static inline u16 ioat2_ring_mask(struct ioat2_dma_chan *ioat) +{ + return (1 << ioat->alloc_order) - 1; +} + +/* count of descriptors in flight with the engine */ +static inline u16 ioat2_ring_active(struct ioat2_dma_chan *ioat) +{ + return (ioat->head - ioat->tail) & ioat2_ring_mask(ioat); +} + +/* count of descriptors pending submission to hardware */ +static inline u16 ioat2_ring_pending(struct ioat2_dma_chan *ioat) +{ + return (ioat->head - ioat->issued) & ioat2_ring_mask(ioat); +} + +static inline u16 ioat2_ring_space(struct ioat2_dma_chan *ioat) +{ + u16 num_descs = ioat2_ring_mask(ioat) + 1; + u16 active = ioat2_ring_active(ioat); + + BUG_ON(active > num_descs); + + return num_descs - active; +} + +/* assumes caller already checked space */ +static inline u16 ioat2_desc_alloc(struct ioat2_dma_chan *ioat, u16 len) +{ + ioat->head += len; + return ioat->head - len; +} + +static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len) +{ + u16 num_descs = len >> ioat->xfercap_log; + + num_descs += !!(len & ((1 << ioat->xfercap_log) - 1)); + return num_descs; +} + +struct ioat_ring_ent { + struct ioat_dma_descriptor *hw; + struct dma_async_tx_descriptor txd; + size_t len; +}; + +static inline struct ioat_ring_ent * +ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx) +{ + return ioat->ring[idx & ioat2_ring_mask(ioat)]; +} + +int ioat2_dma_probe(struct ioatdma_device *dev, int dca); +int ioat3_dma_probe(struct ioatdma_device *dev, int dca); +struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); +struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); +#endif /* IOATDMA_V2_H */ diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index 55414d88ac1..c4e43226925 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -31,6 +31,7 @@ #include #include #include "dma.h" +#include "dma_v2.h" #include "registers.h" #include "hw.h" -- cgit v1.2.3-70-g09d2 From bf40a6869c9198bdf56fe173961feb89e9f0d961 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:55 -0700 Subject: ioat3: split ioat3 support to its own file, add memset Up until this point the driver for Intel(R) QuickData Technology engines, specification versions 2 and 3, were mostly identical save for a few quirks. Version 3.2 hardware adds many new capabilities (like raid offload support) requiring some infrastructure that is not relevant for v2. For better code organization of the new funcionality move v3 and v3.2 support to its own file dma_v3.c, and export some routines from the base files (dma.c and dma_v2.c) that can be reused directly. The first new capability included in this code reorganization is support for v3.2 memset operations. Signed-off-by: Dan Williams --- drivers/dma/ioat/Makefile | 2 +- drivers/dma/ioat/dma.c | 11 -- drivers/dma/ioat/dma.h | 16 ++ drivers/dma/ioat/dma_v2.c | 94 +++--------- drivers/dma/ioat/dma_v2.h | 13 ++ drivers/dma/ioat/dma_v3.c | 367 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/dma/ioat/pci.c | 2 +- 7 files changed, 421 insertions(+), 84 deletions(-) create mode 100644 drivers/dma/ioat/dma_v3.c (limited to 'drivers/dma/ioat/pci.c') diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile index 205a639e84d..8997d3fb905 100644 --- a/drivers/dma/ioat/Makefile +++ b/drivers/dma/ioat/Makefile @@ -1,2 +1,2 @@ obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o -ioatdma-objs := pci.o dma.o dma_v2.o dca.o +ioatdma-objs := pci.o dma.o dma_v2.o dma_v3.o dca.o diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 17a518d0386..70262c0131d 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -538,17 +538,6 @@ static void ioat1_cleanup_tasklet(unsigned long data) writew(IOAT_CHANCTRL_RUN, chan->base.reg_base + IOAT_CHANCTRL_OFFSET); } -static void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len, - int direction, enum dma_ctrl_flags flags, bool dst) -{ - if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) || - (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE))) - pci_unmap_single(pdev, addr, len, direction); - else - pci_unmap_page(pdev, addr, len, direction); -} - - void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, size_t len, struct ioat_dma_descriptor *hw) { diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 0d94e7804c1..c6d58bf541d 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -60,6 +60,10 @@ * @dca: direct cache access context * @intr_quirk: interrupt setup quirk (for ioat_v1 devices) * @enumerate_channels: hw version specific channel enumeration + * @cleanup_tasklet: select between the v2 and v3 cleanup routines + * @timer_fn: select between the v2 and v3 timer watchdog routines + * + * Note: the v3 cleanup routine supports raid operations */ struct ioatdma_device { @@ -74,6 +78,8 @@ struct ioatdma_device { struct dca_provider *dca; void (*intr_quirk)(struct ioatdma_device *device); int (*enumerate_channels)(struct ioatdma_device *device); + void (*cleanup_tasklet)(unsigned long data); + void (*timer_fn)(unsigned long data); }; struct ioat_chan_common { @@ -287,6 +293,16 @@ static inline bool is_ioat_bug(unsigned long err) IOAT_CHANERR_LENGTH_ERR)); } +static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len, + int direction, enum dma_ctrl_flags flags, bool dst) +{ + if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) || + (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE))) + pci_unmap_single(pdev, addr, len, direction); + else + pci_unmap_page(pdev, addr, len, direction); +} + int __devinit ioat_probe(struct ioatdma_device *device); int __devinit ioat_register(struct ioatdma_device *device); int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca); diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 568923c5dde..7492e9165e0 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -39,7 +39,7 @@ #include "registers.h" #include "hw.h" -static int ioat_ring_alloc_order = 8; +int ioat_ring_alloc_order = 8; module_param(ioat_ring_alloc_order, int, 0644); MODULE_PARM_DESC(ioat_ring_alloc_order, "ioat2+: allocate 2^n descriptors per channel (default: n=8)"); @@ -63,7 +63,7 @@ static void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); } -static void ioat2_issue_pending(struct dma_chan *chan) +void ioat2_issue_pending(struct dma_chan *chan) { struct ioat2_dma_chan *ioat = to_ioat2_chan(chan); @@ -214,7 +214,7 @@ static void ioat2_cleanup_tasklet(unsigned long data) writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); } -static void __restart_chan(struct ioat2_dma_chan *ioat) +void __ioat2_restart_chan(struct ioat2_dma_chan *ioat) { struct ioat_chan_common *chan = &ioat->base; @@ -255,11 +255,9 @@ static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) if (ioat_cleanup_preamble(chan, &phys_complete)) __cleanup(ioat, phys_complete); - __restart_chan(ioat); + __ioat2_restart_chan(ioat); } -static bool reshape_ring(struct ioat2_dma_chan *ioat, int order); - static void ioat2_timer_event(unsigned long data) { struct ioat2_dma_chan *ioat = (void *) data; @@ -321,7 +319,7 @@ static void ioat2_timer_event(unsigned long data) * ioat2_enumerate_channels - find and initialize the device's channels * @device: the device to be enumerated */ -static int ioat2_enumerate_channels(struct ioatdma_device *device) +int ioat2_enumerate_channels(struct ioatdma_device *device) { struct ioat2_dma_chan *ioat; struct device *dev = &device->pdev->dev; @@ -354,8 +352,8 @@ static int ioat2_enumerate_channels(struct ioatdma_device *device) break; ioat_init_channel(device, &ioat->base, i, - ioat2_timer_event, - ioat2_cleanup_tasklet, + device->timer_fn, + device->cleanup_tasklet, (unsigned long) ioat); ioat->xfercap_log = xfercap_log; spin_lock_init(&ioat->ring_lock); @@ -460,7 +458,7 @@ static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gf /* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring * @chan: channel to be initialized */ -static int ioat2_alloc_chan_resources(struct dma_chan *c) +int ioat2_alloc_chan_resources(struct dma_chan *c) { struct ioat2_dma_chan *ioat = to_ioat2_chan(c); struct ioat_chan_common *chan = &ioat->base; @@ -514,7 +512,7 @@ static int ioat2_alloc_chan_resources(struct dma_chan *c) return 1 << ioat->alloc_order; } -static bool reshape_ring(struct ioat2_dma_chan *ioat, int order) +bool reshape_ring(struct ioat2_dma_chan *ioat, int order) { /* reshape differs from normal ring allocation in that we want * to allocate a new software ring while only @@ -627,7 +625,7 @@ static bool reshape_ring(struct ioat2_dma_chan *ioat, int order) * @ioat: ioat2,3 channel (ring) to operate on * @num_descs: allocation length */ -static int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs) +int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs) { struct ioat_chan_common *chan = &ioat->base; @@ -655,9 +653,11 @@ static int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_d spin_lock_bh(&chan->cleanup_lock); if (jiffies > chan->timer.expires && timer_pending(&chan->timer)) { + struct ioatdma_device *device = chan->device; + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); spin_unlock_bh(&chan->cleanup_lock); - ioat2_timer_event((unsigned long) ioat); + device->timer_fn((unsigned long) ioat); } else spin_unlock_bh(&chan->cleanup_lock); return -ENOMEM; @@ -670,7 +670,7 @@ static int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_d return 0; /* with ioat->ring_lock held */ } -static struct dma_async_tx_descriptor * +struct dma_async_tx_descriptor * ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, dma_addr_t dma_src, size_t len, unsigned long flags) { @@ -722,11 +722,11 @@ ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, * ioat2_free_chan_resources - release all the descriptors * @chan: the channel to be cleaned */ -static void ioat2_free_chan_resources(struct dma_chan *c) +void ioat2_free_chan_resources(struct dma_chan *c) { struct ioat2_dma_chan *ioat = to_ioat2_chan(c); struct ioat_chan_common *chan = &ioat->base; - struct ioatdma_device *ioatdma_device = chan->device; + struct ioatdma_device *device = chan->device; struct ioat_ring_ent *desc; const u16 total_descs = 1 << ioat->alloc_order; int descs; @@ -740,7 +740,7 @@ static void ioat2_free_chan_resources(struct dma_chan *c) tasklet_disable(&chan->cleanup_task); del_timer_sync(&chan->timer); - ioat2_cleanup(ioat); + device->cleanup_tasklet((unsigned long) ioat); /* Delay 100ms after reset to allow internal DMA logic to quiesce * before removing DMA descriptor resources. @@ -770,8 +770,7 @@ static void ioat2_free_chan_resources(struct dma_chan *c) kfree(ioat->ring); ioat->ring = NULL; ioat->alloc_order = 0; - pci_pool_free(ioatdma_device->completion_pool, - chan->completion, + pci_pool_free(device->completion_pool, chan->completion, chan->completion_dma); spin_unlock_bh(&ioat->ring_lock); @@ -781,16 +780,17 @@ static void ioat2_free_chan_resources(struct dma_chan *c) ioat->dmacount = 0; } -static enum dma_status +enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie, dma_cookie_t *done, dma_cookie_t *used) { struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioatdma_device *device = ioat->base.device; if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) return DMA_SUCCESS; - ioat2_cleanup(ioat); + device->cleanup_tasklet((unsigned long) ioat); return ioat_is_complete(c, cookie, done, used); } @@ -804,6 +804,8 @@ int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca) int err; device->enumerate_channels = ioat2_enumerate_channels; + device->cleanup_tasklet = ioat2_cleanup_tasklet; + device->timer_fn = ioat2_timer_event; dma = &device->common; dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; dma->device_issue_pending = ioat2_issue_pending; @@ -830,53 +832,3 @@ int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca) return err; } - -int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) -{ - struct pci_dev *pdev = device->pdev; - struct dma_device *dma; - struct dma_chan *c; - struct ioat_chan_common *chan; - int err; - u16 dev_id; - - device->enumerate_channels = ioat2_enumerate_channels; - dma = &device->common; - dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; - dma->device_issue_pending = ioat2_issue_pending; - dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; - dma->device_free_chan_resources = ioat2_free_chan_resources; - dma->device_is_tx_complete = ioat2_is_complete; - - /* -= IOAT ver.3 workarounds =- */ - /* Write CHANERRMSK_INT with 3E07h to mask out the errors - * that can cause stability issues for IOAT ver.3 - */ - pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); - - /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit - * (workaround for spurious config parity error after restart) - */ - pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); - if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) - pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); - - err = ioat_probe(device); - if (err) - return err; - ioat_set_tcp_copy_break(262144); - - list_for_each_entry(c, &dma->channels, device_node) { - chan = to_chan_common(c); - writel(IOAT_DMA_DCA_ANY_CPU, - chan->reg_base + IOAT_DCACTRL_OFFSET); - } - - err = ioat_register(device); - if (err) - return err; - if (dca) - device->dca = ioat3_dca_init(pdev, device->reg_base); - - return err; -} diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index ed4bb82a283..bde57ddf555 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -27,6 +27,7 @@ extern int ioat_pending_level; +extern int ioat_ring_alloc_order; /* * workaround for IOAT ver.3.0 null descriptor issue @@ -167,4 +168,16 @@ int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca); int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca); struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); +int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs); +int ioat2_enumerate_channels(struct ioatdma_device *device); +struct dma_async_tx_descriptor * +ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags); +void ioat2_issue_pending(struct dma_chan *chan); +int ioat2_alloc_chan_resources(struct dma_chan *c); +void ioat2_free_chan_resources(struct dma_chan *c); +enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used); +void __ioat2_restart_chan(struct ioat2_dma_chan *ioat); +bool reshape_ring(struct ioat2_dma_chan *ioat, int order); #endif /* IOATDMA_V2_H */ diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c new file mode 100644 index 00000000000..b223d66b97e --- /dev/null +++ b/drivers/dma/ioat/dma_v3.c @@ -0,0 +1,367 @@ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * BSD LICENSE + * + * Copyright(c) 2004-2009 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Support routines for v3+ hardware + */ + +#include +#include +#include +#include "registers.h" +#include "hw.h" +#include "dma.h" +#include "dma_v2.h" + +static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, + struct ioat_ring_ent *desc) +{ + struct ioat_chan_common *chan = &ioat->base; + struct pci_dev *pdev = chan->device->pdev; + size_t len = desc->len; + size_t offset = len - desc->hw->size; + struct dma_async_tx_descriptor *tx = &desc->txd; + enum dma_ctrl_flags flags = tx->flags; + + switch (desc->hw->ctl_f.op) { + case IOAT_OP_COPY: + ioat_dma_unmap(chan, flags, len, desc->hw); + break; + case IOAT_OP_FILL: { + struct ioat_fill_descriptor *hw = desc->fill; + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) + ioat_unmap(pdev, hw->dst_addr - offset, len, + PCI_DMA_FROMDEVICE, flags, 1); + break; + } + default: + dev_err(&pdev->dev, "%s: unknown op type: %#x\n", + __func__, desc->hw->ctl_f.op); + } +} + + +static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) +{ + struct ioat_chan_common *chan = &ioat->base; + struct ioat_ring_ent *desc; + bool seen_current = false; + u16 active; + int i; + + dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued); + + active = ioat2_ring_active(ioat); + for (i = 0; i < active && !seen_current; i++) { + struct dma_async_tx_descriptor *tx; + + prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1)); + desc = ioat2_get_ring_ent(ioat, ioat->tail + i); + dump_desc_dbg(ioat, desc); + tx = &desc->txd; + if (tx->cookie) { + chan->completed_cookie = tx->cookie; + ioat3_dma_unmap(ioat, desc); + tx->cookie = 0; + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; + } + } + + if (tx->phys == phys_complete) + seen_current = true; + } + ioat->tail += i; + BUG_ON(!seen_current); /* no active descs have written a completion? */ + chan->last_completion = phys_complete; + if (ioat->head == ioat->tail) { + dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", + __func__); + clear_bit(IOAT_COMPLETION_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); + } +} + +static void ioat3_cleanup(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + + prefetch(chan->completion); + + if (!spin_trylock_bh(&chan->cleanup_lock)) + return; + + if (!ioat_cleanup_preamble(chan, &phys_complete)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + if (!spin_trylock_bh(&ioat->ring_lock)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + __cleanup(ioat, phys_complete); + + spin_unlock_bh(&ioat->ring_lock); + spin_unlock_bh(&chan->cleanup_lock); +} + +static void ioat3_cleanup_tasklet(unsigned long data) +{ + struct ioat2_dma_chan *ioat = (void *) data; + + ioat3_cleanup(ioat); + writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); +} + +static void ioat3_restart_channel(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + u32 status; + + status = ioat_chansts(chan); + if (is_ioat_active(status) || is_ioat_idle(status)) + ioat_suspend(chan); + while (is_ioat_active(status) || is_ioat_idle(status)) { + status = ioat_chansts(chan); + cpu_relax(); + } + + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + + __ioat2_restart_chan(ioat); +} + +static void ioat3_timer_event(unsigned long data) +{ + struct ioat2_dma_chan *ioat = (void *) data; + struct ioat_chan_common *chan = &ioat->base; + + spin_lock_bh(&chan->cleanup_lock); + if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { + unsigned long phys_complete; + u64 status; + + spin_lock_bh(&ioat->ring_lock); + status = ioat_chansts(chan); + + /* when halted due to errors check for channel + * programming errors before advancing the completion state + */ + if (is_ioat_halted(status)) { + u32 chanerr; + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + BUG_ON(is_ioat_bug(chanerr)); + } + + /* if we haven't made progress and we have already + * acknowledged a pending completion once, then be more + * forceful with a restart + */ + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) + ioat3_restart_channel(ioat); + else { + set_bit(IOAT_COMPLETION_ACK, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + } + spin_unlock_bh(&ioat->ring_lock); + } else { + u16 active; + + /* if the ring is idle, empty, and oversized try to step + * down the size + */ + spin_lock_bh(&ioat->ring_lock); + active = ioat2_ring_active(ioat); + if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) + reshape_ring(ioat, ioat->alloc_order-1); + spin_unlock_bh(&ioat->ring_lock); + + /* keep shrinking until we get back to our minimum + * default size + */ + if (ioat->alloc_order > ioat_get_alloc_order()) + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); + } + spin_unlock_bh(&chan->cleanup_lock); +} + +static enum dma_status +ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + + if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) + return DMA_SUCCESS; + + ioat3_cleanup(ioat); + + return ioat_is_complete(c, cookie, done, used); +} + +static struct dma_async_tx_descriptor * +ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value, + size_t len, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_ring_ent *desc; + size_t total_len = len; + struct ioat_fill_descriptor *fill; + int num_descs; + u64 src_data = (0x0101010101010101ULL) * (value & 0xff); + u16 idx; + int i; + + num_descs = ioat2_xferlen_to_descs(ioat, len); + if (likely(num_descs) && + ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0) + /* pass */; + else + return NULL; + for (i = 0; i < num_descs; i++) { + size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); + + desc = ioat2_get_ring_ent(ioat, idx + i); + fill = desc->fill; + + fill->size = xfer_size; + fill->src_data = src_data; + fill->dst_addr = dest; + fill->ctl = 0; + fill->ctl_f.op = IOAT_OP_FILL; + + len -= xfer_size; + dest += xfer_size; + dump_desc_dbg(ioat, desc); + } + + desc->txd.flags = flags; + desc->len = total_len; + fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + fill->ctl_f.compl_write = 1; + dump_desc_dbg(ioat, desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + +int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) +{ + struct pci_dev *pdev = device->pdev; + struct dma_device *dma; + struct dma_chan *c; + struct ioat_chan_common *chan; + int err; + u16 dev_id; + u32 cap; + + device->enumerate_channels = ioat2_enumerate_channels; + device->cleanup_tasklet = ioat3_cleanup_tasklet; + device->timer_fn = ioat3_timer_event; + dma = &device->common; + dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; + dma->device_issue_pending = ioat2_issue_pending; + dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; + dma->device_free_chan_resources = ioat2_free_chan_resources; + dma->device_is_tx_complete = ioat3_is_complete; + cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET); + if (cap & IOAT_CAP_FILL_BLOCK) { + dma_cap_set(DMA_MEMSET, dma->cap_mask); + dma->device_prep_dma_memset = ioat3_prep_memset_lock; + } + + /* -= IOAT ver.3 workarounds =- */ + /* Write CHANERRMSK_INT with 3E07h to mask out the errors + * that can cause stability issues for IOAT ver.3 + */ + pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); + + /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) + pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); + + err = ioat_probe(device); + if (err) + return err; + ioat_set_tcp_copy_break(262144); + + list_for_each_entry(c, &dma->channels, device_node) { + chan = to_chan_common(c); + writel(IOAT_DMA_DCA_ANY_CPU, + chan->reg_base + IOAT_DCACTRL_OFFSET); + } + + err = ioat_register(device); + if (err) + return err; + if (dca) + device->dca = ioat3_dca_init(pdev, device->reg_base); + + return 0; +} diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index c4e43226925..0f3ec6e97fe 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -36,7 +36,7 @@ #include "hw.h" MODULE_VERSION(IOAT_DMA_VERSION); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Intel Corporation"); static struct pci_device_id ioat_pci_tbl[] = { -- cgit v1.2.3-70-g09d2 From 5669e31c5a4874f1634bc0ffba268a6e2fa0cdd2 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:42:56 -0700 Subject: ioat: add 'ioat' sysfs attributes Export driver attributes for diagnostic purposes: 'ring_size': total number of descriptors available to the engine 'ring_active': number of descriptors in-flight 'capabilities': supported operation types for this channel 'version': Intel(R) QuickData specfication revision This also allows some chattiness to be removed from the driver startup as this information is now available via sysfs. Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 120 +++++++++++++++++++++++++++++++++++++++++++--- drivers/dma/ioat/dma.h | 12 +++++ drivers/dma/ioat/dma_v2.c | 33 +++++++++++++ drivers/dma/ioat/dma_v2.h | 1 + drivers/dma/ioat/dma_v3.c | 3 ++ drivers/dma/ioat/pci.c | 3 ++ 6 files changed, 166 insertions(+), 6 deletions(-) (limited to 'drivers/dma/ioat/pci.c') diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 70262c0131d..cb08f810849 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -263,6 +263,7 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state)) mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + ioat->active += desc->hw->tx_cnt; ioat->pending += desc->hw->tx_cnt; if (ioat->pending >= ioat_pending_level) __ioat1_dma_memcpy_issue_pending(ioat); @@ -611,6 +612,7 @@ static void __cleanup(struct ioat_dma_chan *ioat, unsigned long phys_complete) chan->completed_cookie = tx->cookie; tx->cookie = 0; ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); + ioat->active -= desc->hw->tx_cnt; if (tx->callback) { tx->callback(tx->callback_param); tx->callback = NULL; @@ -1028,13 +1030,8 @@ int __devinit ioat_probe(struct ioatdma_device *device) dma_cap_set(DMA_MEMCPY, dma->cap_mask); dma->dev = &pdev->dev; - dev_err(dev, "Intel(R) I/OAT DMA Engine found," - " %d channels, device version 0x%02x, driver version %s\n", - dma->chancnt, device->version, IOAT_DMA_VERSION); - if (!dma->chancnt) { - dev_err(dev, "Intel(R) I/OAT DMA Engine problem found: " - "zero channels detected\n"); + dev_err(dev, "zero channels detected\n"); goto err_setup_interrupts; } @@ -1085,6 +1082,113 @@ static void ioat1_intr_quirk(struct ioatdma_device *device) pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl); } +static ssize_t ring_size_show(struct dma_chan *c, char *page) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(c); + + return sprintf(page, "%d\n", ioat->desccount); +} +static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); + +static ssize_t ring_active_show(struct dma_chan *c, char *page) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(c); + + return sprintf(page, "%d\n", ioat->active); +} +static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); + +static ssize_t cap_show(struct dma_chan *c, char *page) +{ + struct dma_device *dma = c->device; + + return sprintf(page, "copy%s%s%s%s%s%s\n", + dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "", + dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "", + dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "", + dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "", + dma_has_cap(DMA_MEMSET, dma->cap_mask) ? " fill" : "", + dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : ""); + +} +struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap); + +static ssize_t version_show(struct dma_chan *c, char *page) +{ + struct dma_device *dma = c->device; + struct ioatdma_device *device = to_ioatdma_device(dma); + + return sprintf(page, "%d.%d\n", + device->version >> 4, device->version & 0xf); +} +struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version); + +static struct attribute *ioat1_attrs[] = { + &ring_size_attr.attr, + &ring_active_attr.attr, + &ioat_cap_attr.attr, + &ioat_version_attr.attr, + NULL, +}; + +static ssize_t +ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page) +{ + struct ioat_sysfs_entry *entry; + struct ioat_chan_common *chan; + + entry = container_of(attr, struct ioat_sysfs_entry, attr); + chan = container_of(kobj, struct ioat_chan_common, kobj); + + if (!entry->show) + return -EIO; + return entry->show(&chan->common, page); +} + +struct sysfs_ops ioat_sysfs_ops = { + .show = ioat_attr_show, +}; + +static struct kobj_type ioat1_ktype = { + .sysfs_ops = &ioat_sysfs_ops, + .default_attrs = ioat1_attrs, +}; + +void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type) +{ + struct dma_device *dma = &device->common; + struct dma_chan *c; + + list_for_each_entry(c, &dma->channels, device_node) { + struct ioat_chan_common *chan = to_chan_common(c); + struct kobject *parent = &c->dev->device.kobj; + int err; + + err = kobject_init_and_add(&chan->kobj, type, parent, "quickdata"); + if (err) { + dev_warn(to_dev(chan), + "sysfs init error (%d), continuing...\n", err); + kobject_put(&chan->kobj); + set_bit(IOAT_KOBJ_INIT_FAIL, &chan->state); + } + } +} + +void ioat_kobject_del(struct ioatdma_device *device) +{ + struct dma_device *dma = &device->common; + struct dma_chan *c; + + list_for_each_entry(c, &dma->channels, device_node) { + struct ioat_chan_common *chan = to_chan_common(c); + + if (!test_bit(IOAT_KOBJ_INIT_FAIL, &chan->state)) { + kobject_del(&chan->kobj); + kobject_put(&chan->kobj); + } + } +} + int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca) { struct pci_dev *pdev = device->pdev; @@ -1107,6 +1211,8 @@ int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca) err = ioat_register(device); if (err) return err; + ioat_kobject_add(device, &ioat1_ktype); + if (dca) device->dca = ioat_dca_init(pdev, device->reg_base); @@ -1119,6 +1225,8 @@ void __devexit ioat_dma_remove(struct ioatdma_device *device) ioat_disable_interrupts(device); + ioat_kobject_del(device); + dma_async_device_unregister(dma); pci_pool_destroy(device->dma_pool); diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index c6d58bf541d..c2939b28918 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -92,6 +92,7 @@ struct ioat_chan_common { #define IOAT_COMPLETION_PENDING 0 #define IOAT_COMPLETION_ACK 1 #define IOAT_RESET_PENDING 2 + #define IOAT_KOBJ_INIT_FAIL 3 struct timer_list timer; #define COMPLETION_TIMEOUT msecs_to_jiffies(100) #define IDLE_TIMEOUT msecs_to_jiffies(2000) @@ -100,8 +101,13 @@ struct ioat_chan_common { dma_addr_t completion_dma; u64 *completion; struct tasklet_struct cleanup_task; + struct kobject kobj; }; +struct ioat_sysfs_entry { + struct attribute attr; + ssize_t (*show)(struct dma_chan *, char *); +}; /** * struct ioat_dma_chan - internal representation of a DMA channel @@ -117,6 +123,7 @@ struct ioat_dma_chan { int pending; u16 desccount; + u16 active; }; static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c) @@ -319,4 +326,9 @@ void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, size_t len, struct ioat_dma_descriptor *hw); bool ioat_cleanup_preamble(struct ioat_chan_common *chan, unsigned long *phys_complete); +void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type); +void ioat_kobject_del(struct ioatdma_device *device); +extern struct sysfs_ops ioat_sysfs_ops; +extern struct ioat_sysfs_entry ioat_version_attr; +extern struct ioat_sysfs_entry ioat_cap_attr; #endif /* IOATDMA_H */ diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 7492e9165e0..80ce32de8d3 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -795,6 +795,36 @@ ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie, return ioat_is_complete(c, cookie, done, used); } +static ssize_t ring_size_show(struct dma_chan *c, char *page) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + + return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1); +} +static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); + +static ssize_t ring_active_show(struct dma_chan *c, char *page) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + + /* ...taken outside the lock, no need to be precise */ + return sprintf(page, "%d\n", ioat2_ring_active(ioat)); +} +static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); + +static struct attribute *ioat2_attrs[] = { + &ring_size_attr.attr, + &ring_active_attr.attr, + &ioat_cap_attr.attr, + &ioat_version_attr.attr, + NULL, +}; + +struct kobj_type ioat2_ktype = { + .sysfs_ops = &ioat_sysfs_ops, + .default_attrs = ioat2_attrs, +}; + int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca) { struct pci_dev *pdev = device->pdev; @@ -827,6 +857,9 @@ int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca) err = ioat_register(device); if (err) return err; + + ioat_kobject_add(device, &ioat2_ktype); + if (dca) device->dca = ioat2_dca_init(pdev, device->reg_base); diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index bde57ddf555..fa030f8e1f2 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -180,4 +180,5 @@ enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie, dma_cookie_t *done, dma_cookie_t *used); void __ioat2_restart_chan(struct ioat2_dma_chan *ioat); bool reshape_ring(struct ioat2_dma_chan *ioat, int order); +extern struct kobj_type ioat2_ktype; #endif /* IOATDMA_V2_H */ diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index b223d66b97e..22af78ec257 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -360,6 +360,9 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) err = ioat_register(device); if (err) return err; + + ioat_kobject_add(device, &ioat2_ktype); + if (dca) device->dca = ioat3_dca_init(pdev, device->reg_base); diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index 0f3ec6e97fe..626a508a4f8 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -168,6 +168,9 @@ static void __devexit ioat_remove(struct pci_dev *pdev) static int __init ioat_init_module(void) { + pr_info("%s: Intel(R) QuickData Technology Driver %s\n", + DRV_NAME, IOAT_DMA_VERSION); + return pci_register_driver(&ioat_pci_driver); } module_init(ioat_init_module); -- cgit v1.2.3-70-g09d2 From b265b11fc1a0bd6ae5a7fde12e374583a52ab326 Mon Sep 17 00:00:00 2001 From: Tom Picard Date: Tue, 8 Sep 2009 17:43:01 -0700 Subject: ioat3: ioat3.2 pci ids for Jasper Forest Jasper Forest introduces raid offload support via ioat3.2 support. When raid offload is enabled two (out of 8 channels) will report raid5/raid6 offload capabilities. The remaining channels will only report ioat3.0 capabilities (memcpy). Signed-off-by: Tom Picard Signed-off-by: Dan Williams --- drivers/dma/ioat/pci.c | 13 +++++++++++++ include/linux/pci_ids.h | 10 ++++++++++ 2 files changed, 23 insertions(+) (limited to 'drivers/dma/ioat/pci.c') diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index 626a508a4f8..6c1aac5b359 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -58,6 +58,19 @@ static struct pci_device_id ioat_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, + + /* I/OAT v3.2 platforms */ + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) }, + { 0, } }; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0f71812d67d..2b4b8ce5325 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2529,6 +2529,16 @@ #define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e #define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b #define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c +#define PCI_DEVICE_ID_INTEL_IOAT_JSF0 0x3710 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF1 0x3711 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF2 0x3712 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF3 0x3713 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF4 0x3714 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF5 0x3715 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF6 0x3716 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF7 0x3717 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF8 0x3718 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF9 0x3719 #define PCI_DEVICE_ID_INTEL_ICH10_0 0x3a14 #define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16 #define PCI_DEVICE_ID_INTEL_ICH10_2 0x3a18 -- cgit v1.2.3-70-g09d2 From 6506cbca6b5b36d682bd39afcbf3f575c81dddb6 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 8 Sep 2009 17:43:03 -0700 Subject: Add MODULE_DEVICE_TABLE() so ioatdma module is autoloaded The ioatdma module is missing aliases for the PCI devices it supports, so it is not autoloaded on boot. Add a MODULE_DEVICE_TABLE() to get these aliases. Signed-off-by: Roland Dreier Signed-off-by: Dan Williams --- drivers/dma/ioat/pci.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/dma/ioat/pci.c') diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index 6c1aac5b359..3b2f40e9b3b 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -73,6 +73,7 @@ static struct pci_device_id ioat_pci_tbl[] = { { 0, } }; +MODULE_DEVICE_TABLE(pci, ioat_pci_tbl); static int __devinit ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id); -- cgit v1.2.3-70-g09d2 From a6417dd58d6832f123f36c6f22c63ec1ab62ce1c Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 8 Sep 2009 17:43:03 -0700 Subject: I/OAT: Convert to PCI_VDEVICE() Trivial cleanup to make the PCI ID table easier to read. [dan.j.williams@intel.com: extended to v3.2 devices] Signed-off-by: Roland Dreier Signed-off-by: Dan Williams --- drivers/dma/ioat/pci.c | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'drivers/dma/ioat/pci.c') diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index 3b2f40e9b3b..b77d3a2864a 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -41,35 +41,35 @@ MODULE_AUTHOR("Intel Corporation"); static struct pci_device_id ioat_pci_tbl[] = { /* I/OAT v1 platforms */ - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) }, - { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) }, + { PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, /* I/OAT v2 platforms */ - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, /* I/OAT v3 platforms */ - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, /* I/OAT v3.2 platforms */ - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) }, { 0, } }; -- cgit v1.2.3-70-g09d2 From 162b96e63e518aa6ff029ce23de12d7f027483bf Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Sep 2009 17:53:04 -0700 Subject: ioat2,3: cacheline align software descriptor allocations All the necessary fields for handling an ioat2,3 ring entry can fit into one cacheline. Move ->len prior to ->txd in struct ioat_ring_ent, and move allocation of these entries to a hw-cache-aligned kmem cache to reduce the number of cachelines dirtied for descriptor management. Signed-off-by: Dan Williams --- drivers/dma/ioat/dma_v2.c | 5 +++-- drivers/dma/ioat/dma_v2.h | 3 ++- drivers/dma/ioat/pci.c | 16 +++++++++++++++- 3 files changed, 20 insertions(+), 4 deletions(-) (limited to 'drivers/dma/ioat/pci.c') diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 460b7730133..fa3d6db6624 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -399,11 +399,12 @@ static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t f return NULL; memset(hw, 0, sizeof(*hw)); - desc = kzalloc(sizeof(*desc), flags); + desc = kmem_cache_alloc(ioat2_cache, flags); if (!desc) { pci_pool_free(dma->dma_pool, hw, phys); return NULL; } + memset(desc, 0, sizeof(*desc)); dma_async_tx_descriptor_init(&desc->txd, chan); desc->txd.tx_submit = ioat2_tx_submit_unlock; @@ -418,7 +419,7 @@ static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *cha dma = to_ioatdma_device(chan->device); pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys); - kfree(desc); + kmem_cache_free(ioat2_cache, desc); } static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags) diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index 9baa3d6065f..ac00adc8197 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -116,8 +116,8 @@ static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len struct ioat_ring_ent { struct ioat_dma_descriptor *hw; - struct dma_async_tx_descriptor txd; size_t len; + struct dma_async_tx_descriptor txd; #ifdef DEBUG int id; #endif @@ -143,4 +143,5 @@ int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca); int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca); struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); +extern struct kmem_cache *ioat2_cache; #endif /* IOATDMA_V2_H */ diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index c4e43226925..61086c6bbf4 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -69,6 +69,8 @@ static int ioat_dca_enabled = 1; module_param(ioat_dca_enabled, int, 0644); MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); +struct kmem_cache *ioat2_cache; + #define DRV_NAME "ioatdma" static struct pci_driver ioat_pci_driver = { @@ -168,12 +170,24 @@ static void __devexit ioat_remove(struct pci_dev *pdev) static int __init ioat_init_module(void) { - return pci_register_driver(&ioat_pci_driver); + int err; + + ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!ioat2_cache) + return -ENOMEM; + + err = pci_register_driver(&ioat_pci_driver); + if (err) + kmem_cache_destroy(ioat2_cache); + + return err; } module_init(ioat_init_module); static void __exit ioat_exit_module(void) { pci_unregister_driver(&ioat_pci_driver); + kmem_cache_destroy(ioat2_cache); } module_exit(ioat_exit_module); -- cgit v1.2.3-70-g09d2 From 1a5aeeecd550ee4344cfba1791f1134739b16dc6 Mon Sep 17 00:00:00 2001 From: Maciej Sosnowski Date: Thu, 10 Sep 2009 15:05:58 +0200 Subject: dca: registering requesters in multiple dca domains This patch enables DCA support on multiple-IOH/multiple-IIO architectures. It modifies dca module by replacing single dca_providers list with dca_domains list, each domain containing separate list of providers. This approach lets dca driver manage multiple domains, i.e. sets of providers and requesters mapped back to the same PCI root complex device. The driver takes care to register each requester to a provider from the same domain. Signed-off-by: Dan Williams Signed-off-by: Maciej Sosnowski --- drivers/dca/dca-core.c | 122 +++++++++++++++++++++++++++++++++++++++++++------ drivers/dma/ioat/pci.c | 2 +- include/linux/dca.h | 11 ++++- 3 files changed, 120 insertions(+), 15 deletions(-) (limited to 'drivers/dma/ioat/pci.c') diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c index 25b743abfb5..7e318de0904 100644 --- a/drivers/dca/dca-core.c +++ b/drivers/dca/dca-core.c @@ -28,7 +28,7 @@ #include #include -#define DCA_VERSION "1.8" +#define DCA_VERSION "1.12.1" MODULE_VERSION(DCA_VERSION); MODULE_LICENSE("GPL"); @@ -36,20 +36,92 @@ MODULE_AUTHOR("Intel Corporation"); static DEFINE_SPINLOCK(dca_lock); -static LIST_HEAD(dca_providers); +static LIST_HEAD(dca_domains); -static struct dca_provider *dca_find_provider_by_dev(struct device *dev) +static struct pci_bus *dca_pci_rc_from_dev(struct device *dev) { - struct dca_provider *dca, *ret = NULL; + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_bus *bus = pdev->bus; - list_for_each_entry(dca, &dca_providers, node) { - if ((!dev) || (dca->ops->dev_managed(dca, dev))) { - ret = dca; - break; - } + while (bus->parent) + bus = bus->parent; + + return bus; +} + +static struct dca_domain *dca_allocate_domain(struct pci_bus *rc) +{ + struct dca_domain *domain; + + domain = kzalloc(sizeof(*domain), GFP_NOWAIT); + if (!domain) + return NULL; + + INIT_LIST_HEAD(&domain->dca_providers); + domain->pci_rc = rc; + + return domain; +} + +static void dca_free_domain(struct dca_domain *domain) +{ + list_del(&domain->node); + kfree(domain); +} + +static struct dca_domain *dca_find_domain(struct pci_bus *rc) +{ + struct dca_domain *domain; + + list_for_each_entry(domain, &dca_domains, node) + if (domain->pci_rc == rc) + return domain; + + return NULL; +} + +static struct dca_domain *dca_get_domain(struct device *dev) +{ + struct pci_bus *rc; + struct dca_domain *domain; + + rc = dca_pci_rc_from_dev(dev); + domain = dca_find_domain(rc); + + if (!domain) { + domain = dca_allocate_domain(rc); + if (domain) + list_add(&domain->node, &dca_domains); + } + + return domain; +} + +static struct dca_provider *dca_find_provider_by_dev(struct device *dev) +{ + struct dca_provider *dca; + struct pci_bus *rc; + struct dca_domain *domain; + + if (dev) { + rc = dca_pci_rc_from_dev(dev); + domain = dca_find_domain(rc); + if (!domain) + return NULL; + } else { + if (!list_empty(&dca_domains)) + domain = list_first_entry(&dca_domains, + struct dca_domain, + node); + else + return NULL; } - return ret; + list_for_each_entry(dca, &domain->dca_providers, node) + if ((!dev) || (dca->ops->dev_managed(dca, dev))) + return dca; + + return NULL; } /** @@ -61,6 +133,8 @@ int dca_add_requester(struct device *dev) struct dca_provider *dca; int err, slot = -ENODEV; unsigned long flags; + struct pci_bus *pci_rc; + struct dca_domain *domain; if (!dev) return -EFAULT; @@ -74,7 +148,14 @@ int dca_add_requester(struct device *dev) return -EEXIST; } - list_for_each_entry(dca, &dca_providers, node) { + pci_rc = dca_pci_rc_from_dev(dev); + domain = dca_find_domain(pci_rc); + if (!domain) { + spin_unlock_irqrestore(&dca_lock, flags); + return -ENODEV; + } + + list_for_each_entry(dca, &domain->dca_providers, node) { slot = dca->ops->add_requester(dca, dev); if (slot >= 0) break; @@ -222,13 +303,19 @@ int register_dca_provider(struct dca_provider *dca, struct device *dev) { int err; unsigned long flags; + struct dca_domain *domain; err = dca_sysfs_add_provider(dca, dev); if (err) return err; spin_lock_irqsave(&dca_lock, flags); - list_add(&dca->node, &dca_providers); + domain = dca_get_domain(dev); + if (!domain) { + spin_unlock_irqrestore(&dca_lock, flags); + return -ENODEV; + } + list_add(&dca->node, &domain->dca_providers); spin_unlock_irqrestore(&dca_lock, flags); blocking_notifier_call_chain(&dca_provider_chain, @@ -241,15 +328,24 @@ EXPORT_SYMBOL_GPL(register_dca_provider); * unregister_dca_provider - remove a dca provider * @dca - struct created by alloc_dca_provider() */ -void unregister_dca_provider(struct dca_provider *dca) +void unregister_dca_provider(struct dca_provider *dca, struct device *dev) { unsigned long flags; + struct pci_bus *pci_rc; + struct dca_domain *domain; blocking_notifier_call_chain(&dca_provider_chain, DCA_PROVIDER_REMOVE, NULL); spin_lock_irqsave(&dca_lock, flags); + list_del(&dca->node); + + pci_rc = dca_pci_rc_from_dev(dev); + domain = dca_find_domain(pci_rc); + if (list_empty(&domain->dca_providers)) + dca_free_domain(domain); + spin_unlock_irqrestore(&dca_lock, flags); dca_sysfs_remove_provider(dca); diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index c788fa26647..d545fae30f3 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -175,7 +175,7 @@ static void __devexit ioat_remove(struct pci_dev *pdev) dev_err(&pdev->dev, "Removing dma and dca services\n"); if (device->dca) { - unregister_dca_provider(device->dca); + unregister_dca_provider(device->dca, &pdev->dev); free_dca_provider(device->dca); device->dca = NULL; } diff --git a/include/linux/dca.h b/include/linux/dca.h index 9c20c7e87d0..d27a7a05718 100644 --- a/include/linux/dca.h +++ b/include/linux/dca.h @@ -20,6 +20,9 @@ */ #ifndef DCA_H #define DCA_H + +#include + /* DCA Provider API */ /* DCA Notifier Interface */ @@ -36,6 +39,12 @@ struct dca_provider { int id; }; +struct dca_domain { + struct list_head node; + struct list_head dca_providers; + struct pci_bus *pci_rc; +}; + struct dca_ops { int (*add_requester) (struct dca_provider *, struct device *); int (*remove_requester) (struct dca_provider *, struct device *); @@ -47,7 +56,7 @@ struct dca_ops { struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size); void free_dca_provider(struct dca_provider *dca); int register_dca_provider(struct dca_provider *dca, struct device *dev); -void unregister_dca_provider(struct dca_provider *dca); +void unregister_dca_provider(struct dca_provider *dca, struct device *dev); static inline void *dca_priv(struct dca_provider *dca) { -- cgit v1.2.3-70-g09d2