From 653e67f7e5e008258ff65c2067460cc9e8ee8f94 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 14 Jul 2014 22:47:26 +0200 Subject: dmaengine: inherit debug settings from the subsystem for subdirectories To be able to see debug messages during boot, enable the debug settings from Kconfig also for drivers in subdirectories. Signed-off-by: Wolfram Sang Signed-off-by: Vinod Koul --- drivers/dma/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/dma/Makefile') diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index c779e1eb2db..aca5eb577d4 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -1,5 +1,5 @@ -ccflags-$(CONFIG_DMADEVICES_DEBUG) := -DDEBUG -ccflags-$(CONFIG_DMADEVICES_VDEBUG) += -DVERBOSE_DEBUG +subdir-ccflags-$(CONFIG_DMADEVICES_DEBUG) := -DDEBUG +subdir-ccflags-$(CONFIG_DMADEVICES_VDEBUG) += -DVERBOSE_DEBUG obj-$(CONFIG_DMA_ENGINE) += dmaengine.o obj-$(CONFIG_DMA_VIRTUAL_CHANNELS) += virt-dma.o -- cgit v1.2.3-70-g09d2 From 555859308723d8d5b828304f5eb9281143fd86b5 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 17 Jul 2014 21:46:16 +0200 Subject: dmaengine: sun6i: Add driver for the Allwinner A31 DMA controller The Allwinner A31 has a 16 channels DMA controller that it shares with the newer A23. Although sharing some similarities with the DMA controller of the older Allwinner SoCs, it's significantly different, I don't expect it to be possible to share the driver for these two. The A31 Controller is able to memory-to-memory or memory-to-device transfers on the 16 channels in parallel. Signed-off-by: Maxime Ripard Acked-by: Arnd Bergmann Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 8 + drivers/dma/Makefile | 1 + drivers/dma/sun6i-dma.c | 1059 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1068 insertions(+) create mode 100644 drivers/dma/sun6i-dma.c (limited to 'drivers/dma/Makefile') diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 1eca7b9760e..4b439270fb1 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -375,6 +375,14 @@ config XILINX_VDMA channels, Memory Mapped to Stream (MM2S) and Stream to Memory Mapped (S2MM) for the data transfers. +config DMA_SUN6I + tristate "Allwinner A31 SoCs DMA support" + depends on MACH_SUN6I || COMPILE_TEST + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support for the DMA engine for Allwinner A31 SoCs. + config DMA_ENGINE bool diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index aca5eb577d4..d08bd966da1 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -47,3 +47,4 @@ obj-$(CONFIG_MOXART_DMA) += moxart-dma.o obj-$(CONFIG_FSL_EDMA) += fsl-edma.o obj-$(CONFIG_QCOM_BAM_DMA) += qcom_bam_dma.o obj-y += xilinx/ +obj-$(CONFIG_DMA_SUN6I) += sun6i-dma.o diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c new file mode 100644 index 00000000000..ce8d5d1b0ff --- /dev/null +++ b/drivers/dma/sun6i-dma.c @@ -0,0 +1,1059 @@ +/* + * Copyright (C) 2013-2014 Allwinner Tech Co., Ltd + * Author: Sugar + * + * Copyright (C) 2014 Maxime Ripard + * Maxime Ripard + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "virt-dma.h" + +/* + * There's 16 physical channels that can work in parallel. + * + * However we have 30 different endpoints for our requests. + * + * Since the channels are able to handle only an unidirectional + * transfer, we need to allocate more virtual channels so that + * everyone can grab one channel. + * + * Some devices can't work in both direction (mostly because it + * wouldn't make sense), so we have a bit fewer virtual channels than + * 2 channels per endpoints. + */ + +#define NR_MAX_CHANNELS 16 +#define NR_MAX_REQUESTS 30 +#define NR_MAX_VCHANS 53 + +/* + * Common registers + */ +#define DMA_IRQ_EN(x) ((x) * 0x04) +#define DMA_IRQ_HALF BIT(0) +#define DMA_IRQ_PKG BIT(1) +#define DMA_IRQ_QUEUE BIT(2) + +#define DMA_IRQ_CHAN_NR 8 +#define DMA_IRQ_CHAN_WIDTH 4 + + +#define DMA_IRQ_STAT(x) ((x) * 0x04 + 0x10) + +#define DMA_STAT 0x30 + +/* + * Channels specific registers + */ +#define DMA_CHAN_ENABLE 0x00 +#define DMA_CHAN_ENABLE_START BIT(0) +#define DMA_CHAN_ENABLE_STOP 0 + +#define DMA_CHAN_PAUSE 0x04 +#define DMA_CHAN_PAUSE_PAUSE BIT(1) +#define DMA_CHAN_PAUSE_RESUME 0 + +#define DMA_CHAN_LLI_ADDR 0x08 + +#define DMA_CHAN_CUR_CFG 0x0c +#define DMA_CHAN_CFG_SRC_DRQ(x) ((x) & 0x1f) +#define DMA_CHAN_CFG_SRC_IO_MODE BIT(5) +#define DMA_CHAN_CFG_SRC_LINEAR_MODE (0 << 5) +#define DMA_CHAN_CFG_SRC_BURST(x) (((x) & 0x3) << 7) +#define DMA_CHAN_CFG_SRC_WIDTH(x) (((x) & 0x3) << 9) + +#define DMA_CHAN_CFG_DST_DRQ(x) (DMA_CHAN_CFG_SRC_DRQ(x) << 16) +#define DMA_CHAN_CFG_DST_IO_MODE (DMA_CHAN_CFG_SRC_IO_MODE << 16) +#define DMA_CHAN_CFG_DST_LINEAR_MODE (DMA_CHAN_CFG_SRC_LINEAR_MODE << 16) +#define DMA_CHAN_CFG_DST_BURST(x) (DMA_CHAN_CFG_SRC_BURST(x) << 16) +#define DMA_CHAN_CFG_DST_WIDTH(x) (DMA_CHAN_CFG_SRC_WIDTH(x) << 16) + +#define DMA_CHAN_CUR_SRC 0x10 + +#define DMA_CHAN_CUR_DST 0x14 + +#define DMA_CHAN_CUR_CNT 0x18 + +#define DMA_CHAN_CUR_PARA 0x1c + + +/* + * Various hardware related defines + */ +#define LLI_LAST_ITEM 0xfffff800 +#define NORMAL_WAIT 8 +#define DRQ_SDRAM 1 + +/* + * Hardware representation of the LLI + * + * The hardware will be fed the physical address of this structure, + * and read its content in order to start the transfer. + */ +struct sun6i_dma_lli { + u32 cfg; + u32 src; + u32 dst; + u32 len; + u32 para; + u32 p_lli_next; + + /* + * This field is not used by the DMA controller, but will be + * used by the CPU to go through the list (mostly for dumping + * or freeing it). + */ + struct sun6i_dma_lli *v_lli_next; +}; + + +struct sun6i_desc { + struct virt_dma_desc vd; + dma_addr_t p_lli; + struct sun6i_dma_lli *v_lli; +}; + +struct sun6i_pchan { + u32 idx; + void __iomem *base; + struct sun6i_vchan *vchan; + struct sun6i_desc *desc; + struct sun6i_desc *done; +}; + +struct sun6i_vchan { + struct virt_dma_chan vc; + struct list_head node; + struct dma_slave_config cfg; + struct sun6i_pchan *phy; + u8 port; +}; + +struct sun6i_dma_dev { + struct dma_device slave; + void __iomem *base; + struct clk *clk; + int irq; + spinlock_t lock; + struct reset_control *rstc; + struct tasklet_struct task; + atomic_t tasklet_shutdown; + struct list_head pending; + struct dma_pool *pool; + struct sun6i_pchan *pchans; + struct sun6i_vchan *vchans; +}; + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} + +static inline struct sun6i_dma_dev *to_sun6i_dma_dev(struct dma_device *d) +{ + return container_of(d, struct sun6i_dma_dev, slave); +} + +static inline struct sun6i_vchan *to_sun6i_vchan(struct dma_chan *chan) +{ + return container_of(chan, struct sun6i_vchan, vc.chan); +} + +static inline struct sun6i_desc * +to_sun6i_desc(struct dma_async_tx_descriptor *tx) +{ + return container_of(tx, struct sun6i_desc, vd.tx); +} + +static inline void sun6i_dma_dump_com_regs(struct sun6i_dma_dev *sdev) +{ + dev_dbg(sdev->slave.dev, "Common register:\n" + "\tmask0(%04x): 0x%08x\n" + "\tmask1(%04x): 0x%08x\n" + "\tpend0(%04x): 0x%08x\n" + "\tpend1(%04x): 0x%08x\n" + "\tstats(%04x): 0x%08x\n", + DMA_IRQ_EN(0), readl(sdev->base + DMA_IRQ_EN(0)), + DMA_IRQ_EN(1), readl(sdev->base + DMA_IRQ_EN(1)), + DMA_IRQ_STAT(0), readl(sdev->base + DMA_IRQ_STAT(0)), + DMA_IRQ_STAT(1), readl(sdev->base + DMA_IRQ_STAT(1)), + DMA_STAT, readl(sdev->base + DMA_STAT)); +} + +static inline void sun6i_dma_dump_chan_regs(struct sun6i_dma_dev *sdev, + struct sun6i_pchan *pchan) +{ + phys_addr_t reg = __virt_to_phys((unsigned long)pchan->base); + + dev_dbg(sdev->slave.dev, "Chan %d reg: %pa\n" + "\t___en(%04x): \t0x%08x\n" + "\tpause(%04x): \t0x%08x\n" + "\tstart(%04x): \t0x%08x\n" + "\t__cfg(%04x): \t0x%08x\n" + "\t__src(%04x): \t0x%08x\n" + "\t__dst(%04x): \t0x%08x\n" + "\tcount(%04x): \t0x%08x\n" + "\t_para(%04x): \t0x%08x\n\n", + pchan->idx, ®, + DMA_CHAN_ENABLE, + readl(pchan->base + DMA_CHAN_ENABLE), + DMA_CHAN_PAUSE, + readl(pchan->base + DMA_CHAN_PAUSE), + DMA_CHAN_LLI_ADDR, + readl(pchan->base + DMA_CHAN_LLI_ADDR), + DMA_CHAN_CUR_CFG, + readl(pchan->base + DMA_CHAN_CUR_CFG), + DMA_CHAN_CUR_SRC, + readl(pchan->base + DMA_CHAN_CUR_SRC), + DMA_CHAN_CUR_DST, + readl(pchan->base + DMA_CHAN_CUR_DST), + DMA_CHAN_CUR_CNT, + readl(pchan->base + DMA_CHAN_CUR_CNT), + DMA_CHAN_CUR_PARA, + readl(pchan->base + DMA_CHAN_CUR_PARA)); +} + +static inline int convert_burst(u32 maxburst, u8 *burst) +{ + switch (maxburst) { + case 1: + *burst = 0; + break; + case 8: + *burst = 2; + break; + default: + return -EINVAL; + } + + return 0; +} + +static inline int convert_buswidth(enum dma_slave_buswidth addr_width, u8 *width) +{ + switch (addr_width) { + case DMA_SLAVE_BUSWIDTH_1_BYTE: + *width = 0; + break; + case DMA_SLAVE_BUSWIDTH_2_BYTES: + *width = 1; + break; + case DMA_SLAVE_BUSWIDTH_4_BYTES: + *width = 2; + break; + default: + return -EINVAL; + } + + return 0; +} + +static void *sun6i_dma_lli_add(struct sun6i_dma_lli *prev, + struct sun6i_dma_lli *next, + dma_addr_t next_phy, + struct sun6i_desc *txd) +{ + if ((!prev && !txd) || !next) + return NULL; + + if (!prev) { + txd->p_lli = next_phy; + txd->v_lli = next; + } else { + prev->p_lli_next = next_phy; + prev->v_lli_next = next; + } + + next->p_lli_next = LLI_LAST_ITEM; + next->v_lli_next = NULL; + + return next; +} + +static inline int sun6i_dma_cfg_lli(struct sun6i_dma_lli *lli, + dma_addr_t src, + dma_addr_t dst, u32 len, + struct dma_slave_config *config) +{ + u8 src_width, dst_width, src_burst, dst_burst; + int ret; + + if (!config) + return -EINVAL; + + ret = convert_burst(config->src_maxburst, &src_burst); + if (ret) + return ret; + + ret = convert_burst(config->dst_maxburst, &dst_burst); + if (ret) + return ret; + + ret = convert_buswidth(config->src_addr_width, &src_width); + if (ret) + return ret; + + ret = convert_buswidth(config->dst_addr_width, &dst_width); + if (ret) + return ret; + + lli->cfg = DMA_CHAN_CFG_SRC_BURST(src_burst) | + DMA_CHAN_CFG_SRC_WIDTH(src_width) | + DMA_CHAN_CFG_DST_BURST(dst_burst) | + DMA_CHAN_CFG_DST_WIDTH(dst_width); + + lli->src = src; + lli->dst = dst; + lli->len = len; + lli->para = NORMAL_WAIT; + + return 0; +} + +static inline void sun6i_dma_dump_lli(struct sun6i_vchan *vchan, + struct sun6i_dma_lli *lli) +{ + phys_addr_t p_lli = __virt_to_phys((unsigned long)lli); + + dev_dbg(chan2dev(&vchan->vc.chan), + "\n\tdesc: p - %pa v - 0x%p\n" + "\t\tc - 0x%08x s - 0x%08x d - 0x%08x\n" + "\t\tl - 0x%08x p - 0x%08x n - 0x%08x\n", + &p_lli, lli, + lli->cfg, lli->src, lli->dst, + lli->len, lli->para, lli->p_lli_next); +} + +static void sun6i_dma_free_desc(struct virt_dma_desc *vd) +{ + struct sun6i_desc *txd = to_sun6i_desc(&vd->tx); + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(vd->tx.chan->device); + struct sun6i_dma_lli *v_lli, *v_next; + dma_addr_t p_lli, p_next; + + if (unlikely(!txd)) + return; + + p_lli = txd->p_lli; + v_lli = txd->v_lli; + + while (v_lli) { + v_next = v_lli->v_lli_next; + p_next = v_lli->p_lli_next; + + dma_pool_free(sdev->pool, v_lli, p_lli); + + v_lli = v_next; + p_lli = p_next; + } + + kfree(txd); +} + +static int sun6i_dma_terminate_all(struct sun6i_vchan *vchan) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(vchan->vc.chan.device); + struct sun6i_pchan *pchan = vchan->phy; + unsigned long flags; + LIST_HEAD(head); + + spin_lock(&sdev->lock); + list_del_init(&vchan->node); + spin_unlock(&sdev->lock); + + spin_lock_irqsave(&vchan->vc.lock, flags); + + vchan_get_all_descriptors(&vchan->vc, &head); + + if (pchan) { + writel(DMA_CHAN_ENABLE_STOP, pchan->base + DMA_CHAN_ENABLE); + writel(DMA_CHAN_PAUSE_RESUME, pchan->base + DMA_CHAN_PAUSE); + + vchan->phy = NULL; + pchan->vchan = NULL; + pchan->desc = NULL; + pchan->done = NULL; + } + + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + vchan_dma_desc_free_list(&vchan->vc, &head); + + return 0; +} + +static int sun6i_dma_start_desc(struct sun6i_vchan *vchan) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(vchan->vc.chan.device); + struct virt_dma_desc *desc = vchan_next_desc(&vchan->vc); + struct sun6i_pchan *pchan = vchan->phy; + u32 irq_val, irq_reg, irq_offset; + + if (!pchan) + return -EAGAIN; + + if (!desc) { + pchan->desc = NULL; + pchan->done = NULL; + return -EAGAIN; + } + + list_del(&desc->node); + + pchan->desc = to_sun6i_desc(&desc->tx); + pchan->done = NULL; + + sun6i_dma_dump_lli(vchan, pchan->desc->v_lli); + + irq_reg = pchan->idx / DMA_IRQ_CHAN_NR; + irq_offset = pchan->idx % DMA_IRQ_CHAN_NR; + + irq_val = readl(sdev->base + DMA_IRQ_EN(irq_offset)); + irq_val |= DMA_IRQ_QUEUE << (irq_offset * DMA_IRQ_CHAN_WIDTH); + writel(irq_val, sdev->base + DMA_IRQ_EN(irq_offset)); + + writel(pchan->desc->p_lli, pchan->base + DMA_CHAN_LLI_ADDR); + writel(DMA_CHAN_ENABLE_START, pchan->base + DMA_CHAN_ENABLE); + + sun6i_dma_dump_com_regs(sdev); + sun6i_dma_dump_chan_regs(sdev, pchan); + + return 0; +} + +static void sun6i_dma_tasklet(unsigned long data) +{ + struct sun6i_dma_dev *sdev = (struct sun6i_dma_dev *)data; + struct sun6i_vchan *vchan; + struct sun6i_pchan *pchan; + unsigned int pchan_alloc = 0; + unsigned int pchan_idx; + + list_for_each_entry(vchan, &sdev->slave.channels, vc.chan.device_node) { + spin_lock_irq(&vchan->vc.lock); + + pchan = vchan->phy; + + if (pchan && pchan->done) { + if (sun6i_dma_start_desc(vchan)) { + /* + * No current txd associated with this channel + */ + dev_dbg(sdev->slave.dev, "pchan %u: free\n", + pchan->idx); + + /* Mark this channel free */ + vchan->phy = NULL; + pchan->vchan = NULL; + } + } + spin_unlock_irq(&vchan->vc.lock); + } + + spin_lock_irq(&sdev->lock); + for (pchan_idx = 0; pchan_idx < NR_MAX_CHANNELS; pchan_idx++) { + pchan = &sdev->pchans[pchan_idx]; + + if (pchan->vchan || list_empty(&sdev->pending)) + continue; + + vchan = list_first_entry(&sdev->pending, + struct sun6i_vchan, node); + + /* Remove from pending channels */ + list_del_init(&vchan->node); + pchan_alloc |= BIT(pchan_idx); + + /* Mark this channel allocated */ + pchan->vchan = vchan; + vchan->phy = pchan; + dev_dbg(sdev->slave.dev, "pchan %u: alloc vchan %p\n", + pchan->idx, &vchan->vc); + } + spin_unlock_irq(&sdev->lock); + + for (pchan_idx = 0; pchan_idx < NR_MAX_CHANNELS; pchan_idx++) { + if (!(pchan_alloc & BIT(pchan_idx))) + continue; + + pchan = sdev->pchans + pchan_idx; + vchan = pchan->vchan; + if (vchan) { + spin_lock_irq(&vchan->vc.lock); + sun6i_dma_start_desc(vchan); + spin_unlock_irq(&vchan->vc.lock); + } + } +} + +static irqreturn_t sun6i_dma_interrupt(int irq, void *dev_id) +{ + struct sun6i_dma_dev *sdev = dev_id; + struct sun6i_vchan *vchan; + struct sun6i_pchan *pchan; + int i, j, ret = IRQ_NONE; + u32 status; + + for (i = 0; i < 2; i++) { + status = readl(sdev->base + DMA_IRQ_STAT(i)); + if (!status) + continue; + + dev_dbg(sdev->slave.dev, "DMA irq status %s: 0x%x\n", + i ? "high" : "low", status); + + writel(status, sdev->base + DMA_IRQ_STAT(i)); + + for (j = 0; (j < 8) && status; j++) { + if (status & DMA_IRQ_QUEUE) { + pchan = sdev->pchans + j; + vchan = pchan->vchan; + + if (vchan) { + spin_lock(&vchan->vc.lock); + vchan_cookie_complete(&pchan->desc->vd); + pchan->done = pchan->desc; + spin_unlock(&vchan->vc.lock); + } + } + + status = status >> 4; + } + + if (!atomic_read(&sdev->tasklet_shutdown)) + tasklet_schedule(&sdev->task); + ret = IRQ_HANDLED; + } + + return ret; +} + +static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct sun6i_dma_lli *v_lli; + struct sun6i_desc *txd; + dma_addr_t p_lli; + int ret; + + dev_dbg(chan2dev(chan), + "%s; chan: %d, dest: %pad, src: %pad, len: %zu. flags: 0x%08lx\n", + __func__, vchan->vc.chan.chan_id, &dest, &src, len, flags); + + if (!len) + return NULL; + + txd = kzalloc(sizeof(*txd), GFP_NOWAIT); + if (!txd) + return NULL; + + v_lli = dma_pool_alloc(sdev->pool, GFP_NOWAIT, &p_lli); + if (!v_lli) { + dev_err(sdev->slave.dev, "Failed to alloc lli memory\n"); + kfree(txd); + return NULL; + } + + ret = sun6i_dma_cfg_lli(v_lli, src, dest, len, sconfig); + if (ret) + goto err_dma_free; + + v_lli->cfg |= DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) | + DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) | + DMA_CHAN_CFG_DST_LINEAR_MODE | + DMA_CHAN_CFG_SRC_LINEAR_MODE; + + sun6i_dma_lli_add(NULL, v_lli, p_lli, txd); + + sun6i_dma_dump_lli(vchan, v_lli); + + return vchan_tx_prep(&vchan->vc, &txd->vd, flags); + +err_dma_free: + dma_pool_free(sdev->pool, v_lli, p_lli); + return NULL; +} + +static struct dma_async_tx_descriptor *sun6i_dma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction dir, + unsigned long flags, void *context) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct sun6i_dma_lli *v_lli, *prev = NULL; + struct sun6i_desc *txd; + struct scatterlist *sg; + dma_addr_t p_lli; + int i, ret; + + if (!sgl) + return NULL; + + if (!is_slave_direction(dir)) { + dev_err(chan2dev(chan), "Invalid DMA direction\n"); + return NULL; + } + + txd = kzalloc(sizeof(*txd), GFP_NOWAIT); + if (!txd) + return NULL; + + for_each_sg(sgl, sg, sg_len, i) { + v_lli = dma_pool_alloc(sdev->pool, GFP_NOWAIT, &p_lli); + if (!v_lli) { + kfree(txd); + return NULL; + } + + if (dir == DMA_MEM_TO_DEV) { + ret = sun6i_dma_cfg_lli(v_lli, sg_dma_address(sg), + sconfig->dst_addr, sg_dma_len(sg), + sconfig); + if (ret) + goto err_dma_free; + + v_lli->cfg |= DMA_CHAN_CFG_DST_IO_MODE | + DMA_CHAN_CFG_SRC_LINEAR_MODE | + DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) | + DMA_CHAN_CFG_DST_DRQ(vchan->port); + + dev_dbg(chan2dev(chan), + "%s; chan: %d, dest: %pad, src: %pad, len: %zu. flags: 0x%08lx\n", + __func__, vchan->vc.chan.chan_id, + &sconfig->dst_addr, &sg_dma_address(sg), + sg_dma_len(sg), flags); + + } else { + ret = sun6i_dma_cfg_lli(v_lli, sconfig->src_addr, + sg_dma_address(sg), sg_dma_len(sg), + sconfig); + if (ret) + goto err_dma_free; + + v_lli->cfg |= DMA_CHAN_CFG_DST_LINEAR_MODE | + DMA_CHAN_CFG_SRC_IO_MODE | + DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) | + DMA_CHAN_CFG_SRC_DRQ(vchan->port); + + dev_dbg(chan2dev(chan), + "%s; chan: %d, dest: %pad, src: %pad, len: %zu. flags: 0x%08lx\n", + __func__, vchan->vc.chan.chan_id, + &sg_dma_address(sg), &sconfig->src_addr, + sg_dma_len(sg), flags); + } + + prev = sun6i_dma_lli_add(prev, v_lli, p_lli, txd); + } + + dev_dbg(chan2dev(chan), "First: %pad\n", &txd->p_lli); + for (prev = txd->v_lli; prev; prev = prev->v_lli_next) + sun6i_dma_dump_lli(vchan, prev); + + return vchan_tx_prep(&vchan->vc, &txd->vd, flags); + +err_dma_free: + dma_pool_free(sdev->pool, v_lli, p_lli); + return NULL; +} + +static int sun6i_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + struct sun6i_pchan *pchan = vchan->phy; + unsigned long flags; + int ret = 0; + + switch (cmd) { + case DMA_RESUME: + dev_dbg(chan2dev(chan), "vchan %p: resume\n", &vchan->vc); + + spin_lock_irqsave(&vchan->vc.lock, flags); + + if (pchan) { + writel(DMA_CHAN_PAUSE_RESUME, + pchan->base + DMA_CHAN_PAUSE); + } else if (!list_empty(&vchan->vc.desc_issued)) { + spin_lock(&sdev->lock); + list_add_tail(&vchan->node, &sdev->pending); + spin_unlock(&sdev->lock); + } + + spin_unlock_irqrestore(&vchan->vc.lock, flags); + break; + + case DMA_PAUSE: + dev_dbg(chan2dev(chan), "vchan %p: pause\n", &vchan->vc); + + if (pchan) { + writel(DMA_CHAN_PAUSE_PAUSE, + pchan->base + DMA_CHAN_PAUSE); + } else { + spin_lock(&sdev->lock); + list_del_init(&vchan->node); + spin_unlock(&sdev->lock); + } + break; + + case DMA_TERMINATE_ALL: + ret = sun6i_dma_terminate_all(vchan); + break; + case DMA_SLAVE_CONFIG: + memcpy(&vchan->cfg, (void *)arg, sizeof(struct dma_slave_config)); + break; + default: + ret = -ENXIO; + break; + } + return ret; +} + +static enum dma_status sun6i_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *state) +{ + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + struct sun6i_pchan *pchan = vchan->phy; + struct sun6i_dma_lli *lli; + struct virt_dma_desc *vd; + struct sun6i_desc *txd; + enum dma_status ret; + unsigned long flags; + size_t bytes = 0; + + ret = dma_cookie_status(chan, cookie, state); + if (ret == DMA_COMPLETE) + return ret; + + spin_lock_irqsave(&vchan->vc.lock, flags); + + vd = vchan_find_desc(&vchan->vc, cookie); + txd = to_sun6i_desc(&vd->tx); + + if (vd) { + for (lli = txd->v_lli; lli != NULL; lli = lli->v_lli_next) + bytes += lli->len; + } else if (!pchan || !pchan->desc) { + bytes = 0; + } else { + bytes = readl(pchan->base + DMA_CHAN_CUR_CNT); + } + + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + dma_set_residue(state, bytes); + + return ret; +} + +static void sun6i_dma_issue_pending(struct dma_chan *chan) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + unsigned long flags; + + spin_lock_irqsave(&vchan->vc.lock, flags); + + if (vchan_issue_pending(&vchan->vc)) { + spin_lock(&sdev->lock); + + if (!vchan->phy && list_empty(&vchan->node)) { + list_add_tail(&vchan->node, &sdev->pending); + tasklet_schedule(&sdev->task); + dev_dbg(chan2dev(chan), "vchan %p: issued\n", + &vchan->vc); + } + + spin_unlock(&sdev->lock); + } else { + dev_dbg(chan2dev(chan), "vchan %p: nothing to issue\n", + &vchan->vc); + } + + spin_unlock_irqrestore(&vchan->vc.lock, flags); +} + +static int sun6i_dma_alloc_chan_resources(struct dma_chan *chan) +{ + return 0; +} + +static void sun6i_dma_free_chan_resources(struct dma_chan *chan) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + unsigned long flags; + + spin_lock_irqsave(&sdev->lock, flags); + list_del_init(&vchan->node); + spin_unlock_irqrestore(&sdev->lock, flags); + + vchan_free_chan_resources(&vchan->vc); +} + +static struct dma_chan *sun6i_dma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct sun6i_dma_dev *sdev = ofdma->of_dma_data; + struct sun6i_vchan *vchan; + struct dma_chan *chan; + u8 port = dma_spec->args[0]; + + if (port > NR_MAX_REQUESTS) + return NULL; + + chan = dma_get_any_slave_channel(&sdev->slave); + if (!chan) + return NULL; + + vchan = to_sun6i_vchan(chan); + vchan->port = port; + + return chan; +} + +static inline void sun6i_kill_tasklet(struct sun6i_dma_dev *sdev) +{ + /* Disable all interrupts from DMA */ + writel(0, sdev->base + DMA_IRQ_EN(0)); + writel(0, sdev->base + DMA_IRQ_EN(1)); + + /* Prevent spurious interrupts from scheduling the tasklet */ + atomic_inc(&sdev->tasklet_shutdown); + + /* Make sure all interrupts are handled */ + synchronize_irq(sdev->irq); + + /* Actually prevent the tasklet from being scheduled */ + tasklet_kill(&sdev->task); +} + +static inline void sun6i_dma_free(struct sun6i_dma_dev *sdev) +{ + int i; + + for (i = 0; i < NR_MAX_VCHANS; i++) { + struct sun6i_vchan *vchan = &sdev->vchans[i]; + + list_del(&vchan->vc.chan.device_node); + tasklet_kill(&vchan->vc.task); + } +} + +static int sun6i_dma_probe(struct platform_device *pdev) +{ + struct sun6i_dma_dev *sdc; + struct resource *res; + struct clk *mux, *pll6; + int ret, i; + + sdc = devm_kzalloc(&pdev->dev, sizeof(*sdc), GFP_KERNEL); + if (!sdc) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + sdc->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(sdc->base)) + return PTR_ERR(sdc->base); + + sdc->irq = platform_get_irq(pdev, 0); + if (sdc->irq < 0) { + dev_err(&pdev->dev, "Cannot claim IRQ\n"); + return sdc->irq; + } + + sdc->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(sdc->clk)) { + dev_err(&pdev->dev, "No clock specified\n"); + return PTR_ERR(sdc->clk); + } + + mux = clk_get(NULL, "ahb1_mux"); + if (IS_ERR(mux)) { + dev_err(&pdev->dev, "Couldn't get AHB1 Mux\n"); + return PTR_ERR(mux); + } + + pll6 = clk_get(NULL, "pll6"); + if (IS_ERR(pll6)) { + dev_err(&pdev->dev, "Couldn't get PLL6\n"); + clk_put(mux); + return PTR_ERR(pll6); + } + + ret = clk_set_parent(mux, pll6); + clk_put(pll6); + clk_put(mux); + + if (ret) { + dev_err(&pdev->dev, "Couldn't reparent AHB1 on PLL6\n"); + return ret; + } + + sdc->rstc = devm_reset_control_get(&pdev->dev, NULL); + if (IS_ERR(sdc->rstc)) { + dev_err(&pdev->dev, "No reset controller specified\n"); + return PTR_ERR(sdc->rstc); + } + + sdc->pool = dmam_pool_create(dev_name(&pdev->dev), &pdev->dev, + sizeof(struct sun6i_dma_lli), 4, 0); + if (!sdc->pool) { + dev_err(&pdev->dev, "No memory for descriptors dma pool\n"); + return -ENOMEM; + } + + platform_set_drvdata(pdev, sdc); + INIT_LIST_HEAD(&sdc->pending); + spin_lock_init(&sdc->lock); + + dma_cap_set(DMA_PRIVATE, sdc->slave.cap_mask); + dma_cap_set(DMA_MEMCPY, sdc->slave.cap_mask); + dma_cap_set(DMA_SLAVE, sdc->slave.cap_mask); + + INIT_LIST_HEAD(&sdc->slave.channels); + sdc->slave.device_alloc_chan_resources = sun6i_dma_alloc_chan_resources; + sdc->slave.device_free_chan_resources = sun6i_dma_free_chan_resources; + sdc->slave.device_tx_status = sun6i_dma_tx_status; + sdc->slave.device_issue_pending = sun6i_dma_issue_pending; + sdc->slave.device_prep_slave_sg = sun6i_dma_prep_slave_sg; + sdc->slave.device_prep_dma_memcpy = sun6i_dma_prep_dma_memcpy; + sdc->slave.device_control = sun6i_dma_control; + sdc->slave.chancnt = NR_MAX_VCHANS; + + sdc->slave.dev = &pdev->dev; + + sdc->pchans = devm_kcalloc(&pdev->dev, NR_MAX_CHANNELS, + sizeof(struct sun6i_pchan), GFP_KERNEL); + if (!sdc->pchans) + return -ENOMEM; + + sdc->vchans = devm_kcalloc(&pdev->dev, NR_MAX_VCHANS, + sizeof(struct sun6i_vchan), GFP_KERNEL); + if (!sdc->vchans) + return -ENOMEM; + + tasklet_init(&sdc->task, sun6i_dma_tasklet, (unsigned long)sdc); + + for (i = 0; i < NR_MAX_CHANNELS; i++) { + struct sun6i_pchan *pchan = &sdc->pchans[i]; + + pchan->idx = i; + pchan->base = sdc->base + 0x100 + i * 0x40; + } + + for (i = 0; i < NR_MAX_VCHANS; i++) { + struct sun6i_vchan *vchan = &sdc->vchans[i]; + + INIT_LIST_HEAD(&vchan->node); + vchan->vc.desc_free = sun6i_dma_free_desc; + vchan_init(&vchan->vc, &sdc->slave); + } + + ret = reset_control_deassert(sdc->rstc); + if (ret) { + dev_err(&pdev->dev, "Couldn't deassert the device from reset\n"); + goto err_chan_free; + } + + ret = clk_prepare_enable(sdc->clk); + if (ret) { + dev_err(&pdev->dev, "Couldn't enable the clock\n"); + goto err_reset_assert; + } + + ret = devm_request_irq(&pdev->dev, sdc->irq, sun6i_dma_interrupt, 0, + dev_name(&pdev->dev), sdc); + if (ret) { + dev_err(&pdev->dev, "Cannot request IRQ\n"); + goto err_clk_disable; + } + + ret = dma_async_device_register(&sdc->slave); + if (ret) { + dev_warn(&pdev->dev, "Failed to register DMA engine device\n"); + goto err_irq_disable; + } + + ret = of_dma_controller_register(pdev->dev.of_node, sun6i_dma_of_xlate, + sdc); + if (ret) { + dev_err(&pdev->dev, "of_dma_controller_register failed\n"); + goto err_dma_unregister; + } + + return 0; + +err_dma_unregister: + dma_async_device_unregister(&sdc->slave); +err_irq_disable: + sun6i_kill_tasklet(sdc); +err_clk_disable: + clk_disable_unprepare(sdc->clk); +err_reset_assert: + reset_control_assert(sdc->rstc); +err_chan_free: + sun6i_dma_free(sdc); + return ret; +} + +static int sun6i_dma_remove(struct platform_device *pdev) +{ + struct sun6i_dma_dev *sdc = platform_get_drvdata(pdev); + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&sdc->slave); + + sun6i_kill_tasklet(sdc); + + clk_disable_unprepare(sdc->clk); + reset_control_assert(sdc->rstc); + + sun6i_dma_free(sdc); + + return 0; +} + +static struct of_device_id sun6i_dma_match[] = { + { .compatible = "allwinner,sun6i-a31-dma" }, + { /* sentinel */ } +}; + +static struct platform_driver sun6i_dma_driver = { + .probe = sun6i_dma_probe, + .remove = sun6i_dma_remove, + .driver = { + .name = "sun6i-dma", + .of_match_table = sun6i_dma_match, + }, +}; +module_platform_driver(sun6i_dma_driver); + +MODULE_DESCRIPTION("Allwinner A31 DMA Controller Driver"); +MODULE_AUTHOR("Sugar "); +MODULE_AUTHOR("Maxime Ripard "); +MODULE_LICENSE("GPL"); -- cgit v1.2.3-70-g09d2 From b45b262cefd5b8eb2ba88d20e5bd295881293894 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Sat, 19 Jul 2014 12:48:51 +0200 Subject: dmaengine: add a driver for AMBA AXI NBPF DMAC IP cores This patch adds a driver for NBPF DMAC IP cores from Renesas, designed for the AMBA AXI bus. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 6 + drivers/dma/Makefile | 1 + drivers/dma/nbpfaxi.c | 1511 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1518 insertions(+) create mode 100644 drivers/dma/nbpfaxi.c (limited to 'drivers/dma/Makefile') diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index d4f0415894c..901818c1b37 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -383,6 +383,12 @@ config DMA_SUN6I help Support for the DMA engine for Allwinner A31 SoCs. +config NBPFAXI_DMA + tristate "Renesas Type-AXI NBPF DMA support" + select DMA_ENGINE + help + Support for "Type-AXI" NBPF DMA IPs from Renesas + config DMA_ENGINE bool diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index d08bd966da1..f2b831a994c 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -47,4 +47,5 @@ obj-$(CONFIG_MOXART_DMA) += moxart-dma.o obj-$(CONFIG_FSL_EDMA) += fsl-edma.o obj-$(CONFIG_QCOM_BAM_DMA) += qcom_bam_dma.o obj-y += xilinx/ +obj-$(CONFIG_NBPFAXI_DMA) += nbpfaxi.o obj-$(CONFIG_DMA_SUN6I) += sun6i-dma.o diff --git a/drivers/dma/nbpfaxi.c b/drivers/dma/nbpfaxi.c new file mode 100644 index 00000000000..77c5a890a30 --- /dev/null +++ b/drivers/dma/nbpfaxi.c @@ -0,0 +1,1511 @@ +/* + * Copyright (C) 2013-2014 Renesas Electronics Europe Ltd. + * Author: Guennadi Liakhovetski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "dmaengine.h" + +#define NBPF_REG_CHAN_OFFSET 0 +#define NBPF_REG_CHAN_SIZE 0x40 + +/* Channel Current Transaction Byte register */ +#define NBPF_CHAN_CUR_TR_BYTE 0x20 + +/* Channel Status register */ +#define NBPF_CHAN_STAT 0x24 +#define NBPF_CHAN_STAT_EN 1 +#define NBPF_CHAN_STAT_TACT 4 +#define NBPF_CHAN_STAT_ERR 0x10 +#define NBPF_CHAN_STAT_END 0x20 +#define NBPF_CHAN_STAT_TC 0x40 +#define NBPF_CHAN_STAT_DER 0x400 + +/* Channel Control register */ +#define NBPF_CHAN_CTRL 0x28 +#define NBPF_CHAN_CTRL_SETEN 1 +#define NBPF_CHAN_CTRL_CLREN 2 +#define NBPF_CHAN_CTRL_STG 4 +#define NBPF_CHAN_CTRL_SWRST 8 +#define NBPF_CHAN_CTRL_CLRRQ 0x10 +#define NBPF_CHAN_CTRL_CLREND 0x20 +#define NBPF_CHAN_CTRL_CLRTC 0x40 +#define NBPF_CHAN_CTRL_SETSUS 0x100 +#define NBPF_CHAN_CTRL_CLRSUS 0x200 + +/* Channel Configuration register */ +#define NBPF_CHAN_CFG 0x2c +#define NBPF_CHAN_CFG_SEL 7 /* terminal SELect: 0..7 */ +#define NBPF_CHAN_CFG_REQD 8 /* REQuest Direction: DMAREQ is 0: input, 1: output */ +#define NBPF_CHAN_CFG_LOEN 0x10 /* LOw ENable: low DMA request line is: 0: inactive, 1: active */ +#define NBPF_CHAN_CFG_HIEN 0x20 /* HIgh ENable: high DMA request line is: 0: inactive, 1: active */ +#define NBPF_CHAN_CFG_LVL 0x40 /* LeVeL: DMA request line is sensed as 0: edge, 1: level */ +#define NBPF_CHAN_CFG_AM 0x700 /* ACK Mode: 0: Pulse mode, 1: Level mode, b'1x: Bus Cycle */ +#define NBPF_CHAN_CFG_SDS 0xf000 /* Source Data Size: 0: 8 bits,... , 7: 1024 bits */ +#define NBPF_CHAN_CFG_DDS 0xf0000 /* Destination Data Size: as above */ +#define NBPF_CHAN_CFG_SAD 0x100000 /* Source ADdress counting: 0: increment, 1: fixed */ +#define NBPF_CHAN_CFG_DAD 0x200000 /* Destination ADdress counting: 0: increment, 1: fixed */ +#define NBPF_CHAN_CFG_TM 0x400000 /* Transfer Mode: 0: single, 1: block TM */ +#define NBPF_CHAN_CFG_DEM 0x1000000 /* DMAEND interrupt Mask */ +#define NBPF_CHAN_CFG_TCM 0x2000000 /* DMATCO interrupt Mask */ +#define NBPF_CHAN_CFG_SBE 0x8000000 /* Sweep Buffer Enable */ +#define NBPF_CHAN_CFG_RSEL 0x10000000 /* RM: Register Set sELect */ +#define NBPF_CHAN_CFG_RSW 0x20000000 /* RM: Register Select sWitch */ +#define NBPF_CHAN_CFG_REN 0x40000000 /* RM: Register Set Enable */ +#define NBPF_CHAN_CFG_DMS 0x80000000 /* 0: register mode (RM), 1: link mode (LM) */ + +#define NBPF_CHAN_NXLA 0x38 +#define NBPF_CHAN_CRLA 0x3c + +/* Link Header field */ +#define NBPF_HEADER_LV 1 +#define NBPF_HEADER_LE 2 +#define NBPF_HEADER_WBD 4 +#define NBPF_HEADER_DIM 8 + +#define NBPF_CTRL 0x300 +#define NBPF_CTRL_PR 1 /* 0: fixed priority, 1: round robin */ +#define NBPF_CTRL_LVINT 2 /* DMAEND and DMAERR signalling: 0: pulse, 1: level */ + +#define NBPF_DSTAT_ER 0x314 +#define NBPF_DSTAT_END 0x318 + +#define NBPF_DMA_BUSWIDTHS \ + (BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \ + BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES)) + +struct nbpf_config { + int num_channels; + int buffer_size; +}; + +/* + * We've got 3 types of objects, used to describe DMA transfers: + * 1. high-level descriptor, containing a struct dma_async_tx_descriptor object + * in it, used to communicate with the user + * 2. hardware DMA link descriptors, that we pass to DMAC for DMA transfer + * queuing, these must be DMAable, using either the streaming DMA API or + * allocated from coherent memory - one per SG segment + * 3. one per SG segment descriptors, used to manage HW link descriptors from + * (2). They do not have to be DMAable. They can either be (a) allocated + * together with link descriptors as mixed (DMA / CPU) objects, or (b) + * separately. Even if allocated separately it would be best to link them + * to link descriptors once during channel resource allocation and always + * use them as a single object. + * Therefore for both cases (a) and (b) at run-time objects (2) and (3) shall be + * treated as a single SG segment descriptor. + */ + +struct nbpf_link_reg { + u32 header; + u32 src_addr; + u32 dst_addr; + u32 transaction_size; + u32 config; + u32 interval; + u32 extension; + u32 next; +} __packed; + +struct nbpf_device; +struct nbpf_channel; +struct nbpf_desc; + +struct nbpf_link_desc { + struct nbpf_link_reg *hwdesc; + dma_addr_t hwdesc_dma_addr; + struct nbpf_desc *desc; + struct list_head node; +}; + +/** + * struct nbpf_desc - DMA transfer descriptor + * @async_tx: dmaengine object + * @user_wait: waiting for a user ack + * @length: total transfer length + * @sg: list of hardware descriptors, represented by struct nbpf_link_desc + * @node: member in channel descriptor lists + */ +struct nbpf_desc { + struct dma_async_tx_descriptor async_tx; + bool user_wait; + size_t length; + struct nbpf_channel *chan; + struct list_head sg; + struct list_head node; +}; + +/* Take a wild guess: allocate 4 segments per descriptor */ +#define NBPF_SEGMENTS_PER_DESC 4 +#define NBPF_DESCS_PER_PAGE ((PAGE_SIZE - sizeof(struct list_head)) / \ + (sizeof(struct nbpf_desc) + \ + NBPF_SEGMENTS_PER_DESC * \ + (sizeof(struct nbpf_link_desc) + sizeof(struct nbpf_link_reg)))) +#define NBPF_SEGMENTS_PER_PAGE (NBPF_SEGMENTS_PER_DESC * NBPF_DESCS_PER_PAGE) + +struct nbpf_desc_page { + struct list_head node; + struct nbpf_desc desc[NBPF_DESCS_PER_PAGE]; + struct nbpf_link_desc ldesc[NBPF_SEGMENTS_PER_PAGE]; + struct nbpf_link_reg hwdesc[NBPF_SEGMENTS_PER_PAGE]; +}; + +/** + * struct nbpf_channel - one DMAC channel + * @dma_chan: standard dmaengine channel object + * @base: register address base + * @nbpf: DMAC + * @name: IRQ name + * @irq: IRQ number + * @slave_addr: address for slave DMA + * @slave_width:slave data size in bytes + * @slave_burst:maximum slave burst size in bytes + * @terminal: DMA terminal, assigned to this channel + * @dmarq_cfg: DMA request line configuration - high / low, edge / level for NBPF_CHAN_CFG + * @flags: configuration flags from DT + * @lock: protect descriptor lists + * @free_links: list of free link descriptors + * @free: list of free descriptors + * @queued: list of queued descriptors + * @active: list of descriptors, scheduled for processing + * @done: list of completed descriptors, waiting post-processing + * @desc_page: list of additionally allocated descriptor pages - if any + */ +struct nbpf_channel { + struct dma_chan dma_chan; + void __iomem *base; + struct nbpf_device *nbpf; + char name[16]; + int irq; + dma_addr_t slave_src_addr; + size_t slave_src_width; + size_t slave_src_burst; + dma_addr_t slave_dst_addr; + size_t slave_dst_width; + size_t slave_dst_burst; + unsigned int terminal; + u32 dmarq_cfg; + unsigned long flags; + spinlock_t lock; + struct list_head free_links; + struct list_head free; + struct list_head queued; + struct list_head active; + struct list_head done; + struct list_head desc_page; + struct nbpf_desc *running; + bool paused; +}; + +struct nbpf_device { + struct dma_device dma_dev; + void __iomem *base; + struct clk *clk; + const struct nbpf_config *config; + struct nbpf_channel chan[]; +}; + +enum nbpf_model { + NBPF1B4, + NBPF1B8, + NBPF1B16, + NBPF4B4, + NBPF4B8, + NBPF4B16, + NBPF8B4, + NBPF8B8, + NBPF8B16, +}; + +static struct nbpf_config nbpf_cfg[] = { + [NBPF1B4] = { + .num_channels = 1, + .buffer_size = 4, + }, + [NBPF1B8] = { + .num_channels = 1, + .buffer_size = 8, + }, + [NBPF1B16] = { + .num_channels = 1, + .buffer_size = 16, + }, + [NBPF4B4] = { + .num_channels = 4, + .buffer_size = 4, + }, + [NBPF4B8] = { + .num_channels = 4, + .buffer_size = 8, + }, + [NBPF4B16] = { + .num_channels = 4, + .buffer_size = 16, + }, + [NBPF8B4] = { + .num_channels = 8, + .buffer_size = 4, + }, + [NBPF8B8] = { + .num_channels = 8, + .buffer_size = 8, + }, + [NBPF8B16] = { + .num_channels = 8, + .buffer_size = 16, + }, +}; + +#define nbpf_to_chan(d) container_of(d, struct nbpf_channel, dma_chan) + +/* + * dmaengine drivers seem to have a lot in common and instead of sharing more + * code, they reimplement those common algorithms independently. In this driver + * we try to separate the hardware-specific part from the (largely) generic + * part. This improves code readability and makes it possible in the future to + * reuse the generic code in form of a helper library. That generic code should + * be suitable for various DMA controllers, using transfer descriptors in RAM + * and pushing one SG list at a time to the DMA controller. + */ + +/* Hardware-specific part */ + +static inline u32 nbpf_chan_read(struct nbpf_channel *chan, + unsigned int offset) +{ + u32 data = ioread32(chan->base + offset); + dev_dbg(chan->dma_chan.device->dev, "%s(0x%p + 0x%x) = 0x%x\n", + __func__, chan->base, offset, data); + return data; +} + +static inline void nbpf_chan_write(struct nbpf_channel *chan, + unsigned int offset, u32 data) +{ + iowrite32(data, chan->base + offset); + dev_dbg(chan->dma_chan.device->dev, "%s(0x%p + 0x%x) = 0x%x\n", + __func__, chan->base, offset, data); +} + +static inline u32 nbpf_read(struct nbpf_device *nbpf, + unsigned int offset) +{ + u32 data = ioread32(nbpf->base + offset); + dev_dbg(nbpf->dma_dev.dev, "%s(0x%p + 0x%x) = 0x%x\n", + __func__, nbpf->base, offset, data); + return data; +} + +static inline void nbpf_write(struct nbpf_device *nbpf, + unsigned int offset, u32 data) +{ + iowrite32(data, nbpf->base + offset); + dev_dbg(nbpf->dma_dev.dev, "%s(0x%p + 0x%x) = 0x%x\n", + __func__, nbpf->base, offset, data); +} + +static void nbpf_chan_halt(struct nbpf_channel *chan) +{ + nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_CLREN); +} + +static bool nbpf_status_get(struct nbpf_channel *chan) +{ + u32 status = nbpf_read(chan->nbpf, NBPF_DSTAT_END); + + return status & BIT(chan - chan->nbpf->chan); +} + +static void nbpf_status_ack(struct nbpf_channel *chan) +{ + nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_CLREND); +} + +static u32 nbpf_error_get(struct nbpf_device *nbpf) +{ + return nbpf_read(nbpf, NBPF_DSTAT_ER); +} + +struct nbpf_channel *nbpf_error_get_channel(struct nbpf_device *nbpf, u32 error) +{ + return nbpf->chan + __ffs(error); +} + +static void nbpf_error_clear(struct nbpf_channel *chan) +{ + u32 status; + int i; + + /* Stop the channel, make sure DMA has been aborted */ + nbpf_chan_halt(chan); + + for (i = 1000; i; i--) { + status = nbpf_chan_read(chan, NBPF_CHAN_STAT); + if (!(status & NBPF_CHAN_STAT_TACT)) + break; + cpu_relax(); + } + + if (!i) + dev_err(chan->dma_chan.device->dev, + "%s(): abort timeout, channel status 0x%x\n", __func__, status); + + nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_SWRST); +} + +static int nbpf_start(struct nbpf_desc *desc) +{ + struct nbpf_channel *chan = desc->chan; + struct nbpf_link_desc *ldesc = list_first_entry(&desc->sg, struct nbpf_link_desc, node); + + nbpf_chan_write(chan, NBPF_CHAN_NXLA, (u32)ldesc->hwdesc_dma_addr); + nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_SETEN | NBPF_CHAN_CTRL_CLRSUS); + chan->paused = false; + + /* Software trigger MEMCPY - only MEMCPY uses the block mode */ + if (ldesc->hwdesc->config & NBPF_CHAN_CFG_TM) + nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_STG); + + dev_dbg(chan->nbpf->dma_dev.dev, "%s(): next 0x%x, cur 0x%x\n", __func__, + nbpf_chan_read(chan, NBPF_CHAN_NXLA), nbpf_chan_read(chan, NBPF_CHAN_CRLA)); + + return 0; +} + +static void nbpf_chan_prepare(struct nbpf_channel *chan) +{ + chan->dmarq_cfg = (chan->flags & NBPF_SLAVE_RQ_HIGH ? NBPF_CHAN_CFG_HIEN : 0) | + (chan->flags & NBPF_SLAVE_RQ_LOW ? NBPF_CHAN_CFG_LOEN : 0) | + (chan->flags & NBPF_SLAVE_RQ_LEVEL ? + NBPF_CHAN_CFG_LVL | (NBPF_CHAN_CFG_AM & 0x200) : 0) | + chan->terminal; +} + +static void nbpf_chan_prepare_default(struct nbpf_channel *chan) +{ + /* Don't output DMAACK */ + chan->dmarq_cfg = NBPF_CHAN_CFG_AM & 0x400; + chan->terminal = 0; + chan->flags = 0; +} + +static void nbpf_chan_configure(struct nbpf_channel *chan) +{ + /* + * We assume, that only the link mode and DMA request line configuration + * have to be set in the configuration register manually. Dynamic + * per-transfer configuration will be loaded from transfer descriptors. + */ + nbpf_chan_write(chan, NBPF_CHAN_CFG, NBPF_CHAN_CFG_DMS | chan->dmarq_cfg); +} + +static u32 nbpf_xfer_ds(struct nbpf_device *nbpf, size_t size) +{ + /* Maximum supported bursts depend on the buffer size */ + return min_t(int, __ffs(size), ilog2(nbpf->config->buffer_size * 8)); +} + +static size_t nbpf_xfer_size(struct nbpf_device *nbpf, + enum dma_slave_buswidth width, u32 burst) +{ + size_t size; + + if (!burst) + burst = 1; + + switch (width) { + case DMA_SLAVE_BUSWIDTH_8_BYTES: + size = 8 * burst; + break; + + case DMA_SLAVE_BUSWIDTH_4_BYTES: + size = 4 * burst; + break; + + case DMA_SLAVE_BUSWIDTH_2_BYTES: + size = 2 * burst; + break; + + default: + pr_warn("%s(): invalid bus width %u\n", __func__, width); + case DMA_SLAVE_BUSWIDTH_1_BYTE: + size = burst; + } + + return nbpf_xfer_ds(nbpf, size); +} + +/* + * We need a way to recognise slaves, whose data is sent "raw" over the bus, + * i.e. it isn't known in advance how many bytes will be received. Therefore + * the slave driver has to provide a "large enough" buffer and either read the + * buffer, when it is full, or detect, that some data has arrived, then wait for + * a timeout, if no more data arrives - receive what's already there. We want to + * handle such slaves in a special way to allow an optimised mode for other + * users, for whom the amount of data is known in advance. So far there's no way + * to recognise such slaves. We use a data-width check to distinguish between + * the SD host and the PL011 UART. + */ + +static int nbpf_prep_one(struct nbpf_link_desc *ldesc, + enum dma_transfer_direction direction, + dma_addr_t src, dma_addr_t dst, size_t size, bool last) +{ + struct nbpf_link_reg *hwdesc = ldesc->hwdesc; + struct nbpf_desc *desc = ldesc->desc; + struct nbpf_channel *chan = desc->chan; + struct device *dev = chan->dma_chan.device->dev; + size_t mem_xfer, slave_xfer; + bool can_burst; + + hwdesc->header = NBPF_HEADER_WBD | NBPF_HEADER_LV | + (last ? NBPF_HEADER_LE : 0); + + hwdesc->src_addr = src; + hwdesc->dst_addr = dst; + hwdesc->transaction_size = size; + + /* + * set config: SAD, DAD, DDS, SDS, etc. + * Note on transfer sizes: the DMAC can perform unaligned DMA transfers, + * but it is important to have transaction size a multiple of both + * receiver and transmitter transfer sizes. It is also possible to use + * different RAM and device transfer sizes, and it does work well with + * some devices, e.g. with V08R07S01E SD host controllers, which can use + * 128 byte transfers. But this doesn't work with other devices, + * especially when the transaction size is unknown. This is the case, + * e.g. with serial drivers like amba-pl011.c. For reception it sets up + * the transaction size of 4K and if fewer bytes are received, it + * pauses DMA and reads out data received via DMA as well as those left + * in the Rx FIFO. For this to work with the RAM side using burst + * transfers we enable the SBE bit and terminate the transfer in our + * DMA_PAUSE handler. + */ + mem_xfer = nbpf_xfer_ds(chan->nbpf, size); + + switch (direction) { + case DMA_DEV_TO_MEM: + can_burst = chan->slave_src_width >= 3; + slave_xfer = min(mem_xfer, can_burst ? + chan->slave_src_burst : chan->slave_src_width); + /* + * Is the slave narrower than 64 bits, i.e. isn't using the full + * bus width and cannot use bursts? + */ + if (mem_xfer > chan->slave_src_burst && !can_burst) + mem_xfer = chan->slave_src_burst; + /* Device-to-RAM DMA is unreliable without REQD set */ + hwdesc->config = NBPF_CHAN_CFG_SAD | (NBPF_CHAN_CFG_DDS & (mem_xfer << 16)) | + (NBPF_CHAN_CFG_SDS & (slave_xfer << 12)) | NBPF_CHAN_CFG_REQD | + NBPF_CHAN_CFG_SBE; + break; + + case DMA_MEM_TO_DEV: + slave_xfer = min(mem_xfer, chan->slave_dst_width >= 3 ? + chan->slave_dst_burst : chan->slave_dst_width); + hwdesc->config = NBPF_CHAN_CFG_DAD | (NBPF_CHAN_CFG_SDS & (mem_xfer << 12)) | + (NBPF_CHAN_CFG_DDS & (slave_xfer << 16)) | NBPF_CHAN_CFG_REQD; + break; + + case DMA_MEM_TO_MEM: + hwdesc->config = NBPF_CHAN_CFG_TCM | NBPF_CHAN_CFG_TM | + (NBPF_CHAN_CFG_SDS & (mem_xfer << 12)) | + (NBPF_CHAN_CFG_DDS & (mem_xfer << 16)); + break; + + default: + return -EINVAL; + } + + hwdesc->config |= chan->dmarq_cfg | (last ? 0 : NBPF_CHAN_CFG_DEM) | + NBPF_CHAN_CFG_DMS; + + dev_dbg(dev, "%s(): desc @ %pad: hdr 0x%x, cfg 0x%x, %zu @ %pad -> %pad\n", + __func__, &ldesc->hwdesc_dma_addr, hwdesc->header, + hwdesc->config, size, &src, &dst); + + dma_sync_single_for_device(dev, ldesc->hwdesc_dma_addr, sizeof(*hwdesc), + DMA_TO_DEVICE); + + return 0; +} + +static size_t nbpf_bytes_left(struct nbpf_channel *chan) +{ + return nbpf_chan_read(chan, NBPF_CHAN_CUR_TR_BYTE); +} + +static void nbpf_configure(struct nbpf_device *nbpf) +{ + nbpf_write(nbpf, NBPF_CTRL, NBPF_CTRL_LVINT); +} + +static void nbpf_pause(struct nbpf_channel *chan) +{ + nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_SETSUS); + /* See comment in nbpf_prep_one() */ + nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_CLREN); +} + +/* Generic part */ + +/* DMA ENGINE functions */ +static void nbpf_issue_pending(struct dma_chan *dchan) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + unsigned long flags; + + dev_dbg(dchan->device->dev, "Entry %s()\n", __func__); + + spin_lock_irqsave(&chan->lock, flags); + if (list_empty(&chan->queued)) + goto unlock; + + list_splice_tail_init(&chan->queued, &chan->active); + + if (!chan->running) { + struct nbpf_desc *desc = list_first_entry(&chan->active, + struct nbpf_desc, node); + if (!nbpf_start(desc)) + chan->running = desc; + } + +unlock: + spin_unlock_irqrestore(&chan->lock, flags); +} + +static enum dma_status nbpf_tx_status(struct dma_chan *dchan, + dma_cookie_t cookie, struct dma_tx_state *state) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + enum dma_status status = dma_cookie_status(dchan, cookie, state); + + if (state) { + dma_cookie_t running; + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + running = chan->running ? chan->running->async_tx.cookie : -EINVAL; + + if (cookie == running) { + state->residue = nbpf_bytes_left(chan); + dev_dbg(dchan->device->dev, "%s(): residue %u\n", __func__, + state->residue); + } else if (status == DMA_IN_PROGRESS) { + struct nbpf_desc *desc; + bool found = false; + + list_for_each_entry(desc, &chan->active, node) + if (desc->async_tx.cookie == cookie) { + found = true; + break; + } + + if (!found) + list_for_each_entry(desc, &chan->queued, node) + if (desc->async_tx.cookie == cookie) { + found = true; + break; + + } + + state->residue = found ? desc->length : 0; + } + + spin_unlock_irqrestore(&chan->lock, flags); + } + + if (chan->paused) + status = DMA_PAUSED; + + return status; +} + +static dma_cookie_t nbpf_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct nbpf_desc *desc = container_of(tx, struct nbpf_desc, async_tx); + struct nbpf_channel *chan = desc->chan; + unsigned long flags; + dma_cookie_t cookie; + + spin_lock_irqsave(&chan->lock, flags); + cookie = dma_cookie_assign(tx); + list_add_tail(&desc->node, &chan->queued); + spin_unlock_irqrestore(&chan->lock, flags); + + dev_dbg(chan->dma_chan.device->dev, "Entry %s(%d)\n", __func__, cookie); + + return cookie; +} + +static int nbpf_desc_page_alloc(struct nbpf_channel *chan) +{ + struct dma_chan *dchan = &chan->dma_chan; + struct nbpf_desc_page *dpage = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); + struct nbpf_link_desc *ldesc; + struct nbpf_link_reg *hwdesc; + struct nbpf_desc *desc; + LIST_HEAD(head); + LIST_HEAD(lhead); + int i; + struct device *dev = dchan->device->dev; + + if (!dpage) + return -ENOMEM; + + dev_dbg(dev, "%s(): alloc %lu descriptors, %lu segments, total alloc %zu\n", + __func__, NBPF_DESCS_PER_PAGE, NBPF_SEGMENTS_PER_PAGE, sizeof(*dpage)); + + for (i = 0, ldesc = dpage->ldesc, hwdesc = dpage->hwdesc; + i < ARRAY_SIZE(dpage->ldesc); + i++, ldesc++, hwdesc++) { + ldesc->hwdesc = hwdesc; + list_add_tail(&ldesc->node, &lhead); + ldesc->hwdesc_dma_addr = dma_map_single(dchan->device->dev, + hwdesc, sizeof(*hwdesc), DMA_TO_DEVICE); + + dev_dbg(dev, "%s(): mapped 0x%p to %pad\n", __func__, + hwdesc, &ldesc->hwdesc_dma_addr); + } + + for (i = 0, desc = dpage->desc; + i < ARRAY_SIZE(dpage->desc); + i++, desc++) { + dma_async_tx_descriptor_init(&desc->async_tx, dchan); + desc->async_tx.tx_submit = nbpf_tx_submit; + desc->chan = chan; + INIT_LIST_HEAD(&desc->sg); + list_add_tail(&desc->node, &head); + } + + /* + * This function cannot be called from interrupt context, so, no need to + * save flags + */ + spin_lock_irq(&chan->lock); + list_splice_tail(&lhead, &chan->free_links); + list_splice_tail(&head, &chan->free); + list_add(&dpage->node, &chan->desc_page); + spin_unlock_irq(&chan->lock); + + return ARRAY_SIZE(dpage->desc); +} + +static void nbpf_desc_put(struct nbpf_desc *desc) +{ + struct nbpf_channel *chan = desc->chan; + struct nbpf_link_desc *ldesc, *tmp; + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + list_for_each_entry_safe(ldesc, tmp, &desc->sg, node) + list_move(&ldesc->node, &chan->free_links); + + list_add(&desc->node, &chan->free); + spin_unlock_irqrestore(&chan->lock, flags); +} + +static void nbpf_scan_acked(struct nbpf_channel *chan) +{ + struct nbpf_desc *desc, *tmp; + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&chan->lock, flags); + list_for_each_entry_safe(desc, tmp, &chan->done, node) + if (async_tx_test_ack(&desc->async_tx) && desc->user_wait) { + list_move(&desc->node, &head); + desc->user_wait = false; + } + spin_unlock_irqrestore(&chan->lock, flags); + + list_for_each_entry_safe(desc, tmp, &head, node) { + list_del(&desc->node); + nbpf_desc_put(desc); + } +} + +/* + * We have to allocate descriptors with the channel lock dropped. This means, + * before we re-acquire the lock buffers can be taken already, so we have to + * re-check after re-acquiring the lock and possibly retry, if buffers are gone + * again. + */ +static struct nbpf_desc *nbpf_desc_get(struct nbpf_channel *chan, size_t len) +{ + struct nbpf_desc *desc = NULL; + struct nbpf_link_desc *ldesc, *prev = NULL; + + nbpf_scan_acked(chan); + + spin_lock_irq(&chan->lock); + + do { + int i = 0, ret; + + if (list_empty(&chan->free)) { + /* No more free descriptors */ + spin_unlock_irq(&chan->lock); + ret = nbpf_desc_page_alloc(chan); + if (ret < 0) + return NULL; + spin_lock_irq(&chan->lock); + continue; + } + desc = list_first_entry(&chan->free, struct nbpf_desc, node); + list_del(&desc->node); + + do { + if (list_empty(&chan->free_links)) { + /* No more free link descriptors */ + spin_unlock_irq(&chan->lock); + ret = nbpf_desc_page_alloc(chan); + if (ret < 0) { + nbpf_desc_put(desc); + return NULL; + } + spin_lock_irq(&chan->lock); + continue; + } + + ldesc = list_first_entry(&chan->free_links, + struct nbpf_link_desc, node); + ldesc->desc = desc; + if (prev) + prev->hwdesc->next = (u32)ldesc->hwdesc_dma_addr; + + prev = ldesc; + list_move_tail(&ldesc->node, &desc->sg); + + i++; + } while (i < len); + } while (!desc); + + prev->hwdesc->next = 0; + + spin_unlock_irq(&chan->lock); + + return desc; +} + +static void nbpf_chan_idle(struct nbpf_channel *chan) +{ + struct nbpf_desc *desc, *tmp; + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&chan->lock, flags); + + list_splice_init(&chan->done, &head); + list_splice_init(&chan->active, &head); + list_splice_init(&chan->queued, &head); + + chan->running = NULL; + + spin_unlock_irqrestore(&chan->lock, flags); + + list_for_each_entry_safe(desc, tmp, &head, node) { + dev_dbg(chan->nbpf->dma_dev.dev, "%s(): force-free desc %p cookie %d\n", + __func__, desc, desc->async_tx.cookie); + list_del(&desc->node); + nbpf_desc_put(desc); + } +} + +static int nbpf_control(struct dma_chan *dchan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + struct dma_slave_config *config; + + dev_dbg(dchan->device->dev, "Entry %s(%d)\n", __func__, cmd); + + switch (cmd) { + case DMA_TERMINATE_ALL: + dev_dbg(dchan->device->dev, "Terminating\n"); + nbpf_chan_halt(chan); + nbpf_chan_idle(chan); + break; + + case DMA_SLAVE_CONFIG: + if (!arg) + return -EINVAL; + config = (struct dma_slave_config *)arg; + + /* + * We could check config->slave_id to match chan->terminal here, + * but with DT they would be coming from the same source, so + * such a check would be superflous + */ + + chan->slave_dst_addr = config->dst_addr; + chan->slave_dst_width = nbpf_xfer_size(chan->nbpf, + config->dst_addr_width, 1); + chan->slave_dst_burst = nbpf_xfer_size(chan->nbpf, + config->dst_addr_width, + config->dst_maxburst); + chan->slave_src_addr = config->src_addr; + chan->slave_src_width = nbpf_xfer_size(chan->nbpf, + config->src_addr_width, 1); + chan->slave_src_burst = nbpf_xfer_size(chan->nbpf, + config->src_addr_width, + config->src_maxburst); + break; + + case DMA_PAUSE: + chan->paused = true; + nbpf_pause(chan); + break; + + default: + return -ENXIO; + } + + return 0; +} + +static struct dma_async_tx_descriptor *nbpf_prep_sg(struct nbpf_channel *chan, + struct scatterlist *src_sg, struct scatterlist *dst_sg, + size_t len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct nbpf_link_desc *ldesc; + struct scatterlist *mem_sg; + struct nbpf_desc *desc; + bool inc_src, inc_dst; + size_t data_len = 0; + int i = 0; + + switch (direction) { + case DMA_DEV_TO_MEM: + mem_sg = dst_sg; + inc_src = false; + inc_dst = true; + break; + + case DMA_MEM_TO_DEV: + mem_sg = src_sg; + inc_src = true; + inc_dst = false; + break; + + default: + case DMA_MEM_TO_MEM: + mem_sg = src_sg; + inc_src = true; + inc_dst = true; + } + + desc = nbpf_desc_get(chan, len); + if (!desc) + return NULL; + + desc->async_tx.flags = flags; + desc->async_tx.cookie = -EBUSY; + desc->user_wait = false; + + /* + * This is a private descriptor list, and we own the descriptor. No need + * to lock. + */ + list_for_each_entry(ldesc, &desc->sg, node) { + int ret = nbpf_prep_one(ldesc, direction, + sg_dma_address(src_sg), + sg_dma_address(dst_sg), + sg_dma_len(mem_sg), + i == len - 1); + if (ret < 0) { + nbpf_desc_put(desc); + return NULL; + } + data_len += sg_dma_len(mem_sg); + if (inc_src) + src_sg = sg_next(src_sg); + if (inc_dst) + dst_sg = sg_next(dst_sg); + mem_sg = direction == DMA_DEV_TO_MEM ? dst_sg : src_sg; + i++; + } + + desc->length = data_len; + + /* The user has to return the descriptor to us ASAP via .tx_submit() */ + return &desc->async_tx; +} + +static struct dma_async_tx_descriptor *nbpf_prep_memcpy( + struct dma_chan *dchan, dma_addr_t dst, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + struct scatterlist dst_sg; + struct scatterlist src_sg; + + sg_init_table(&dst_sg, 1); + sg_init_table(&src_sg, 1); + + sg_dma_address(&dst_sg) = dst; + sg_dma_address(&src_sg) = src; + + sg_dma_len(&dst_sg) = len; + sg_dma_len(&src_sg) = len; + + dev_dbg(dchan->device->dev, "%s(): %zu @ %pad -> %pad\n", + __func__, len, &src, &dst); + + return nbpf_prep_sg(chan, &src_sg, &dst_sg, 1, + DMA_MEM_TO_MEM, flags); +} + +static struct dma_async_tx_descriptor *nbpf_prep_memcpy_sg( + struct dma_chan *dchan, + struct scatterlist *dst_sg, unsigned int dst_nents, + struct scatterlist *src_sg, unsigned int src_nents, + unsigned long flags) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + + if (dst_nents != src_nents) + return NULL; + + return nbpf_prep_sg(chan, src_sg, dst_sg, src_nents, + DMA_MEM_TO_MEM, flags); +} + +static struct dma_async_tx_descriptor *nbpf_prep_slave_sg( + struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len, + enum dma_transfer_direction direction, unsigned long flags, void *context) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + struct scatterlist slave_sg; + + dev_dbg(dchan->device->dev, "Entry %s()\n", __func__); + + sg_init_table(&slave_sg, 1); + + switch (direction) { + case DMA_MEM_TO_DEV: + sg_dma_address(&slave_sg) = chan->slave_dst_addr; + return nbpf_prep_sg(chan, sgl, &slave_sg, sg_len, + direction, flags); + + case DMA_DEV_TO_MEM: + sg_dma_address(&slave_sg) = chan->slave_src_addr; + return nbpf_prep_sg(chan, &slave_sg, sgl, sg_len, + direction, flags); + + default: + return NULL; + } +} + +static int nbpf_alloc_chan_resources(struct dma_chan *dchan) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + int ret; + + INIT_LIST_HEAD(&chan->free); + INIT_LIST_HEAD(&chan->free_links); + INIT_LIST_HEAD(&chan->queued); + INIT_LIST_HEAD(&chan->active); + INIT_LIST_HEAD(&chan->done); + + ret = nbpf_desc_page_alloc(chan); + if (ret < 0) + return ret; + + dev_dbg(dchan->device->dev, "Entry %s(): terminal %u\n", __func__, + chan->terminal); + + nbpf_chan_configure(chan); + + return ret; +} + +static void nbpf_free_chan_resources(struct dma_chan *dchan) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + struct nbpf_desc_page *dpage, *tmp; + + dev_dbg(dchan->device->dev, "Entry %s()\n", __func__); + + nbpf_chan_halt(chan); + /* Clean up for if a channel is re-used for MEMCPY after slave DMA */ + nbpf_chan_prepare_default(chan); + + list_for_each_entry_safe(dpage, tmp, &chan->desc_page, node) { + struct nbpf_link_desc *ldesc; + int i; + list_del(&dpage->node); + for (i = 0, ldesc = dpage->ldesc; + i < ARRAY_SIZE(dpage->ldesc); + i++, ldesc++) + dma_unmap_single(dchan->device->dev, ldesc->hwdesc_dma_addr, + sizeof(*ldesc->hwdesc), DMA_TO_DEVICE); + free_page((unsigned long)dpage); + } +} + +static int nbpf_slave_caps(struct dma_chan *dchan, + struct dma_slave_caps *caps) +{ + caps->src_addr_widths = NBPF_DMA_BUSWIDTHS; + caps->dstn_addr_widths = NBPF_DMA_BUSWIDTHS; + caps->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + caps->cmd_pause = false; + caps->cmd_terminate = true; + + return 0; +} + +static struct dma_chan *nbpf_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct nbpf_device *nbpf = ofdma->of_dma_data; + struct dma_chan *dchan; + struct nbpf_channel *chan; + + if (dma_spec->args_count != 2) + return NULL; + + dchan = dma_get_any_slave_channel(&nbpf->dma_dev); + if (!dchan) + return NULL; + + dev_dbg(dchan->device->dev, "Entry %s(%s)\n", __func__, + dma_spec->np->name); + + chan = nbpf_to_chan(dchan); + + chan->terminal = dma_spec->args[0]; + chan->flags = dma_spec->args[1]; + + nbpf_chan_prepare(chan); + nbpf_chan_configure(chan); + + return dchan; +} + +static irqreturn_t nbpf_chan_irqt(int irq, void *dev) +{ + struct nbpf_channel *chan = dev; + struct nbpf_desc *desc, *tmp; + dma_async_tx_callback callback; + void *param; + + while (!list_empty(&chan->done)) { + bool found = false, must_put, recycling = false; + + spin_lock_irq(&chan->lock); + + list_for_each_entry_safe(desc, tmp, &chan->done, node) { + if (!desc->user_wait) { + /* Newly completed descriptor, have to process */ + found = true; + break; + } else if (async_tx_test_ack(&desc->async_tx)) { + /* + * This descriptor was waiting for a user ACK, + * it can be recycled now. + */ + list_del(&desc->node); + spin_unlock_irq(&chan->lock); + nbpf_desc_put(desc); + recycling = true; + break; + } + } + + if (recycling) + continue; + + if (!found) { + /* This can happen if TERMINATE_ALL has been called */ + spin_unlock_irq(&chan->lock); + break; + } + + dma_cookie_complete(&desc->async_tx); + + /* + * With released lock we cannot dereference desc, maybe it's + * still on the "done" list + */ + if (async_tx_test_ack(&desc->async_tx)) { + list_del(&desc->node); + must_put = true; + } else { + desc->user_wait = true; + must_put = false; + } + + callback = desc->async_tx.callback; + param = desc->async_tx.callback_param; + + /* ack and callback completed descriptor */ + spin_unlock_irq(&chan->lock); + + if (callback) + callback(param); + + if (must_put) + nbpf_desc_put(desc); + } + + return IRQ_HANDLED; +} + +static irqreturn_t nbpf_chan_irq(int irq, void *dev) +{ + struct nbpf_channel *chan = dev; + bool done = nbpf_status_get(chan); + struct nbpf_desc *desc; + irqreturn_t ret; + + if (!done) + return IRQ_NONE; + + nbpf_status_ack(chan); + + dev_dbg(&chan->dma_chan.dev->device, "%s()\n", __func__); + + spin_lock(&chan->lock); + desc = chan->running; + if (WARN_ON(!desc)) { + ret = IRQ_NONE; + goto unlock; + } else { + ret = IRQ_WAKE_THREAD; + } + + list_move_tail(&desc->node, &chan->done); + chan->running = NULL; + + if (!list_empty(&chan->active)) { + desc = list_first_entry(&chan->active, + struct nbpf_desc, node); + if (!nbpf_start(desc)) + chan->running = desc; + } + +unlock: + spin_unlock(&chan->lock); + + return ret; +} + +static irqreturn_t nbpf_err_irq(int irq, void *dev) +{ + struct nbpf_device *nbpf = dev; + u32 error = nbpf_error_get(nbpf); + + dev_warn(nbpf->dma_dev.dev, "DMA error IRQ %u\n", irq); + + if (!error) + return IRQ_NONE; + + do { + struct nbpf_channel *chan = nbpf_error_get_channel(nbpf, error); + /* On error: abort all queued transfers, no callback */ + nbpf_error_clear(chan); + nbpf_chan_idle(chan); + error = nbpf_error_get(nbpf); + } while (error); + + return IRQ_HANDLED; +} + +static int nbpf_chan_probe(struct nbpf_device *nbpf, int n) +{ + struct dma_device *dma_dev = &nbpf->dma_dev; + struct nbpf_channel *chan = nbpf->chan + n; + int ret; + + chan->nbpf = nbpf; + chan->base = nbpf->base + NBPF_REG_CHAN_OFFSET + NBPF_REG_CHAN_SIZE * n; + INIT_LIST_HEAD(&chan->desc_page); + spin_lock_init(&chan->lock); + chan->dma_chan.device = dma_dev; + dma_cookie_init(&chan->dma_chan); + nbpf_chan_prepare_default(chan); + + dev_dbg(dma_dev->dev, "%s(): channel %d: -> %p\n", __func__, n, chan->base); + + snprintf(chan->name, sizeof(chan->name), "nbpf %d", n); + + ret = devm_request_threaded_irq(dma_dev->dev, chan->irq, + nbpf_chan_irq, nbpf_chan_irqt, IRQF_SHARED, + chan->name, chan); + if (ret < 0) + return ret; + + /* Add the channel to DMA device channel list */ + list_add_tail(&chan->dma_chan.device_node, + &dma_dev->channels); + + return 0; +} + +static const struct of_device_id nbpf_match[] = { + {.compatible = "renesas,nbpfaxi64dmac1b4", .data = &nbpf_cfg[NBPF1B4]}, + {.compatible = "renesas,nbpfaxi64dmac1b8", .data = &nbpf_cfg[NBPF1B8]}, + {.compatible = "renesas,nbpfaxi64dmac1b16", .data = &nbpf_cfg[NBPF1B16]}, + {.compatible = "renesas,nbpfaxi64dmac4b4", .data = &nbpf_cfg[NBPF4B4]}, + {.compatible = "renesas,nbpfaxi64dmac4b8", .data = &nbpf_cfg[NBPF4B8]}, + {.compatible = "renesas,nbpfaxi64dmac4b16", .data = &nbpf_cfg[NBPF4B16]}, + {.compatible = "renesas,nbpfaxi64dmac8b4", .data = &nbpf_cfg[NBPF8B4]}, + {.compatible = "renesas,nbpfaxi64dmac8b8", .data = &nbpf_cfg[NBPF8B8]}, + {.compatible = "renesas,nbpfaxi64dmac8b16", .data = &nbpf_cfg[NBPF8B16]}, + {} +}; +MODULE_DEVICE_TABLE(of, nbpf_match); + +static int nbpf_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + const struct of_device_id *of_id = of_match_device(nbpf_match, dev); + struct device_node *np = dev->of_node; + struct nbpf_device *nbpf; + struct dma_device *dma_dev; + struct resource *iomem, *irq_res; + const struct nbpf_config *cfg; + int num_channels; + int ret, irq, eirq, i; + int irqbuf[9] /* maximum 8 channels + error IRQ */; + unsigned int irqs = 0; + + BUILD_BUG_ON(sizeof(struct nbpf_desc_page) > PAGE_SIZE); + + /* DT only */ + if (!np || !of_id || !of_id->data) + return -ENODEV; + + cfg = of_id->data; + num_channels = cfg->num_channels; + + nbpf = devm_kzalloc(dev, sizeof(*nbpf) + num_channels * + sizeof(nbpf->chan[0]), GFP_KERNEL); + if (!nbpf) { + dev_err(dev, "Memory allocation failed\n"); + return -ENOMEM; + } + dma_dev = &nbpf->dma_dev; + dma_dev->dev = dev; + + iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + nbpf->base = devm_ioremap_resource(dev, iomem); + if (IS_ERR(nbpf->base)) + return PTR_ERR(nbpf->base); + + nbpf->clk = devm_clk_get(dev, NULL); + if (IS_ERR(nbpf->clk)) + return PTR_ERR(nbpf->clk); + + nbpf->config = cfg; + + for (i = 0; irqs < ARRAY_SIZE(irqbuf); i++) { + irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i); + if (!irq_res) + break; + + for (irq = irq_res->start; irq <= irq_res->end; + irq++, irqs++) + irqbuf[irqs] = irq; + } + + /* + * 3 IRQ resource schemes are supported: + * 1. 1 shared IRQ for error and all channels + * 2. 2 IRQs: one for error and one shared for all channels + * 3. 1 IRQ for error and an own IRQ for each channel + */ + if (irqs != 1 && irqs != 2 && irqs != num_channels + 1) + return -ENXIO; + + if (irqs == 1) { + eirq = irqbuf[0]; + + for (i = 0; i <= num_channels; i++) + nbpf->chan[i].irq = irqbuf[0]; + } else { + eirq = platform_get_irq_byname(pdev, "error"); + if (eirq < 0) + return eirq; + + if (irqs == num_channels + 1) { + struct nbpf_channel *chan; + + for (i = 0, chan = nbpf->chan; i <= num_channels; + i++, chan++) { + /* Skip the error IRQ */ + if (irqbuf[i] == eirq) + i++; + chan->irq = irqbuf[i]; + } + + if (chan != nbpf->chan + num_channels) + return -EINVAL; + } else { + /* 2 IRQs and more than one channel */ + if (irqbuf[0] == eirq) + irq = irqbuf[1]; + else + irq = irqbuf[0]; + + for (i = 0; i <= num_channels; i++) + nbpf->chan[i].irq = irq; + } + } + + ret = devm_request_irq(dev, eirq, nbpf_err_irq, + IRQF_SHARED, "dma error", nbpf); + if (ret < 0) + return ret; + + INIT_LIST_HEAD(&dma_dev->channels); + + /* Create DMA Channel */ + for (i = 0; i < num_channels; i++) { + ret = nbpf_chan_probe(nbpf, i); + if (ret < 0) + return ret; + } + + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); + dma_cap_set(DMA_SLAVE, dma_dev->cap_mask); + dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask); + dma_cap_set(DMA_SG, dma_dev->cap_mask); + + /* Common and MEMCPY operations */ + dma_dev->device_alloc_chan_resources + = nbpf_alloc_chan_resources; + dma_dev->device_free_chan_resources = nbpf_free_chan_resources; + dma_dev->device_prep_dma_sg = nbpf_prep_memcpy_sg; + dma_dev->device_prep_dma_memcpy = nbpf_prep_memcpy; + dma_dev->device_tx_status = nbpf_tx_status; + dma_dev->device_issue_pending = nbpf_issue_pending; + dma_dev->device_slave_caps = nbpf_slave_caps; + + /* + * If we drop support for unaligned MEMCPY buffer addresses and / or + * lengths by setting + * dma_dev->copy_align = 4; + * then we can set transfer length to 4 bytes in nbpf_prep_one() for + * DMA_MEM_TO_MEM + */ + + /* Compulsory for DMA_SLAVE fields */ + dma_dev->device_prep_slave_sg = nbpf_prep_slave_sg; + dma_dev->device_control = nbpf_control; + + platform_set_drvdata(pdev, nbpf); + + ret = clk_prepare_enable(nbpf->clk); + if (ret < 0) + return ret; + + nbpf_configure(nbpf); + + ret = dma_async_device_register(dma_dev); + if (ret < 0) + goto e_clk_off; + + ret = of_dma_controller_register(np, nbpf_of_xlate, nbpf); + if (ret < 0) + goto e_dma_dev_unreg; + + return 0; + +e_dma_dev_unreg: + dma_async_device_unregister(dma_dev); +e_clk_off: + clk_disable_unprepare(nbpf->clk); + + return ret; +} + +static int nbpf_remove(struct platform_device *pdev) +{ + struct nbpf_device *nbpf = platform_get_drvdata(pdev); + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&nbpf->dma_dev); + clk_disable_unprepare(nbpf->clk); + + return 0; +} + +static struct platform_device_id nbpf_ids[] = { + {"nbpfaxi64dmac1b4", (kernel_ulong_t)&nbpf_cfg[NBPF1B4]}, + {"nbpfaxi64dmac1b8", (kernel_ulong_t)&nbpf_cfg[NBPF1B8]}, + {"nbpfaxi64dmac1b16", (kernel_ulong_t)&nbpf_cfg[NBPF1B16]}, + {"nbpfaxi64dmac4b4", (kernel_ulong_t)&nbpf_cfg[NBPF4B4]}, + {"nbpfaxi64dmac4b8", (kernel_ulong_t)&nbpf_cfg[NBPF4B8]}, + {"nbpfaxi64dmac4b16", (kernel_ulong_t)&nbpf_cfg[NBPF4B16]}, + {"nbpfaxi64dmac8b4", (kernel_ulong_t)&nbpf_cfg[NBPF8B4]}, + {"nbpfaxi64dmac8b8", (kernel_ulong_t)&nbpf_cfg[NBPF8B8]}, + {"nbpfaxi64dmac8b16", (kernel_ulong_t)&nbpf_cfg[NBPF8B16]}, + {}, +}; +MODULE_DEVICE_TABLE(platform, nbpf_ids); + +#ifdef CONFIG_PM_RUNTIME +static int nbpf_runtime_suspend(struct device *dev) +{ + struct nbpf_device *nbpf = platform_get_drvdata(to_platform_device(dev)); + clk_disable_unprepare(nbpf->clk); + return 0; +} + +static int nbpf_runtime_resume(struct device *dev) +{ + struct nbpf_device *nbpf = platform_get_drvdata(to_platform_device(dev)); + return clk_prepare_enable(nbpf->clk); +} +#endif + +static const struct dev_pm_ops nbpf_pm_ops = { + SET_RUNTIME_PM_OPS(nbpf_runtime_suspend, nbpf_runtime_resume, NULL) +}; + +static struct platform_driver nbpf_driver = { + .driver = { + .owner = THIS_MODULE, + .name = "dma-nbpf", + .of_match_table = nbpf_match, + .pm = &nbpf_pm_ops, + }, + .id_table = nbpf_ids, + .probe = nbpf_probe, + .remove = nbpf_remove, +}; + +module_platform_driver(nbpf_driver); + +MODULE_AUTHOR("Guennadi Liakhovetski "); +MODULE_DESCRIPTION("dmaengine driver for NBPFAXI64* DMACs"); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3-70-g09d2