diff options
-rw-r--r-- | drivers/infiniband/hw/ipath/Makefile | 3 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_common.h | 16 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_driver.c | 150 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_file_ops.c | 97 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_init_chip.c | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_intr.c | 239 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_kernel.h | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_qp.c | 14 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_ruc.c | 18 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_sdma.c | 91 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_stats.c | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_ud.c | 1 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_verbs.c | 391 |
13 files changed, 896 insertions, 136 deletions
diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile index fe673882686..75a6c91944c 100644 --- a/drivers/infiniband/hw/ipath/Makefile +++ b/drivers/infiniband/hw/ipath/Makefile @@ -20,17 +20,20 @@ ib_ipath-y := \ ipath_qp.o \ ipath_rc.o \ ipath_ruc.o \ + ipath_sdma.o \ ipath_srq.o \ ipath_stats.o \ ipath_sysfs.o \ ipath_uc.o \ ipath_ud.o \ ipath_user_pages.o \ + ipath_user_sdma.o \ ipath_verbs_mcast.o \ ipath_verbs.o ib_ipath-$(CONFIG_HT_IRQ) += ipath_iba6110.o ib_ipath-$(CONFIG_PCI_MSI) += ipath_iba6120.o +ib_ipath-$(CONFIG_PCI_MSI) += ipath_iba7220.o ipath_sd7220.o ipath_sd7220_img.o ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h index 02fd310d1ef..2cf7cd2cb66 100644 --- a/drivers/infiniband/hw/ipath/ipath_common.h +++ b/drivers/infiniband/hw/ipath/ipath_common.h @@ -447,8 +447,9 @@ struct ipath_user_info { #define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */ #define IPATH_CMD_POLL_TYPE 28 /* set the kind of polling we want */ #define IPATH_CMD_ARMLAUNCH_CTRL 29 /* armlaunch detection control */ - -#define IPATH_CMD_MAX 29 +/* 30 is unused */ +#define IPATH_CMD_SDMA_INFLIGHT 31 /* sdma inflight counter request */ +#define IPATH_CMD_SDMA_COMPLETE 32 /* sdma completion counter request */ /* * Poll types @@ -486,6 +487,17 @@ struct ipath_cmd { union { struct ipath_tid_info tid_info; struct ipath_user_info user_info; + + /* + * address in userspace where we should put the sdma + * inflight counter + */ + __u64 sdma_inflight; + /* + * address in userspace where we should put the sdma + * completion counter + */ + __u64 sdma_complete; /* address in userspace of struct ipath_port_info to write result to */ __u64 port_info; diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index c94bc4730b2..ed7bdc93b9e 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -129,8 +129,10 @@ static int __devinit ipath_init_one(struct pci_dev *, /* Only needed for registration, nothing else needs this info */ #define PCI_VENDOR_ID_PATHSCALE 0x1fc1 +#define PCI_VENDOR_ID_QLOGIC 0x1077 #define PCI_DEVICE_ID_INFINIPATH_HT 0xd #define PCI_DEVICE_ID_INFINIPATH_PE800 0x10 +#define PCI_DEVICE_ID_INFINIPATH_7220 0x7220 /* Number of seconds before our card status check... */ #define STATUS_TIMEOUT 60 @@ -138,6 +140,7 @@ static int __devinit ipath_init_one(struct pci_dev *, static const struct pci_device_id ipath_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) }, { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) }, + { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_INFINIPATH_7220) }, { 0, } }; @@ -532,6 +535,13 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, "CONFIG_PCI_MSI is not enabled\n", ent->device); return -ENODEV; #endif + case PCI_DEVICE_ID_INFINIPATH_7220: +#ifndef CONFIG_PCI_MSI + ipath_dbg("CONFIG_PCI_MSI is not enabled, " + "using IntX for unit %u\n", dd->ipath_unit); +#endif + ipath_init_iba7220_funcs(dd); + break; default: ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, " "failing\n", ent->device); @@ -887,13 +897,47 @@ int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs) return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT; } +static void decode_sdma_errs(struct ipath_devdata *dd, ipath_err_t err, + char *buf, size_t blen) +{ + static const struct { + ipath_err_t err; + const char *msg; + } errs[] = { + { INFINIPATH_E_SDMAGENMISMATCH, "SDmaGenMismatch" }, + { INFINIPATH_E_SDMAOUTOFBOUND, "SDmaOutOfBound" }, + { INFINIPATH_E_SDMATAILOUTOFBOUND, "SDmaTailOutOfBound" }, + { INFINIPATH_E_SDMABASE, "SDmaBase" }, + { INFINIPATH_E_SDMA1STDESC, "SDma1stDesc" }, + { INFINIPATH_E_SDMARPYTAG, "SDmaRpyTag" }, + { INFINIPATH_E_SDMADWEN, "SDmaDwEn" }, + { INFINIPATH_E_SDMAMISSINGDW, "SDmaMissingDw" }, + { INFINIPATH_E_SDMAUNEXPDATA, "SDmaUnexpData" }, + { INFINIPATH_E_SDMADESCADDRMISALIGN, "SDmaDescAddrMisalign" }, + { INFINIPATH_E_SENDBUFMISUSE, "SendBufMisuse" }, + { INFINIPATH_E_SDMADISABLED, "SDmaDisabled" }, + }; + int i; + int expected; + size_t bidx = 0; + + for (i = 0; i < ARRAY_SIZE(errs); i++) { + expected = (errs[i].err != INFINIPATH_E_SDMADISABLED) ? 0 : + test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); + if ((err & errs[i].err) && !expected) + bidx += snprintf(buf + bidx, blen - bidx, + "%s ", errs[i].msg); + } +} + /* * Decode the error status into strings, deciding whether to always * print * it or not depending on "normal packet errors" vs everything * else. Return 1 if "real" errors, otherwise 0 if only packet * errors, so caller can decide what to print with the string. */ -int ipath_decode_err(char *buf, size_t blen, ipath_err_t err) +int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen, + ipath_err_t err) { int iserr = 1; *buf = '\0'; @@ -975,6 +1019,8 @@ int ipath_decode_err(char *buf, size_t blen, ipath_err_t err) strlcat(buf, "hardware ", blen); if (err & INFINIPATH_E_RESET) strlcat(buf, "reset ", blen); + if (err & INFINIPATH_E_SDMAERRS) + decode_sdma_errs(dd, err, buf, blen); if (err & INFINIPATH_E_INVALIDEEPCMD) strlcat(buf, "invalideepromcmd ", blen); done: @@ -1730,30 +1776,80 @@ bail: */ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) { + unsigned long flags; + if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) { ipath_cdbg(VERBOSE, "Ignore while in autonegotiation\n"); goto bail; } + /* + * If we have SDMA, and it's not disabled, we have to kick off the + * abort state machine, provided we aren't already aborting. + * If we are in the process of aborting SDMA (!DISABLED, but ABORTING), + * we skip the rest of this routine. It is already "in progress" + */ + if (dd->ipath_flags & IPATH_HAS_SEND_DMA) { + int skip_cancel; + u64 *statp = &dd->ipath_sdma_status; + + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + skip_cancel = + !test_bit(IPATH_SDMA_DISABLED, statp) && + test_and_set_bit(IPATH_SDMA_ABORTING, statp); + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + if (skip_cancel) + goto bail; + } + ipath_dbg("Cancelling all in-progress send buffers\n"); /* skip armlaunch errs for a while */ dd->ipath_lastcancel = jiffies + HZ / 2; /* - * the abort bit is auto-clearing. We read scratch to be sure - * that cancels and the abort have taken effect in the chip. + * The abort bit is auto-clearing. We also don't want pioavail + * update happening during this, and we don't want any other + * sends going out, so turn those off for the duration. We read + * the scratch register to be sure that cancels and the abort + * have taken effect in the chip. Otherwise two parts are same + * as ipath_force_pio_avail_update() */ + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl &= ~(INFINIPATH_S_PIOBUFAVAILUPD + | INFINIPATH_S_PIOENABLE); ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - INFINIPATH_S_ABORT); + dd->ipath_sendctrl | INFINIPATH_S_ABORT); ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + + /* disarm all send buffers */ ipath_disarm_piobufs(dd, 0, - (unsigned)(dd->ipath_piobcnt2k + dd->ipath_piobcnt4k)); - if (restore_sendctrl) /* else done by caller later */ + dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); + + if (restore_sendctrl) { + /* else done by caller later if needed */ + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl |= INFINIPATH_S_PIOBUFAVAILUPD | + INFINIPATH_S_PIOENABLE; ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); + dd->ipath_sendctrl); + /* and again, be sure all have hit the chip */ + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + } - /* and again, be sure all have hit the chip */ - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + if ((dd->ipath_flags & IPATH_HAS_SEND_DMA) && + !test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) && + test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) { + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + /* only wait so long for intr */ + dd->ipath_sdma_abort_intr_timeout = jiffies + HZ; + dd->ipath_sdma_reset_wait = 200; + __set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status); + if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) + tasklet_hi_schedule(&dd->ipath_sdma_abort_task); + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + } bail:; } @@ -1952,7 +2048,7 @@ bail: * sanity checking on this, and we don't deal with what happens to * programs that are already running when the size changes. * NOTE: changing the MTU will usually cause the IBC to go back to - * link initialize (IPATH_IBSTATE_INIT) state... + * link INIT state... */ int ipath_set_mtu(struct ipath_devdata *dd, u16 arg) { @@ -2092,9 +2188,8 @@ static void ipath_run_led_override(unsigned long opaque) * but leave that to per-chip functions. */ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); - ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & - dd->ibcs_lts_mask; - lstate = (val >> dd->ibcs_ls_shift) & INFINIPATH_IBCS_LINKSTATE_MASK; + ltstate = ipath_ib_linktrstate(dd, val); + lstate = ipath_ib_linkstate(dd, val); dd->ipath_f_setextled(dd, lstate, ltstate); mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff); @@ -2170,6 +2265,9 @@ void ipath_shutdown_device(struct ipath_devdata *dd) ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); + if (dd->ipath_flags & IPATH_HAS_SEND_DMA) + teardown_sdma(dd); + /* * gracefully stop all sends allowing any in progress to trickle out * first. @@ -2187,9 +2285,16 @@ void ipath_shutdown_device(struct ipath_devdata *dd) */ udelay(5); + dd->ipath_f_setextled(dd, 0, 0); /* make sure LEDs are off */ + ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE); ipath_cancel_sends(dd, 0); + /* + * we are shutting down, so tell components that care. We don't do + * this on just a link state change, much like ethernet, a cable + * unplug, etc. doesn't change driver state + */ signal_ib_event(dd, IB_EVENT_PORT_ERR); /* disable IBC */ @@ -2214,6 +2319,10 @@ void ipath_shutdown_device(struct ipath_devdata *dd) del_timer_sync(&dd->ipath_intrchk_timer); dd->ipath_intrchk_timer.data = 0; } + if (atomic_read(&dd->ipath_led_override_timer_active)) { + del_timer_sync(&dd->ipath_led_override_timer); + atomic_set(&dd->ipath_led_override_timer_active, 0); + } /* * clear all interrupts and errors, so that the next time the driver @@ -2408,13 +2517,18 @@ int ipath_reset_device(int unit) } } + if (dd->ipath_flags & IPATH_HAS_SEND_DMA) + teardown_sdma(dd); + dd->ipath_flags &= ~IPATH_INITTED; + ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); ret = dd->ipath_f_reset(dd); - if (ret != 1) - ipath_dbg("reset was not successful\n"); - ipath_dbg("Trying to reinitialize unit %u after reset attempt\n", - unit); - ret = ipath_init_chip(dd, 1); + if (ret == 1) { + ipath_dbg("Reinitializing unit %u after reset attempt\n", + unit); + ret = ipath_init_chip(dd, 1); + } else + ret = -EAGAIN; if (ret) ipath_dev_err(dd, "Reinitialize unit %u after " "reset failed with %d\n", unit, ret); diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index b87d3126e4d..d38ba293323 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -36,21 +36,28 @@ #include <linux/cdev.h> #include <linux/swap.h> #include <linux/vmalloc.h> +#include <linux/highmem.h> +#include <linux/io.h> +#include <linux/jiffies.h> #include <asm/pgtable.h> #include "ipath_kernel.h" #include "ipath_common.h" +#include "ipath_user_sdma.h" static int ipath_open(struct inode *, struct file *); static int ipath_close(struct inode *, struct file *); static ssize_t ipath_write(struct file *, const char __user *, size_t, loff_t *); +static ssize_t ipath_writev(struct kiocb *, const struct iovec *, + unsigned long , loff_t); static unsigned int ipath_poll(struct file *, struct poll_table_struct *); static int ipath_mmap(struct file *, struct vm_area_struct *); static const struct file_operations ipath_file_ops = { .owner = THIS_MODULE, .write = ipath_write, + .aio_write = ipath_writev, .open = ipath_open, .release = ipath_close, .poll = ipath_poll, @@ -1870,10 +1877,9 @@ static int ipath_assign_port(struct file *fp, if (ipath_compatible_subports(swmajor, swminor) && uinfo->spu_subport_cnt && (ret = find_shared_port(fp, uinfo))) { - mutex_unlock(&ipath_mutex); if (ret > 0) ret = 0; - goto done; + goto done_chk_sdma; } i_minor = iminor(fp->f_path.dentry->d_inode) - IPATH_USER_MINOR_BASE; @@ -1885,6 +1891,21 @@ static int ipath_assign_port(struct file *fp, else ret = find_best_unit(fp, uinfo); +done_chk_sdma: + if (!ret) { + struct ipath_filedata *fd = fp->private_data; + const struct ipath_portdata *pd = fd->pd; + const struct ipath_devdata *dd = pd->port_dd; + + fd->pq = ipath_user_sdma_queue_create(&dd->pcidev->dev, + dd->ipath_unit, + pd->port_port, + fd->subport); + + if (!fd->pq) + ret = -ENOMEM; + } + mutex_unlock(&ipath_mutex); done: @@ -2042,6 +2063,13 @@ static int ipath_close(struct inode *in, struct file *fp) mutex_unlock(&ipath_mutex); goto bail; } + + dd = pd->port_dd; + + /* drain user sdma queue */ + ipath_user_sdma_queue_drain(dd, fd->pq); + ipath_user_sdma_queue_destroy(fd->pq); + if (--pd->port_cnt) { /* * XXX If the master closes the port before the slave(s), @@ -2054,7 +2082,6 @@ static int ipath_close(struct inode *in, struct file *fp) goto bail; } port = pd->port_port; - dd = pd->port_dd; if (pd->port_hdrqfull) { ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors " @@ -2176,6 +2203,35 @@ static int ipath_get_slave_info(struct ipath_portdata *pd, return ret; } +static int ipath_sdma_get_inflight(struct ipath_user_sdma_queue *pq, + u32 __user *inflightp) +{ + const u32 val = ipath_user_sdma_inflight_counter(pq); + + if (put_user(val, inflightp)) + return -EFAULT; + + return 0; +} + +static int ipath_sdma_get_complete(struct ipath_devdata *dd, + struct ipath_user_sdma_queue *pq, + u32 __user *completep) +{ + u32 val; + int err; + + err = ipath_user_sdma_make_progress(dd, pq); + if (err < 0) + return err; + + val = ipath_user_sdma_complete_counter(pq); + if (put_user(val, completep)) + return -EFAULT; + + return 0; +} + static ssize_t ipath_write(struct file *fp, const char __user *data, size_t count, loff_t *off) { @@ -2250,6 +2306,16 @@ static ssize_t ipath_write(struct file *fp, const char __user *data, dest = &cmd.cmd.armlaunch_ctrl; src = &ucmd->cmd.armlaunch_ctrl; break; + case IPATH_CMD_SDMA_INFLIGHT: + copy = sizeof(cmd.cmd.sdma_inflight); + dest = &cmd.cmd.sdma_inflight; + src = &ucmd->cmd.sdma_inflight; + break; + case IPATH_CMD_SDMA_COMPLETE: + copy = sizeof(cmd.cmd.sdma_complete); + dest = &cmd.cmd.sdma_complete; + src = &ucmd->cmd.sdma_complete; + break; default: ret = -EINVAL; goto bail; @@ -2331,6 +2397,17 @@ static ssize_t ipath_write(struct file *fp, const char __user *data, else ipath_disable_armlaunch(pd->port_dd); break; + case IPATH_CMD_SDMA_INFLIGHT: + ret = ipath_sdma_get_inflight(user_sdma_queue_fp(fp), + (u32 __user *) (unsigned long) + cmd.cmd.sdma_inflight); + break; + case IPATH_CMD_SDMA_COMPLETE: + ret = ipath_sdma_get_complete(pd->port_dd, + user_sdma_queue_fp(fp), + (u32 __user *) (unsigned long) + cmd.cmd.sdma_complete); + break; } if (ret >= 0) @@ -2340,6 +2417,20 @@ bail: return ret; } +static ssize_t ipath_writev(struct kiocb *iocb, const struct iovec *iov, + unsigned long dim, loff_t off) +{ + struct file *filp = iocb->ki_filp; + struct ipath_filedata *fp = filp->private_data; + struct ipath_portdata *pd = port_fp(filp); + struct ipath_user_sdma_queue *pq = fp->pq; + + if (!dim) + return -EINVAL; + + return ipath_user_sdma_writev(pd->port_dd, pq, iov, dim); +} + static struct class *ipath_class; static int init_cdev(int minor, char *name, const struct file_operations *fops, diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index c012e05649b..b43c2a10029 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -980,6 +980,10 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) dd->ipath_stats_timer_active = 1; } + /* Set up SendDMA if chip supports it */ + if (dd->ipath_flags & IPATH_HAS_SEND_DMA) + ret = setup_sdma(dd); + /* Set up HoL state */ init_timer(&dd->ipath_hol_timer); dd->ipath_hol_timer.function = ipath_hol_event; diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 90b972f6a84..d0088d5b9a4 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -433,6 +433,8 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT | IPATH_LINKDOWN | IPATH_LINKARMED | IPATH_NOCABLE); + if (dd->ipath_flags & IPATH_HAS_SEND_DMA) + ipath_restart_sdma(dd); signal_ib_event(dd, IB_EVENT_PORT_ACTIVE); /* LED active not handled in chip _f_updown */ dd->ipath_f_setextled(dd, lstate, ltstate); @@ -480,7 +482,7 @@ done: } static void handle_supp_msgs(struct ipath_devdata *dd, - unsigned supp_msgs, char *msg, int msgsz) + unsigned supp_msgs, char *msg, u32 msgsz) { /* * Print the message unless it's ibc status change only, which @@ -488,12 +490,19 @@ static void handle_supp_msgs(struct ipath_devdata *dd, */ if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) { int iserr; - iserr = ipath_decode_err(msg, msgsz, + ipath_err_t mask; + iserr = ipath_decode_err(dd, msg, msgsz, dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED); - if (dd->ipath_lasterror & - ~(INFINIPATH_E_RRCVEGRFULL | - INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS)) + + mask = INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | + INFINIPATH_E_PKTERRS | INFINIPATH_E_SDMADISABLED; + + /* if we're in debug, then don't mask SDMADISABLED msgs */ + if (ipath_debug & __IPATH_DBG) + mask &= ~INFINIPATH_E_SDMADISABLED; + + if (dd->ipath_lasterror & ~mask) ipath_dev_err(dd, "Suppressed %u messages for " "fast-repeating errors (%s) (%llx)\n", supp_msgs, msg, @@ -520,7 +529,7 @@ static void handle_supp_msgs(struct ipath_devdata *dd, static unsigned handle_frequent_errors(struct ipath_devdata *dd, ipath_err_t errs, char *msg, - int msgsz, int *noprint) + u32 msgsz, int *noprint) { unsigned long nc; static unsigned long nextmsg_time; @@ -550,19 +559,125 @@ static unsigned handle_frequent_errors(struct ipath_devdata *dd, return supp_msgs; } +static void handle_sdma_errors(struct ipath_devdata *dd, ipath_err_t errs) +{ + unsigned long flags; + int expected; + + if (ipath_debug & __IPATH_DBG) { + char msg[128]; + ipath_decode_err(dd, msg, sizeof msg, errs & + INFINIPATH_E_SDMAERRS); + ipath_dbg("errors %lx (%s)\n", (unsigned long)errs, msg); + } + if (ipath_debug & __IPATH_VERBDBG) { + unsigned long tl, hd, status, lengen; + tl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail); + hd = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead); + status = ipath_read_kreg64(dd + , dd->ipath_kregs->kr_senddmastatus); + lengen = ipath_read_kreg64(dd, + dd->ipath_kregs->kr_senddmalengen); + ipath_cdbg(VERBOSE, "sdma tl 0x%lx hd 0x%lx status 0x%lx " + "lengen 0x%lx\n", tl, hd, status, lengen); + } + + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); + expected = test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + if (!expected) + ipath_cancel_sends(dd, 1); +} + +static void handle_sdma_intr(struct ipath_devdata *dd, u64 istat) +{ + unsigned long flags; + int expected; + + if ((istat & INFINIPATH_I_SDMAINT) && + !test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) + ipath_sdma_intr(dd); + + if (istat & INFINIPATH_I_SDMADISABLED) { + expected = test_bit(IPATH_SDMA_ABORTING, + &dd->ipath_sdma_status); + ipath_dbg("%s SDmaDisabled intr\n", + expected ? "expected" : "unexpected"); + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + if (!expected) + ipath_cancel_sends(dd, 1); + if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) + tasklet_hi_schedule(&dd->ipath_sdma_abort_task); + } +} + +static int handle_hdrq_full(struct ipath_devdata *dd) +{ + int chkerrpkts = 0; + u32 hd, tl; + u32 i; + + ipath_stats.sps_hdrqfull++; + for (i = 0; i < dd->ipath_cfgports; i++) { + struct ipath_portdata *pd = dd->ipath_pd[i]; + + if (i == 0) { + /* + * For kernel receive queues, we just want to know + * if there are packets in the queue that we can + * process. + */ + if (pd->port_head != ipath_get_hdrqtail(pd)) + chkerrpkts |= 1 << i; + continue; + } + + /* Skip if user context is not open */ + if (!pd || !pd->port_cnt) + continue; + + /* Don't report the same point multiple times. */ + if (dd->ipath_flags & IPATH_NODMA_RTAIL) + tl = ipath_read_ureg32(dd, ur_rcvhdrtail, i); + else + tl = ipath_get_rcvhdrtail(pd); + if (tl == pd->port_lastrcvhdrqtail) + continue; + + hd = ipath_read_ureg32(dd, ur_rcvhdrhead, i); + if (hd == (tl + 1) || (!hd && tl == dd->ipath_hdrqlast)) { + pd->port_lastrcvhdrqtail = tl; + pd->port_hdrqfull++; + /* flush hdrqfull so that poll() sees it */ + wmb(); + wake_up_interruptible(&pd->port_wait); + } + } + + return chkerrpkts; +} + static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) { char msg[128]; u64 ignore_this_time = 0; - int i, iserr = 0; + u64 iserr = 0; int chkerrpkts = 0, noprint = 0; unsigned supp_msgs; int log_idx; - supp_msgs = handle_frequent_errors(dd, errs, msg, sizeof msg, &noprint); + /* + * don't report errors that are masked, either at init + * (not set in ipath_errormask), or temporarily (set in + * ipath_maskederrs) + */ + errs &= dd->ipath_errormask & ~dd->ipath_maskederrs; - /* don't report errors that are masked */ - errs &= ~dd->ipath_maskederrs; + supp_msgs = handle_frequent_errors(dd, errs, msg, (u32)sizeof msg, + &noprint); /* do these first, they are most important */ if (errs & INFINIPATH_E_HARDWARE) { @@ -577,6 +692,9 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) } } + if (errs & INFINIPATH_E_SDMAERRS) + handle_sdma_errors(dd, errs); + if (!noprint && (errs & ~dd->ipath_e_bitsextant)) ipath_dev_err(dd, "error interrupt with unknown errors " "%llx set\n", (unsigned long long) @@ -611,7 +729,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) dd->ipath_errormask &= ~dd->ipath_maskederrs; ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, dd->ipath_errormask); - s_iserr = ipath_decode_err(msg, sizeof msg, + s_iserr = ipath_decode_err(dd, msg, sizeof msg, dd->ipath_maskederrs); if (dd->ipath_maskederrs & @@ -661,26 +779,43 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) INFINIPATH_E_IBSTATUSCHANGED); } - /* likely due to cancel, so suppress */ + if (errs & INFINIPATH_E_SENDSPECIALTRIGGER) { + dd->ipath_spectriggerhit++; + ipath_dbg("%lu special trigger hits\n", + dd->ipath_spectriggerhit); + } + + /* likely due to cancel; so suppress message unless verbose */ if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) && dd->ipath_lastcancel > jiffies) { - ipath_dbg("Suppressed armlaunch/spktlen after error send cancel\n"); + /* armlaunch takes precedence; it often causes both. */ + ipath_cdbg(VERBOSE, + "Suppressed %s error (%llx) after sendbuf cancel\n", + (errs & INFINIPATH_E_SPIOARMLAUNCH) ? + "armlaunch" : "sendpktlen", (unsigned long long)errs); errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN); } if (!errs) return 0; - if (!noprint) + if (!noprint) { + ipath_err_t mask; /* - * the ones we mask off are handled specially below or above + * The ones we mask off are handled specially below + * or above. Also mask SDMADISABLED by default as it + * is too chatty. */ - ipath_decode_err(msg, sizeof msg, - errs & ~(INFINIPATH_E_IBSTATUSCHANGED | - INFINIPATH_E_RRCVEGRFULL | - INFINIPATH_E_RRCVHDRFULL | - INFINIPATH_E_HARDWARE)); - else + mask = INFINIPATH_E_IBSTATUSCHANGED | + INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | + INFINIPATH_E_HARDWARE | INFINIPATH_E_SDMADISABLED; + + /* if we're in debug, then don't mask SDMADISABLED msgs */ + if (ipath_debug & __IPATH_DBG) + mask &= ~INFINIPATH_E_SDMADISABLED; + + ipath_decode_err(dd, msg, sizeof msg, errs & ~mask); + } else /* so we don't need if (!noprint) at strlcat's below */ *msg = 0; @@ -705,39 +840,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) * fast_stats, no more than every 5 seconds, user ports get printed * on close */ - if (errs & INFINIPATH_E_RRCVHDRFULL) { - u32 hd, tl; - ipath_stats.sps_hdrqfull++; - for (i = 0; i < dd->ipath_cfgports; i++) { - struct ipath_portdata *pd = dd->ipath_pd[i]; - if (i == 0) { - hd = pd->port_head; - tl = ipath_get_hdrqtail(pd); - } else if (pd && pd->port_cnt && - pd->port_rcvhdrtail_kvaddr) { - /* - * don't report same point multiple times, - * except kernel - */ - tl = *(u64 *) pd->port_rcvhdrtail_kvaddr; - if (tl == pd->port_lastrcvhdrqtail) - continue; - hd = ipath_read_ureg32(dd, ur_rcvhdrhead, - i); - } else - continue; - if (hd == (tl + 1) || - (!hd && tl == dd->ipath_hdrqlast)) { - if (i == 0) - chkerrpkts = 1; - pd->port_lastrcvhdrqtail = tl; - pd->port_hdrqfull++; - /* flush hdrqfull so that poll() sees it */ - wmb(); - wake_up_interruptible(&pd->port_wait); - } - } - } + if (errs & INFINIPATH_E_RRCVHDRFULL) + chkerrpkts |= handle_hdrq_full(dd); if (errs & INFINIPATH_E_RRCVEGRFULL) { struct ipath_portdata *pd = dd->ipath_pd[0]; @@ -749,7 +853,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) */ ipath_stats.sps_etidfull++; if (pd->port_head != ipath_get_hdrqtail(pd)) - chkerrpkts = 1; + chkerrpkts |= 1; } /* @@ -788,9 +892,6 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) if (!noprint && *msg) { if (iserr) ipath_dev_err(dd, "%s error\n", msg); - else - dev_info(&dd->pcidev->dev, "%s packet problems\n", - msg); } if (dd->ipath_state_wanted & dd->ipath_flags) { ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, " @@ -1017,7 +1118,7 @@ static void handle_urcv(struct ipath_devdata *dd, u64 istat) irqreturn_t ipath_intr(int irq, void *data) { struct ipath_devdata *dd = data; - u32 istat, chk0rcv = 0; + u64 istat, chk0rcv = 0; ipath_err_t estat = 0; irqreturn_t ret; static unsigned unexpected = 0; @@ -1070,17 +1171,17 @@ irqreturn_t ipath_intr(int irq, void *data) if (unlikely(istat & ~dd->ipath_i_bitsextant)) ipath_dev_err(dd, - "interrupt with unknown interrupts %x set\n", - istat & (u32) ~ dd->ipath_i_bitsextant); - else - ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat); + "interrupt with unknown interrupts %Lx set\n", + istat & ~dd->ipath_i_bitsextant); + else if (istat & ~INFINIPATH_I_ERROR) /* errors do own printing */ + ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n", istat); - if (unlikely(istat & INFINIPATH_I_ERROR)) { + if (istat & INFINIPATH_I_ERROR) { ipath_stats.sps_errints++; estat = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errorstatus); if (!estat) - dev_info(&dd->pcidev->dev, "error interrupt (%x), " + dev_info(&dd->pcidev->dev, "error interrupt (%Lx), " "but no error bits set!\n", istat); else if (estat == -1LL) /* @@ -1198,6 +1299,9 @@ irqreturn_t ipath_intr(int irq, void *data) (dd->ipath_i_rcvurg_mask << dd->ipath_i_rcvurg_shift))) handle_urcv(dd, istat); + if (istat & (INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED)) + handle_sdma_intr(dd, istat); + if (istat & INFINIPATH_I_SPIOBUFAVAIL) { unsigned long flags; @@ -1208,7 +1312,10 @@ irqreturn_t ipath_intr(int irq, void *data) ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - handle_layer_pioavail(dd); + if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) + handle_layer_pioavail(dd); + else + ipath_dbg("unexpected BUFAVAIL intr\n"); } ret = IRQ_HANDLED; diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 550d46c1aef..a2f036c9c28 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -871,7 +871,8 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t); extern int ipath_diag_inuse; irqreturn_t ipath_intr(int irq, void *devid); -int ipath_decode_err(char *buf, size_t blen, ipath_err_t err); +int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen, + ipath_err_t err); #if __IPATH_INFO || __IPATH_DBG extern const char *ipath_ibcstatus_str[]; #endif @@ -1026,6 +1027,7 @@ void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val); /* send dma routines */ int setup_sdma(struct ipath_devdata *); void teardown_sdma(struct ipath_devdata *); +void ipath_restart_sdma(struct ipath_devdata *); void ipath_sdma_intr(struct ipath_devdata *); int ipath_sdma_verbs_send(struct ipath_devdata *, struct ipath_sge_state *, u32, struct ipath_verbs_txreq *); diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 812b42c500e..ded970bd13e 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -340,6 +340,7 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) qp->s_flags &= IPATH_S_SIGNAL_REQ_WR; qp->s_hdrwords = 0; qp->s_wqe = NULL; + qp->s_pkt_delay = 0; qp->s_psn = 0; qp->r_psn = 0; qp->r_msn = 0; @@ -563,8 +564,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_ACCESS_FLAGS) qp->qp_access_flags = attr->qp_access_flags; - if (attr_mask & IB_QP_AV) + if (attr_mask & IB_QP_AV) { qp->remote_ah_attr = attr->ah_attr; + qp->s_dmult = ipath_ib_rate_to_mult(attr->ah_attr.static_rate); + } if (attr_mask & IB_QP_PATH_MTU) qp->path_mtu = attr->path_mtu; @@ -850,6 +853,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, goto bail_qp; } qp->ip = NULL; + qp->s_tx = NULL; ipath_reset_qp(qp, init_attr->qp_type); break; @@ -955,12 +959,20 @@ int ipath_destroy_qp(struct ib_qp *ibqp) /* Stop the sending tasklet. */ tasklet_kill(&qp->s_task); + if (qp->s_tx) { + atomic_dec(&qp->refcount); + if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) + kfree(qp->s_tx->txreq.map_addr); + } + /* Make sure the QP isn't on the timeout list. */ spin_lock_irqsave(&dev->pending_lock, flags); if (!list_empty(&qp->timerwait)) list_del_init(&qp->timerwait); if (!list_empty(&qp->piowait)) list_del_init(&qp->piowait); + if (qp->s_tx) + list_add(&qp->s_tx->txreq.list, &dev->txreq_free); spin_unlock_irqrestore(&dev->pending_lock, flags); /* diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index a59bdbd0ed8..bcaa2914e34 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -483,14 +483,16 @@ done: static void want_buffer(struct ipath_devdata *dd) { - unsigned long flags; - - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) { + unsigned long flags; + + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + } } /** diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c index 5918cafb880..1974df7a9f7 100644 --- a/drivers/infiniband/hw/ipath/ipath_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_sdma.c @@ -230,7 +230,6 @@ static void dump_sdma_state(struct ipath_devdata *dd) static void sdma_abort_task(unsigned long opaque) { struct ipath_devdata *dd = (struct ipath_devdata *) opaque; - int kick = 0; u64 status; unsigned long flags; @@ -308,30 +307,26 @@ static void sdma_abort_task(unsigned long opaque) /* done with sdma state for a bit */ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - /* restart sdma engine */ + /* + * Don't restart sdma here. Wait until link is up to ACTIVE. + * VL15 MADs used to bring the link up use PIO, and multiple + * link transitions otherwise cause the sdma engine to be + * stopped and started multiple times. + * The disable is done here, including the shadow, so the + * state is kept consistent. + * See ipath_restart_sdma() for the actual starting of sdma. + */ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE; ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - dd->ipath_sendctrl |= INFINIPATH_S_SDMAENABLE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - kick = 1; - ipath_dbg("sdma restarted from abort\n"); - - /* now clear status bits */ - spin_lock_irqsave(&dd->ipath_sdma_lock, flags); - __clear_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); - __clear_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status); - __clear_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); /* make sure I see next message */ dd->ipath_sdma_abort_jiffies = 0; - goto unlock; + goto done; } resched: @@ -353,10 +348,8 @@ resched_noprint: unlock: spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); - - /* kick upper layers */ - if (kick) - ipath_ib_piobufavail(dd->verbs_dev); +done: + return; } /* @@ -481,10 +474,14 @@ int setup_sdma(struct ipath_devdata *dd) tasklet_init(&dd->ipath_sdma_abort_task, sdma_abort_task, (unsigned long) dd); - /* Turn on SDMA */ + /* + * No use to turn on SDMA here, as link is probably not ACTIVE + * Just mark it RUNNING and enable the interrupt, and let the + * ipath_restart_sdma() on link transition to ACTIVE actually + * enable it. + */ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl |= INFINIPATH_S_SDMAENABLE | - INFINIPATH_S_SDMAINTENABLE; + dd->ipath_sendctrl |= INFINIPATH_S_SDMAINTENABLE; ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); __set_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status); @@ -572,6 +569,56 @@ void teardown_sdma(struct ipath_devdata *dd) sdma_descq, sdma_descq_phys); } +/* + * [Re]start SDMA, if we use it, and it's not already OK. + * This is called on transition to link ACTIVE, either the first or + * subsequent times. + */ +void ipath_restart_sdma(struct ipath_devdata *dd) +{ + unsigned long flags; + int needed = 1; + + if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) + goto bail; + + /* + * First, make sure we should, which is to say, + * check that we are "RUNNING" (not in teardown) + * and not "SHUTDOWN" + */ + spin_lock_irqsave(&dd->ipath_sdma_lock, flags); + if (!test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status) + || test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) + needed = 0; + else { + __clear_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); + __clear_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status); + __clear_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); + } + spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); + if (!needed) { + ipath_dbg("invalid attempt to restart SDMA, status 0x%016llx\n", + dd->ipath_sdma_status); + goto bail; + } + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + /* + * First clear, just to be safe. Enable is only done + * in chip on 0->1 transition + */ + dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + dd->ipath_sendctrl |= INFINIPATH_S_SDMAENABLE; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + +bail: + return; +} + static inline void make_sdma_desc(struct ipath_devdata *dd, u64 *sdmadesc, u64 addr, u64 dwlen, u64 dwoffset) { diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c index adff2f10dc0..1e36bac8149 100644 --- a/drivers/infiniband/hw/ipath/ipath_stats.c +++ b/drivers/infiniband/hw/ipath/ipath_stats.c @@ -292,8 +292,8 @@ void ipath_get_faststats(unsigned long opaque) && time_after(jiffies, dd->ipath_unmasktime)) { char ebuf[256]; int iserr; - iserr = ipath_decode_err(ebuf, sizeof ebuf, - dd->ipath_maskederrs); + iserr = ipath_decode_err(dd, ebuf, sizeof ebuf, + dd->ipath_maskederrs); if (dd->ipath_maskederrs & ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS)) diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index de67eed08ed..4d4d58d8e9d 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -303,6 +303,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) qp->s_hdrwords = 7; qp->s_cur_size = wqe->length; qp->s_cur_sge = &qp->s_sge; + qp->s_dmult = ah_attr->static_rate; qp->s_wqe = wqe; qp->s_sge.sge = wqe->sg_list[0]; qp->s_sge.sg_list = wqe->sg_list + 1; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 2e6b6f6265b..75429aa92ce 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -242,6 +242,93 @@ static void ipath_flush_wqe(struct ipath_qp *qp, struct ib_send_wr *wr) ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); } +/* + * Count the number of DMA descriptors needed to send length bytes of data. + * Don't modify the ipath_sge_state to get the count. + * Return zero if any of the segments is not aligned. + */ +static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length) +{ + struct ipath_sge *sg_list = ss->sg_list; + struct ipath_sge sge = ss->sge; + u8 num_sge = ss->num_sge; + u32 ndesc = 1; /* count the header */ + + while (length) { + u32 len = sge.length; + + if (len > length) + len = length; + if (len > sge.sge_length) + len = sge.sge_length; + BUG_ON(len == 0); + if (((long) sge.vaddr & (sizeof(u32) - 1)) || + (len != length && (len & (sizeof(u32) - 1)))) { + ndesc = 0; + break; + } + ndesc++; + sge.vaddr += len; + sge.length -= len; + sge.sge_length -= len; + if (sge.sge_length == 0) { + if (--num_sge) + sge = *sg_list++; + } else if (sge.length == 0 && sge.mr != NULL) { + if (++sge.n >= IPATH_SEGSZ) { + if (++sge.m >= sge.mr->mapsz) + break; + sge.n = 0; + } + sge.vaddr = + sge.mr->map[sge.m]->segs[sge.n].vaddr; + sge.length = + sge.mr->map[sge.m]->segs[sge.n].length; + } + length -= len; + } + return ndesc; +} + +/* + * Copy from the SGEs to the data buffer. + */ +static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss, + u32 length) +{ + struct ipath_sge *sge = &ss->sge; + + while (length) { + u32 len = sge->length; + + if (len > length) + len = length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); + memcpy(data, sge->vaddr, len); + sge->vaddr += len; + sge->length -= len; + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (--ss->num_sge) + *sge = *ss->sg_list++; + } else if (sge->length == 0 && sge->mr != NULL) { + if (++sge->n >= IPATH_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + break; + sge->n = 0; + } + sge->vaddr = + sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = + sge->mr->map[sge->m]->segs[sge->n].length; + } + data += len; + length -= len; + } +} + /** * ipath_post_one_send - post one RC, UC, or UD send work request * @qp: the QP to post on @@ -866,13 +953,231 @@ static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss, __raw_writel(last, piobuf); } -static int ipath_verbs_send_pio(struct ipath_qp *qp, u32 *hdr, u32 hdrwords, +/* + * Convert IB rate to delay multiplier. + */ +unsigned ipath_ib_rate_to_mult(enum ib_rate rate) +{ + switch (rate) { + case IB_RATE_2_5_GBPS: return 8; + case IB_RATE_5_GBPS: return 4; + case IB_RATE_10_GBPS: return 2; + case IB_RATE_20_GBPS: return 1; + default: return 0; + } +} + +/* + * Convert delay multiplier to IB rate + */ +static enum ib_rate ipath_mult_to_ib_rate(unsigned mult) +{ + switch (mult) { + case 8: return IB_RATE_2_5_GBPS; + case 4: return IB_RATE_5_GBPS; + case 2: return IB_RATE_10_GBPS; + case 1: return IB_RATE_20_GBPS; + default: return IB_RATE_PORT_CURRENT; + } +} + +static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev) +{ + struct ipath_verbs_txreq *tx = NULL; + unsigned long flags; + + spin_lock_irqsave(&dev->pending_lock, flags); + if (!list_empty(&dev->txreq_free)) { + struct list_head *l = dev->txreq_free.next; + + list_del(l); + tx = list_entry(l, struct ipath_verbs_txreq, txreq.list); + } + spin_unlock_irqrestore(&dev->pending_lock, flags); + return tx; +} + +static inline void put_txreq(struct ipath_ibdev *dev, + struct ipath_verbs_txreq *tx) +{ + unsigned long flags; + + spin_lock_irqsave(&dev->pending_lock, flags); + list_add(&tx->txreq.list, &dev->txreq_free); + spin_unlock_irqrestore(&dev->pending_lock, flags); +} + +static void sdma_complete(void *cookie, int status) +{ + struct ipath_verbs_txreq *tx = cookie; + struct ipath_qp *qp = tx->qp; + struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + + /* Generate a completion queue entry if needed */ + if (qp->ibqp.qp_type != IB_QPT_RC && tx->wqe) { + enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? + IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; + + ipath_send_complete(qp, tx->wqe, ibs); + } + + if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) + kfree(tx->txreq.map_addr); + put_txreq(dev, tx); + + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); +} + +/* + * Compute the number of clock cycles of delay before sending the next packet. + * The multipliers reflect the number of clocks for the fastest rate so + * one tick at 4xDDR is 8 ticks at 1xSDR. + * If the destination port will take longer to receive a packet than + * the outgoing link can send it, we need to delay sending the next packet + * by the difference in time it takes the receiver to receive and the sender + * to send this packet. + * Note that this delay is always correct for UC and RC but not always + * optimal for UD. For UD, the destination HCA can be different for each + * packet, in which case, we could send packets to a different destination + * while "waiting" for the delay. The overhead for doing this without + * HW support is more than just paying the cost of delaying some packets + * unnecessarily. + */ +static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult) +{ + return (rcv_mult > snd_mult) ? + (plen * (rcv_mult - snd_mult) + 1) >> 1 : 0; +} + +static int ipath_verbs_send_dma(struct ipath_qp *qp, + struct ipath_ib_header *hdr, u32 hdrwords, + struct ipath_sge_state *ss, u32 len, + u32 plen, u32 dwords) +{ + struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + struct ipath_devdata *dd = dev->dd; + struct ipath_verbs_txreq *tx; + u32 *piobuf; + u32 control; + u32 ndesc; + int ret; + + tx = qp->s_tx; + if (tx) { + qp->s_tx = NULL; + /* resend previously constructed packet */ + ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx); + if (ret) + qp->s_tx = tx; + goto bail; + } + + tx = get_txreq(dev); + if (!tx) { + ret = -EBUSY; + goto bail; + } + + /* + * Get the saved delay count we computed for the previous packet + * and save the delay count for this packet to be used next time + * we get here. + */ + control = qp->s_pkt_delay; + qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult); + + tx->qp = qp; + atomic_inc(&qp->refcount); + tx->wqe = qp->s_wqe; + tx->txreq.callback = sdma_complete; + tx->txreq.callback_cookie = tx; + tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST | + IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC; + if (plen + 1 >= IPATH_SMALLBUF_DWORDS) + tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF; + + /* VL15 packets bypass credit check */ + if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) { + control |= 1ULL << 31; + tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15; + } + + if (len) { + /* + * Don't try to DMA if it takes more descriptors than + * the queue holds. + */ + ndesc = ipath_count_sge(ss, len); + if (ndesc >= dd->ipath_sdma_descq_cnt) + ndesc = 0; + } else + ndesc = 1; + if (ndesc) { + tx->hdr.pbc[0] = cpu_to_le32(plen); + tx->hdr.pbc[1] = cpu_to_le32(control); + memcpy(&tx->hdr.hdr, hdr, hdrwords << 2); + tx->txreq.sg_count = ndesc; + tx->map_len = (hdrwords + 2) << 2; + tx->txreq.map_addr = &tx->hdr; + ret = ipath_sdma_verbs_send(dd, ss, dwords, tx); + if (ret) { + /* save ss and length in dwords */ + tx->ss = ss; + tx->len = dwords; + qp->s_tx = tx; + } + goto bail; + } + + /* Allocate a buffer and copy the header and payload to it. */ + tx->map_len = (plen + 1) << 2; + piobuf = kmalloc(tx->map_len, GFP_ATOMIC); + if (unlikely(piobuf == NULL)) { + ret = -EBUSY; + goto err_tx; + } + tx->txreq.map_addr = piobuf; + tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF; + tx->txreq.sg_count = 1; + + *piobuf++ = (__force u32) cpu_to_le32(plen); + *piobuf++ = (__force u32) cpu_to_le32(control); + memcpy(piobuf, hdr, hdrwords << 2); + ipath_copy_from_sge(piobuf + hdrwords, ss, len); + + ret = ipath_sdma_verbs_send(dd, NULL, 0, tx); + /* + * If we couldn't queue the DMA request, save the info + * and try again later rather than destroying the + * buffer and undoing the side effects of the copy. + */ + if (ret) { + tx->ss = NULL; + tx->len = 0; + qp->s_tx = tx; + } + dev->n_unaligned++; + goto bail; + +err_tx: + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + put_txreq(dev, tx); +bail: + return ret; +} + +static int ipath_verbs_send_pio(struct ipath_qp *qp, + struct ipath_ib_header *ibhdr, u32 hdrwords, struct ipath_sge_state *ss, u32 len, u32 plen, u32 dwords) { struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd; + u32 *hdr = (u32 *) ibhdr; u32 __iomem *piobuf; unsigned flush_wc; + u32 control; int ret; piobuf = ipath_getpiobuf(dd, plen, NULL); @@ -882,11 +1187,23 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp, u32 *hdr, u32 hdrwords, } /* - * Write len to control qword, no flags. + * Get the saved delay count we computed for the previous packet + * and save the delay count for this packet to be used next time + * we get here. + */ + control = qp->s_pkt_delay; + qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult); + + /* VL15 packets bypass credit check */ + if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15) + control |= 1ULL << 31; + + /* + * Write the length to the control qword plus any needed flags. * We have to flush after the PBC for correctness on some cpus * or WC buffer can be written out of order. */ - writeq(plen, piobuf); + writeq(((u64) control << 32) | plen, piobuf); piobuf += 2; flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC; @@ -961,15 +1278,25 @@ int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr, */ plen = hdrwords + dwords + 1; - /* Drop non-VL15 packets if we are not in the active state */ - if (!(dd->ipath_flags & IPATH_LINKACTIVE) && - qp->ibqp.qp_type != IB_QPT_SMI) { + /* + * VL15 packets (IB_QPT_SMI) will always use PIO, so we + * can defer SDMA restart until link goes ACTIVE without + * worrying about just how we got there. + */ + if (qp->ibqp.qp_type == IB_QPT_SMI) + ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, + plen, dwords); + /* All non-VL15 packets are dropped if link is not ACTIVE */ + else if (!(dd->ipath_flags & IPATH_LINKACTIVE)) { if (qp->s_wqe) ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); ret = 0; - } else - ret = ipath_verbs_send_pio(qp, (u32 *) hdr, hdrwords, - ss, len, plen, dwords); + } else if (dd->ipath_flags & IPATH_HAS_SEND_DMA) + ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len, + plen, dwords); + else + ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, + plen, dwords); return ret; } @@ -1038,6 +1365,12 @@ int ipath_get_counters(struct ipath_devdata *dd, ipath_snap_cntr(dd, crp->cr_errlpcrccnt) + ipath_snap_cntr(dd, crp->cr_badformatcnt) + dd->ipath_rxfc_unsupvl_errs; + if (crp->cr_rxotherlocalphyerrcnt) + cntrs->port_rcv_errors += + ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt); + if (crp->cr_rxvlerrcnt) + cntrs->port_rcv_errors += + ipath_snap_cntr(dd, crp->cr_rxvlerrcnt); cntrs->port_rcv_remphys_errors = ipath_snap_cntr(dd, crp->cr_rcvebpcnt); cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt); @@ -1046,9 +1379,16 @@ int ipath_get_counters(struct ipath_devdata *dd, cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt); cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt); cntrs->local_link_integrity_errors = - (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ? - dd->ipath_lli_errs : dd->ipath_lli_errors; - cntrs->excessive_buffer_overrun_errors = dd->ipath_overrun_thresh_errs; + crp->cr_locallinkintegrityerrcnt ? + ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) : + ((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ? + dd->ipath_lli_errs : dd->ipath_lli_errors); + cntrs->excessive_buffer_overrun_errors = + crp->cr_excessbufferovflcnt ? + ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) : + dd->ipath_overrun_thresh_errs; + cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ? + ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0; ret = 0; @@ -1396,6 +1736,7 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd, /* ib_create_ah() will initialize ah->ibah. */ ah->attr = *ah_attr; + ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate); ret = &ah->ibah; @@ -1429,6 +1770,7 @@ static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) struct ipath_ah *ah = to_iah(ibah); *ah_attr = ah->attr; + ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate); return 0; } @@ -1578,6 +1920,8 @@ int ipath_register_ib_device(struct ipath_devdata *dd) struct ipath_verbs_counters cntrs; struct ipath_ibdev *idev; struct ib_device *dev; + struct ipath_verbs_txreq *tx; + unsigned i; int ret; idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev); @@ -1588,6 +1932,17 @@ int ipath_register_ib_device(struct ipath_devdata *dd) dev = &idev->ibdev; + if (dd->ipath_sdma_descq_cnt) { + tx = kmalloc(dd->ipath_sdma_descq_cnt * sizeof *tx, + GFP_KERNEL); + if (tx == NULL) { + ret = -ENOMEM; + goto err_tx; + } + } else + tx = NULL; + idev->txreq_bufs = tx; + /* Only need to initialize non-zero fields. */ spin_lock_init(&idev->n_pds_lock); spin_lock_init(&idev->n_ahs_lock); @@ -1628,6 +1983,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd) INIT_LIST_HEAD(&idev->pending[2]); INIT_LIST_HEAD(&idev->piowait); INIT_LIST_HEAD(&idev->rnrwait); + INIT_LIST_HEAD(&idev->txreq_free); idev->pending_index = 0; idev->port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP; @@ -1659,6 +2015,9 @@ int ipath_register_ib_device(struct ipath_devdata *dd) cntrs.excessive_buffer_overrun_errors; idev->z_vl15_dropped = cntrs.vl15_dropped; + for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++) + list_add(&tx->txreq.list, &idev->txreq_free); + /* * The system image GUID is supposed to be the same for all * IB HCAs in a single system but since there can be other @@ -1708,6 +2067,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd) dev->phys_port_cnt = 1; dev->num_comp_vectors = 1; dev->dma_device = &dd->pcidev->dev; + dev->class_dev.dev = dev->dma_device; dev->query_device = ipath_query_device; dev->modify_device = ipath_modify_device; dev->query_port = ipath_query_port; @@ -1772,6 +2132,8 @@ err_reg: err_lk: kfree(idev->qp_table.table); err_qp: + kfree(idev->txreq_bufs); +err_tx: ib_dealloc_device(dev); ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret); idev = NULL; @@ -1806,6 +2168,7 @@ void ipath_unregister_ib_device(struct ipath_ibdev *dev) ipath_free_all_qps(&dev->qp_table); kfree(dev->qp_table.table); kfree(dev->lk_table.table); + kfree(dev->txreq_bufs); ib_dealloc_device(ibdev); } @@ -1853,13 +2216,15 @@ static ssize_t show_stats(struct class_device *cdev, char *buf) "RC stalls %d\n" "piobuf wait %d\n" "no piobuf %d\n" + "unaligned %d\n" "PKT drops %d\n" "WQE errs %d\n", dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks, dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks, dev->n_other_naks, dev->n_timeouts, dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait, - dev->n_no_piobuf, dev->n_pkt_drops, dev->n_wqe_errs); + dev->n_no_piobuf, dev->n_unaligned, + dev->n_pkt_drops, dev->n_wqe_errs); for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) { const struct ipath_opcode_stats *si = &dev->opstats[i]; |