summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/ipath
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/ipath')
-rw-r--r--drivers/infiniband/hw/ipath/ipath_common.h23
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c38
-rw-r--r--drivers/infiniband/hw/ipath/ipath_debug.h1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c11
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c123
-rw-r--r--drivers/infiniband/hw/ipath/ipath_eeprom.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c287
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c152
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c73
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c86
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c100
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h10
-rw-r--r--drivers/infiniband/hw/ipath/ipath_keys.c14
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c12
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c133
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c920
-rw-r--r--drivers/infiniband/hw/ipath/ipath_registers.h22
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c63
-rw-r--r--drivers/infiniband/hw/ipath/ipath_stats.c16
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c15
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h57
23 files changed, 1349 insertions, 825 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index 54139d39818..10c008f22ba 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -78,6 +78,8 @@
#define IPATH_IB_LINKINIT 3
#define IPATH_IB_LINKDOWN_SLEEP 4
#define IPATH_IB_LINKDOWN_DISABLE 5
+#define IPATH_IB_LINK_LOOPBACK 6 /* enable local loopback */
+#define IPATH_IB_LINK_EXTERNAL 7 /* normal, disable local loopback */
/*
* stats maintained by the driver. For now, at least, this is global
@@ -316,11 +318,17 @@ struct ipath_base_info {
/* address of readonly memory copy of the rcvhdrq tail register. */
__u64 spi_rcvhdr_tailaddr;
- /* shared memory pages for subports if IPATH_RUNTIME_MASTER is set */
+ /* shared memory pages for subports if port is shared */
__u64 spi_subport_uregbase;
__u64 spi_subport_rcvegrbuf;
__u64 spi_subport_rcvhdr_base;
+ /* shared memory page for hardware port if it is shared */
+ __u64 spi_port_uregbase;
+ __u64 spi_port_rcvegrbuf;
+ __u64 spi_port_rcvhdr_base;
+ __u64 spi_port_rcvhdr_tailaddr;
+
} __attribute__ ((aligned(8)));
@@ -344,7 +352,7 @@ struct ipath_base_info {
* may not be implemented; the user code must deal with this if it
* cares, or it must abort after initialization reports the difference.
*/
-#define IPATH_USER_SWMINOR 3
+#define IPATH_USER_SWMINOR 5
#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
@@ -418,11 +426,14 @@ struct ipath_user_info {
#define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */
#define IPATH_CMD_TID_FREE 20 /* free expected TID entries */
#define IPATH_CMD_SET_PART_KEY 21 /* add partition key */
-#define IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes */
+#define __IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes (for old user code) */
#define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */
#define IPATH_CMD_USER_INIT 24 /* set up userspace */
+#define IPATH_CMD_UNUSED_1 25
+#define IPATH_CMD_UNUSED_2 26
+#define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */
-#define IPATH_CMD_MAX 24
+#define IPATH_CMD_MAX 27
struct ipath_port_info {
__u32 num_active; /* number of active units */
@@ -430,7 +441,7 @@ struct ipath_port_info {
__u16 port; /* port on unit assigned to caller */
__u16 subport; /* subport on unit assigned to caller */
__u16 num_ports; /* number of ports available on unit */
- __u16 num_subports; /* number of subport slaves opened on port */
+ __u16 num_subports; /* number of subports opened on port */
};
struct ipath_tid_info {
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index 87462e0cb4d..ea78e6dddc9 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -76,7 +76,20 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
}
return;
}
- wc->queue[head] = *entry;
+ wc->queue[head].wr_id = entry->wr_id;
+ wc->queue[head].status = entry->status;
+ wc->queue[head].opcode = entry->opcode;
+ wc->queue[head].vendor_err = entry->vendor_err;
+ wc->queue[head].byte_len = entry->byte_len;
+ wc->queue[head].imm_data = (__u32 __force)entry->imm_data;
+ wc->queue[head].qp_num = entry->qp->qp_num;
+ wc->queue[head].src_qp = entry->src_qp;
+ wc->queue[head].wc_flags = entry->wc_flags;
+ wc->queue[head].pkey_index = entry->pkey_index;
+ wc->queue[head].slid = entry->slid;
+ wc->queue[head].sl = entry->sl;
+ wc->queue[head].dlid_path_bits = entry->dlid_path_bits;
+ wc->queue[head].port_num = entry->port_num;
wc->head = next;
if (cq->notify == IB_CQ_NEXT_COMP ||
@@ -122,9 +135,30 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
if (tail > (u32) cq->ibcq.cqe)
tail = (u32) cq->ibcq.cqe;
for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
+ struct ipath_qp *qp;
+
if (tail == wc->head)
break;
- *entry = wc->queue[tail];
+
+ qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table,
+ wc->queue[tail].qp_num);
+ entry->qp = &qp->ibqp;
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+
+ entry->wr_id = wc->queue[tail].wr_id;
+ entry->status = wc->queue[tail].status;
+ entry->opcode = wc->queue[tail].opcode;
+ entry->vendor_err = wc->queue[tail].vendor_err;
+ entry->byte_len = wc->queue[tail].byte_len;
+ entry->imm_data = wc->queue[tail].imm_data;
+ entry->src_qp = wc->queue[tail].src_qp;
+ entry->wc_flags = wc->queue[tail].wc_flags;
+ entry->pkey_index = wc->queue[tail].pkey_index;
+ entry->slid = wc->queue[tail].slid;
+ entry->sl = wc->queue[tail].sl;
+ entry->dlid_path_bits = wc->queue[tail].dlid_path_bits;
+ entry->port_num = wc->queue[tail].port_num;
if (tail >= cq->ibcq.cqe)
tail = 0;
else
diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
index df69f0d80b8..42bfbdb0d3e 100644
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -57,6 +57,7 @@
#define __IPATH_PROCDBG 0x100
/* print mmap/nopage stuff, not using VDBG any more */
#define __IPATH_MMDBG 0x200
+#define __IPATH_ERRPKTDBG 0x400
#define __IPATH_USER_SEND 0x1000 /* use user mode send */
#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */
#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 0f13a2182cc..63e8368b0e9 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -296,7 +296,7 @@ static int ipath_diag_open(struct inode *in, struct file *fp)
}
fp->private_data = dd;
- ipath_diag_inuse = 1;
+ ipath_diag_inuse = -2;
diag_set_link = 0;
ret = 0;
@@ -461,6 +461,8 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data,
else if ((count % 4) || (*off % 4))
/* address or length is not 32-bit aligned, hence invalid */
ret = -EINVAL;
+ else if (ipath_diag_inuse < 1 && (*off || count != 8))
+ ret = -EINVAL; /* prevent cat /dev/ipath_diag* */
else if ((count % 8) || (*off % 8))
/* address or length not 64-bit aligned; do 32-bit reads */
ret = ipath_read_umem32(dd, data, kreg_base + *off, count);
@@ -470,6 +472,8 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data,
if (ret >= 0) {
*off += count;
ret = count;
+ if (ipath_diag_inuse == -2)
+ ipath_diag_inuse++;
}
return ret;
@@ -489,6 +493,9 @@ static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
else if ((count % 4) || (*off % 4))
/* address or length is not 32-bit aligned, hence invalid */
ret = -EINVAL;
+ else if ((ipath_diag_inuse == -1 && (*off || count != 8)) ||
+ ipath_diag_inuse == -2) /* read qw off 0, write qw off 0 */
+ ret = -EINVAL; /* before any other write allowed */
else if ((count % 8) || (*off % 8))
/* address or length not 64-bit aligned; do 32-bit writes */
ret = ipath_write_umem32(dd, kreg_base + *off, data, count);
@@ -498,6 +505,8 @@ static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
if (ret >= 0) {
*off += count;
ret = count;
+ if (ipath_diag_inuse == -1)
+ ipath_diag_inuse = 1; /* all read/write OK now */
}
return ret;
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index ae7f21a0cdc..e3a22320971 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -390,15 +390,23 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
/* setup the chip-specific functions, as early as possible. */
switch (ent->device) {
-#ifdef CONFIG_HT_IRQ
case PCI_DEVICE_ID_INFINIPATH_HT:
+#ifdef CONFIG_HT_IRQ
ipath_init_iba6110_funcs(dd);
break;
+#else
+ ipath_dev_err(dd, "QLogic HT device 0x%x cannot work if "
+ "CONFIG_HT_IRQ is not enabled\n", ent->device);
+ return -ENODEV;
#endif
-#ifdef CONFIG_PCI_MSI
case PCI_DEVICE_ID_INFINIPATH_PE800:
+#ifdef CONFIG_PCI_MSI
ipath_init_iba6120_funcs(dd);
break;
+#else
+ ipath_dev_err(dd, "QLogic PCIE device 0x%x cannot work if "
+ "CONFIG_PCI_MSI is not enabled\n", ent->device);
+ return -ENODEV;
#endif
default:
ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
@@ -486,7 +494,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
ret = ipath_init_chip(dd, 0); /* do the chip-specific init */
if (ret)
- goto bail_iounmap;
+ goto bail_irqsetup;
ret = ipath_enable_wc(dd);
@@ -505,6 +513,9 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
goto bail;
+bail_irqsetup:
+ if (pdev->irq) free_irq(pdev->irq, dd);
+
bail_iounmap:
iounmap((volatile void __iomem *) dd->ipath_kregbase);
@@ -525,8 +536,6 @@ static void __devexit cleanup_device(struct ipath_devdata *dd)
{
int port;
- ipath_shutdown_device(dd);
-
if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
/* can't do anything more with chip; needs re-init */
*dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
@@ -594,8 +603,9 @@ static void __devexit cleanup_device(struct ipath_devdata *dd)
ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
dd->ipath_pageshadow);
- vfree(dd->ipath_pageshadow);
+ tmpp = dd->ipath_pageshadow;
dd->ipath_pageshadow = NULL;
+ vfree(tmpp);
}
/*
@@ -622,6 +632,12 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev)
ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
+ /*
+ * disable the IB link early, to be sure no new packets arrive, which
+ * complicates the shutdown process
+ */
+ ipath_shutdown_device(dd);
+
if (dd->verbs_dev)
ipath_unregister_ib_device(dd->verbs_dev);
@@ -754,9 +770,42 @@ static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state,
return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
}
-void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
+/*
+ * Decode the error status into strings, deciding whether to always
+ * print * it or not depending on "normal packet errors" vs everything
+ * else. Return 1 if "real" errors, otherwise 0 if only packet
+ * errors, so caller can decide what to print with the string.
+ */
+int ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
{
+ int iserr = 1;
*buf = '\0';
+ if (err & INFINIPATH_E_PKTERRS) {
+ if (!(err & ~INFINIPATH_E_PKTERRS))
+ iserr = 0; // if only packet errors.
+ if (ipath_debug & __IPATH_ERRPKTDBG) {
+ if (err & INFINIPATH_E_REBP)
+ strlcat(buf, "EBP ", blen);
+ if (err & INFINIPATH_E_RVCRC)
+ strlcat(buf, "VCRC ", blen);
+ if (err & INFINIPATH_E_RICRC) {
+ strlcat(buf, "CRC ", blen);
+ // clear for check below, so only once
+ err &= INFINIPATH_E_RICRC;
+ }
+ if (err & INFINIPATH_E_RSHORTPKTLEN)
+ strlcat(buf, "rshortpktlen ", blen);
+ if (err & INFINIPATH_E_SDROPPEDDATAPKT)
+ strlcat(buf, "sdroppeddatapkt ", blen);
+ if (err & INFINIPATH_E_SPKTLEN)
+ strlcat(buf, "spktlen ", blen);
+ }
+ if ((err & INFINIPATH_E_RICRC) &&
+ !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
+ strlcat(buf, "CRC ", blen);
+ if (!iserr)
+ goto done;
+ }
if (err & INFINIPATH_E_RHDRLEN)
strlcat(buf, "rhdrlen ", blen);
if (err & INFINIPATH_E_RBADTID)
@@ -767,12 +816,12 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
strlcat(buf, "rhdr ", blen);
if (err & INFINIPATH_E_RLONGPKTLEN)
strlcat(buf, "rlongpktlen ", blen);
- if (err & INFINIPATH_E_RSHORTPKTLEN)
- strlcat(buf, "rshortpktlen ", blen);
if (err & INFINIPATH_E_RMAXPKTLEN)
strlcat(buf, "rmaxpktlen ", blen);
if (err & INFINIPATH_E_RMINPKTLEN)
strlcat(buf, "rminpktlen ", blen);
+ if (err & INFINIPATH_E_SMINPKTLEN)
+ strlcat(buf, "sminpktlen ", blen);
if (err & INFINIPATH_E_RFORMATERR)
strlcat(buf, "rformaterr ", blen);
if (err & INFINIPATH_E_RUNSUPVL)
@@ -781,32 +830,20 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
strlcat(buf, "runexpchar ", blen);
if (err & INFINIPATH_E_RIBFLOW)
strlcat(buf, "ribflow ", blen);
- if (err & INFINIPATH_E_REBP)
- strlcat(buf, "EBP ", blen);
if (err & INFINIPATH_E_SUNDERRUN)
strlcat(buf, "sunderrun ", blen);
if (err & INFINIPATH_E_SPIOARMLAUNCH)
strlcat(buf, "spioarmlaunch ", blen);
if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
strlcat(buf, "sunexperrpktnum ", blen);
- if (err & INFINIPATH_E_SDROPPEDDATAPKT)
- strlcat(buf, "sdroppeddatapkt ", blen);
if (err & INFINIPATH_E_SDROPPEDSMPPKT)
strlcat(buf, "sdroppedsmppkt ", blen);
if (err & INFINIPATH_E_SMAXPKTLEN)
strlcat(buf, "smaxpktlen ", blen);
- if (err & INFINIPATH_E_SMINPKTLEN)
- strlcat(buf, "sminpktlen ", blen);
if (err & INFINIPATH_E_SUNSUPVL)
strlcat(buf, "sunsupVL ", blen);
- if (err & INFINIPATH_E_SPKTLEN)
- strlcat(buf, "spktlen ", blen);
if (err & INFINIPATH_E_INVALIDADDR)
strlcat(buf, "invalidaddr ", blen);
- if (err & INFINIPATH_E_RICRC)
- strlcat(buf, "CRC ", blen);
- if (err & INFINIPATH_E_RVCRC)
- strlcat(buf, "VCRC ", blen);
if (err & INFINIPATH_E_RRCVEGRFULL)
strlcat(buf, "rcvegrfull ", blen);
if (err & INFINIPATH_E_RRCVHDRFULL)
@@ -819,6 +856,8 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
strlcat(buf, "hardware ", blen);
if (err & INFINIPATH_E_RESET)
strlcat(buf, "reset ", blen);
+done:
+ return iserr;
}
/**
@@ -1662,6 +1701,22 @@ int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
lstate = IPATH_LINKACTIVE;
break;
+ case IPATH_IB_LINK_LOOPBACK:
+ dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n");
+ dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+ dd->ipath_ibcctrl);
+ ret = 0;
+ goto bail; // no state change to wait for
+
+ case IPATH_IB_LINK_EXTERNAL:
+ dev_info(&dd->pcidev->dev, "Disabling IB local loopback (normal)\n");
+ dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+ dd->ipath_ibcctrl);
+ ret = 0;
+ goto bail; // no state change to wait for
+
default:
ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
ret = -EINVAL;
@@ -1765,29 +1820,6 @@ int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
return 0;
}
-/**
- * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register
- * @dd: the infinipath device
- * @regno: the register number to read
- * @port: the port containing the register
- *
- * Registers that vary with the chip implementation constants (port)
- * use this routine.
- */
-u64 ipath_read_kreg64_port(const struct ipath_devdata *dd, ipath_kreg regno,
- unsigned port)
-{
- u16 where;
-
- if (port < dd->ipath_portcnt &&
- (regno == dd->ipath_kregs->kr_rcvhdraddr ||
- regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
- where = regno + port;
- else
- where = -1;
-
- return ipath_read_kreg64(dd, where);
-}
/**
* ipath_write_kreg_port - write a device's per-port 64-bit kernel register
@@ -1973,7 +2005,8 @@ static int __init infinipath_init(void)
{
int ret;
- ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
+ if (ipath_debug & __IPATH_DBG)
+ printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
/*
* These must be called before the driver is registered with
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
index a4019a6b756..030185f90ee 100644
--- a/drivers/infiniband/hw/ipath/ipath_eeprom.c
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -626,6 +626,10 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
} else
memcpy(dd->ipath_serial, ifp->if_serial,
sizeof ifp->if_serial);
+ if (!strstr(ifp->if_comment, "Tested successfully"))
+ ipath_dev_err(dd, "Board SN %s did not pass functional "
+ "test: %s\n", dd->ipath_serial,
+ ifp->if_comment);
ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
(unsigned long long) be64_to_cpu(dd->ipath_guid));
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 5d64ff87529..1272aaf2a78 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -41,12 +41,6 @@
#include "ipath_kernel.h"
#include "ipath_common.h"
-/*
- * mmap64 doesn't allow all 64 bits for 32-bit applications
- * so only use the low 43 bits.
- */
-#define MMAP64_MASK 0x7FFFFFFFFFFUL
-
static int ipath_open(struct inode *, struct file *);
static int ipath_close(struct inode *, struct file *);
static ssize_t ipath_write(struct file *, const char __user *, size_t,
@@ -63,6 +57,24 @@ static const struct file_operations ipath_file_ops = {
.mmap = ipath_mmap
};
+/*
+ * Convert kernel virtual addresses to physical addresses so they don't
+ * potentially conflict with the chip addresses used as mmap offsets.
+ * It doesn't really matter what mmap offset we use as long as we can
+ * interpret it correctly.
+ */
+static u64 cvt_kvaddr(void *p)
+{
+ struct page *page;
+ u64 paddr = 0;
+
+ page = vmalloc_to_page(p);
+ if (page)
+ paddr = page_to_pfn(page) << PAGE_SHIFT;
+
+ return paddr;
+}
+
static int ipath_get_base_info(struct file *fp,
void __user *ubase, size_t ubase_size)
{
@@ -87,7 +99,7 @@ static int ipath_get_base_info(struct file *fp,
sz = sizeof(*kinfo);
/* If port sharing is not requested, allow the old size structure */
if (!shared)
- sz -= 3 * sizeof(u64);
+ sz -= 7 * sizeof(u64);
if (ubase_size < sz) {
ipath_cdbg(PROC,
"Base size %zu, need %zu (version mismatch?)\n",
@@ -165,24 +177,41 @@ static int ipath_get_base_info(struct file *fp,
kinfo->spi_piobufbase = (u64) pd->port_piobufs +
dd->ipath_palign *
(dd->ipath_pbufsport - kinfo->spi_piocnt);
- kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
- dd->ipath_palign * pd->port_port;
} else {
unsigned slave = subport_fp(fp) - 1;
kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt;
kinfo->spi_piobufbase = (u64) pd->port_piobufs +
dd->ipath_palign * kinfo->spi_piocnt * slave;
- kinfo->__spi_uregbase = ((u64) pd->subport_uregbase +
- PAGE_SIZE * slave) & MMAP64_MASK;
+ }
+ if (shared) {
+ kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
+ dd->ipath_palign * pd->port_port;
+ kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs;
+ kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base;
+ kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr;
- kinfo->spi_rcvhdr_base = ((u64) pd->subport_rcvhdr_base +
- pd->port_rcvhdrq_size * slave) & MMAP64_MASK;
- kinfo->spi_rcvhdr_tailaddr =
- (u64) pd->port_rcvhdrqtailaddr_phys & MMAP64_MASK;
- kinfo->spi_rcv_egrbufs = ((u64) pd->subport_rcvegrbuf +
- dd->ipath_rcvegrcnt * dd->ipath_rcvegrbufsize * slave) &
- MMAP64_MASK;
+ kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase +
+ PAGE_SIZE * subport_fp(fp));
+
+ kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base +
+ pd->port_rcvhdrq_size * subport_fp(fp));
+ kinfo->spi_rcvhdr_tailaddr = 0;
+ kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf +
+ pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size *
+ subport_fp(fp));
+
+ kinfo->spi_subport_uregbase =
+ cvt_kvaddr(pd->subport_uregbase);
+ kinfo->spi_subport_rcvegrbuf =
+ cvt_kvaddr(pd->subport_rcvegrbuf);
+ kinfo->spi_subport_rcvhdr_base =
+ cvt_kvaddr(pd->subport_rcvhdr_base);
+ ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
+ kinfo->spi_port, kinfo->spi_runtime_flags,
+ (unsigned long long) kinfo->spi_subport_uregbase,
+ (unsigned long long) kinfo->spi_subport_rcvegrbuf,
+ (unsigned long long) kinfo->spi_subport_rcvhdr_base);
}
kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) /
@@ -199,20 +228,10 @@ static int ipath_get_base_info(struct file *fp,
if (master) {
kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
- kinfo->spi_subport_uregbase =
- (u64) pd->subport_uregbase & MMAP64_MASK;
- kinfo->spi_subport_rcvegrbuf =
- (u64) pd->subport_rcvegrbuf & MMAP64_MASK;
- kinfo->spi_subport_rcvhdr_base =
- (u64) pd->subport_rcvhdr_base & MMAP64_MASK;
- ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
- kinfo->spi_port, kinfo->spi_runtime_flags,
- (unsigned long long) kinfo->spi_subport_uregbase,
- (unsigned long long) kinfo->spi_subport_rcvegrbuf,
- (unsigned long long) kinfo->spi_subport_rcvhdr_base);
}
- if (copy_to_user(ubase, kinfo, sizeof(*kinfo)))
+ sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo);
+ if (copy_to_user(ubase, kinfo, sz))
ret = -EFAULT;
bail:
@@ -1132,67 +1151,55 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
struct ipath_devdata *dd;
void *addr;
size_t size;
- int ret;
+ int ret = 0;
/* If the port is not shared, all addresses should be physical */
- if (!pd->port_subport_cnt) {
- ret = -EINVAL;
+ if (!pd->port_subport_cnt)
goto bail;
- }
dd = pd->port_dd;
size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
/*
- * Master has all the slave uregbase, rcvhdrq, and
- * rcvegrbufs mmapped.
+ * Each process has all the subport uregbase, rcvhdrq, and
+ * rcvegrbufs mmapped - as an array for all the processes,
+ * and also separately for this process.
*/
- if (subport == 0) {
- unsigned num_slaves = pd->port_subport_cnt - 1;
-
- if (pgaddr == ((u64) pd->subport_uregbase & MMAP64_MASK)) {
- addr = pd->subport_uregbase;
- size = PAGE_SIZE * num_slaves;
- } else if (pgaddr == ((u64) pd->subport_rcvhdr_base &
- MMAP64_MASK)) {
- addr = pd->subport_rcvhdr_base;
- size = pd->port_rcvhdrq_size * num_slaves;
- } else if (pgaddr == ((u64) pd->subport_rcvegrbuf &
- MMAP64_MASK)) {
- addr = pd->subport_rcvegrbuf;
- size *= num_slaves;
- } else {
- ret = -EINVAL;
- goto bail;
- }
- } else if (pgaddr == (((u64) pd->subport_uregbase +
- PAGE_SIZE * (subport - 1)) & MMAP64_MASK)) {
- addr = pd->subport_uregbase + PAGE_SIZE * (subport - 1);
- size = PAGE_SIZE;
- } else if (pgaddr == (((u64) pd->subport_rcvhdr_base +
- pd->port_rcvhdrq_size * (subport - 1)) &
- MMAP64_MASK)) {
- addr = pd->subport_rcvhdr_base +
- pd->port_rcvhdrq_size * (subport - 1);
- size = pd->port_rcvhdrq_size;
- } else if (pgaddr == (((u64) pd->subport_rcvegrbuf +
- size * (subport - 1)) & MMAP64_MASK)) {
- addr = pd->subport_rcvegrbuf + size * (subport - 1);
- /* rcvegrbufs are read-only on the slave */
- if (vma->vm_flags & VM_WRITE) {
- dev_info(&dd->pcidev->dev,
- "Can't map eager buffers as "
- "writable (flags=%lx)\n", vma->vm_flags);
- ret = -EPERM;
- goto bail;
- }
- /*
- * Don't allow permission to later change to writeable
- * with mprotect.
- */
- vma->vm_flags &= ~VM_MAYWRITE;
+ if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) {
+ addr = pd->subport_uregbase;
+ size = PAGE_SIZE * pd->port_subport_cnt;
+ } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) {
+ addr = pd->subport_rcvhdr_base;
+ size = pd->port_rcvhdrq_size * pd->port_subport_cnt;
+ } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) {
+ addr = pd->subport_rcvegrbuf;
+ size *= pd->port_subport_cnt;
+ } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase +
+ PAGE_SIZE * subport)) {
+ addr = pd->subport_uregbase + PAGE_SIZE * subport;
+ size = PAGE_SIZE;
+ } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base +
+ pd->port_rcvhdrq_size * subport)) {
+ addr = pd->subport_rcvhdr_base +
+ pd->port_rcvhdrq_size * subport;
+ size = pd->port_rcvhdrq_size;
+ } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf +
+ size * subport)) {
+ addr = pd->subport_rcvegrbuf + size * subport;
+ /* rcvegrbufs are read-only on the slave */
+ if (vma->vm_flags & VM_WRITE) {
+ dev_info(&dd->pcidev->dev,
+ "Can't map eager buffers as "
+ "writable (flags=%lx)\n", vma->vm_flags);
+ ret = -EPERM;
+ goto bail;
+ }
+ /*
+ * Don't allow permission to later change to writeable
+ * with mprotect.
+ */
+ vma->vm_flags &= ~VM_MAYWRITE;
} else {
- ret = -EINVAL;
goto bail;
}
len = vma->vm_end - vma->vm_start;
@@ -1205,7 +1212,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
vma->vm_ops = &ipath_file_vm_ops;
vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
- ret = 0;
+ ret = 1;
bail:
return ret;
@@ -1265,19 +1272,20 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
* Check for kernel virtual addresses first, anything else must
* match a HW or memory address.
*/
- if (pgaddr >= (1ULL<<40)) {
- ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
+ ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
+ if (ret) {
+ if (ret > 0)
+ ret = 0;
goto bail;
}
+ ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
if (!pd->port_subport_cnt) {
/* port is not shared */
- ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
piocnt = dd->ipath_pbufsport;
piobufs = pd->port_piobufs;
} else if (!subport_fp(fp)) {
/* caller is the master */
- ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) +
(dd->ipath_pbufsport % pd->port_subport_cnt);
piobufs = pd->port_piobufs +
@@ -1286,7 +1294,6 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
unsigned slave = subport_fp(fp) - 1;
/* caller is a slave */
- ureg = 0;
piocnt = dd->ipath_pbufsport / pd->port_subport_cnt;
piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
}
@@ -1300,9 +1307,6 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
(void *) dd->ipath_pioavailregs_dma,
"pioavail registers");
- else if (subport_fp(fp))
- /* Subports don't mmap the physical receive buffers */
- ret = -EINVAL;
else if (pgaddr == pd->port_rcvegr_phys)
ret = mmap_rcvegrbufs(vma, pd);
else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
@@ -1400,32 +1404,41 @@ static int init_subports(struct ipath_devdata *dd,
const struct ipath_user_info *uinfo)
{
int ret = 0;
- unsigned num_slaves;
+ unsigned num_subports;
size_t size;
- /* Old user binaries don't know about subports */
- if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR)
- goto bail;
/*
* If the user is requesting zero or one port,
* skip the subport allocation.
*/
if (uinfo->spu_subport_cnt <= 1)
goto bail;
- if (uinfo->spu_subport_cnt > 4) {
+
+ /* Old user binaries don't know about new subport implementation */
+ if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) {
+ dev_info(&dd->pcidev->dev,
+ "Mismatched user minor version (%d) and driver "
+ "minor version (%d) while port sharing. Ensure "
+ "that driver and library are from the same "
+ "release.\n",
+ (int) (uinfo->spu_userversion & 0xffff),
+ IPATH_USER_SWMINOR);
+ goto bail;
+ }
+ if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) {
ret = -EINVAL;
goto bail;
}
- num_slaves = uinfo->spu_subport_cnt - 1;
- pd->subport_uregbase = vmalloc(PAGE_SIZE * num_slaves);
+ num_subports = uinfo->spu_subport_cnt;
+ pd->subport_uregbase = vmalloc(PAGE_SIZE * num_subports);
if (!pd->subport_uregbase) {
ret = -ENOMEM;
goto bail;
}
/* Note: pd->port_rcvhdrq_size isn't initialized yet. */
size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
- sizeof(u32), PAGE_SIZE) * num_slaves;
+ sizeof(u32), PAGE_SIZE) * num_subports;
pd->subport_rcvhdr_base = vmalloc(size);
if (!pd->subport_rcvhdr_base) {
ret = -ENOMEM;
@@ -1434,7 +1447,7 @@ static int init_subports(struct ipath_devdata *dd,
pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks *
pd->port_rcvegrbuf_size *
- num_slaves);
+ num_subports);
if (!pd->subport_rcvegrbuf) {
ret = -ENOMEM;
goto bail_rhdr;
@@ -1443,6 +1456,12 @@ static int init_subports(struct ipath_devdata *dd,
pd->port_subport_cnt = uinfo->spu_subport_cnt;
pd->port_subport_id = uinfo->spu_subport_id;
pd->active_slaves = 1;
+ set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
+ memset(pd->subport_uregbase, 0, PAGE_SIZE * num_subports);
+ memset(pd->subport_rcvhdr_base, 0, size);
+ memset(pd->subport_rcvegrbuf, 0, pd->port_rcvegrbuf_chunks *
+ pd->port_rcvegrbuf_size *
+ num_subports);
goto bail;
bail_rhdr:
@@ -1573,18 +1592,19 @@ static int find_best_unit(struct file *fp,
*/
if (!cpus_empty(current->cpus_allowed) &&
!cpus_full(current->cpus_allowed)) {
- int ncpus = num_online_cpus(), curcpu = -1;
+ int ncpus = num_online_cpus(), curcpu = -1, nset = 0;
for (i = 0; i < ncpus; i++)
if (cpu_isset(i, current->cpus_allowed)) {
ipath_cdbg(PROC, "%s[%u] affinity set for "
- "cpu %d\n", current->comm,
- current->pid, i);
+ "cpu %d/%d\n", current->comm,
+ current->pid, i, ncpus);
curcpu = i;
+ nset++;
}
- if (curcpu != -1) {
+ if (curcpu != -1 && nset != ncpus) {
if (npresent) {
prefunit = curcpu / (ncpus / npresent);
- ipath_dbg("%s[%u] %d chips, %d cpus, "
+ ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, "
"%d cpus/chip, select unit %d\n",
current->comm, current->pid,
npresent, ncpus, ncpus / npresent,
@@ -1764,11 +1784,17 @@ static int ipath_do_user_init(struct file *fp,
const struct ipath_user_info *uinfo)
{
int ret;
- struct ipath_portdata *pd;
+ struct ipath_portdata *pd = port_fp(fp);
struct ipath_devdata *dd;
u32 head32;
- pd = port_fp(fp);
+ /* Subports don't need to initialize anything since master did it. */
+ if (subport_fp(fp)) {
+ ret = wait_event_interruptible(pd->port_wait,
+ !test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag));
+ goto done;
+ }
+
dd = pd->port_dd;
if (uinfo->spu_rcvhdrsize) {
@@ -1826,6 +1852,11 @@ static int ipath_do_user_init(struct file *fp,
dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl);
+ /* Notify any waiting slaves */
+ if (pd->port_subport_cnt) {
+ clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
+ wake_up(&pd->port_wait);
+ }
done:
return ret;
}
@@ -2017,6 +2048,17 @@ static int ipath_get_slave_info(struct ipath_portdata *pd,
return ret;
}
+static int ipath_force_pio_avail_update(struct ipath_devdata *dd)
+{
+ u64 reg = dd->ipath_sendctrl;
+
+ clear_bit(IPATH_S_PIOBUFAVAILUPD, &reg);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, reg);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
+
+ return 0;
+}
+
static ssize_t ipath_write(struct file *fp, const char __user *data,
size_t count, loff_t *off)
{
@@ -2071,27 +2113,35 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
dest = &cmd.cmd.part_key;
src = &ucmd->cmd.part_key;
break;
- case IPATH_CMD_SLAVE_INFO:
+ case __IPATH_CMD_SLAVE_INFO:
copy = sizeof(cmd.cmd.slave_mask_addr);
dest = &cmd.cmd.slave_mask_addr;
src = &ucmd->cmd.slave_mask_addr;
break;
+ case IPATH_CMD_PIOAVAILUPD: // force an update of PIOAvail reg
+ copy = 0;
+ src = NULL;
+ dest = NULL;
+ break;
default:
ret = -EINVAL;
goto bail;
}
- if ((count - consumed) < copy) {
- ret = -EINVAL;
- goto bail;
- }
+ if (copy) {
+ if ((count - consumed) < copy) {
+ ret = -EINVAL;
+ goto bail;
+ }
- if (copy_from_user(dest, src, copy)) {
- ret = -EFAULT;
- goto bail;
+ if (copy_from_user(dest, src, copy)) {
+ ret = -EFAULT;
+ goto bail;
+ }
+
+ consumed += copy;
}
- consumed += copy;
pd = port_fp(fp);
if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
cmd.type != IPATH_CMD_ASSIGN_PORT) {
@@ -2137,11 +2187,14 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
case IPATH_CMD_SET_PART_KEY:
ret = ipath_set_part_key(pd, cmd.cmd.part_key);
break;
- case IPATH_CMD_SLAVE_INFO:
+ case __IPATH_CMD_SLAVE_INFO:
ret = ipath_get_slave_info(pd,
(void __user *) (unsigned long)
cmd.cmd.slave_mask_addr);
break;
+ case IPATH_CMD_PIOAVAILUPD:
+ ret = ipath_force_pio_avail_update(pd->port_dd);
+ break;
}
if (ret >= 0)
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 99348254502..4171198fc20 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -43,6 +43,9 @@
#include "ipath_kernel.h"
#include "ipath_registers.h"
+static void ipath_setup_ht_setextled(struct ipath_devdata *, u64, u64);
+
+
/*
* This lists the InfiniPath registers, in the actual chip layout.
* This structure should never be directly accessed.
@@ -208,8 +211,8 @@ static const struct ipath_kregs ipath_ht_kregs = {
.kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
.kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
/*
- * These should not be used directly via ipath_read_kreg64(),
- * use them with ipath_read_kreg64_port(),
+ * These should not be used directly via ipath_write_kreg64(),
+ * use them with ipath_write_kreg64_port(),
*/
.kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
.kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0)
@@ -284,6 +287,14 @@ static const struct ipath_cregs ipath_ht_cregs = {
#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000
#define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000
+
+/* TID entries (memory), HT-only */
+#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
+#define INFINIPATH_RT_VALID 0x8000000000000000ULL
+#define INFINIPATH_RT_ADDR_SHIFT 0
+#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL
+#define INFINIPATH_RT_BUFSIZE_SHIFT 48
+
/*
* masks and bits that are different in different chips, or present only
* in one
@@ -402,6 +413,14 @@ static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
};
+#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
+#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \
+ << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)
+
+static int ipath_ht_txe_recover(struct ipath_devdata *);
+
/**
* ipath_ht_handle_hwerrors - display hardware errors.
* @dd: the infinipath device
@@ -450,13 +469,12 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
/*
* make sure we get this much out, unless told to be quiet,
+ * it's a parity error we may recover from,
* or it's occurred within the last 5 seconds
*/
- if ((hwerrs & ~(dd->ipath_lasthwerror |
- ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
- INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
- (ipath_debug & __IPATH_VERBDBG))
+ if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY |
+ RXE_EAGER_PARITY)) ||
+ (ipath_debug & __IPATH_VERBDBG))
dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
"(cleared)\n", (unsigned long long) hwerrs);
dd->ipath_lasthwerror |= hwerrs;
@@ -467,7 +485,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
(hwerrs & ~dd->ipath_hwe_bitsextant));
ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
- if (ctrl & INFINIPATH_C_FREEZEMODE) {
+ if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
/*
* parity errors in send memory are recoverable,
* just cancel the send (if indicated in * sendbuffererror),
@@ -476,50 +494,14 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
* occur if a processor speculative read is done to the PIO
* buffer while we are sending a packet, for example.
*/
- if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
- INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
- ipath_stats.sps_txeparity++;
- ipath_dbg("Recovering from TXE parity error (%llu), "
- "hwerrstatus=%llx\n",
- (unsigned long long) ipath_stats.sps_txeparity,
- (unsigned long long) hwerrs);
- ipath_disarm_senderrbufs(dd);
- hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
- INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
- if (!hwerrs) { /* else leave in freeze mode */
- ipath_write_kreg(dd,
- dd->ipath_kregs->kr_control,
- dd->ipath_control);
- return;
- }
- }
- if (hwerrs) {
- /*
- * if any set that we aren't ignoring; only
- * make the complaint once, in case it's stuck
- * or recurring, and we get here multiple
- * times.
- */
- if (dd->ipath_flags & IPATH_INITTED) {
- ipath_dev_err(dd, "Fatal Hardware Error (freeze "
- "mode), no longer usable, SN %.16s\n",
- dd->ipath_serial);
- isfatal = 1;
- }
- *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
- /* mark as having had error */
- *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
- /*
- * mark as not usable, at a minimum until driver
- * is reloaded, probably until reboot, since no
- * other reset is possible.
- */
- dd->ipath_flags &= ~IPATH_INITTED;
- } else {
- ipath_dbg("Clearing freezemode on ignored hardware "
- "error\n");
+ if ((hwerrs & TXE_PIO_PARITY) && ipath_ht_txe_recover(dd))
+ hwerrs &= ~TXE_PIO_PARITY;
+ if (hwerrs & RXE_EAGER_PARITY)
+ ipath_dev_err(dd, "RXE parity, Eager TID error is not "
+ "recoverable\n");
+ if (!hwerrs) {
+ ipath_dbg("Clearing freezemode on ignored or "
+ "recovered hardware error\n");
ctrl &= ~INFINIPATH_C_FREEZEMODE;
ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
ctrl);
@@ -587,7 +569,39 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
dd->ipath_hwerrmask);
}
- ipath_dev_err(dd, "%s hardware error\n", msg);
+ if (hwerrs) {
+ /*
+ * if any set that we aren't ignoring; only
+ * make the complaint once, in case it's stuck
+ * or recurring, and we get here multiple
+ * times.
+ * force link down, so switch knows, and
+ * LEDs are turned off
+ */
+ if (dd->ipath_flags & IPATH_INITTED) {
+ ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
+ ipath_setup_ht_setextled(dd,
+ INFINIPATH_IBCS_L_STATE_DOWN,
+ INFINIPATH_IBCS_LT_STATE_DISABLED);
+ ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+ "mode), no longer usable, SN %.16s\n",
+ dd->ipath_serial);
+ isfatal = 1;
+ }
+ *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
+ /* mark as having had error */
+ *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
+ /*
+ * mark as not usable, at a minimum until driver
+ * is reloaded, probably until reboot, since no
+ * other reset is possible.
+ */
+ dd->ipath_flags &= ~IPATH_INITTED;
+ }
+ else
+ *msg = 0; /* recovered from all of them */
+ if (*msg)
+ ipath_dev_err(dd, "%s hardware error\n", msg);
if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
/*
* for status file; if no trailing brace is copied,
@@ -658,7 +672,8 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
if (n)
snprintf(name, namelen, "%s", n);
- if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) {
+ if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
+ dd->ipath_minrev > 3)) {
/*
* This version of the driver only supports Rev 3.2 and 3.3
*/
@@ -1163,6 +1178,8 @@ static void ipath_ht_init_hwerrors(struct ipath_devdata *dd)
if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
ipath_dev_err(dd, "MemBIST did not complete!\n");
+ if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT)
+ ipath_dbg("MemBIST corrected\n");
ipath_check_htlink(dd);
@@ -1366,6 +1383,9 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
u64 __iomem *tidptr, u32 type,
unsigned long pa)
{
+ if (!dd->ipath_kregbase)
+ return;
+
if (pa != dd->ipath_tidinvalid) {
if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) {
dev_info(&dd->pcidev->dev,
@@ -1382,10 +1402,10 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
pa |= lenvalid | INFINIPATH_RT_VALID;
}
}
- if (dd->ipath_kregbase)
- writeq(pa, tidptr);
+ writeq(pa, tidptr);
}
+
/**
* ipath_ht_clear_tid - clear all TID entries for a port, expected and eager
* @dd: the infinipath device
@@ -1515,7 +1535,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
INFINIPATH_S_ABORT);
ipath_get_eeprom_info(dd);
- if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
+ if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') {
/*
* Later production QHT7040 has same changes as QHT7140, so
@@ -1528,6 +1548,24 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
return 0;
}
+
+static int ipath_ht_txe_recover(struct ipath_devdata *dd)
+{
+ int cnt = ++ipath_stats.sps_txeparity;
+ if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) {
+ if (cnt == IPATH_MAX_PARITY_ATTEMPTS)
+ ipath_dev_err(dd,
+ "Too many attempts to recover from "
+ "TXE parity, giving up\n");
+ return 0;
+ }
+ dev_info(&dd->pcidev->dev,
+ "Recovering from TXE PIO parity error\n");
+ ipath_disarm_senderrbufs(dd, 1);
+ return 1;
+}
+
+
/**
* ipath_init_ht_get_base_info - set chip-specific flags for user code
* @dd: the infinipath device
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index 05918e1e7c3..1b9c3085775 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -43,6 +43,8 @@
#include "ipath_kernel.h"
#include "ipath_registers.h"
+static void ipath_setup_pe_setextled(struct ipath_devdata *, u64, u64);
+
/*
* This file contains all the chip-specific register information and
* access functions for the QLogic InfiniPath PCI-Express chip.
@@ -207,8 +209,8 @@ static const struct ipath_kregs ipath_pe_kregs = {
.kr_ibpllcfg = IPATH_KREG_OFFSET(IBPLLCfg),
/*
- * These should not be used directly via ipath_read_kreg64(),
- * use them with ipath_read_kreg64_port()
+ * These should not be used directly via ipath_write_kreg64(),
+ * use them with ipath_write_kreg64_port(),
*/
.kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
.kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
@@ -321,6 +323,12 @@ static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
};
+#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
+
+static int ipath_pe_txe_recover(struct ipath_devdata *);
+
/**
* ipath_pe_handle_hwerrors - display hardware errors.
* @dd: the infinipath device
@@ -394,32 +402,21 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
* occur if a processor speculative read is done to the PIO
* buffer while we are sending a packet, for example.
*/
- if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
- INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
- ipath_stats.sps_txeparity++;
- ipath_dbg("Recovering from TXE parity error (%llu), "
- "hwerrstatus=%llx\n",
- (unsigned long long) ipath_stats.sps_txeparity,
- (unsigned long long) hwerrs);
- ipath_disarm_senderrbufs(dd);
- hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
- INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
- if (!hwerrs) { /* else leave in freeze mode */
- ipath_write_kreg(dd,
- dd->ipath_kregs->kr_control,
- dd->ipath_control);
- return;
- }
- }
+ if ((hwerrs & TXE_PIO_PARITY) && ipath_pe_txe_recover(dd))
+ hwerrs &= ~TXE_PIO_PARITY;
if (hwerrs) {
/*
* if any set that we aren't ignoring only make the
* complaint once, in case it's stuck or recurring,
* and we get here multiple times
+ * Force link down, so switch knows, and
+ * LEDs are turned off
*/
if (dd->ipath_flags & IPATH_INITTED) {
+ ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
+ ipath_setup_pe_setextled(dd,
+ INFINIPATH_IBCS_L_STATE_DOWN,
+ INFINIPATH_IBCS_LT_STATE_DISABLED);
ipath_dev_err(dd, "Fatal Hardware Error (freeze "
"mode), no longer usable, SN %.16s\n",
dd->ipath_serial);
@@ -493,7 +490,8 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
dd->ipath_hwerrmask);
}
- ipath_dev_err(dd, "%s hardware error\n", msg);
+ if (*msg)
+ ipath_dev_err(dd, "%s hardware error\n", msg);
if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) {
/*
* for /sys status file ; if no trailing } is copied, we'll
@@ -581,6 +579,8 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd)
if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
ipath_dev_err(dd, "MemBIST did not complete!\n");
+ if (extsval & INFINIPATH_EXTS_MEMBIST_FOUND)
+ ipath_dbg("MemBIST corrected\n");
val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */
@@ -1330,6 +1330,35 @@ static void ipath_pe_free_irq(struct ipath_devdata *dd)
dd->ipath_irq = 0;
}
+/*
+ * On platforms using this chip, and not having ordered WC stores, we
+ * can get TXE parity errors due to speculative reads to the PIO buffers,
+ * and this, due to a chip bug can result in (many) false parity error
+ * reports. So it's a debug print on those, and an info print on systems
+ * where the speculative reads don't occur.
+ * Because we can get lots of false errors, we have no upper limit
+ * on recovery attempts on those platforms.
+ */
+static int ipath_pe_txe_recover(struct ipath_devdata *dd)
+{
+ if (ipath_unordered_wc())
+ ipath_dbg("Recovering from TXE PIO parity error\n");
+ else {
+ int cnt = ++ipath_stats.sps_txeparity;
+ if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) {
+ if (cnt == IPATH_MAX_PARITY_ATTEMPTS)
+ ipath_dev_err(dd,
+ "Too many attempts to recover from "
+ "TXE parity, giving up\n");
+ return 0;
+ }
+ dev_info(&dd->pcidev->dev,
+ "Recovering from TXE PIO parity error\n");
+ }
+ ipath_disarm_senderrbufs(dd, 1);
+ return 1;
+}
+
/**
* ipath_init_iba6120_funcs - set up the chip-specific function pointers
* @dd: the infinipath device
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index d4f6b5239ef..7045ba68949 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -216,6 +216,20 @@ static int bringup_link(struct ipath_devdata *dd)
return ret;
}
+static struct ipath_portdata *create_portdata0(struct ipath_devdata *dd)
+{
+ struct ipath_portdata *pd = NULL;
+
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ if (pd) {
+ pd->port_dd = dd;
+ pd->port_cnt = 1;
+ /* The port 0 pkey table is used by the layer interface. */
+ pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
+ }
+ return pd;
+}
+
static int init_chip_first(struct ipath_devdata *dd,
struct ipath_portdata **pdp)
{
@@ -271,20 +285,16 @@ static int init_chip_first(struct ipath_devdata *dd,
goto done;
}
- dd->ipath_pd[0] = kzalloc(sizeof(*pd), GFP_KERNEL);
+ pd = create_portdata0(dd);
- if (!dd->ipath_pd[0]) {
+ if (!pd) {
ipath_dev_err(dd, "Unable to allocate portdata for port "
"0, failing\n");
ret = -ENOMEM;
goto done;
}
- pd = dd->ipath_pd[0];
- pd->port_dd = dd;
- pd->port_port = 0;
- pd->port_cnt = 1;
- /* The port 0 pkey table is used by the layer interface. */
- pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
+ dd->ipath_pd[0] = pd;
+
dd->ipath_rcvtidcnt =
ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
dd->ipath_rcvtidbase =
@@ -590,6 +600,10 @@ static int init_housekeeping(struct ipath_devdata *dd,
goto done;
}
+
+ /* clear diagctrl register, in case diags were running and crashed */
+ ipath_write_kreg (dd, dd->ipath_kregs->kr_hwdiagctrl, 0);
+
/* clear the initial reset flag, in case first driver load */
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
INFINIPATH_E_RESET);
@@ -668,6 +682,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
{
int ret = 0, i;
u32 val32, kpiobufs;
+ u32 piobufs, uports;
u64 val;
struct ipath_portdata *pd = NULL; /* keep gcc4 happy */
gfp_t gfp_flags = GFP_USER | __GFP_COMP;
@@ -702,16 +717,17 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
* the in memory DMA'ed copies of the registers. This has to
* be done early, before we calculate lastport, etc.
*/
- val = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
+ piobufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
/*
* calc number of pioavail registers, and save it; we have 2
* bits per buffer.
*/
- dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2)
+ dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
/ (sizeof(u64) * BITS_PER_BYTE / 2);
+ uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
if (ipath_kpiobufs == 0) {
/* not set by user (this is default) */
- if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128)
+ if (piobufs >= (uports * IPATH_MIN_USER_PORT_BUFCNT) + 32)
kpiobufs = 32;
else
kpiobufs = 16;
@@ -719,31 +735,25 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
else
kpiobufs = ipath_kpiobufs;
- if (kpiobufs >
- (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
- (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT))) {
- i = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
- (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT);
+ if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) {
+ i = (int) piobufs -
+ (int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
if (i < 0)
i = 0;
- dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs for "
- "kernel leaves too few for %d user ports "
+ dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
+ "%d for kernel leaves too few for %d user ports "
"(%d each); using %u\n", kpiobufs,
- dd->ipath_cfgports - 1,
- IPATH_MIN_USER_PORT_BUFCNT, i);
+ piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
/*
* shouldn't change ipath_kpiobufs, because could be
* different for different devices...
*/
kpiobufs = i;
}
- dd->ipath_lastport_piobuf =
- dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - kpiobufs;
- dd->ipath_pbufsport = dd->ipath_cfgports > 1
- ? dd->ipath_lastport_piobuf / (dd->ipath_cfgports - 1)
- : 0;
- val32 = dd->ipath_lastport_piobuf -
- (dd->ipath_pbufsport * (dd->ipath_cfgports - 1));
+ dd->ipath_lastport_piobuf = piobufs - kpiobufs;
+ dd->ipath_pbufsport =
+ uports ? dd->ipath_lastport_piobuf / uports : 0;
+ val32 = dd->ipath_lastport_piobuf - (dd->ipath_pbufsport * uports);
if (val32 > 0) {
ipath_dbg("allocating %u pbufs/port leaves %u unused, "
"add to kernel\n", dd->ipath_pbufsport, val32);
@@ -754,8 +764,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
dd->ipath_lastpioindex = dd->ipath_lastport_piobuf;
ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
"each for %u user ports\n", kpiobufs,
- dd->ipath_piobcnt2k + dd->ipath_piobcnt4k,
- dd->ipath_pbufsport, dd->ipath_cfgports - 1);
+ piobufs, dd->ipath_pbufsport, uports);
dd->ipath_f_early_init(dd);
@@ -839,11 +848,24 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
* Set up the port 0 (kernel) rcvhdr q and egr TIDs. If doing
* re-init, the simplest way to handle this is to free
* existing, and re-allocate.
+ * Need to re-create rest of port 0 portdata as well.
*/
if (reinit) {
- struct ipath_portdata *pd = dd->ipath_pd[0];
- dd->ipath_pd[0] = NULL;
- ipath_free_pddata(dd, pd);
+ /* Alloc and init new ipath_portdata for port0,
+ * Then free old pd. Could lead to fragmentation, but also
+ * makes later support for hot-swap easier.
+ */
+ struct ipath_portdata *npd;
+ npd = create_portdata0(dd);
+ if (npd) {
+ ipath_free_pddata(dd, pd);
+ dd->ipath_pd[0] = pd = npd;
+ } else {
+ ipath_dev_err(dd, "Unable to allocate portdata for"
+ " port 0, failing\n");
+ ret = -ENOMEM;
+ goto done;
+ }
}
dd->ipath_f_tidtemplate(dd);
ret = ipath_create_rcvhdrq(dd, pd);
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 72b9e279d19..45d033169c6 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -38,10 +38,39 @@
#include "ipath_common.h"
/*
+ * clear (write) a pio buffer, to clear a parity error. This routine
+ * should only be called when in freeze mode, and the buffer should be
+ * canceled afterwards.
+ */
+static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum)
+{
+ u32 __iomem *pbuf;
+ u32 dwcnt; /* dword count to write */
+ if (pnum < dd->ipath_piobcnt2k) {
+ pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum *
+ dd->ipath_palign);
+ dwcnt = dd->ipath_piosize2k >> 2;
+ }
+ else {
+ pbuf = (u32 __iomem *) (dd->ipath_pio4kbase +
+ (pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
+ dwcnt = dd->ipath_piosize4k >> 2;
+ }
+ dev_info(&dd->pcidev->dev,
+ "Rewrite PIO buffer %u, to recover from parity error\n",
+ pnum);
+ *pbuf = dwcnt+1; /* no flush required, since already in freeze */
+ while(--dwcnt)
+ *pbuf++ = 0;
+}
+
+/*
* Called when we might have an error that is specific to a particular
* PIO buffer, and may need to cancel that buffer, so it can be re-used.
+ * If rewrite is true, and bits are set in the sendbufferror registers,
+ * we'll write to the buffer, for error recovery on parity errors.
*/
-void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
+void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
{
u32 piobcnt;
unsigned long sbuf[4];
@@ -74,8 +103,11 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
}
for (i = 0; i < piobcnt; i++)
- if (test_bit(i, sbuf))
+ if (test_bit(i, sbuf)) {
+ if (rewrite)
+ ipath_clrpiobuf(dd, i);
ipath_disarm_piobufs(dd, i, 1);
+ }
dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */
}
}
@@ -114,7 +146,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
{
u64 ignore_this_time = 0;
- ipath_disarm_senderrbufs(dd);
+ ipath_disarm_senderrbufs(dd, 0);
if ((errs & E_SUM_LINK_PKTERRS) &&
!(dd->ipath_flags & IPATH_LINKACTIVE)) {
/*
@@ -403,10 +435,13 @@ static void handle_supp_msgs(struct ipath_devdata *dd,
* happens so often we never want to count it.
*/
if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
- ipath_decode_err(msg, sizeof msg, dd->ipath_lasterror &
- ~INFINIPATH_E_IBSTATUSCHANGED);
+ int iserr;
+ iserr = ipath_decode_err(msg, sizeof msg,
+ dd->ipath_lasterror &
+ ~INFINIPATH_E_IBSTATUSCHANGED);
if (dd->ipath_lasterror &
- ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
+ ~(INFINIPATH_E_RRCVEGRFULL |
+ INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
ipath_dev_err(dd, "Suppressed %u messages for "
"fast-repeating errors (%s) (%llx)\n",
supp_msgs, msg,
@@ -420,8 +455,13 @@ static void handle_supp_msgs(struct ipath_devdata *dd,
* them. So only complain about these at debug
* level.
*/
- ipath_dbg("Suppressed %u messages for %s\n",
- supp_msgs, msg);
+ if (iserr)
+ ipath_dbg("Suppressed %u messages for %s\n",
+ supp_msgs, msg);
+ else
+ ipath_cdbg(ERRPKT,
+ "Suppressed %u messages for %s\n",
+ supp_msgs, msg);
}
}
}
@@ -462,7 +502,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
{
char msg[512];
u64 ignore_this_time = 0;
- int i;
+ int i, iserr = 0;
int chkerrpkts = 0, noprint = 0;
unsigned supp_msgs;
@@ -502,6 +542,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
}
if (supp_msgs == 250000) {
+ int s_iserr;
/*
* It's not entirely reasonable assuming that the errors set
* in the last clear period are all responsible for the
@@ -511,17 +552,17 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
~dd->ipath_maskederrs);
- ipath_decode_err(msg, sizeof msg,
+ s_iserr = ipath_decode_err(msg, sizeof msg,
(dd->ipath_maskederrs & ~dd->
ipath_ignorederrs));
if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
- ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
- ipath_dev_err(dd, "Disabling error(s) %llx because "
- "occurring too frequently (%s)\n",
- (unsigned long long)
- (dd->ipath_maskederrs &
- ~dd->ipath_ignorederrs), msg);
+ ~(INFINIPATH_E_RRCVEGRFULL |
+ INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
+ ipath_dev_err(dd, "Temporarily disabling "
+ "error(s) %llx reporting; too frequent (%s)\n",
+ (unsigned long long) (dd->ipath_maskederrs &
+ ~dd->ipath_ignorederrs), msg);
else {
/*
* rcvegrfull and rcvhdrqfull are "normal",
@@ -530,8 +571,15 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
* processing them. So only complain about
* these at debug level.
*/
- ipath_dbg("Disabling frequent queue full errors "
- "(%s)\n", msg);
+ if (s_iserr)
+ ipath_dbg("Temporarily disabling reporting "
+ "too frequent queue full errors (%s)\n",
+ msg);
+ else
+ ipath_cdbg(ERRPKT,
+ "Temporarily disabling reporting too"
+ " frequent packet errors (%s)\n",
+ msg);
}
/*
@@ -589,6 +637,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
ipath_stats.sps_crcerrs++;
chkerrpkts = 1;
}
+ iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS);
+
/*
* We don't want to print these two as they happen, or we can make
@@ -677,8 +727,13 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
*dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
}
- if (!noprint && *msg)
- ipath_dev_err(dd, "%s error\n", msg);
+ if (!noprint && *msg) {
+ if (iserr)
+ ipath_dev_err(dd, "%s error\n", msg);
+ else
+ dev_info(&dd->pcidev->dev, "%s packet problems\n",
+ msg);
+ }
if (dd->ipath_state_wanted & dd->ipath_flags) {
ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
"waking\n", dd->ipath_state_wanted,
@@ -819,11 +874,10 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
struct ipath_portdata *pd = dd->ipath_pd[i];
if (portr & (1 << i) && pd && pd->port_cnt &&
test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) {
- int rcbit;
clear_bit(IPATH_PORT_WAITING_RCV,
&pd->port_flag);
- rcbit = i + INFINIPATH_R_INTRAVAIL_SHIFT;
- clear_bit(1UL << rcbit, &dd->ipath_rcvctrl);
+ clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT,
+ &dd->ipath_rcvctrl);
wake_up_interruptible(&pd->port_wait);
rcvdint = 1;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 6d8d05fb599..e900c2593f4 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -590,7 +590,6 @@ int ipath_enable_wc(struct ipath_devdata *dd);
void ipath_disable_wc(struct ipath_devdata *dd);
int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
void ipath_shutdown_device(struct ipath_devdata *);
-void ipath_disarm_senderrbufs(struct ipath_devdata *);
struct file_operations;
int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
@@ -611,7 +610,7 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t);
extern int ipath_diag_inuse;
irqreturn_t ipath_intr(int irq, void *devid);
-void ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
+int ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
#if __IPATH_INFO || __IPATH_DBG
extern const char *ipath_ibcstatus_str[];
#endif
@@ -701,6 +700,8 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
#define IPATH_PORT_WAITING_RCV 2
/* waiting for a PIO buffer to be available */
#define IPATH_PORT_WAITING_PIO 3
+ /* master has not finished initializing */
+#define IPATH_PORT_MASTER_UNINIT 4
/* free up any allocated data at closes */
void ipath_free_data(struct ipath_portdata *dd);
@@ -711,6 +712,7 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *);
void ipath_init_iba6110_funcs(struct ipath_devdata *);
void ipath_get_eeprom_info(struct ipath_devdata *);
u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
+void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
/*
* number of words used for protocol header if not set by ipath_userinit();
@@ -754,8 +756,6 @@ int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int);
/* these are used for the registers that vary with port */
void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg,
unsigned, u64);
-u64 ipath_read_kreg64_port(const struct ipath_devdata *, ipath_kreg,
- unsigned);
/*
* We could have a single register get/put routine, that takes a group type,
@@ -897,6 +897,8 @@ dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
extern unsigned ipath_debug; /* debugging bit mask */
+#define IPATH_MAX_PARITY_ATTEMPTS 10000 /* max times to try recovery */
+
const char *ipath_get_unit_name(int unit);
extern struct mutex ipath_mutex;
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index 851763d7d2d..dd487c100f5 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -61,7 +61,7 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
r = (r + 1) & (rkt->max - 1);
if (r == n) {
spin_unlock_irqrestore(&rkt->lock, flags);
- ipath_dbg(KERN_INFO "LKEY table full\n");
+ ipath_dbg("LKEY table full\n");
ret = 0;
goto bail;
}
@@ -133,6 +133,12 @@ int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
* being reversible by calling bus_to_virt().
*/
if (sge->lkey == 0) {
+ struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
+
+ if (pd->user) {
+ ret = 0;
+ goto bail;
+ }
isge->mr = NULL;
isge->vaddr = (void *) sge->addr;
isge->length = sge->length;
@@ -206,6 +212,12 @@ int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
* (see ipath_get_dma_mr and ipath_dma.c).
*/
if (rkey == 0) {
+ struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
+
+ if (pd->user) {
+ ret = 0;
+ goto bail;
+ }
sge->mr = NULL;
sge->vaddr = (void *) vaddr;
sge->length = len;
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index 8cc8598d6c6..31e70732e36 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -210,9 +210,15 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
m = 0;
n = 0;
list_for_each_entry(chunk, &region->chunk_list, list) {
- for (i = 0; i < chunk->nmap; i++) {
- mr->mr.map[m]->segs[n].vaddr =
- page_address(chunk->page_list[i].page);
+ for (i = 0; i < chunk->nents; i++) {
+ void *vaddr;
+
+ vaddr = page_address(chunk->page_list[i].page);
+ if (!vaddr) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+ mr->mr.map[m]->segs[n].vaddr = vaddr;
mr->mr.map[m]->segs[n].length = region->page_size;
n++;
if (n == IPATH_SEGSZ) {
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 64f07b19349..16db9ac0b40 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -81,11 +81,51 @@ static u32 credit_table[31] = {
32768 /* 1E */
};
-static u32 alloc_qpn(struct ipath_qp_table *qpt)
+
+static void get_map_page(struct ipath_qp_table *qpt, struct qpn_map *map)
+{
+ unsigned long page = get_zeroed_page(GFP_KERNEL);
+ unsigned long flags;
+
+ /*
+ * Free the page if someone raced with us installing it.
+ */
+
+ spin_lock_irqsave(&qpt->lock, flags);
+ if (map->page)
+ free_page(page);
+ else
+ map->page = (void *)page;
+ spin_unlock_irqrestore(&qpt->lock, flags);
+}
+
+
+static int alloc_qpn(struct ipath_qp_table *qpt, enum ib_qp_type type)
{
u32 i, offset, max_scan, qpn;
struct qpn_map *map;
- u32 ret;
+ u32 ret = -1;
+
+ if (type == IB_QPT_SMI)
+ ret = 0;
+ else if (type == IB_QPT_GSI)
+ ret = 1;
+
+ if (ret != -1) {
+ map = &qpt->map[0];
+ if (unlikely(!map->page)) {
+ get_map_page(qpt, map);
+ if (unlikely(!map->page)) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+ }
+ if (!test_and_set_bit(ret, map->page))
+ atomic_dec(&map->n_free);
+ else
+ ret = -EBUSY;
+ goto bail;
+ }
qpn = qpt->last + 1;
if (qpn >= QPN_MAX)
@@ -95,19 +135,7 @@ static u32 alloc_qpn(struct ipath_qp_table *qpt)
max_scan = qpt->nmaps - !offset;
for (i = 0;;) {
if (unlikely(!map->page)) {
- unsigned long page = get_zeroed_page(GFP_KERNEL);
- unsigned long flags;
-
- /*
- * Free the page if someone raced with us
- * installing it:
- */
- spin_lock_irqsave(&qpt->lock, flags);
- if (map->page)
- free_page(page);
- else
- map->page = (void *)page;
- spin_unlock_irqrestore(&qpt->lock, flags);
+ get_map_page(qpt, map);
if (unlikely(!map->page))
break;
}
@@ -151,7 +179,7 @@ static u32 alloc_qpn(struct ipath_qp_table *qpt)
qpn = mk_qpn(qpt, map, offset);
}
- ret = 0;
+ ret = -ENOMEM;
bail:
return ret;
@@ -180,29 +208,19 @@ static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp,
enum ib_qp_type type)
{
unsigned long flags;
- u32 qpn;
int ret;
- if (type == IB_QPT_SMI)
- qpn = 0;
- else if (type == IB_QPT_GSI)
- qpn = 1;
- else {
- /* Allocate the next available QPN */
- qpn = alloc_qpn(qpt);
- if (qpn == 0) {
- ret = -ENOMEM;
- goto bail;
- }
- }
- qp->ibqp.qp_num = qpn;
+ ret = alloc_qpn(qpt, type);
+ if (ret < 0)
+ goto bail;
+ qp->ibqp.qp_num = ret;
/* Add the QP to the hash table. */
spin_lock_irqsave(&qpt->lock, flags);
- qpn %= qpt->max;
- qp->next = qpt->table[qpn];
- qpt->table[qpn] = qp;
+ ret %= qpt->max;
+ qp->next = qpt->table[ret];
+ qpt->table[ret] = qp;
atomic_inc(&qp->refcount);
spin_unlock_irqrestore(&qpt->lock, flags);
@@ -245,9 +263,7 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
if (!fnd)
return;
- /* If QPN is not reserved, mark QPN free in the bitmap. */
- if (qp->ibqp.qp_num > 1)
- free_qpn(qpt, qp->ibqp.qp_num);
+ free_qpn(qpt, qp->ibqp.qp_num);
wait_event(qp->wait, !atomic_read(&qp->refcount));
}
@@ -270,11 +286,10 @@ void ipath_free_all_qps(struct ipath_qp_table *qpt)
while (qp) {
nqp = qp->next;
- if (qp->ibqp.qp_num > 1)
- free_qpn(qpt, qp->ibqp.qp_num);
+ free_qpn(qpt, qp->ibqp.qp_num);
if (!atomic_dec_and_test(&qp->refcount) ||
!ipath_destroy_qp(&qp->ibqp))
- ipath_dbg(KERN_INFO "QP memory leak!\n");
+ ipath_dbg("QP memory leak!\n");
qp = nqp;
}
}
@@ -320,7 +335,8 @@ static void ipath_reset_qp(struct ipath_qp *qp)
qp->remote_qpn = 0;
qp->qkey = 0;
qp->qp_access_flags = 0;
- clear_bit(IPATH_S_BUSY, &qp->s_flags);
+ qp->s_busy = 0;
+ qp->s_flags &= ~IPATH_S_SIGNAL_REQ_WR;
qp->s_hdrwords = 0;
qp->s_psn = 0;
qp->r_psn = 0;
@@ -333,7 +349,6 @@ static void ipath_reset_qp(struct ipath_qp *qp)
qp->r_state = IB_OPCODE_UC_SEND_LAST;
}
qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
- qp->r_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
qp->r_nak_state = 0;
qp->r_wrid_valid = 0;
qp->s_rnr_timeout = 0;
@@ -344,6 +359,10 @@ static void ipath_reset_qp(struct ipath_qp *qp)
qp->s_ssn = 1;
qp->s_lsn = 0;
qp->s_wait_credit = 0;
+ memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
+ qp->r_head_ack_queue = 0;
+ qp->s_tail_ack_queue = 0;
+ qp->s_num_rd_atomic = 0;
if (qp->r_rq.wq) {
qp->r_rq.wq->head = 0;
qp->r_rq.wq->tail = 0;
@@ -357,7 +376,7 @@ static void ipath_reset_qp(struct ipath_qp *qp)
* @err: the receive completion error to signal if a RWQE is active
*
* Flushes both send and receive work queues.
- * QP s_lock should be held and interrupts disabled.
+ * The QP s_lock should be held and interrupts disabled.
*/
void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
@@ -365,7 +384,7 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ib_wc wc;
- ipath_dbg(KERN_INFO "QP%d/%d in error state\n",
+ ipath_dbg("QP%d/%d in error state\n",
qp->ibqp.qp_num, qp->remote_qpn);
spin_lock(&dev->pending_lock);
@@ -389,6 +408,8 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
wc.port_num = 0;
if (qp->r_wrid_valid) {
qp->r_wrid_valid = 0;
+ wc.wr_id = qp->r_wr_id;
+ wc.opcode = IB_WC_RECV;
wc.status = err;
ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
}
@@ -503,13 +524,17 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->path_mig_state != IB_MIG_REARM)
goto inval;
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC)
+ goto inval;
+
switch (new_state) {
case IB_QPS_RESET:
ipath_reset_qp(qp);
break;
case IB_QPS_ERR:
- ipath_error_qp(qp, IB_WC_GENERAL_ERR);
+ ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
break;
default:
@@ -559,6 +584,12 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr_mask & IB_QP_QKEY)
qp->qkey = attr->qkey;
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
+ qp->s_max_rd_atomic = attr->max_rd_atomic;
+
qp->state = new_state;
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -598,8 +629,8 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->alt_pkey_index = 0;
attr->en_sqd_async_notify = 0;
attr->sq_draining = 0;
- attr->max_rd_atomic = 1;
- attr->max_dest_rd_atomic = 1;
+ attr->max_rd_atomic = qp->s_max_rd_atomic;
+ attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
attr->min_rnr_timer = qp->r_min_rnr_timer;
attr->port_num = 1;
attr->timeout = qp->timeout;
@@ -614,7 +645,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
init_attr->recv_cq = qp->ibqp.recv_cq;
init_attr->srq = qp->ibqp.srq;
init_attr->cap = attr->cap;
- if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
+ if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR)
init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
else
init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
@@ -786,7 +817,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
qp->s_size = init_attr->cap.max_send_wr + 1;
qp->s_max_sge = init_attr->cap.max_send_sge;
if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
- qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR;
+ qp->s_flags = IPATH_S_SIGNAL_REQ_WR;
else
qp->s_flags = 0;
dev = to_idev(ibpd->device);
@@ -958,7 +989,7 @@ bail:
* @wc: the WC responsible for putting the QP in this state
*
* Flushes the send work queue.
- * The QP s_lock should be held.
+ * The QP s_lock should be held and interrupts disabled.
*/
void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
@@ -966,7 +997,7 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
- ipath_dbg(KERN_INFO "Send queue error on QP%d/%d: err: %d\n",
+ ipath_dbg("Send queue error on QP%d/%d: err: %d\n",
qp->ibqp.qp_num, qp->remote_qpn, wc->status);
spin_lock(&dev->pending_lock);
@@ -984,12 +1015,12 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
wc->status = IB_WC_WR_FLUSH_ERR;
while (qp->s_last != qp->s_head) {
+ wqe = get_swqe_ptr(qp, qp->s_last);
wc->wr_id = wqe->wr.wr_id;
wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
if (++qp->s_last >= qp->s_size)
qp->s_last = 0;
- wqe = get_swqe_ptr(qp, qp->s_last);
}
qp->s_cur = qp->s_tail = qp->s_head;
qp->state = IB_QPS_SQE;
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 5ff20cb0449..b4b88d0b53f 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -37,6 +37,19 @@
/* cut down ridiculously long IB macro names */
#define OP(x) IB_OPCODE_RC_##x
+static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe,
+ u32 psn, u32 pmtu)
+{
+ u32 len;
+
+ len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
+ ss->sge = wqe->sg_list[0];
+ ss->sg_list = wqe->sg_list + 1;
+ ss->num_sge = wqe->wr.num_sge;
+ ipath_skip_sge(ss, len);
+ return wqe->length - len;
+}
+
/**
* ipath_init_restart- initialize the qp->s_sge after a restart
* @qp: the QP who's SGE we're restarting
@@ -47,15 +60,9 @@
static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
{
struct ipath_ibdev *dev;
- u32 len;
- len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) *
- ib_mtu_enum_to_int(qp->path_mtu);
- qp->s_sge.sge = wqe->sg_list[0];
- qp->s_sge.sg_list = wqe->sg_list + 1;
- qp->s_sge.num_sge = wqe->wr.num_sge;
- ipath_skip_sge(&qp->s_sge, len);
- qp->s_len = wqe->length - len;
+ qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn,
+ ib_mtu_enum_to_int(qp->path_mtu));
dev = to_idev(qp->ibqp.device);
spin_lock(&dev->pending_lock);
if (list_empty(&qp->timerwait))
@@ -70,107 +77,123 @@ static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
* @ohdr: a pointer to the IB header being constructed
* @pmtu: the path MTU
*
- * Return bth0 if constructed; otherwise, return 0.
+ * Return 1 if constructed; otherwise, return 0.
+ * Note that we are in the responder's side of the QP context.
* Note the QP s_lock must be held.
*/
-u32 ipath_make_rc_ack(struct ipath_qp *qp,
- struct ipath_other_headers *ohdr,
- u32 pmtu)
+static int ipath_make_rc_ack(struct ipath_qp *qp,
+ struct ipath_other_headers *ohdr,
+ u32 pmtu, u32 *bth0p, u32 *bth2p)
{
+ struct ipath_ack_entry *e;
u32 hwords;
u32 len;
u32 bth0;
+ u32 bth2;
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5;
- /*
- * Send a response. Note that we are in the responder's
- * side of the QP context.
- */
switch (qp->s_ack_state) {
- case OP(RDMA_READ_REQUEST):
- qp->s_cur_sge = &qp->s_rdma_sge;
- len = qp->s_rdma_len;
- if (len > pmtu) {
- len = pmtu;
- qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
- } else
- qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
- qp->s_rdma_len -= len;
+ case OP(RDMA_READ_RESPONSE_LAST):
+ case OP(RDMA_READ_RESPONSE_ONLY):
+ case OP(ATOMIC_ACKNOWLEDGE):
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ /* FALLTHROUGH */
+ case OP(ACKNOWLEDGE):
+ /* Check for no next entry in the queue. */
+ if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
+ if (qp->s_flags & IPATH_S_ACK_PENDING)
+ goto normal;
+ goto bail;
+ }
+
+ e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+ if (e->opcode == OP(RDMA_READ_REQUEST)) {
+ /* Copy SGE state in case we need to resend */
+ qp->s_ack_rdma_sge = e->rdma_sge;
+ qp->s_cur_sge = &qp->s_ack_rdma_sge;
+ len = e->rdma_sge.sge.sge_length;
+ if (len > pmtu) {
+ len = pmtu;
+ qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
+ } else {
+ qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
+ if (++qp->s_tail_ack_queue >
+ IPATH_MAX_RDMA_ATOMIC)
+ qp->s_tail_ack_queue = 0;
+ }
+ ohdr->u.aeth = ipath_compute_aeth(qp);
+ hwords++;
+ qp->s_ack_rdma_psn = e->psn;
+ bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
+ } else {
+ /* COMPARE_SWAP or FETCH_ADD */
+ qp->s_cur_sge = NULL;
+ len = 0;
+ qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
+ ohdr->u.at.aeth = ipath_compute_aeth(qp);
+ ohdr->u.at.atomic_ack_eth[0] =
+ cpu_to_be32(e->atomic_data >> 32);
+ ohdr->u.at.atomic_ack_eth[1] =
+ cpu_to_be32(e->atomic_data);
+ hwords += sizeof(ohdr->u.at) / sizeof(u32);
+ bth2 = e->psn;
+ if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
+ qp->s_tail_ack_queue = 0;
+ }
bth0 = qp->s_ack_state << 24;
- ohdr->u.aeth = ipath_compute_aeth(qp);
- hwords++;
break;
case OP(RDMA_READ_RESPONSE_FIRST):
qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
/* FALLTHROUGH */
case OP(RDMA_READ_RESPONSE_MIDDLE):
- qp->s_cur_sge = &qp->s_rdma_sge;
- len = qp->s_rdma_len;
+ len = qp->s_ack_rdma_sge.sge.sge_length;
if (len > pmtu)
len = pmtu;
else {
ohdr->u.aeth = ipath_compute_aeth(qp);
hwords++;
qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
+ if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
+ qp->s_tail_ack_queue = 0;
}
- qp->s_rdma_len -= len;
bth0 = qp->s_ack_state << 24;
- break;
-
- case OP(RDMA_READ_RESPONSE_LAST):
- case OP(RDMA_READ_RESPONSE_ONLY):
- /*
- * We have to prevent new requests from changing
- * the r_sge state while a ipath_verbs_send()
- * is in progress.
- */
- qp->s_ack_state = OP(ACKNOWLEDGE);
- bth0 = 0;
- goto bail;
-
- case OP(COMPARE_SWAP):
- case OP(FETCH_ADD):
- qp->s_cur_sge = NULL;
- len = 0;
- /*
- * Set the s_ack_state so the receive interrupt handler
- * won't try to send an ACK (out of order) until this one
- * is actually sent.
- */
- qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
- bth0 = OP(ATOMIC_ACKNOWLEDGE) << 24;
- ohdr->u.at.aeth = ipath_compute_aeth(qp);
- ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
- hwords += sizeof(ohdr->u.at) / 4;
+ bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
break;
default:
- /* Send a regular ACK. */
- qp->s_cur_sge = NULL;
- len = 0;
+ normal:
/*
- * Set the s_ack_state so the receive interrupt handler
- * won't try to send an ACK (out of order) until this one
- * is actually sent.
+ * Send a regular ACK.
+ * Set the s_ack_state so we wait until after sending
+ * the ACK before setting s_ack_state to ACKNOWLEDGE
+ * (see above).
*/
- qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
- bth0 = OP(ACKNOWLEDGE) << 24;
+ qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
+ qp->s_flags &= ~IPATH_S_ACK_PENDING;
+ qp->s_cur_sge = NULL;
if (qp->s_nak_state)
- ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
- (qp->s_nak_state <<
- IPATH_AETH_CREDIT_SHIFT));
+ ohdr->u.aeth =
+ cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
+ (qp->s_nak_state <<
+ IPATH_AETH_CREDIT_SHIFT));
else
ohdr->u.aeth = ipath_compute_aeth(qp);
hwords++;
+ len = 0;
+ bth0 = OP(ACKNOWLEDGE) << 24;
+ bth2 = qp->s_ack_psn & IPATH_PSN_MASK;
}
qp->s_hdrwords = hwords;
qp->s_cur_size = len;
+ *bth0p = bth0;
+ *bth2p = bth2;
+ return 1;
bail:
- return bth0;
+ return 0;
}
/**
@@ -197,9 +220,16 @@ int ipath_make_rc_req(struct ipath_qp *qp,
u32 bth2;
char newreq;
+ /* Sending responses has higher priority over sending requests. */
+ if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
+ (qp->s_flags & IPATH_S_ACK_PENDING) ||
+ qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE) &&
+ ipath_make_rc_ack(qp, ohdr, pmtu, bth0p, bth2p))
+ goto done;
+
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) ||
qp->s_rnr_timeout)
- goto done;
+ goto bail;
/* Limit the number of packets sent without an ACK. */
if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) {
@@ -210,7 +240,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
list_add_tail(&qp->timerwait,
&dev->pending[dev->pending_index]);
spin_unlock(&dev->pending_lock);
- goto done;
+ goto bail;
}
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -232,7 +262,16 @@ int ipath_make_rc_req(struct ipath_qp *qp,
if (qp->s_cur == qp->s_tail) {
/* Check if send work queue is empty. */
if (qp->s_tail == qp->s_head)
- goto done;
+ goto bail;
+ /*
+ * If a fence is requested, wait for previous
+ * RDMA read and atomic operations to finish.
+ */
+ if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
+ qp->s_num_rd_atomic) {
+ qp->s_flags |= IPATH_S_FENCE_PENDING;
+ goto bail;
+ }
wqe->psn = qp->s_next_psn;
newreq = 1;
}
@@ -250,7 +289,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
/* If no credit, return. */
if (qp->s_lsn != (u32) -1 &&
ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
- goto done;
+ goto bail;
wqe->lpsn = wqe->psn;
if (len > pmtu) {
wqe->lpsn += (len - 1) / pmtu;
@@ -281,13 +320,13 @@ int ipath_make_rc_req(struct ipath_qp *qp,
/* If no credit, return. */
if (qp->s_lsn != (u32) -1 &&
ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
- goto done;
+ goto bail;
ohdr->u.rc.reth.vaddr =
cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
ohdr->u.rc.reth.rkey =
cpu_to_be32(wqe->wr.wr.rdma.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
- hwords += sizeof(struct ib_reth) / 4;
+ hwords += sizeof(struct ib_reth) / sizeof(u32);
wqe->lpsn = wqe->psn;
if (len > pmtu) {
wqe->lpsn += (len - 1) / pmtu;
@@ -312,14 +351,17 @@ int ipath_make_rc_req(struct ipath_qp *qp,
break;
case IB_WR_RDMA_READ:
- ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
- ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
- ohdr->u.rc.reth.length = cpu_to_be32(len);
- qp->s_state = OP(RDMA_READ_REQUEST);
- hwords += sizeof(ohdr->u.rc.reth) / 4;
+ /*
+ * Don't allow more operations to be started
+ * than the QP limits allow.
+ */
if (newreq) {
+ if (qp->s_num_rd_atomic >=
+ qp->s_max_rd_atomic) {
+ qp->s_flags |= IPATH_S_RDMAR_PENDING;
+ goto bail;
+ }
+ qp->s_num_rd_atomic++;
if (qp->s_lsn != (u32) -1)
qp->s_lsn++;
/*
@@ -330,6 +372,13 @@ int ipath_make_rc_req(struct ipath_qp *qp,
qp->s_next_psn += (len - 1) / pmtu;
wqe->lpsn = qp->s_next_psn++;
}
+ ohdr->u.rc.reth.vaddr =
+ cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ ohdr->u.rc.reth.rkey =
+ cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ ohdr->u.rc.reth.length = cpu_to_be32(len);
+ qp->s_state = OP(RDMA_READ_REQUEST);
+ hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
ss = NULL;
len = 0;
if (++qp->s_cur == qp->s_size)
@@ -338,32 +387,48 @@ int ipath_make_rc_req(struct ipath_qp *qp,
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
- if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP)
- qp->s_state = OP(COMPARE_SWAP);
- else
- qp->s_state = OP(FETCH_ADD);
- ohdr->u.atomic_eth.vaddr = cpu_to_be64(
- wqe->wr.wr.atomic.remote_addr);
- ohdr->u.atomic_eth.rkey = cpu_to_be32(
- wqe->wr.wr.atomic.rkey);
- ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->wr.wr.atomic.swap);
- ohdr->u.atomic_eth.compare_data = cpu_to_be64(
- wqe->wr.wr.atomic.compare_add);
- hwords += sizeof(struct ib_atomic_eth) / 4;
+ /*
+ * Don't allow more operations to be started
+ * than the QP limits allow.
+ */
if (newreq) {
+ if (qp->s_num_rd_atomic >=
+ qp->s_max_rd_atomic) {
+ qp->s_flags |= IPATH_S_RDMAR_PENDING;
+ goto bail;
+ }
+ qp->s_num_rd_atomic++;
if (qp->s_lsn != (u32) -1)
qp->s_lsn++;
wqe->lpsn = wqe->psn;
}
- if (++qp->s_cur == qp->s_size)
- qp->s_cur = 0;
+ if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ qp->s_state = OP(COMPARE_SWAP);
+ ohdr->u.atomic_eth.swap_data = cpu_to_be64(
+ wqe->wr.wr.atomic.swap);
+ ohdr->u.atomic_eth.compare_data = cpu_to_be64(
+ wqe->wr.wr.atomic.compare_add);
+ } else {
+ qp->s_state = OP(FETCH_ADD);
+ ohdr->u.atomic_eth.swap_data = cpu_to_be64(
+ wqe->wr.wr.atomic.compare_add);
+ ohdr->u.atomic_eth.compare_data = 0;
+ }
+ ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
+ wqe->wr.wr.atomic.remote_addr >> 32);
+ ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
+ wqe->wr.wr.atomic.remote_addr);
+ ohdr->u.atomic_eth.rkey = cpu_to_be32(
+ wqe->wr.wr.atomic.rkey);
+ hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
ss = NULL;
len = 0;
+ if (++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
break;
default:
- goto done;
+ goto bail;
}
qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1;
@@ -379,7 +444,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
qp->s_psn = wqe->lpsn + 1;
else {
qp->s_psn++;
- if ((int)(qp->s_psn - qp->s_next_psn) > 0)
+ if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
qp->s_next_psn = qp->s_psn;
}
/*
@@ -406,7 +471,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
/* FALLTHROUGH */
case OP(SEND_MIDDLE):
bth2 = qp->s_psn++ & IPATH_PSN_MASK;
- if ((int)(qp->s_psn - qp->s_next_psn) > 0)
+ if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
qp->s_next_psn = qp->s_psn;
ss = &qp->s_sge;
len = qp->s_len;
@@ -442,7 +507,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
/* FALLTHROUGH */
case OP(RDMA_WRITE_MIDDLE):
bth2 = qp->s_psn++ & IPATH_PSN_MASK;
- if ((int)(qp->s_psn - qp->s_next_psn) > 0)
+ if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
qp->s_next_psn = qp->s_psn;
ss = &qp->s_sge;
len = qp->s_len;
@@ -479,9 +544,9 @@ int ipath_make_rc_req(struct ipath_qp *qp,
cpu_to_be32(wqe->wr.wr.rdma.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
qp->s_state = OP(RDMA_READ_REQUEST);
- hwords += sizeof(ohdr->u.rc.reth) / 4;
+ hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
bth2 = qp->s_psn++ & IPATH_PSN_MASK;
- if ((int)(qp->s_psn - qp->s_next_psn) > 0)
+ if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
qp->s_next_psn = qp->s_psn;
ss = NULL;
len = 0;
@@ -489,20 +554,6 @@ int ipath_make_rc_req(struct ipath_qp *qp,
if (qp->s_cur == qp->s_size)
qp->s_cur = 0;
break;
-
- case OP(RDMA_READ_REQUEST):
- case OP(COMPARE_SWAP):
- case OP(FETCH_ADD):
- /*
- * We shouldn't start anything new until this request is
- * finished. The ACK will handle rescheduling us. XXX The
- * number of outstanding ones is negotiated at connection
- * setup time (see pg. 258,289)? XXX Also, if we support
- * multiple outstanding requests, we need to check the WQE
- * IB_SEND_FENCE flag and not send a new request if a RDMA
- * read or atomic is pending.
- */
- goto done;
}
if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
bth2 |= 1 << 31; /* Request ACK. */
@@ -512,9 +563,10 @@ int ipath_make_rc_req(struct ipath_qp *qp,
qp->s_cur_size = len;
*bth0p = bth0 | (qp->s_state << 24);
*bth2p = bth2;
+done:
return 1;
-done:
+bail:
return 0;
}
@@ -524,7 +576,8 @@ done:
*
* This is called from ipath_rc_rcv() and only uses the receive
* side QP state.
- * Note that RDMA reads are handled in the send side QP state and tasklet.
+ * Note that RDMA reads and atomics are handled in the
+ * send side QP state and tasklet.
*/
static void send_rc_ack(struct ipath_qp *qp)
{
@@ -535,6 +588,10 @@ static void send_rc_ack(struct ipath_qp *qp)
struct ipath_ib_header hdr;
struct ipath_other_headers *ohdr;
+ /* Don't send ACK or NAK if a RDMA read or atomic is pending. */
+ if (qp->r_head_ack_queue != qp->s_tail_ack_queue)
+ goto queue_ack;
+
/* Construct the header. */
ohdr = &hdr.u.oth;
lrh0 = IPATH_LRH_BTH;
@@ -548,19 +605,14 @@ static void send_rc_ack(struct ipath_qp *qp)
lrh0 = IPATH_LRH_GRH;
}
/* read pkey_index w/o lock (its atomic) */
- bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index);
+ bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) |
+ OP(ACKNOWLEDGE) << 24;
if (qp->r_nak_state)
ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
(qp->r_nak_state <<
IPATH_AETH_CREDIT_SHIFT));
else
ohdr->u.aeth = ipath_compute_aeth(qp);
- if (qp->r_ack_state >= OP(COMPARE_SWAP)) {
- bth0 |= OP(ATOMIC_ACKNOWLEDGE) << 24;
- ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
- hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4;
- } else
- bth0 |= OP(ACKNOWLEDGE) << 24;
lrh0 |= qp->remote_ah_attr.sl << 4;
hdr.lrh[0] = cpu_to_be16(lrh0);
hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
@@ -574,31 +626,31 @@ static void send_rc_ack(struct ipath_qp *qp)
* If we can send the ACK, clear the ACK state.
*/
if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) {
- qp->r_ack_state = OP(ACKNOWLEDGE);
dev->n_unicast_xmit++;
- } else {
- /*
- * We are out of PIO buffers at the moment.
- * Pass responsibility for sending the ACK to the
- * send tasklet so that when a PIO buffer becomes
- * available, the ACK is sent ahead of other outgoing
- * packets.
- */
- dev->n_rc_qacks++;
- spin_lock_irq(&qp->s_lock);
- /* Don't coalesce if a RDMA read or atomic is pending. */
- if (qp->s_ack_state == OP(ACKNOWLEDGE) ||
- qp->s_ack_state < OP(RDMA_READ_REQUEST)) {
- qp->s_ack_state = qp->r_ack_state;
- qp->s_nak_state = qp->r_nak_state;
- qp->s_ack_psn = qp->r_ack_psn;
- qp->r_ack_state = OP(ACKNOWLEDGE);
- }
- spin_unlock_irq(&qp->s_lock);
-
- /* Call ipath_do_rc_send() in another thread. */
- tasklet_hi_schedule(&qp->s_task);
+ goto done;
}
+
+ /*
+ * We are out of PIO buffers at the moment.
+ * Pass responsibility for sending the ACK to the
+ * send tasklet so that when a PIO buffer becomes
+ * available, the ACK is sent ahead of other outgoing
+ * packets.
+ */
+ dev->n_rc_qacks++;
+
+queue_ack:
+ spin_lock_irq(&qp->s_lock);
+ qp->s_flags |= IPATH_S_ACK_PENDING;
+ qp->s_nak_state = qp->r_nak_state;
+ qp->s_ack_psn = qp->r_ack_psn;
+ spin_unlock_irq(&qp->s_lock);
+
+ /* Call ipath_do_rc_send() in another thread. */
+ tasklet_hi_schedule(&qp->s_task);
+
+done:
+ return;
}
/**
@@ -727,7 +779,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
if (wqe->wr.opcode == IB_WR_RDMA_READ)
dev->n_rc_resends++;
else
- dev->n_rc_resends += (int)qp->s_psn - (int)psn;
+ dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
reset_psn(qp, psn);
tasklet_hi_schedule(&qp->s_task);
@@ -775,10 +827,6 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
list_del_init(&qp->timerwait);
spin_unlock(&dev->pending_lock);
- /* Nothing is pending to ACK/NAK. */
- if (unlikely(qp->s_last == qp->s_tail))
- goto bail;
-
/*
* Note that NAKs implicitly ACK outstanding SEND and RDMA write
* requests and implicitly NAK RDMA read and atomic requests issued
@@ -806,7 +854,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
*/
if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
(opcode != OP(RDMA_READ_RESPONSE_LAST) ||
- ipath_cmp24(ack_psn, wqe->lpsn) != 0)) ||
+ ipath_cmp24(ack_psn, wqe->lpsn) != 0)) ||
((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
(opcode != OP(ATOMIC_ACKNOWLEDGE) ||
@@ -824,20 +872,33 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
*/
goto bail;
}
- if (wqe->wr.opcode == IB_WR_RDMA_READ ||
- wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
- wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
- tasklet_hi_schedule(&qp->s_task);
+ if (qp->s_num_rd_atomic &&
+ (wqe->wr.opcode == IB_WR_RDMA_READ ||
+ wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
+ qp->s_num_rd_atomic--;
+ /* Restart sending task if fence is complete */
+ if ((qp->s_flags & IPATH_S_FENCE_PENDING) &&
+ !qp->s_num_rd_atomic) {
+ qp->s_flags &= ~IPATH_S_FENCE_PENDING;
+ tasklet_hi_schedule(&qp->s_task);
+ } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) {
+ qp->s_flags &= ~IPATH_S_RDMAR_PENDING;
+ tasklet_hi_schedule(&qp->s_task);
+ }
+ }
/* Post a send completion queue entry if requested. */
- if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
+ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
wc.wr_id = wqe->wr.wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
wc.vendor_err = 0;
wc.byte_len = wqe->length;
+ wc.imm_data = 0;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
+ wc.wc_flags = 0;
wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
@@ -854,15 +915,19 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
if (qp->s_last == qp->s_cur) {
if (++qp->s_cur >= qp->s_size)
qp->s_cur = 0;
+ qp->s_last = qp->s_cur;
+ if (qp->s_last == qp->s_tail)
+ break;
wqe = get_swqe_ptr(qp, qp->s_cur);
qp->s_state = OP(SEND_LAST);
qp->s_psn = wqe->psn;
+ } else {
+ if (++qp->s_last >= qp->s_size)
+ qp->s_last = 0;
+ if (qp->s_last == qp->s_tail)
+ break;
+ wqe = get_swqe_ptr(qp, qp->s_last);
}
- if (++qp->s_last >= qp->s_size)
- qp->s_last = 0;
- wqe = get_swqe_ptr(qp, qp->s_last);
- if (qp->s_last == qp->s_tail)
- break;
}
switch (aeth >> 29) {
@@ -874,6 +939,18 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
list_add_tail(&qp->timerwait,
&dev->pending[dev->pending_index]);
spin_unlock(&dev->pending_lock);
+ /*
+ * If we get a partial ACK for a resent operation,
+ * we can stop resending the earlier packets and
+ * continue with the next packet the receiver wants.
+ */
+ if (ipath_cmp24(qp->s_psn, psn) <= 0) {
+ reset_psn(qp, psn + 1);
+ tasklet_hi_schedule(&qp->s_task);
+ }
+ } else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
+ qp->s_state = OP(SEND_LAST);
+ qp->s_psn = psn + 1;
}
ipath_get_credit(qp, aeth);
qp->s_rnr_retry = qp->s_rnr_retry_cnt;
@@ -884,22 +961,23 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
case 1: /* RNR NAK */
dev->n_rnr_naks++;
+ if (qp->s_last == qp->s_tail)
+ goto bail;
if (qp->s_rnr_retry == 0) {
- if (qp->s_last == qp->s_tail)
- goto bail;
-
wc.status = IB_WC_RNR_RETRY_EXC_ERR;
goto class_b;
}
if (qp->s_rnr_retry_cnt < 7)
qp->s_rnr_retry--;
- if (qp->s_last == qp->s_tail)
- goto bail;
/* The last valid PSN is the previous PSN. */
update_last_psn(qp, psn - 1);
- dev->n_rc_resends += (int)qp->s_psn - (int)psn;
+ if (wqe->wr.opcode == IB_WR_RDMA_READ)
+ dev->n_rc_resends++;
+ else
+ dev->n_rc_resends +=
+ (qp->s_psn - psn) & IPATH_PSN_MASK;
reset_psn(qp, psn);
@@ -910,26 +988,20 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
goto bail;
case 3: /* NAK */
- /* The last valid PSN seen is the previous request's. */
- if (qp->s_last != qp->s_tail)
- update_last_psn(qp, wqe->psn - 1);
+ if (qp->s_last == qp->s_tail)
+ goto bail;
+ /* The last valid PSN is the previous PSN. */
+ update_last_psn(qp, psn - 1);
switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
IPATH_AETH_CREDIT_MASK) {
case 0: /* PSN sequence error */
dev->n_seq_naks++;
/*
- * Back up to the responder's expected PSN. XXX
+ * Back up to the responder's expected PSN.
* Note that we might get a NAK in the middle of an
* RDMA READ response which terminates the RDMA
* READ.
*/
- if (qp->s_last == qp->s_tail)
- break;
-
- if (ipath_cmp24(psn, wqe->psn) < 0)
- break;
-
- /* Retry the request. */
ipath_restart_rc(qp, psn, &wc);
break;
@@ -1003,6 +1075,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
u32 psn, u32 hdrsize, u32 pmtu,
int header_in_data)
{
+ struct ipath_swqe *wqe;
unsigned long flags;
struct ib_wc wc;
int diff;
@@ -1032,6 +1105,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
goto ack_done;
}
+ if (unlikely(qp->s_last == qp->s_tail))
+ goto ack_done;
+ wqe = get_swqe_ptr(qp, qp->s_last);
+
switch (opcode) {
case OP(ACKNOWLEDGE):
case OP(ATOMIC_ACKNOWLEDGE):
@@ -1042,38 +1119,49 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
aeth = be32_to_cpu(((__be32 *) data)[0]);
data += sizeof(__be32);
}
- if (opcode == OP(ATOMIC_ACKNOWLEDGE))
- *(u64 *) qp->s_sge.sge.vaddr = *(u64 *) data;
+ if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
+ u64 val;
+
+ if (!header_in_data) {
+ __be32 *p = ohdr->u.at.atomic_ack_eth;
+
+ val = ((u64) be32_to_cpu(p[0]) << 32) |
+ be32_to_cpu(p[1]);
+ } else
+ val = be64_to_cpu(((__be64 *) data)[0]);
+ *(u64 *) wqe->sg_list[0].vaddr = val;
+ }
if (!do_rc_ack(qp, aeth, psn, opcode) ||
opcode != OP(RDMA_READ_RESPONSE_FIRST))
goto ack_done;
hdrsize += 4;
+ if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+ goto ack_op_err;
/*
- * do_rc_ack() has already checked the PSN so skip
- * the sequence check.
+ * If this is a response to a resent RDMA read, we
+ * have to be careful to copy the data to the right
+ * location.
*/
- goto rdma_read;
+ qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
+ wqe, psn, pmtu);
+ goto read_middle;
case OP(RDMA_READ_RESPONSE_MIDDLE):
/* no AETH, no ACK */
if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
dev->n_rdma_seq++;
- if (qp->s_last != qp->s_tail)
- ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+ ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
goto ack_done;
}
- rdma_read:
- if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST)))
- goto ack_done;
+ if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+ goto ack_op_err;
+ read_middle:
if (unlikely(tlen != (hdrsize + pmtu + 4)))
- goto ack_done;
- if (unlikely(pmtu >= qp->s_len))
- goto ack_done;
+ goto ack_len_err;
+ if (unlikely(pmtu >= qp->s_rdma_read_len))
+ goto ack_len_err;
+
/* We got a response so update the timeout. */
- if (unlikely(qp->s_last == qp->s_tail ||
- get_swqe_ptr(qp, qp->s_last)->wr.opcode !=
- IB_WR_RDMA_READ))
- goto ack_done;
spin_lock(&dev->pending_lock);
if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait))
list_move_tail(&qp->timerwait,
@@ -1082,67 +1170,97 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
/*
* Update the RDMA receive state but do the copy w/o
* holding the locks and blocking interrupts.
- * XXX Yet another place that affects relaxed RDMA order
- * since we don't want s_sge modified.
*/
- qp->s_len -= pmtu;
+ qp->s_rdma_read_len -= pmtu;
update_last_psn(qp, psn);
spin_unlock_irqrestore(&qp->s_lock, flags);
- ipath_copy_sge(&qp->s_sge, data, pmtu);
+ ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu);
goto bail;
- case OP(RDMA_READ_RESPONSE_LAST):
- /* ACKs READ req. */
+ case OP(RDMA_READ_RESPONSE_ONLY):
if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
dev->n_rdma_seq++;
- if (qp->s_last != qp->s_tail)
- ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+ ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
goto ack_done;
}
- /* FALLTHROUGH */
- case OP(RDMA_READ_RESPONSE_ONLY):
- if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST)))
- goto ack_done;
+ if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+ goto ack_op_err;
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ /*
+ * Check that the data size is >= 0 && <= pmtu.
+ * Remember to account for the AETH header (4) and
+ * ICRC (4).
+ */
+ if (unlikely(tlen < (hdrsize + pad + 8)))
+ goto ack_len_err;
/*
- * Get the number of bytes the message was padded by.
+ * If this is a response to a resent RDMA read, we
+ * have to be careful to copy the data to the right
+ * location.
*/
+ qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
+ wqe, psn, pmtu);
+ goto read_last;
+
+ case OP(RDMA_READ_RESPONSE_LAST):
+ /* ACKs READ req. */
+ if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
+ dev->n_rdma_seq++;
+ ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+ goto ack_done;
+ }
+ if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+ goto ack_op_err;
+ /* Get the number of bytes the message was padded by. */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
/*
* Check that the data size is >= 1 && <= pmtu.
* Remember to account for the AETH header (4) and
* ICRC (4).
*/
- if (unlikely(tlen <= (hdrsize + pad + 8))) {
- /* XXX Need to generate an error CQ entry. */
- goto ack_done;
- }
+ if (unlikely(tlen <= (hdrsize + pad + 8)))
+ goto ack_len_err;
+ read_last:
tlen -= hdrsize + pad + 8;
- if (unlikely(tlen != qp->s_len)) {
- /* XXX Need to generate an error CQ entry. */
- goto ack_done;
- }
+ if (unlikely(tlen != qp->s_rdma_read_len))
+ goto ack_len_err;
if (!header_in_data)
aeth = be32_to_cpu(ohdr->u.aeth);
else {
aeth = be32_to_cpu(((__be32 *) data)[0]);
data += sizeof(__be32);
}
- ipath_copy_sge(&qp->s_sge, data, tlen);
- if (do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST))) {
- /*
- * Change the state so we contimue
- * processing new requests and wake up the
- * tasklet if there are posted sends.
- */
- qp->s_state = OP(SEND_LAST);
- if (qp->s_tail != qp->s_head)
- tasklet_hi_schedule(&qp->s_task);
- }
+ ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
+ (void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST));
goto ack_done;
}
ack_done:
spin_unlock_irqrestore(&qp->s_lock, flags);
+ goto bail;
+
+ack_op_err:
+ wc.status = IB_WC_LOC_QP_OP_ERR;
+ goto ack_err;
+
+ack_len_err:
+ wc.status = IB_WC_LOC_LEN_ERR;
+ack_err:
+ wc.wr_id = wqe->wr.wr_id;
+ wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
+ wc.vendor_err = 0;
+ wc.byte_len = 0;
+ wc.imm_data = 0;
+ wc.qp = &qp->ibqp;
+ wc.src_qp = qp->remote_qpn;
+ wc.wc_flags = 0;
+ wc.pkey_index = 0;
+ wc.slid = qp->remote_ah_attr.dlid;
+ wc.sl = qp->remote_ah_attr.sl;
+ wc.dlid_path_bits = 0;
+ wc.port_num = 0;
+ ipath_sqerror_qp(qp, &wc);
bail:
return;
}
@@ -1162,7 +1280,7 @@ bail:
* incoming RC packet for the given QP.
* Called at interrupt level.
* Return 1 if no more processing is needed; otherwise return 0 to
- * schedule a response to be sent and the s_lock unlocked.
+ * schedule a response to be sent.
*/
static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
struct ipath_other_headers *ohdr,
@@ -1173,25 +1291,23 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
int diff,
int header_in_data)
{
- struct ib_reth *reth;
+ struct ipath_ack_entry *e;
+ u8 i, prev;
+ int old_req;
if (diff > 0) {
/*
* Packet sequence error.
* A NAK will ACK earlier sends and RDMA writes.
- * Don't queue the NAK if a RDMA read, atomic, or
- * NAK is pending though.
+ * Don't queue the NAK if we already sent one.
*/
- if (qp->s_ack_state != OP(ACKNOWLEDGE) ||
- qp->r_nak_state != 0)
- goto done;
- if (qp->r_ack_state < OP(COMPARE_SWAP)) {
- qp->r_ack_state = OP(SEND_ONLY);
+ if (!qp->r_nak_state) {
qp->r_nak_state = IB_NAK_PSN_ERROR;
/* Use the expected PSN. */
qp->r_ack_psn = qp->r_psn;
+ goto send_ack;
}
- goto send_ack;
+ goto done;
}
/*
@@ -1204,8 +1320,46 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
* can coalesce an outstanding duplicate ACK. We have to
* send the earliest so that RDMA reads can be restarted at
* the requester's expected PSN.
+ *
+ * First, find where this duplicate PSN falls within the
+ * ACKs previously sent.
*/
- if (opcode == OP(RDMA_READ_REQUEST)) {
+ psn &= IPATH_PSN_MASK;
+ e = NULL;
+ old_req = 1;
+ spin_lock_irq(&qp->s_lock);
+ for (i = qp->r_head_ack_queue; ; i = prev) {
+ if (i == qp->s_tail_ack_queue)
+ old_req = 0;
+ if (i)
+ prev = i - 1;
+ else
+ prev = IPATH_MAX_RDMA_ATOMIC;
+ if (prev == qp->r_head_ack_queue) {
+ e = NULL;
+ break;
+ }
+ e = &qp->s_ack_queue[prev];
+ if (!e->opcode) {
+ e = NULL;
+ break;
+ }
+ if (ipath_cmp24(psn, e->psn) >= 0)
+ break;
+ }
+ switch (opcode) {
+ case OP(RDMA_READ_REQUEST): {
+ struct ib_reth *reth;
+ u32 offset;
+ u32 len;
+
+ /*
+ * If we didn't find the RDMA read request in the ack queue,
+ * or the send tasklet is already backed up to send an
+ * earlier entry, we can ignore this request.
+ */
+ if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req)
+ goto unlock_done;
/* RETH comes after BTH */
if (!header_in_data)
reth = &ohdr->u.rc.reth;
@@ -1214,88 +1368,87 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
data += sizeof(*reth);
}
/*
- * If we receive a duplicate RDMA request, it means the
- * requester saw a sequence error and needs to restart
- * from an earlier point. We can abort the current
- * RDMA read send in that case.
+ * Address range must be a subset of the original
+ * request and start on pmtu boundaries.
+ * We reuse the old ack_queue slot since the requester
+ * should not back up and request an earlier PSN for the
+ * same request.
*/
- spin_lock_irq(&qp->s_lock);
- if (qp->s_ack_state != OP(ACKNOWLEDGE) &&
- (qp->s_hdrwords || ipath_cmp24(psn, qp->s_ack_psn) >= 0)) {
- /*
- * We are already sending earlier requested data.
- * Don't abort it to send later out of sequence data.
- */
- spin_unlock_irq(&qp->s_lock);
- goto done;
- }
- qp->s_rdma_len = be32_to_cpu(reth->length);
- if (qp->s_rdma_len != 0) {
+ offset = ((psn - e->psn) & IPATH_PSN_MASK) *
+ ib_mtu_enum_to_int(qp->path_mtu);
+ len = be32_to_cpu(reth->length);
+ if (unlikely(offset + len > e->rdma_sge.sge.sge_length))
+ goto unlock_done;
+ if (len != 0) {
u32 rkey = be32_to_cpu(reth->rkey);
u64 vaddr = be64_to_cpu(reth->vaddr);
int ok;
- /*
- * Address range must be a subset of the original
- * request and start on pmtu boundaries.
- */
- ok = ipath_rkey_ok(qp, &qp->s_rdma_sge,
- qp->s_rdma_len, vaddr, rkey,
+ ok = ipath_rkey_ok(qp, &e->rdma_sge,
+ len, vaddr, rkey,
IB_ACCESS_REMOTE_READ);
- if (unlikely(!ok)) {
- spin_unlock_irq(&qp->s_lock);
- goto done;
- }
+ if (unlikely(!ok))
+ goto unlock_done;
} else {
- qp->s_rdma_sge.sg_list = NULL;
- qp->s_rdma_sge.num_sge = 0;
- qp->s_rdma_sge.sge.mr = NULL;
- qp->s_rdma_sge.sge.vaddr = NULL;
- qp->s_rdma_sge.sge.length = 0;
- qp->s_rdma_sge.sge.sge_length = 0;
+ e->rdma_sge.sg_list = NULL;
+ e->rdma_sge.num_sge = 0;
+ e->rdma_sge.sge.mr = NULL;
+ e->rdma_sge.sge.vaddr = NULL;
+ e->rdma_sge.sge.length = 0;
+ e->rdma_sge.sge.sge_length = 0;
}
- qp->s_ack_state = opcode;
- qp->s_ack_psn = psn;
- spin_unlock_irq(&qp->s_lock);
- tasklet_hi_schedule(&qp->s_task);
- goto send_ack;
+ e->psn = psn;
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ qp->s_tail_ack_queue = prev;
+ break;
}
- /*
- * A pending RDMA read will ACK anything before it so
- * ignore earlier duplicate requests.
- */
- if (qp->s_ack_state != OP(ACKNOWLEDGE))
- goto done;
-
- /*
- * If an ACK is pending, don't replace the pending ACK
- * with an earlier one since the later one will ACK the earlier.
- * Also, if we already have a pending atomic, send it.
- */
- if (qp->r_ack_state != OP(ACKNOWLEDGE) &&
- (ipath_cmp24(psn, qp->r_ack_psn) <= 0 ||
- qp->r_ack_state >= OP(COMPARE_SWAP)))
- goto send_ack;
- switch (opcode) {
case OP(COMPARE_SWAP):
- case OP(FETCH_ADD):
+ case OP(FETCH_ADD): {
/*
- * Check for the PSN of the last atomic operation
- * performed and resend the result if found.
+ * If we didn't find the atomic request in the ack queue
+ * or the send tasklet is already backed up to send an
+ * earlier entry, we can ignore this request.
*/
- if ((psn & IPATH_PSN_MASK) != qp->r_atomic_psn)
- goto done;
+ if (!e || e->opcode != (u8) opcode || old_req)
+ goto unlock_done;
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ qp->s_tail_ack_queue = prev;
+ break;
+ }
+
+ default:
+ if (old_req)
+ goto unlock_done;
+ /*
+ * Resend the most recent ACK if this request is
+ * after all the previous RDMA reads and atomics.
+ */
+ if (i == qp->r_head_ack_queue) {
+ spin_unlock_irq(&qp->s_lock);
+ qp->r_nak_state = 0;
+ qp->r_ack_psn = qp->r_psn - 1;
+ goto send_ack;
+ }
+ /*
+ * Resend the RDMA read or atomic op which
+ * ACKs this duplicate request.
+ */
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ qp->s_tail_ack_queue = i;
break;
}
- qp->r_ack_state = opcode;
qp->r_nak_state = 0;
- qp->r_ack_psn = psn;
-send_ack:
- return 0;
+ spin_unlock_irq(&qp->s_lock);
+ tasklet_hi_schedule(&qp->s_task);
+unlock_done:
+ spin_unlock_irq(&qp->s_lock);
done:
return 1;
+
+send_ack:
+ return 0;
}
static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
@@ -1391,15 +1544,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
opcode == OP(SEND_LAST_WITH_IMMEDIATE))
break;
nack_inv:
- /*
- * A NAK will ACK earlier sends and RDMA writes.
- * Don't queue the NAK if a RDMA read, atomic, or NAK
- * is pending though.
- */
- if (qp->r_ack_state >= OP(COMPARE_SWAP))
- goto send_ack;
ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR);
- qp->r_ack_state = OP(SEND_ONLY);
qp->r_nak_state = IB_NAK_INVALID_REQUEST;
qp->r_ack_psn = qp->r_psn;
goto send_ack;
@@ -1441,9 +1586,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
* Don't queue the NAK if a RDMA read or atomic
* is pending though.
*/
- if (qp->r_ack_state >= OP(COMPARE_SWAP))
- goto send_ack;
- qp->r_ack_state = OP(SEND_ONLY);
+ if (qp->r_nak_state)
+ goto done;
qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
qp->r_ack_psn = qp->r_psn;
goto send_ack;
@@ -1567,7 +1711,19 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
goto rnr_nak;
goto send_last_imm;
- case OP(RDMA_READ_REQUEST):
+ case OP(RDMA_READ_REQUEST): {
+ struct ipath_ack_entry *e;
+ u32 len;
+ u8 next;
+
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
+ goto nack_acc;
+ next = qp->r_head_ack_queue + 1;
+ if (next > IPATH_MAX_RDMA_ATOMIC)
+ next = 0;
+ if (unlikely(next == qp->s_tail_ack_queue))
+ goto nack_inv;
+ e = &qp->s_ack_queue[qp->r_head_ack_queue];
/* RETH comes after BTH */
if (!header_in_data)
reth = &ohdr->u.rc.reth;
@@ -1575,72 +1731,75 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
reth = (struct ib_reth *)data;
data += sizeof(*reth);
}
- if (unlikely(!(qp->qp_access_flags &
- IB_ACCESS_REMOTE_READ)))
- goto nack_acc;
- spin_lock_irq(&qp->s_lock);
- qp->s_rdma_len = be32_to_cpu(reth->length);
- if (qp->s_rdma_len != 0) {
+ len = be32_to_cpu(reth->length);
+ if (len) {
u32 rkey = be32_to_cpu(reth->rkey);
u64 vaddr = be64_to_cpu(reth->vaddr);
int ok;
/* Check rkey & NAK */
- ok = ipath_rkey_ok(qp, &qp->s_rdma_sge,
- qp->s_rdma_len, vaddr, rkey,
- IB_ACCESS_REMOTE_READ);
- if (unlikely(!ok)) {
- spin_unlock_irq(&qp->s_lock);
+ ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
+ rkey, IB_ACCESS_REMOTE_READ);
+ if (unlikely(!ok))
goto nack_acc;
- }
/*
* Update the next expected PSN. We add 1 later
* below, so only add the remainder here.
*/
- if (qp->s_rdma_len > pmtu)
- qp->r_psn += (qp->s_rdma_len - 1) / pmtu;
+ if (len > pmtu)
+ qp->r_psn += (len - 1) / pmtu;
} else {
- qp->s_rdma_sge.sg_list = NULL;
- qp->s_rdma_sge.num_sge = 0;
- qp->s_rdma_sge.sge.mr = NULL;
- qp->s_rdma_sge.sge.vaddr = NULL;
- qp->s_rdma_sge.sge.length = 0;
- qp->s_rdma_sge.sge.sge_length = 0;
+ e->rdma_sge.sg_list = NULL;
+ e->rdma_sge.num_sge = 0;
+ e->rdma_sge.sge.mr = NULL;
+ e->rdma_sge.sge.vaddr = NULL;
+ e->rdma_sge.sge.length = 0;
+ e->rdma_sge.sge.sge_length = 0;
}
+ e->opcode = opcode;
+ e->psn = psn;
/*
* We need to increment the MSN here instead of when we
* finish sending the result since a duplicate request would
* increment it more than once.
*/
qp->r_msn++;
-
- qp->s_ack_state = opcode;
- qp->s_ack_psn = psn;
- spin_unlock_irq(&qp->s_lock);
-
qp->r_psn++;
qp->r_state = opcode;
qp->r_nak_state = 0;
+ barrier();
+ qp->r_head_ack_queue = next;
/* Call ipath_do_rc_send() in another thread. */
tasklet_hi_schedule(&qp->s_task);
goto done;
+ }
case OP(COMPARE_SWAP):
case OP(FETCH_ADD): {
struct ib_atomic_eth *ateth;
+ struct ipath_ack_entry *e;
u64 vaddr;
+ atomic64_t *maddr;
u64 sdata;
u32 rkey;
+ u8 next;
+ if (unlikely(!(qp->qp_access_flags &
+ IB_ACCESS_REMOTE_ATOMIC)))
+ goto nack_acc;
+ next = qp->r_head_ack_queue + 1;
+ if (next > IPATH_MAX_RDMA_ATOMIC)
+ next = 0;
+ if (unlikely(next == qp->s_tail_ack_queue))
+ goto nack_inv;
if (!header_in_data)
ateth = &ohdr->u.atomic_eth;
- else {
+ else
ateth = (struct ib_atomic_eth *)data;
- data += sizeof(*ateth);
- }
- vaddr = be64_to_cpu(ateth->vaddr);
+ vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
+ be32_to_cpu(ateth->vaddr[1]);
if (unlikely(vaddr & (sizeof(u64) - 1)))
goto nack_inv;
rkey = be32_to_cpu(ateth->rkey);
@@ -1649,63 +1808,50 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
sizeof(u64), vaddr, rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto nack_acc;
- if (unlikely(!(qp->qp_access_flags &
- IB_ACCESS_REMOTE_ATOMIC)))
- goto nack_acc;
/* Perform atomic OP and save result. */
+ maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
sdata = be64_to_cpu(ateth->swap_data);
- spin_lock_irq(&dev->pending_lock);
- qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr;
- if (opcode == OP(FETCH_ADD))
- *(u64 *) qp->r_sge.sge.vaddr =
- qp->r_atomic_data + sdata;
- else if (qp->r_atomic_data ==
- be64_to_cpu(ateth->compare_data))
- *(u64 *) qp->r_sge.sge.vaddr = sdata;
- spin_unlock_irq(&dev->pending_lock);
+ e = &qp->s_ack_queue[qp->r_head_ack_queue];
+ e->atomic_data = (opcode == OP(FETCH_ADD)) ?
+ (u64) atomic64_add_return(sdata, maddr) - sdata :
+ (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+ be64_to_cpu(ateth->compare_data),
+ sdata);
+ e->opcode = opcode;
+ e->psn = psn & IPATH_PSN_MASK;
qp->r_msn++;
- qp->r_atomic_psn = psn & IPATH_PSN_MASK;
- psn |= 1 << 31;
- break;
+ qp->r_psn++;
+ qp->r_state = opcode;
+ qp->r_nak_state = 0;
+ barrier();
+ qp->r_head_ack_queue = next;
+
+ /* Call ipath_do_rc_send() in another thread. */
+ tasklet_hi_schedule(&qp->s_task);
+
+ goto done;
}
default:
- /* Drop packet for unknown opcodes. */
- goto done;
+ /* NAK unknown opcodes. */
+ goto nack_inv;
}
qp->r_psn++;
qp->r_state = opcode;
+ qp->r_ack_psn = psn;
qp->r_nak_state = 0;
/* Send an ACK if requested or required. */
- if (psn & (1 << 31)) {
- /*
- * Coalesce ACKs unless there is a RDMA READ or
- * ATOMIC pending.
- */
- if (qp->r_ack_state < OP(COMPARE_SWAP)) {
- qp->r_ack_state = opcode;
- qp->r_ack_psn = psn;
- }
+ if (psn & (1 << 31))
goto send_ack;
- }
goto done;
nack_acc:
- /*
- * A NAK will ACK earlier sends and RDMA writes.
- * Don't queue the NAK if a RDMA read, atomic, or NAK
- * is pending though.
- */
- if (qp->r_ack_state < OP(COMPARE_SWAP)) {
- ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
- qp->r_ack_state = OP(RDMA_WRITE_ONLY);
- qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
- qp->r_ack_psn = qp->r_psn;
- }
+ ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
+ qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
+ qp->r_ack_psn = qp->r_psn;
+
send_ack:
- /* Send ACK right away unless the send tasklet has a pending ACK. */
- if (qp->s_ack_state == OP(ACKNOWLEDGE))
- send_rc_ack(qp);
+ send_rc_ack(qp);
done:
return;
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index dffc76016d3..c182bcd6209 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -126,9 +126,18 @@
#define INFINIPATH_E_RESET 0x0004000000000000ULL
#define INFINIPATH_E_HARDWARE 0x0008000000000000ULL
+/*
+ * this is used to print "common" packet errors only when the
+ * __IPATH_ERRPKTDBG bit is set in ipath_debug.
+ */
+#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \
+ | INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \
+ | INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \
+ | INFINIPATH_E_REBP )
+
/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
- * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: eagerTID, 3: expTID
+ * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: expTID, 3: eagerTID
* bit 4: flag buffer, 5: datainfo, 6: header info */
#define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL
#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
@@ -143,8 +152,8 @@
/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */
#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x04ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x08ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x04ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x08ULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL
@@ -299,13 +308,6 @@
#define INFINIPATH_XGXS_RX_POL_SHIFT 19
#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
-#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
-
-/* TID entries (memory), HT-only */
-#define INFINIPATH_RT_VALID 0x8000000000000000ULL
-#define INFINIPATH_RT_ADDR_SHIFT 0
-#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF
-#define INFINIPATH_RT_BUFSIZE_SHIFT 48
/*
* IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index e86cb171872..d9c2a9b15d8 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -202,6 +202,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
wq->tail = tail;
ret = 1;
+ qp->r_wrid_valid = 1;
if (handler) {
u32 n;
@@ -229,7 +230,6 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
}
}
spin_unlock_irqrestore(&rq->lock, flags);
- qp->r_wrid_valid = 1;
bail:
return ret;
@@ -255,6 +255,7 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
unsigned long flags;
struct ib_wc wc;
u64 sdata;
+ atomic64_t *maddr;
qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
if (!qp) {
@@ -265,7 +266,8 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
again:
spin_lock_irqsave(&sqp->s_lock, flags);
- if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK)) {
+ if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) ||
+ qp->s_rnr_timeout) {
spin_unlock_irqrestore(&sqp->s_lock, flags);
goto done;
}
@@ -310,7 +312,7 @@ again:
sqp->s_rnr_retry--;
dev->n_rnr_naks++;
sqp->s_rnr_timeout =
- ib_ipath_rnr_table[sqp->r_min_rnr_timer];
+ ib_ipath_rnr_table[qp->r_min_rnr_timer];
ipath_insert_rnr_queue(sqp);
goto done;
}
@@ -343,20 +345,22 @@ again:
wc.sl = sqp->remote_ah_attr.sl;
wc.dlid_path_bits = 0;
wc.port_num = 0;
+ spin_lock_irqsave(&sqp->s_lock, flags);
ipath_sqerror_qp(sqp, &wc);
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
goto done;
}
break;
case IB_WR_RDMA_READ:
+ if (unlikely(!(qp->qp_access_flags &
+ IB_ACCESS_REMOTE_READ)))
+ goto acc_err;
if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_READ)))
goto acc_err;
- if (unlikely(!(qp->qp_access_flags &
- IB_ACCESS_REMOTE_READ)))
- goto acc_err;
qp->r_sge.sge = wqe->sg_list[0];
qp->r_sge.sg_list = wqe->sg_list + 1;
qp->r_sge.num_sge = wqe->wr.num_sge;
@@ -364,22 +368,22 @@ again:
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
+ if (unlikely(!(qp->qp_access_flags &
+ IB_ACCESS_REMOTE_ATOMIC)))
+ goto acc_err;
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
- wqe->wr.wr.rdma.remote_addr,
- wqe->wr.wr.rdma.rkey,
+ wqe->wr.wr.atomic.remote_addr,
+ wqe->wr.wr.atomic.rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto acc_err;
/* Perform atomic OP and save result. */
- sdata = wqe->wr.wr.atomic.swap;
- spin_lock_irqsave(&dev->pending_lock, flags);
- qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr;
- if (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
- *(u64 *) qp->r_sge.sge.vaddr =
- qp->r_atomic_data + sdata;
- else if (qp->r_atomic_data == wqe->wr.wr.atomic.compare_add)
- *(u64 *) qp->r_sge.sge.vaddr = sdata;
- spin_unlock_irqrestore(&dev->pending_lock, flags);
- *(u64 *) sqp->s_sge.sge.vaddr = qp->r_atomic_data;
+ maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
+ sdata = wqe->wr.wr.atomic.compare_add;
+ *(u64 *) sqp->s_sge.sge.vaddr =
+ (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
+ (u64) atomic64_add_return(sdata, maddr) - sdata :
+ (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+ sdata, wqe->wr.wr.atomic.swap);
goto send_comp;
default:
@@ -440,7 +444,7 @@ again:
send_comp:
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
- if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &sqp->s_flags) ||
+ if (!(sqp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
wc.wr_id = wqe->wr.wr_id;
wc.status = IB_WC_SUCCESS;
@@ -502,7 +506,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
* We clear the tasklet flag now since we are committing to return
* from the tasklet function.
*/
- clear_bit(IPATH_S_BUSY, &qp->s_flags);
+ clear_bit(IPATH_S_BUSY, &qp->s_busy);
tasklet_unlock(&qp->s_task);
want_buffer(dev->dd);
dev->n_piowait++;
@@ -541,6 +545,9 @@ int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
wr->sg_list[0].addr & (sizeof(u64) - 1))) {
ret = -EINVAL;
goto bail;
+ } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) {
+ ret = -EINVAL;
+ goto bail;
}
/* IB spec says that num_sge == 0 is OK. */
if (wr->num_sge > qp->s_max_sge) {
@@ -647,7 +654,7 @@ void ipath_do_ruc_send(unsigned long data)
u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
struct ipath_other_headers *ohdr;
- if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags))
+ if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
goto bail;
if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) {
@@ -683,19 +690,15 @@ again:
*/
spin_lock_irqsave(&qp->s_lock, flags);
- /* Sending responses has higher priority over sending requests. */
- if (qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE &&
- (bth0 = ipath_make_rc_ack(qp, ohdr, pmtu)) != 0)
- bth2 = qp->s_ack_psn++ & IPATH_PSN_MASK;
- else if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
- ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
- ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
+ if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
+ ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
+ ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
/*
* Clear the busy bit before unlocking to avoid races with
* adding new work queue items and then failing to process
* them.
*/
- clear_bit(IPATH_S_BUSY, &qp->s_flags);
+ clear_bit(IPATH_S_BUSY, &qp->s_busy);
spin_unlock_irqrestore(&qp->s_lock, flags);
goto bail;
}
@@ -728,7 +731,7 @@ again:
goto again;
clear:
- clear_bit(IPATH_S_BUSY, &qp->s_flags);
+ clear_bit(IPATH_S_BUSY, &qp->s_busy);
bail:
return;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index 30a825928fc..9307f7187ca 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -207,7 +207,7 @@ void ipath_get_faststats(unsigned long opaque)
* don't access the chip while running diags, or memory diags can
* fail
*/
- if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT) ||
+ if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) ||
ipath_diag_inuse)
/* but re-arm the timer, for diags case; won't hurt other */
goto done;
@@ -237,11 +237,13 @@ void ipath_get_faststats(unsigned long opaque)
if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs)
&& time_after(jiffies, dd->ipath_unmasktime)) {
char ebuf[256];
- ipath_decode_err(ebuf, sizeof ebuf,
+ int iserr;
+ iserr = ipath_decode_err(ebuf, sizeof ebuf,
(dd->ipath_maskederrs & ~dd->
ipath_ignorederrs));
if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
- ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
+ ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+ INFINIPATH_E_PKTERRS ))
ipath_dev_err(dd, "Re-enabling masked errors "
"(%s)\n", ebuf);
else {
@@ -252,8 +254,12 @@ void ipath_get_faststats(unsigned long opaque)
* them. So only complain about these at debug
* level.
*/
- ipath_dbg("Disabling frequent queue full errors "
- "(%s)\n", ebuf);
+ if (iserr)
+ ipath_dbg("Re-enabling queue full errors (%s)\n",
+ ebuf);
+ else
+ ipath_cdbg(ERRPKT, "Re-enabling packet"
+ " problem interrupt (%s)\n", ebuf);
}
dd->ipath_maskederrs = dd->ipath_ignorederrs;
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index 325d6634ff5..1c2b03c2ef5 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -42,7 +42,7 @@ static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
{
if (++qp->s_last == qp->s_size)
qp->s_last = 0;
- if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
+ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
wc->wr_id = wqe->wr.wr_id;
wc->status = IB_WC_SUCCESS;
@@ -344,13 +344,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
send_first:
if (qp->r_reuse_sge) {
qp->r_reuse_sge = 0;
- qp->r_sge = qp->s_rdma_sge;
+ qp->r_sge = qp->s_rdma_read_sge;
} else if (!ipath_get_rwqe(qp, 0)) {
dev->n_pkt_drops++;
goto done;
}
/* Save the WQE so we can reuse it in case of an error. */
- qp->s_rdma_sge = qp->r_sge;
+ qp->s_rdma_read_sge = qp->r_sge;
qp->r_rcv_len = 0;
if (opcode == OP(SEND_ONLY))
goto send_last;
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 9a3e54664ee..a518f7c8fa8 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -308,6 +308,11 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
goto bail;
}
+ if (wr->wr.ud.ah->pd != qp->ibqp.pd) {
+ ret = -EPERM;
+ goto bail;
+ }
+
/* IB spec says that num_sge == 0 is OK. */
if (wr->num_sge > qp->s_max_sge) {
ret = -EINVAL;
@@ -467,7 +472,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
done:
/* Queue the completion status entry. */
- if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
+ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
(wr->send_flags & IB_SEND_SIGNALED)) {
wc.wr_id = wr->wr_id;
wc.status = IB_WC_SUCCESS;
@@ -647,6 +652,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
ipath_copy_sge(&qp->r_sge, data,
wc.byte_len - sizeof(struct ib_grh));
+ qp->r_wrid_valid = 0;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 2aaacdb7e52..18c6df2052c 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -438,6 +438,10 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
struct ipath_mcast *mcast;
struct ipath_mcast_qp *p;
+ if (lnh != IPATH_LRH_GRH) {
+ dev->n_pkt_drops++;
+ goto bail;
+ }
mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
if (mcast == NULL) {
dev->n_pkt_drops++;
@@ -445,8 +449,7 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
}
dev->n_multicast_rcv++;
list_for_each_entry_rcu(p, &mcast->qp_list, list)
- ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,
- tlen, p->qp);
+ ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
/*
* Notify ipath_multicast_detach() if it is waiting for us
* to finish.
@@ -773,7 +776,6 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
/* +1 is for the qword padding of pbc */
plen = hdrwords + ((len + 3) >> 2) + 1;
if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
- ipath_dbg("packet len 0x%x too long, failing\n", plen);
ret = -EINVAL;
goto bail;
}
@@ -980,14 +982,14 @@ static int ipath_query_device(struct ib_device *ibdev,
props->max_cqe = ib_ipath_max_cqes;
props->max_mr = dev->lk_table.max;
props->max_pd = ib_ipath_max_pds;
- props->max_qp_rd_atom = 1;
- props->max_qp_init_rd_atom = 1;
+ props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
+ props->max_qp_init_rd_atom = 255;
/* props->max_res_rd_atom */
props->max_srq = ib_ipath_max_srqs;
props->max_srq_wr = ib_ipath_max_srq_wrs;
props->max_srq_sge = ib_ipath_max_srq_sges;
/* props->local_ca_ack_delay */
- props->atomic_cap = IB_ATOMIC_HCA;
+ props->atomic_cap = IB_ATOMIC_GLOB;
props->max_pkeys = ipath_get_npkeys(dev->dd);
props->max_mcast_grp = ib_ipath_max_mcast_grps;
props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
@@ -1557,7 +1559,6 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
dev->node_type = RDMA_NODE_IB_CA;
dev->phys_port_cnt = 1;
dev->dma_device = &dd->pcidev->dev;
- dev->class_dev.dev = dev->dma_device;
dev->query_device = ipath_query_device;
dev->modify_device = ipath_modify_device;
dev->query_port = ipath_query_port;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index c0c8d5b24a7..7c4929f1cb5 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -40,9 +40,12 @@
#include <linux/interrupt.h>
#include <linux/kref.h>
#include <rdma/ib_pack.h>
+#include <rdma/ib_user_verbs.h>
#include "ipath_layer.h"
+#define IPATH_MAX_RDMA_ATOMIC 4
+
#define QPN_MAX (1 << 24)
#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
@@ -89,7 +92,7 @@ struct ib_reth {
} __attribute__ ((packed));
struct ib_atomic_eth {
- __be64 vaddr;
+ __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
__be32 rkey;
__be64 swap_data;
__be64 compare_data;
@@ -108,7 +111,7 @@ struct ipath_other_headers {
} rc;
struct {
__be32 aeth;
- __be64 atomic_ack_eth;
+ __be32 atomic_ack_eth[2];
} at;
__be32 imm_data;
__be32 aeth;
@@ -186,7 +189,7 @@ struct ipath_mmap_info {
struct ipath_cq_wc {
u32 head; /* index of next entry to fill */
u32 tail; /* index of next ib_poll_cq() entry */
- struct ib_wc queue[1]; /* this is actually size ibcq.cqe + 1 */
+ struct ib_uverbs_wc queue[1]; /* this is actually size ibcq.cqe + 1 */
};
/*
@@ -312,6 +315,19 @@ struct ipath_sge_state {
};
/*
+ * This structure holds the information that the send tasklet needs
+ * to send a RDMA read response or atomic operation.
+ */
+struct ipath_ack_entry {
+ u8 opcode;
+ u32 psn;
+ union {
+ struct ipath_sge_state rdma_sge;
+ u64 atomic_data;
+ };
+};
+
+/*
* Variables prefixed with s_ are for the requester (sender).
* Variables prefixed with r_ are for the responder (receiver).
* Variables prefixed with ack_ are for responder replies.
@@ -333,24 +349,24 @@ struct ipath_qp {
struct ipath_mmap_info *ip;
struct ipath_sge_state *s_cur_sge;
struct ipath_sge_state s_sge; /* current send request data */
- /* current RDMA read send data */
- struct ipath_sge_state s_rdma_sge;
+ struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1];
+ struct ipath_sge_state s_ack_rdma_sge;
+ struct ipath_sge_state s_rdma_read_sge;
struct ipath_sge_state r_sge; /* current receive data */
spinlock_t s_lock;
- unsigned long s_flags;
+ unsigned long s_busy;
u32 s_hdrwords; /* size of s_hdr in 32 bit words */
u32 s_cur_size; /* size of send packet in bytes */
u32 s_len; /* total length of s_sge */
- u32 s_rdma_len; /* total length of s_rdma_sge */
+ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */
u32 s_next_psn; /* PSN for next request */
u32 s_last_psn; /* last response PSN processed */
u32 s_psn; /* current packet sequence number */
- u32 s_ack_psn; /* PSN for RDMA_READ */
+ u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */
+ u32 s_ack_psn; /* PSN for acking sends and RDMA writes */
u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
u64 r_wr_id; /* ID for current receive WQE */
- u64 r_atomic_data; /* data for last atomic op */
- u32 r_atomic_psn; /* PSN of last atomic op */
u32 r_len; /* total length of r_sge */
u32 r_rcv_len; /* receive data len processed */
u32 r_psn; /* expected rcv packet sequence number */
@@ -360,12 +376,13 @@ struct ipath_qp {
u8 s_ack_state; /* opcode of packet to ACK */
u8 s_nak_state; /* non-zero if NAK is pending */
u8 r_state; /* opcode of last packet received */
- u8 r_ack_state; /* opcode of packet to ACK */
u8 r_nak_state; /* non-zero if NAK is pending */
u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
u8 r_reuse_sge; /* for UC receive errors */
u8 r_sge_inx; /* current index into sg_list */
u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */
+ u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
+ u8 r_head_ack_queue; /* index into s_ack_queue[] */
u8 qp_access_flags;
u8 s_max_sge; /* size of s_wq->sg_list */
u8 s_retry_cnt; /* number of times to retry */
@@ -374,6 +391,10 @@ struct ipath_qp {
u8 s_rnr_retry; /* requester RNR retry counter */
u8 s_wait_credit; /* limit number of unacked packets sent */
u8 s_pkey_index; /* PKEY index to use */
+ u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
+ u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
+ u8 s_tail_ack_queue; /* index into s_ack_queue[] */
+ u8 s_flags;
u8 timeout; /* Timeout for this QP */
enum ib_mtu path_mtu;
u32 remote_qpn;
@@ -390,11 +411,16 @@ struct ipath_qp {
struct ipath_sge r_sg_list[0]; /* verified SGEs */
};
+/* Bit definition for s_busy. */
+#define IPATH_S_BUSY 0
+
/*
* Bit definitions for s_flags.
*/
-#define IPATH_S_BUSY 0
-#define IPATH_S_SIGNAL_REQ_WR 1
+#define IPATH_S_SIGNAL_REQ_WR 0x01
+#define IPATH_S_FENCE_PENDING 0x02
+#define IPATH_S_RDMAR_PENDING 0x04
+#define IPATH_S_ACK_PENDING 0x08
#define IPATH_PSN_CREDIT 2048
@@ -706,8 +732,6 @@ int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
int ipath_destroy_srq(struct ib_srq *ibsrq);
-void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
-
int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
@@ -757,9 +781,6 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
void ipath_do_ruc_send(unsigned long data);
-u32 ipath_make_rc_ack(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
- u32 pmtu);
-
int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
u32 pmtu, u32 *bth0p, u32 *bth2p);