summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/amso1100/c2_rnic.c5
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c134
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h17
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.c36
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h103
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c8
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.h2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c6
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c15
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c75
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c271
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h16
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c276
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h3
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c9
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c21
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c7
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c20
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.c10
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.h1
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.c1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c158
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c95
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba7220.c30
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c95
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c80
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h23
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mad.c10
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c237
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c295
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c336
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sdma.c62
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c69
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c74
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_sdma.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_sdma.h2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c185
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h64
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs_mcast.c3
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c29
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c3
-rw-r--r--drivers/infiniband/hw/mlx4/main.c19
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h19
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c71
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c163
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c1
-rw-r--r--drivers/infiniband/hw/mlx4/user.h1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_allocator.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_catas.c17
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_config_reg.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_doorbell.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c16
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mcg.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c8
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c28
-rw-r--r--drivers/infiniband/hw/mthca/mthca_pd.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c32
-rw-r--r--drivers/infiniband/hw/mthca/mthca_reset.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_uar.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_user.h1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_wqe.h2
-rw-r--r--drivers/infiniband/hw/nes/nes.c10
-rw-r--r--drivers/infiniband/hw/nes/nes.h10
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c2035
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.h23
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c83
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.h2
-rw-r--r--drivers/infiniband/hw/nes/nes_utils.c33
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c226
84 files changed, 3344 insertions, 2399 deletions
diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c
index 9a054c6941a..dd05c483564 100644
--- a/drivers/infiniband/hw/amso1100/c2_rnic.c
+++ b/drivers/infiniband/hw/amso1100/c2_rnic.c
@@ -454,9 +454,8 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev)
(IB_DEVICE_RESIZE_MAX_WR |
IB_DEVICE_CURR_QP_STATE_MOD |
IB_DEVICE_SYS_IMAGE_GUID |
- IB_DEVICE_ZERO_STAG |
- IB_DEVICE_MEM_WINDOW |
- IB_DEVICE_SEND_W_INV);
+ IB_DEVICE_LOCAL_DMA_LKEY |
+ IB_DEVICE_MEM_WINDOW);
/* Allocate the qptr_array */
c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *));
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index ed2ee4ba4b7..f6d5747153a 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -145,7 +145,9 @@ static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
}
wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
memset(wqe, 0, sizeof(*wqe));
- build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0, qpid, 7);
+ build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD,
+ T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 0, qpid, 7,
+ T3_SOPEOP);
wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
sge_cmd = qpid << 8 | 3;
wqe->sge_cmd = cpu_to_be64(sge_cmd);
@@ -276,7 +278,7 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
if (!wq->qpid)
return -ENOMEM;
- wq->rq = kzalloc(depth * sizeof(u64), GFP_KERNEL);
+ wq->rq = kzalloc(depth * sizeof(struct t3_swrq), GFP_KERNEL);
if (!wq->rq)
goto err1;
@@ -300,6 +302,7 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
if (!kernel_domain)
wq->udb = (u64)rdev_p->rnic_info.udbell_physbase +
(wq->qpid << rdev_p->qpshift);
+ wq->rdev = rdev_p;
PDBG("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", __func__,
wq->qpid, wq->doorbell, (unsigned long long) wq->udb);
return 0;
@@ -359,9 +362,10 @@ static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq)
cq->sw_wptr++;
}
-void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
+int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
{
u32 ptr;
+ int flushed = 0;
PDBG("%s wq %p cq %p\n", __func__, wq, cq);
@@ -369,8 +373,11 @@ void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
PDBG("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__,
wq->rq_rptr, wq->rq_wptr, count);
ptr = wq->rq_rptr + count;
- while (ptr++ != wq->rq_wptr)
+ while (ptr++ != wq->rq_wptr) {
insert_recv_cqe(wq, cq);
+ flushed++;
+ }
+ return flushed;
}
static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
@@ -394,18 +401,21 @@ static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
cq->sw_wptr++;
}
-void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
+int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
{
__u32 ptr;
+ int flushed = 0;
struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2);
ptr = wq->sq_rptr + count;
- sqp += count;
+ sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
while (ptr != wq->sq_wptr) {
insert_sq_cqe(wq, cq, sqp);
- sqp++;
ptr++;
+ sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
+ flushed++;
}
+ return flushed;
}
/*
@@ -551,7 +561,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
memset(wqe, 0, sizeof(*wqe));
build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0,
- T3_CTL_QP_TID, 7);
+ T3_CTL_QP_TID, 7, T3_SOPEOP);
wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3;
wqe->sge_cmd = cpu_to_be64(sge_cmd);
@@ -581,7 +591,7 @@ static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
* caller aquires the ctrl_qp lock before the call
*/
static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
- u32 len, void *data, int completion)
+ u32 len, void *data)
{
u32 i, nr_wqe, copy_len;
u8 *copy_data;
@@ -617,7 +627,7 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
flag = 0;
if (i == (nr_wqe - 1)) {
/* last WQE */
- flag = completion ? T3_COMPLETION_FLAG : 0;
+ flag = T3_COMPLETION_FLAG;
if (len % 32)
utx_len = len / 32 + 1;
else
@@ -667,7 +677,7 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag,
Q_GENBIT(rdev_p->ctrl_qp.wptr,
T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID,
- wr_len);
+ wr_len, T3_SOPEOP);
if (flag == T3_COMPLETION_FLAG)
ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID);
len -= 96;
@@ -676,21 +686,20 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
return 0;
}
-/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl, and pbl_size
- * OUT: stag index, actual pbl_size, pbl_addr allocated.
+/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl_size and pbl_addr
+ * OUT: stag index
* TBD: shared memory region support
*/
static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
u32 *stag, u8 stag_state, u32 pdid,
enum tpt_mem_type type, enum tpt_mem_perm perm,
- u32 zbva, u64 to, u32 len, u8 page_size, __be64 *pbl,
- u32 *pbl_size, u32 *pbl_addr)
+ u32 zbva, u64 to, u32 len, u8 page_size,
+ u32 pbl_size, u32 pbl_addr)
{
int err;
struct tpt_entry tpt;
u32 stag_idx;
u32 wptr;
- int rereg = (*stag != T3_STAG_UNSET);
stag_state = stag_state > 0;
stag_idx = (*stag) >> 8;
@@ -704,30 +713,8 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
__func__, stag_state, type, pdid, stag_idx);
- if (reset_tpt_entry)
- cxio_hal_pblpool_free(rdev_p, *pbl_addr, *pbl_size << 3);
- else if (!rereg) {
- *pbl_addr = cxio_hal_pblpool_alloc(rdev_p, *pbl_size << 3);
- if (!*pbl_addr) {
- return -ENOMEM;
- }
- }
-
mutex_lock(&rdev_p->ctrl_qp.lock);
- /* write PBL first if any - update pbl only if pbl list exist */
- if (pbl) {
-
- PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
- __func__, *pbl_addr, rdev_p->rnic_info.pbl_base,
- *pbl_size);
- err = cxio_hal_ctrl_qp_write_mem(rdev_p,
- (*pbl_addr >> 5),
- (*pbl_size << 3), pbl, 0);
- if (err)
- goto ret;
- }
-
/* write TPT entry */
if (reset_tpt_entry)
memset(&tpt, 0, sizeof(tpt));
@@ -742,23 +729,23 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
V_TPT_PAGE_SIZE(page_size));
tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 :
- cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, *pbl_addr)>>3));
+ cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
tpt.len = cpu_to_be32(len);
tpt.va_hi = cpu_to_be32((u32) (to >> 32));
tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL));
tpt.rsvd_bind_cnt_or_pstag = 0;
tpt.rsvd_pbl_size = reset_tpt_entry ? 0 :
- cpu_to_be32(V_TPT_PBL_SIZE((*pbl_size) >> 2));
+ cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2));
}
err = cxio_hal_ctrl_qp_write_mem(rdev_p,
stag_idx +
(rdev_p->rnic_info.tpt_base >> 5),
- sizeof(tpt), &tpt, 1);
+ sizeof(tpt), &tpt);
/* release the stag index to free pool */
if (reset_tpt_entry)
cxio_hal_put_stag(rdev_p->rscp, stag_idx);
-ret:
+
wptr = rdev_p->ctrl_qp.wptr;
mutex_unlock(&rdev_p->ctrl_qp.lock);
if (!err)
@@ -769,44 +756,74 @@ ret:
return err;
}
+int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
+ u32 pbl_addr, u32 pbl_size)
+{
+ u32 wptr;
+ int err;
+
+ PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
+ __func__, pbl_addr, rdev_p->rnic_info.pbl_base,
+ pbl_size);
+
+ mutex_lock(&rdev_p->ctrl_qp.lock);
+ err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3,
+ pbl);
+ wptr = rdev_p->ctrl_qp.wptr;
+ mutex_unlock(&rdev_p->ctrl_qp.lock);
+ if (err)
+ return err;
+
+ if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
+ SEQ32_GE(rdev_p->ctrl_qp.rptr,
+ wptr)))
+ return -ERESTARTSYS;
+
+ return 0;
+}
+
int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, __be64 *pbl, u32 *pbl_size,
- u32 *pbl_addr)
+ u8 page_size, u32 pbl_size, u32 pbl_addr)
{
*stag = T3_STAG_UNSET;
return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
- zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
+ zbva, to, len, page_size, pbl_size, pbl_addr);
}
int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, __be64 *pbl, u32 *pbl_size,
- u32 *pbl_addr)
+ u8 page_size, u32 pbl_size, u32 pbl_addr)
{
return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
- zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
+ zbva, to, len, page_size, pbl_size, pbl_addr);
}
int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size,
u32 pbl_addr)
{
- return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
- &pbl_size, &pbl_addr);
+ return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
+ pbl_size, pbl_addr);
}
int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid)
{
- u32 pbl_size = 0;
*stag = T3_STAG_UNSET;
return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0,
- NULL, &pbl_size, NULL);
+ 0, 0);
}
int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag)
{
- return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
- NULL, NULL);
+ return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
+ 0, 0);
+}
+
+int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr)
+{
+ *stag = T3_STAG_UNSET;
+ return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR,
+ 0, 0, 0ULL, 0, 0, pbl_size, pbl_addr);
}
int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
@@ -1250,13 +1267,16 @@ proc_cqe:
wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe);
PDBG("%s completing sq idx %ld\n", __func__,
Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
- *cookie = (wq->sq +
- Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2))->wr_id;
+ *cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id;
wq->sq_rptr++;
} else {
PDBG("%s completing rq idx %ld\n", __func__,
Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
- *cookie = *(wq->rq + Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
+ *cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id;
+ if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr)
+ cxio_hal_pblpool_free(wq->rdev,
+ wq->rq[Q_PTR2IDX(wq->rq_rptr,
+ wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE);
wq->rq_rptr++;
}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index 2bcff7f5046..656fe47bc84 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -45,15 +45,17 @@
#define T3_CTRL_QP_SIZE_LOG2 8
#define T3_CTRL_CQ_ID 0
-/* TBD */
#define T3_MAX_NUM_RI (1<<15)
#define T3_MAX_NUM_QP (1<<15)
#define T3_MAX_NUM_CQ (1<<15)
#define T3_MAX_NUM_PD (1<<15)
#define T3_MAX_PBL_SIZE 256
#define T3_MAX_RQ_SIZE 1024
+#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1)
+#define T3_MAX_CQ_DEPTH 8192
#define T3_MAX_NUM_STAG (1<<15)
#define T3_MAX_MR_SIZE 0x100000000ULL
+#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */
#define T3_STAG_UNSET 0xffffffff
@@ -154,17 +156,18 @@ int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq,
struct cxio_ucontext *uctx);
int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode);
+int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
+ u32 pbl_addr, u32 pbl_size);
int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, __be64 *pbl, u32 *pbl_size,
- u32 *pbl_addr);
+ u8 page_size, u32 pbl_size, u32 pbl_addr);
int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, __be64 *pbl, u32 *pbl_size,
- u32 *pbl_addr);
+ u8 page_size, u32 pbl_size, u32 pbl_addr);
int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size,
u32 pbl_addr);
int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid);
+int cxio_allocate_stag(struct cxio_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr);
int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag);
int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr);
void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
@@ -173,8 +176,8 @@ u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp);
void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid);
int __init cxio_hal_init(void);
void __exit cxio_hal_exit(void);
-void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
-void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
+int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
+int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
void cxio_flush_hw_cq(struct t3_cq *cq);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index 45ed4f25ef7..bd233c08765 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -250,7 +250,6 @@ void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
*/
#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */
-#define PBL_CHUNK 2*1024*1024
u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size)
{
@@ -267,14 +266,35 @@ void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p)
{
- unsigned long i;
+ unsigned pbl_start, pbl_chunk;
+
rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1);
- if (rdev_p->pbl_pool)
- for (i = rdev_p->rnic_info.pbl_base;
- i <= rdev_p->rnic_info.pbl_top - PBL_CHUNK + 1;
- i += PBL_CHUNK)
- gen_pool_add(rdev_p->pbl_pool, i, PBL_CHUNK, -1);
- return rdev_p->pbl_pool ? 0 : -ENOMEM;
+ if (!rdev_p->pbl_pool)
+ return -ENOMEM;
+
+ pbl_start = rdev_p->rnic_info.pbl_base;
+ pbl_chunk = rdev_p->rnic_info.pbl_top - pbl_start + 1;
+
+ while (pbl_start < rdev_p->rnic_info.pbl_top) {
+ pbl_chunk = min(rdev_p->rnic_info.pbl_top - pbl_start + 1,
+ pbl_chunk);
+ if (gen_pool_add(rdev_p->pbl_pool, pbl_start, pbl_chunk, -1)) {
+ PDBG("%s failed to add PBL chunk (%x/%x)\n",
+ __func__, pbl_start, pbl_chunk);
+ if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) {
+ printk(KERN_WARNING MOD "%s: Failed to add all PBL chunks (%x/%x)\n",
+ __func__, pbl_start, rdev_p->rnic_info.pbl_top - pbl_start);
+ return 0;
+ }
+ pbl_chunk >>= 1;
+ } else {
+ PDBG("%s added PBL chunk (%x/%x)\n",
+ __func__, pbl_start, pbl_chunk);
+ pbl_start += pbl_chunk;
+ }
+ }
+
+ return 0;
}
void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p)
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
index f1a25a821a4..04618f7bfbb 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -39,6 +39,9 @@
#define T3_MAX_SGE 4
#define T3_MAX_INLINE 64
+#define T3_STAG0_PBL_SIZE (2 * T3_MAX_SGE << 3)
+#define T3_STAG0_MAX_PBE_LEN (128 * 1024 * 1024)
+#define T3_STAG0_PAGE_SHIFT 15
#define Q_EMPTY(rptr,wptr) ((rptr)==(wptr))
#define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \
@@ -72,7 +75,8 @@ enum t3_wr_opcode {
T3_WR_BIND = FW_WROPCODE_RI_BIND_MW,
T3_WR_RCV = FW_WROPCODE_RI_RECEIVE,
T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT,
- T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP
+ T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP,
+ T3_WR_FASTREG = FW_WROPCODE_RI_FASTREGISTER_MR
} __attribute__ ((packed));
enum t3_rdma_opcode {
@@ -89,7 +93,8 @@ enum t3_rdma_opcode {
T3_FAST_REGISTER,
T3_LOCAL_INV,
T3_QP_MOD,
- T3_BYPASS
+ T3_BYPASS,
+ T3_RDMA_READ_REQ_WITH_INV,
} __attribute__ ((packed));
static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop)
@@ -103,6 +108,7 @@ static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop)
case T3_WR_BIND: return T3_BIND_MW;
case T3_WR_INIT: return T3_RDMA_INIT;
case T3_WR_QP_MOD: return T3_QP_MOD;
+ case T3_WR_FASTREG: return T3_FAST_REGISTER;
default: break;
}
return -1;
@@ -170,11 +176,54 @@ struct t3_send_wr {
struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */
};
+#define T3_MAX_FASTREG_DEPTH 24
+#define T3_MAX_FASTREG_FRAG 10
+
+struct t3_fastreg_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ __be32 stag; /* 2 */
+ __be32 len;
+ __be32 va_base_hi; /* 3 */
+ __be32 va_base_lo_fbo;
+ __be32 page_type_perms; /* 4 */
+ __be32 reserved1;
+ __be64 pbl_addrs[0]; /* 5+ */
+};
+
+/*
+ * If a fastreg wr spans multiple wqes, then the 2nd fragment look like this.
+ */
+struct t3_pbl_frag {
+ struct fw_riwrh wrh; /* 0 */
+ __be64 pbl_addrs[14]; /* 1..14 */
+};
+
+#define S_FR_PAGE_COUNT 24
+#define M_FR_PAGE_COUNT 0xff
+#define V_FR_PAGE_COUNT(x) ((x) << S_FR_PAGE_COUNT)
+#define G_FR_PAGE_COUNT(x) ((((x) >> S_FR_PAGE_COUNT)) & M_FR_PAGE_COUNT)
+
+#define S_FR_PAGE_SIZE 16
+#define M_FR_PAGE_SIZE 0x1f
+#define V_FR_PAGE_SIZE(x) ((x) << S_FR_PAGE_SIZE)
+#define G_FR_PAGE_SIZE(x) ((((x) >> S_FR_PAGE_SIZE)) & M_FR_PAGE_SIZE)
+
+#define S_FR_TYPE 8
+#define M_FR_TYPE 0x1
+#define V_FR_TYPE(x) ((x) << S_FR_TYPE)
+#define G_FR_TYPE(x) ((((x) >> S_FR_TYPE)) & M_FR_TYPE)
+
+#define S_FR_PERMS 0
+#define M_FR_PERMS 0xff
+#define V_FR_PERMS(x) ((x) << S_FR_PERMS)
+#define G_FR_PERMS(x) ((((x) >> S_FR_PERMS)) & M_FR_PERMS)
+
struct t3_local_inv_wr {
struct fw_riwrh wrh; /* 0 */
union t3_wrid wrid; /* 1 */
__be32 stag; /* 2 */
- __be32 reserved3;
+ __be32 reserved;
};
struct t3_rdma_write_wr {
@@ -193,7 +242,8 @@ struct t3_rdma_read_wr {
struct fw_riwrh wrh; /* 0 */
union t3_wrid wrid; /* 1 */
u8 rdmaop; /* 2 */
- u8 reserved[3];
+ u8 local_inv;
+ u8 reserved[2];
__be32 rem_stag;
__be64 rem_to; /* 3 */
__be32 local_stag; /* 4 */
@@ -201,18 +251,6 @@ struct t3_rdma_read_wr {
__be64 local_to; /* 5 */
};
-enum t3_addr_type {
- T3_VA_BASED_TO = 0x0,
- T3_ZERO_BASED_TO = 0x1
-} __attribute__ ((packed));
-
-enum t3_mem_perms {
- T3_MEM_ACCESS_LOCAL_READ = 0x1,
- T3_MEM_ACCESS_LOCAL_WRITE = 0x2,
- T3_MEM_ACCESS_REM_READ = 0x4,
- T3_MEM_ACCESS_REM_WRITE = 0x8
-} __attribute__ ((packed));
-
struct t3_bind_mw_wr {
struct fw_riwrh wrh; /* 0 */
union t3_wrid wrid; /* 1 */
@@ -336,6 +374,11 @@ struct t3_genbit {
__be64 genbit;
};
+struct t3_wq_in_err {
+ u64 flit[13];
+ u64 err;
+};
+
enum rdma_init_wr_flags {
MPA_INITIATOR = (1<<0),
PRIV_QP = (1<<1),
@@ -346,13 +389,16 @@ union t3_wr {
struct t3_rdma_write_wr write;
struct t3_rdma_read_wr read;
struct t3_receive_wr recv;
+ struct t3_fastreg_wr fastreg;
+ struct t3_pbl_frag pbl_frag;
struct t3_local_inv_wr local_inv;
struct t3_bind_mw_wr bind;
struct t3_bypass_wr bypass;
struct t3_rdma_init_wr init;
struct t3_modify_qp_wr qp_mod;
struct t3_genbit genbit;
- u64 flit[16];
+ struct t3_wq_in_err wq_in_err;
+ __be64 flit[16];
};
#define T3_SQ_CQE_FLIT 13
@@ -366,12 +412,18 @@ static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe)
return G_FW_RIWR_OP(be32_to_cpu(wqe->op_seop_flags));
}
+enum t3_wr_hdr_bits {
+ T3_EOP = 1,
+ T3_SOP = 2,
+ T3_SOPEOP = T3_EOP|T3_SOP,
+};
+
static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op,
enum t3_wr_flags flags, u8 genbit, u32 tid,
- u8 len)
+ u8 len, u8 sopeop)
{
wqe->op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(op) |
- V_FW_RIWR_SOPEOP(M_FW_RIWR_SOPEOP) |
+ V_FW_RIWR_SOPEOP(sopeop) |
V_FW_RIWR_FLAGS(flags));
wmb();
wqe->gen_tid_len = cpu_to_be32(V_FW_RIWR_GEN(genbit) |
@@ -404,6 +456,7 @@ enum tpt_addr_type {
};
enum tpt_mem_perm {
+ TPT_MW_BIND = 0x10,
TPT_LOCAL_READ = 0x8,
TPT_LOCAL_WRITE = 0x4,
TPT_REMOTE_READ = 0x2,
@@ -615,6 +668,11 @@ struct t3_swsq {
int signaled;
};
+struct t3_swrq {
+ __u64 wr_id;
+ __u32 pbl_addr;
+};
+
/*
* A T3 WQ implements both the SQ and RQ.
*/
@@ -631,14 +689,15 @@ struct t3_wq {
u32 sq_wptr; /* sq_wptr - sq_rptr == count of */
u32 sq_rptr; /* pending wrs */
u32 sq_size_log2; /* sq size */
- u64 *rq; /* SW RQ (holds consumer wr_ids */
+ struct t3_swrq *rq; /* SW RQ (holds consumer wr_ids */
u32 rq_wptr; /* rq_wptr - rq_rptr == count of */
u32 rq_rptr; /* pending wrs */
- u64 *rq_oldest_wr; /* oldest wr on the SW RQ */
+ struct t3_swrq *rq_oldest_wr; /* oldest wr on the SW RQ */
u32 rq_size_log2; /* rq size */
u32 rq_addr; /* rq adapter address */
void __iomem *doorbell; /* kernel db */
u64 udb; /* user db if any */
+ struct cxio_rdev *rdev;
};
struct t3_cq {
@@ -659,7 +718,7 @@ struct t3_cq {
static inline void cxio_set_wq_in_error(struct t3_wq *wq)
{
- wq->queue->flit[13] = 1;
+ wq->queue->wq_in_err.err = 1;
}
static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq)
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index 71554eacb13..4489c89d671 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -71,18 +71,16 @@ static void rnic_init(struct iwch_dev *rnicp)
idr_init(&rnicp->mmidr);
spin_lock_init(&rnicp->lock);
- rnicp->attr.vendor_id = 0x168;
- rnicp->attr.vendor_part_id = 7;
rnicp->attr.max_qps = T3_MAX_NUM_QP - 32;
- rnicp->attr.max_wrs = (1UL << 24) - 1;
+ rnicp->attr.max_wrs = T3_MAX_QP_DEPTH;
rnicp->attr.max_sge_per_wr = T3_MAX_SGE;
rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE;
rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1;
- rnicp->attr.max_cqes_per_cq = (1UL << 24) - 1;
+ rnicp->attr.max_cqes_per_cq = T3_MAX_CQ_DEPTH;
rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev);
rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE;
rnicp->attr.max_pds = T3_MAX_NUM_PD - 1;
- rnicp->attr.mem_pgsizes_bitmask = 0x7FFF; /* 4KB-128MB */
+ rnicp->attr.mem_pgsizes_bitmask = T3_PAGESIZE_MASK;
rnicp->attr.max_mr_size = T3_MAX_MR_SIZE;
rnicp->attr.can_resize_wq = 0;
rnicp->attr.max_rdma_reads_per_qp = 8;
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
index d2409a505e8..3773453b2cf 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.h
+++ b/drivers/infiniband/hw/cxgb3/iwch.h
@@ -48,8 +48,6 @@ struct iwch_qp;
struct iwch_mr;
struct iwch_rnic_attributes {
- u32 vendor_id;
- u32 vendor_part_id;
u32 max_qps;
u32 max_wrs; /* Max for any SQ/RQ */
u32 max_sge_per_wr;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index d44a6df9ad8..c325c44807e 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -67,10 +67,10 @@ int peer2peer = 0;
module_param(peer2peer, int, 0644);
MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
-static int ep_timeout_secs = 10;
+static int ep_timeout_secs = 60;
module_param(ep_timeout_secs, int, 0644);
MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
- "in seconds (default=10)");
+ "in seconds (default=60)");
static int mpa_rev = 1;
module_param(mpa_rev, int, 0644);
@@ -1650,8 +1650,8 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
release = 1;
break;
case ABORTING:
- break;
case DEAD:
+ break;
default:
BUG_ON(1);
break;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
index 4ee8ccd0a9e..cf5474ae68f 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -81,6 +81,7 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
wc->wr_id = cookie;
wc->qp = &qhp->ibqp;
wc->vendor_err = CQE_STATUS(cqe);
+ wc->wc_flags = 0;
PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x "
"lo 0x%x cookie 0x%llx\n", __func__,
@@ -94,6 +95,11 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
else
wc->byte_len = 0;
wc->opcode = IB_WC_RECV;
+ if (CQE_OPCODE(cqe) == T3_SEND_WITH_INV ||
+ CQE_OPCODE(cqe) == T3_SEND_WITH_SE_INV) {
+ wc->ex.invalidate_rkey = CQE_WRID_STAG(cqe);
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ }
} else {
switch (CQE_OPCODE(cqe)) {
case T3_RDMA_WRITE:
@@ -105,17 +111,20 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
break;
case T3_SEND:
case T3_SEND_WITH_SE:
+ case T3_SEND_WITH_INV:
+ case T3_SEND_WITH_SE_INV:
wc->opcode = IB_WC_SEND;
break;
case T3_BIND_MW:
wc->opcode = IB_WC_BIND_MW;
break;
- /* these aren't supported yet */
- case T3_SEND_WITH_INV:
- case T3_SEND_WITH_SE_INV:
case T3_LOCAL_INV:
+ wc->opcode = IB_WC_LOCAL_INV;
+ break;
case T3_FAST_REGISTER:
+ wc->opcode = IB_WC_FAST_REG_MR;
+ break;
default:
printk(KERN_ERR MOD "Unexpected opcode %d "
"in the CQE received for QPID=0x%0x\n",
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
index 58c3d61bcd1..ec49a5cbdeb 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -35,17 +35,26 @@
#include <rdma/ib_verbs.h>
#include "cxio_hal.h"
+#include "cxio_resource.h"
#include "iwch.h"
#include "iwch_provider.h"
-int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
- struct iwch_mr *mhp,
- int shift,
- __be64 *page_list)
+static void iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
{
- u32 stag;
u32 mmid;
+ mhp->attr.state = 1;
+ mhp->attr.stag = stag;
+ mmid = stag >> 8;
+ mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
+ insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
+ PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
+}
+
+int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+ struct iwch_mr *mhp, int shift)
+{
+ u32 stag;
if (cxio_register_phys_mem(&rhp->rdev,
&stag, mhp->attr.pdid,
@@ -53,28 +62,21 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
mhp->attr.zbva,
mhp->attr.va_fbo,
mhp->attr.len,
- shift-12,
- page_list,
- &mhp->attr.pbl_size, &mhp->attr.pbl_addr))
+ shift - 12,
+ mhp->attr.pbl_size, mhp->attr.pbl_addr))
return -ENOMEM;
- mhp->attr.state = 1;
- mhp->attr.stag = stag;
- mmid = stag >> 8;
- mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- insert_handle(rhp, &rhp->mmidr, mhp, mmid);
- PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
+
+ iwch_finish_mem_reg(mhp, stag);
+
return 0;
}
int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp,
int shift,
- __be64 *page_list,
int npages)
{
u32 stag;
- u32 mmid;
-
/* We could support this... */
if (npages > mhp->attr.pbl_size)
@@ -87,19 +89,40 @@ int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
mhp->attr.zbva,
mhp->attr.va_fbo,
mhp->attr.len,
- shift-12,
- page_list,
- &mhp->attr.pbl_size, &mhp->attr.pbl_addr))
+ shift - 12,
+ mhp->attr.pbl_size, mhp->attr.pbl_addr))
return -ENOMEM;
- mhp->attr.state = 1;
- mhp->attr.stag = stag;
- mmid = stag >> 8;
- mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- insert_handle(rhp, &rhp->mmidr, mhp, mmid);
- PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
+
+ iwch_finish_mem_reg(mhp, stag);
+
+ return 0;
+}
+
+int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
+{
+ mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev,
+ npages << 3);
+
+ if (!mhp->attr.pbl_addr)
+ return -ENOMEM;
+
+ mhp->attr.pbl_size = npages;
+
return 0;
}
+void iwch_free_pbl(struct iwch_mr *mhp)
+{
+ cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
+ mhp->attr.pbl_size << 3);
+}
+
+int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset)
+{
+ return cxio_write_pbl(&mhp->rhp->rdev, pages,
+ mhp->attr.pbl_addr + (offset << 3), npages);
+}
+
int build_phys_page_list(struct ib_phys_buf *buffer_list,
int num_phys_buf,
u64 *iova_start,
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index d07d3a377b5..b89640aa6e1 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -56,6 +56,7 @@
#include "iwch_provider.h"
#include "iwch_cm.h"
#include "iwch_user.h"
+#include "common.h"
static int iwch_modify_port(struct ib_device *ibdev,
u8 port, int port_modify_mask,
@@ -442,6 +443,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
mmid = mhp->attr.stag >> 8;
cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
mhp->attr.pbl_addr);
+ iwch_free_pbl(mhp);
remove_handle(rhp, &rhp->mmidr, mmid);
if (mhp->kva)
kfree((void *) (unsigned long) mhp->kva);
@@ -475,6 +477,8 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
if (!mhp)
return ERR_PTR(-ENOMEM);
+ mhp->rhp = rhp;
+
/* First check that we have enough alignment */
if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
ret = -EINVAL;
@@ -492,7 +496,17 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
if (ret)
goto err;
- mhp->rhp = rhp;
+ ret = iwch_alloc_pbl(mhp, npages);
+ if (ret) {
+ kfree(page_list);
+ goto err_pbl;
+ }
+
+ ret = iwch_write_pbl(mhp, page_list, npages, 0);
+ kfree(page_list);
+ if (ret)
+ goto err_pbl;
+
mhp->attr.pdid = php->pdid;
mhp->attr.zbva = 0;
@@ -502,12 +516,15 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
mhp->attr.len = (u32) total_size;
mhp->attr.pbl_size = npages;
- ret = iwch_register_mem(rhp, php, mhp, shift, page_list);
- kfree(page_list);
- if (ret) {
- goto err;
- }
+ ret = iwch_register_mem(rhp, php, mhp, shift);
+ if (ret)
+ goto err_pbl;
+
return &mhp->ibmr;
+
+err_pbl:
+ iwch_free_pbl(mhp);
+
err:
kfree(mhp);
return ERR_PTR(ret);
@@ -560,7 +577,7 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
return ret;
}
- ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages);
+ ret = iwch_reregister_mem(rhp, php, &mh, shift, npages);
kfree(page_list);
if (ret) {
return ret;
@@ -602,6 +619,8 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mhp)
return ERR_PTR(-ENOMEM);
+ mhp->rhp = rhp;
+
mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
if (IS_ERR(mhp->umem)) {
err = PTR_ERR(mhp->umem);
@@ -615,10 +634,14 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
n += chunk->nents;
- pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
+ err = iwch_alloc_pbl(mhp, n);
+ if (err)
+ goto err;
+
+ pages = (__be64 *) __get_free_page(GFP_KERNEL);
if (!pages) {
err = -ENOMEM;
- goto err;
+ goto err_pbl;
}
i = n = 0;
@@ -630,25 +653,38 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
pages[i++] = cpu_to_be64(sg_dma_address(
&chunk->page_list[j]) +
mhp->umem->page_size * k);
+ if (i == PAGE_SIZE / sizeof *pages) {
+ err = iwch_write_pbl(mhp, pages, i, n);
+ if (err)
+ goto pbl_done;
+ n += i;
+ i = 0;
+ }
}
}
- mhp->rhp = rhp;
+ if (i)
+ err = iwch_write_pbl(mhp, pages, i, n);
+
+pbl_done:
+ free_page((unsigned long) pages);
+ if (err)
+ goto err_pbl;
+
mhp->attr.pdid = php->pdid;
mhp->attr.zbva = 0;
mhp->attr.perms = iwch_ib_to_tpt_access(acc);
mhp->attr.va_fbo = virt;
mhp->attr.page_size = shift - 12;
mhp->attr.len = (u32) length;
- mhp->attr.pbl_size = i;
- err = iwch_register_mem(rhp, php, mhp, shift, pages);
- kfree(pages);
+
+ err = iwch_register_mem(rhp, php, mhp, shift);
if (err)
- goto err;
+ goto err_pbl;
if (udata && !t3a_device(rhp)) {
uresp.pbl_addr = (mhp->attr.pbl_addr -
- rhp->rdev.rnic_info.pbl_base) >> 3;
+ rhp->rdev.rnic_info.pbl_base) >> 3;
PDBG("%s user resp pbl_addr 0x%x\n", __func__,
uresp.pbl_addr);
@@ -661,6 +697,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return &mhp->ibmr;
+err_pbl:
+ iwch_free_pbl(mhp);
+
err:
ib_umem_release(mhp->umem);
kfree(mhp);
@@ -709,6 +748,7 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
mhp->attr.type = TPT_MW;
mhp->attr.stag = stag;
mmid = (stag) >> 8;
+ mhp->ibmw.rkey = stag;
insert_handle(rhp, &rhp->mmidr, mhp, mmid);
PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
return &(mhp->ibmw);
@@ -730,6 +770,68 @@ static int iwch_dealloc_mw(struct ib_mw *mw)
return 0;
}
+static struct ib_mr *iwch_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth)
+{
+ struct iwch_dev *rhp;
+ struct iwch_pd *php;
+ struct iwch_mr *mhp;
+ u32 mmid;
+ u32 stag = 0;
+ int ret;
+
+ php = to_iwch_pd(pd);
+ rhp = php->rhp;
+ mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
+ if (!mhp)
+ return ERR_PTR(-ENOMEM);
+
+ mhp->rhp = rhp;
+ ret = iwch_alloc_pbl(mhp, pbl_depth);
+ if (ret) {
+ kfree(mhp);
+ return ERR_PTR(ret);
+ }
+ mhp->attr.pbl_size = pbl_depth;
+ ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid,
+ mhp->attr.pbl_size, mhp->attr.pbl_addr);
+ if (ret) {
+ iwch_free_pbl(mhp);
+ kfree(mhp);
+ return ERR_PTR(ret);
+ }
+ mhp->attr.pdid = php->pdid;
+ mhp->attr.type = TPT_NON_SHARED_MR;
+ mhp->attr.stag = stag;
+ mhp->attr.state = 1;
+ mmid = (stag) >> 8;
+ mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
+ insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+ PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
+ return &(mhp->ibmr);
+}
+
+static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl(
+ struct ib_device *device,
+ int page_list_len)
+{
+ struct ib_fast_reg_page_list *page_list;
+
+ page_list = kmalloc(sizeof *page_list + page_list_len * sizeof(u64),
+ GFP_KERNEL);
+ if (!page_list)
+ return ERR_PTR(-ENOMEM);
+
+ page_list->page_list = (u64 *)(page_list + 1);
+ page_list->max_page_list_len = page_list_len;
+
+ return page_list;
+}
+
+static void iwch_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list)
+{
+ kfree(page_list);
+}
+
static int iwch_destroy_qp(struct ib_qp *ib_qp)
{
struct iwch_dev *rhp;
@@ -805,6 +907,15 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
*/
sqsize = roundup_pow_of_two(attrs->cap.max_send_wr);
wqsize = roundup_pow_of_two(rqsize + sqsize);
+
+ /*
+ * Kernel users need more wq space for fastreg WRs which can take
+ * 2 WR fragments.
+ */
+ ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL;
+ if (!ucontext && wqsize < (rqsize + (2 * sqsize)))
+ wqsize = roundup_pow_of_two(rqsize +
+ roundup_pow_of_two(attrs->cap.max_send_wr * 2));
PDBG("%s wqsize %d sqsize %d rqsize %d\n", __func__,
wqsize, sqsize, rqsize);
qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
@@ -813,7 +924,6 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
qhp->wq.size_log2 = ilog2(wqsize);
qhp->wq.rq_size_log2 = ilog2(rqsize);
qhp->wq.sq_size_log2 = ilog2(sqsize);
- ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL;
if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) {
kfree(qhp);
@@ -897,10 +1007,10 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
qhp->ibqp.qp_num = qhp->wq.qpid;
init_timer(&(qhp->timer));
PDBG("%s sq_num_entries %d, rq_num_entries %d "
- "qpid 0x%0x qhp %p dma_addr 0x%llx size %d\n",
+ "qpid 0x%0x qhp %p dma_addr 0x%llx size %d rq_addr 0x%x\n",
__func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
qhp->wq.qpid, qhp, (unsigned long long) qhp->wq.dma_addr,
- 1 << qhp->wq.size_log2);
+ 1 << qhp->wq.size_log2, qhp->wq.rq_addr);
return &qhp->ibqp;
}
@@ -985,6 +1095,29 @@ static int iwch_query_gid(struct ib_device *ibdev, u8 port,
return 0;
}
+static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev)
+{
+ struct ethtool_drvinfo info;
+ struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
+ char *cp, *next;
+ unsigned fw_maj, fw_min, fw_mic;
+
+ rtnl_lock();
+ lldev->ethtool_ops->get_drvinfo(lldev, &info);
+ rtnl_unlock();
+
+ next = info.fw_version + 1;
+ cp = strsep(&next, ".");
+ sscanf(cp, "%i", &fw_maj);
+ cp = strsep(&next, ".");
+ sscanf(cp, "%i", &fw_min);
+ cp = strsep(&next, ".");
+ sscanf(cp, "%i", &fw_mic);
+
+ return (((u64)fw_maj & 0xffff) << 32) | ((fw_min & 0xffff) << 16) |
+ (fw_mic & 0xffff);
+}
+
static int iwch_query_device(struct ib_device *ibdev,
struct ib_device_attr *props)
{
@@ -995,7 +1128,10 @@ static int iwch_query_device(struct ib_device *ibdev,
dev = to_iwch_dev(ibdev);
memset(props, 0, sizeof *props);
memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
+ props->hw_ver = dev->rdev.t3cdev_p->type;
+ props->fw_ver = fw_vers_string_to_u64(dev);
props->device_cap_flags = dev->device_cap_flags;
+ props->page_size_cap = dev->attr.mem_pgsizes_bitmask;
props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor;
props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device;
props->max_mr_size = dev->attr.max_mr_size;
@@ -1010,6 +1146,7 @@ static int iwch_query_device(struct ib_device *ibdev,
props->max_mr = dev->attr.max_mem_regs;
props->max_pd = dev->attr.max_pds;
props->local_ca_ack_delay = 0;
+ props->max_fast_reg_page_list_len = T3_MAX_FASTREG_DEPTH;
return 0;
}
@@ -1050,6 +1187,28 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
}
+static int fw_supports_fastreg(struct iwch_dev *iwch_dev)
+{
+ struct ethtool_drvinfo info;
+ struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
+ char *cp, *next;
+ unsigned fw_maj, fw_min;
+
+ rtnl_lock();
+ lldev->ethtool_ops->get_drvinfo(lldev, &info);
+ rtnl_unlock();
+
+ next = info.fw_version+1;
+ cp = strsep(&next, ".");
+ sscanf(cp, "%i", &fw_maj);
+ cp = strsep(&next, ".");
+ sscanf(cp, "%i", &fw_min);
+
+ PDBG("%s maj %u min %u\n", __func__, fw_maj, fw_min);
+
+ return fw_maj > 6 || (fw_maj == 6 && fw_min > 0);
+}
+
static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, char *buf)
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
@@ -1058,7 +1217,9 @@ static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, ch
struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
PDBG("%s dev 0x%p\n", __func__, dev);
+ rtnl_lock();
lldev->ethtool_ops->get_drvinfo(lldev, &info);
+ rtnl_unlock();
return sprintf(buf, "%s\n", info.fw_version);
}
@@ -1071,7 +1232,9 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
PDBG("%s dev 0x%p\n", __func__, dev);
+ rtnl_lock();
lldev->ethtool_ops->get_drvinfo(lldev, &info);
+ rtnl_unlock();
return sprintf(buf, "%s\n", info.driver);
}
@@ -1085,6 +1248,61 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
iwch_dev->rdev.rnic_info.pdev->device);
}
+static int iwch_get_mib(struct ib_device *ibdev,
+ union rdma_protocol_stats *stats)
+{
+ struct iwch_dev *dev;
+ struct tp_mib_stats m;
+ int ret;
+
+ PDBG("%s ibdev %p\n", __func__, ibdev);
+ dev = to_iwch_dev(ibdev);
+ ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m);
+ if (ret)
+ return -ENOSYS;
+
+ memset(stats, 0, sizeof *stats);
+ stats->iw.ipInReceives = ((u64) m.ipInReceive_hi << 32) +
+ m.ipInReceive_lo;
+ stats->iw.ipInHdrErrors = ((u64) m.ipInHdrErrors_hi << 32) +
+ m.ipInHdrErrors_lo;
+ stats->iw.ipInAddrErrors = ((u64) m.ipInAddrErrors_hi << 32) +
+ m.ipInAddrErrors_lo;
+ stats->iw.ipInUnknownProtos = ((u64) m.ipInUnknownProtos_hi << 32) +
+ m.ipInUnknownProtos_lo;
+ stats->iw.ipInDiscards = ((u64) m.ipInDiscards_hi << 32) +
+ m.ipInDiscards_lo;
+ stats->iw.ipInDelivers = ((u64) m.ipInDelivers_hi << 32) +
+ m.ipInDelivers_lo;
+ stats->iw.ipOutRequests = ((u64) m.ipOutRequests_hi << 32) +
+ m.ipOutRequests_lo;
+ stats->iw.ipOutDiscards = ((u64) m.ipOutDiscards_hi << 32) +
+ m.ipOutDiscards_lo;
+ stats->iw.ipOutNoRoutes = ((u64) m.ipOutNoRoutes_hi << 32) +
+ m.ipOutNoRoutes_lo;
+ stats->iw.ipReasmTimeout = (u64) m.ipReasmTimeout;
+ stats->iw.ipReasmReqds = (u64) m.ipReasmReqds;
+ stats->iw.ipReasmOKs = (u64) m.ipReasmOKs;
+ stats->iw.ipReasmFails = (u64) m.ipReasmFails;
+ stats->iw.tcpActiveOpens = (u64) m.tcpActiveOpens;
+ stats->iw.tcpPassiveOpens = (u64) m.tcpPassiveOpens;
+ stats->iw.tcpAttemptFails = (u64) m.tcpAttemptFails;
+ stats->iw.tcpEstabResets = (u64) m.tcpEstabResets;
+ stats->iw.tcpOutRsts = (u64) m.tcpOutRsts;
+ stats->iw.tcpCurrEstab = (u64) m.tcpCurrEstab;
+ stats->iw.tcpInSegs = ((u64) m.tcpInSegs_hi << 32) +
+ m.tcpInSegs_lo;
+ stats->iw.tcpOutSegs = ((u64) m.tcpOutSegs_hi << 32) +
+ m.tcpOutSegs_lo;
+ stats->iw.tcpRetransSegs = ((u64) m.tcpRetransSeg_hi << 32) +
+ m.tcpRetransSeg_lo;
+ stats->iw.tcpInErrs = ((u64) m.tcpInErrs_hi << 32) +
+ m.tcpInErrs_lo;
+ stats->iw.tcpRtoMin = (u64) m.tcpRtoMin;
+ stats->iw.tcpRtoMax = (u64) m.tcpRtoMax;
+ return 0;
+}
+
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
@@ -1094,7 +1312,7 @@ static struct device_attribute *iwch_class_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_fw_ver,
&dev_attr_hca_type,
- &dev_attr_board_id
+ &dev_attr_board_id,
};
int iwch_register_device(struct iwch_dev *dev)
@@ -1107,8 +1325,12 @@ int iwch_register_device(struct iwch_dev *dev)
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
dev->ibdev.owner = THIS_MODULE;
- dev->device_cap_flags =
- (IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW);
+ dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
+
+ /* cxgb3 supports STag 0. */
+ dev->ibdev.local_dma_lkey = 0;
+ if (fw_supports_fastreg(dev))
+ dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
dev->ibdev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
@@ -1160,15 +1382,16 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.alloc_mw = iwch_alloc_mw;
dev->ibdev.bind_mw = iwch_bind_mw;
dev->ibdev.dealloc_mw = iwch_dealloc_mw;
-
+ dev->ibdev.alloc_fast_reg_mr = iwch_alloc_fast_reg_mr;
+ dev->ibdev.alloc_fast_reg_page_list = iwch_alloc_fastreg_pbl;
+ dev->ibdev.free_fast_reg_page_list = iwch_free_fastreg_pbl;
dev->ibdev.attach_mcast = iwch_multicast_attach;
dev->ibdev.detach_mcast = iwch_multicast_detach;
dev->ibdev.process_mad = iwch_process_mad;
-
dev->ibdev.req_notify_cq = iwch_arm_cq;
dev->ibdev.post_send = iwch_post_send;
dev->ibdev.post_recv = iwch_post_receive;
-
+ dev->ibdev.get_protocol_stats = iwch_get_mib;
dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
if (!dev->ibdev.iwcm)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index db5100d27ca..f5ceca05c43 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -296,14 +296,6 @@ static inline u32 iwch_ib_to_tpt_access(int acc)
TPT_LOCAL_READ;
}
-static inline u32 iwch_ib_to_mwbind_access(int acc)
-{
- return (acc & IB_ACCESS_REMOTE_WRITE ? T3_MEM_ACCESS_REM_WRITE : 0) |
- (acc & IB_ACCESS_REMOTE_READ ? T3_MEM_ACCESS_REM_READ : 0) |
- (acc & IB_ACCESS_LOCAL_WRITE ? T3_MEM_ACCESS_LOCAL_WRITE : 0) |
- T3_MEM_ACCESS_LOCAL_READ;
-}
-
enum iwch_mmid_state {
IWCH_STAG_STATE_VALID,
IWCH_STAG_STATE_INVALID
@@ -340,14 +332,14 @@ int iwch_quiesce_qps(struct iwch_cq *chp);
int iwch_resume_qps(struct iwch_cq *chp);
void stop_read_rep_timer(struct iwch_qp *qhp);
int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
- struct iwch_mr *mhp,
- int shift,
- __be64 *page_list);
+ struct iwch_mr *mhp, int shift);
int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp,
int shift,
- __be64 *page_list,
int npages);
+int iwch_alloc_pbl(struct iwch_mr *mhp, int npages);
+void iwch_free_pbl(struct iwch_mr *mhp);
+int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset);
int build_phys_page_list(struct ib_phys_buf *buffer_list,
int num_phys_buf,
u64 *iova_start,
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 9b4be889c58..9a3be3a9d5d 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -33,10 +33,11 @@
#include "iwch.h"
#include "iwch_cm.h"
#include "cxio_hal.h"
+#include "cxio_resource.h"
#define NO_SUPPORT -1
-static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
+static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
u8 * flit_cnt)
{
int i;
@@ -44,59 +45,44 @@ static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_SEND:
- case IB_WR_SEND_WITH_IMM:
if (wr->send_flags & IB_SEND_SOLICITED)
wqe->send.rdmaop = T3_SEND_WITH_SE;
else
wqe->send.rdmaop = T3_SEND;
wqe->send.rem_stag = 0;
break;
-#if 0 /* Not currently supported */
- case TYPE_SEND_INVALIDATE:
- case TYPE_SEND_INVALIDATE_IMMEDIATE:
- wqe->send.rdmaop = T3_SEND_WITH_INV;
- wqe->send.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
- break;
- case TYPE_SEND_SE_INVALIDATE:
- wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
- wqe->send.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
+ case IB_WR_SEND_WITH_INV:
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
+ else
+ wqe->send.rdmaop = T3_SEND_WITH_INV;
+ wqe->send.rem_stag = cpu_to_be32(wr->ex.invalidate_rkey);
break;
-#endif
default:
- break;
+ return -EINVAL;
}
if (wr->num_sge > T3_MAX_SGE)
return -EINVAL;
wqe->send.reserved[0] = 0;
wqe->send.reserved[1] = 0;
wqe->send.reserved[2] = 0;
- if (wr->opcode == IB_WR_SEND_WITH_IMM) {
- plen = 4;
- wqe->send.sgl[0].stag = wr->ex.imm_data;
- wqe->send.sgl[0].len = __constant_cpu_to_be32(0);
- wqe->send.num_sgle = __constant_cpu_to_be32(0);
- *flit_cnt = 5;
- } else {
- plen = 0;
- for (i = 0; i < wr->num_sge; i++) {
- if ((plen + wr->sg_list[i].length) < plen) {
- return -EMSGSIZE;
- }
- plen += wr->sg_list[i].length;
- wqe->send.sgl[i].stag =
- cpu_to_be32(wr->sg_list[i].lkey);
- wqe->send.sgl[i].len =
- cpu_to_be32(wr->sg_list[i].length);
- wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
- }
- wqe->send.num_sgle = cpu_to_be32(wr->num_sge);
- *flit_cnt = 4 + ((wr->num_sge) << 1);
+ plen = 0;
+ for (i = 0; i < wr->num_sge; i++) {
+ if ((plen + wr->sg_list[i].length) < plen)
+ return -EMSGSIZE;
+
+ plen += wr->sg_list[i].length;
+ wqe->send.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
+ wqe->send.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
+ wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
}
+ wqe->send.num_sgle = cpu_to_be32(wr->num_sge);
+ *flit_cnt = 4 + ((wr->num_sge) << 1);
wqe->send.plen = cpu_to_be32(plen);
return 0;
}
-static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
+static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
u8 *flit_cnt)
{
int i;
@@ -137,15 +123,18 @@ static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
return 0;
}
-static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
+static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
u8 *flit_cnt)
{
if (wr->num_sge > 1)
return -EINVAL;
wqe->read.rdmaop = T3_READ_REQ;
+ if (wr->opcode == IB_WR_RDMA_READ_WITH_INV)
+ wqe->read.local_inv = 1;
+ else
+ wqe->read.local_inv = 0;
wqe->read.reserved[0] = 0;
wqe->read.reserved[1] = 0;
- wqe->read.reserved[2] = 0;
wqe->read.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
wqe->read.rem_to = cpu_to_be64(wr->wr.rdma.remote_addr);
wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey);
@@ -155,6 +144,57 @@ static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
return 0;
}
+static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr,
+ u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
+{
+ int i;
+ __be64 *p;
+
+ if (wr->wr.fast_reg.page_list_len > T3_MAX_FASTREG_DEPTH)
+ return -EINVAL;
+ *wr_cnt = 1;
+ wqe->fastreg.stag = cpu_to_be32(wr->wr.fast_reg.rkey);
+ wqe->fastreg.len = cpu_to_be32(wr->wr.fast_reg.length);
+ wqe->fastreg.va_base_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32);
+ wqe->fastreg.va_base_lo_fbo =
+ cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff);
+ wqe->fastreg.page_type_perms = cpu_to_be32(
+ V_FR_PAGE_COUNT(wr->wr.fast_reg.page_list_len) |
+ V_FR_PAGE_SIZE(wr->wr.fast_reg.page_shift-12) |
+ V_FR_TYPE(TPT_VATO) |
+ V_FR_PERMS(iwch_ib_to_tpt_access(wr->wr.fast_reg.access_flags)));
+ p = &wqe->fastreg.pbl_addrs[0];
+ for (i = 0; i < wr->wr.fast_reg.page_list_len; i++, p++) {
+
+ /* If we need a 2nd WR, then set it up */
+ if (i == T3_MAX_FASTREG_FRAG) {
+ *wr_cnt = 2;
+ wqe = (union t3_wr *)(wq->queue +
+ Q_PTR2IDX((wq->wptr+1), wq->size_log2));
+ build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0,
+ Q_GENBIT(wq->wptr + 1, wq->size_log2),
+ 0, 1 + wr->wr.fast_reg.page_list_len - T3_MAX_FASTREG_FRAG,
+ T3_EOP);
+
+ p = &wqe->pbl_frag.pbl_addrs[0];
+ }
+ *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]);
+ }
+ *flit_cnt = 5 + wr->wr.fast_reg.page_list_len;
+ if (*flit_cnt > 15)
+ *flit_cnt = 15;
+ return 0;
+}
+
+static int build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr,
+ u8 *flit_cnt)
+{
+ wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey);
+ wqe->local_inv.reserved = 0;
+ *flit_cnt = sizeof(struct t3_local_inv_wr) >> 3;
+ return 0;
+}
+
/*
* TBD: this is going to be moved to firmware. Missing pdid/qpid check for now.
*/
@@ -205,23 +245,106 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
return 0;
}
-static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe,
+static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe,
struct ib_recv_wr *wr)
{
- int i;
- if (wr->num_sge > T3_MAX_SGE)
- return -EINVAL;
+ int i, err = 0;
+ u32 pbl_addr[T3_MAX_SGE];
+ u8 page_size[T3_MAX_SGE];
+
+ err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr,
+ page_size);
+ if (err)
+ return err;
+ wqe->recv.pagesz[0] = page_size[0];
+ wqe->recv.pagesz[1] = page_size[1];
+ wqe->recv.pagesz[2] = page_size[2];
+ wqe->recv.pagesz[3] = page_size[3];
wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
for (i = 0; i < wr->num_sge; i++) {
wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
+
+ /* to in the WQE == the offset into the page */
+ wqe->recv.sgl[i].to = cpu_to_be64(((u32) wr->sg_list[i].addr) %
+ (1UL << (12 + page_size[i])));
+
+ /* pbl_addr is the adapters address in the PBL */
+ wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]);
+ }
+ for (; i < T3_MAX_SGE; i++) {
+ wqe->recv.sgl[i].stag = 0;
+ wqe->recv.sgl[i].len = 0;
+ wqe->recv.sgl[i].to = 0;
+ wqe->recv.pbl_addr[i] = 0;
+ }
+ qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
+ qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
+ qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
+ qhp->wq.rq_size_log2)].pbl_addr = 0;
+ return 0;
+}
+
+static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe,
+ struct ib_recv_wr *wr)
+{
+ int i;
+ u32 pbl_addr;
+ u32 pbl_offset;
+
+
+ /*
+ * The T3 HW requires the PBL in the HW recv descriptor to reference
+ * a PBL entry. So we allocate the max needed PBL memory here and pass
+ * it to the uP in the recv WR. The uP will build the PBL and setup
+ * the HW recv descriptor.
+ */
+ pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE);
+ if (!pbl_addr)
+ return -ENOMEM;
+
+ /*
+ * Compute the 8B aligned offset.
+ */
+ pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3;
+
+ wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
+
+ for (i = 0; i < wr->num_sge; i++) {
+
+ /*
+ * Use a 128MB page size. This and an imposed 128MB
+ * sge length limit allows us to require only a 2-entry HW
+ * PBL for each SGE. This restriction is acceptable since
+ * since it is not possible to allocate 128MB of contiguous
+ * DMA coherent memory!
+ */
+ if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN)
+ return -EINVAL;
+ wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT;
+
+ /*
+ * T3 restricts a recv to all zero-stag or all non-zero-stag.
+ */
+ if (wr->sg_list[i].lkey != 0)
+ return -EINVAL;
+ wqe->recv.sgl[i].stag = 0;
+ wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
+ wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_offset);
+ pbl_offset += 2;
}
for (; i < T3_MAX_SGE; i++) {
+ wqe->recv.pagesz[i] = 0;
wqe->recv.sgl[i].stag = 0;
wqe->recv.sgl[i].len = 0;
wqe->recv.sgl[i].to = 0;
+ wqe->recv.pbl_addr[i] = 0;
}
+ qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
+ qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
+ qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
+ qhp->wq.rq_size_log2)].pbl_addr = pbl_addr;
return 0;
}
@@ -229,7 +352,7 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
int err = 0;
- u8 t3_wr_flit_cnt;
+ u8 uninitialized_var(t3_wr_flit_cnt);
enum t3_wr_opcode t3_wr_opcode = 0;
enum t3_wr_flags t3_wr_flags;
struct iwch_qp *qhp;
@@ -238,6 +361,7 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
u32 num_wrs;
unsigned long flag;
struct t3_swsq *sqp;
+ int wr_cnt = 1;
qhp = to_iwch_qp(ibqp);
spin_lock_irqsave(&qhp->lock, flag);
@@ -262,33 +386,45 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
t3_wr_flags = 0;
if (wr->send_flags & IB_SEND_SOLICITED)
t3_wr_flags |= T3_SOLICITED_EVENT_FLAG;
- if (wr->send_flags & IB_SEND_FENCE)
- t3_wr_flags |= T3_READ_FENCE_FLAG;
if (wr->send_flags & IB_SEND_SIGNALED)
t3_wr_flags |= T3_COMPLETION_FLAG;
sqp = qhp->wq.sq +
Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
switch (wr->opcode) {
case IB_WR_SEND:
- case IB_WR_SEND_WITH_IMM:
+ case IB_WR_SEND_WITH_INV:
+ if (wr->send_flags & IB_SEND_FENCE)
+ t3_wr_flags |= T3_READ_FENCE_FLAG;
t3_wr_opcode = T3_WR_SEND;
- err = iwch_build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
+ err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
break;
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
t3_wr_opcode = T3_WR_WRITE;
- err = iwch_build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
+ err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
break;
case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_READ_WITH_INV:
t3_wr_opcode = T3_WR_READ;
t3_wr_flags = 0; /* T3 reads are always signaled */
- err = iwch_build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
+ err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
if (err)
break;
sqp->read_len = wqe->read.local_len;
if (!qhp->wq.oldest_read)
qhp->wq.oldest_read = sqp;
break;
+ case IB_WR_FAST_REG_MR:
+ t3_wr_opcode = T3_WR_FASTREG;
+ err = build_fastreg(wqe, wr, &t3_wr_flit_cnt,
+ &wr_cnt, &qhp->wq);
+ break;
+ case IB_WR_LOCAL_INV:
+ if (wr->send_flags & IB_SEND_FENCE)
+ t3_wr_flags |= T3_LOCAL_FENCE_FLAG;
+ t3_wr_opcode = T3_WR_INV_STAG;
+ err = build_inv_stag(wqe, wr, &t3_wr_flit_cnt);
+ break;
default:
PDBG("%s post of type=%d TBD!\n", __func__,
wr->opcode);
@@ -307,14 +443,15 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags,
Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
- 0, t3_wr_flit_cnt);
+ 0, t3_wr_flit_cnt,
+ (wr_cnt == 1) ? T3_SOPEOP : T3_SOP);
PDBG("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n",
__func__, (unsigned long long) wr->wr_id, idx,
Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
sqp->opcode);
wr = wr->next;
num_wrs--;
- ++(qhp->wq.wptr);
+ qhp->wq.wptr += wr_cnt;
++(qhp->wq.sq_wptr);
}
spin_unlock_irqrestore(&qhp->lock, flag);
@@ -345,21 +482,27 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
return -EINVAL;
}
while (wr) {
+ if (wr->num_sge > T3_MAX_SGE) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ break;
+ }
idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
wqe = (union t3_wr *) (qhp->wq.queue + idx);
if (num_wrs)
- err = iwch_build_rdma_recv(qhp->rhp, wqe, wr);
+ if (wr->sg_list[0].lkey)
+ err = build_rdma_recv(qhp, wqe, wr);
+ else
+ err = build_zero_stag_recv(qhp, wqe, wr);
else
err = -ENOMEM;
if (err) {
*bad_wr = wr;
break;
}
- qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)] =
- wr->wr_id;
build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
- 0, sizeof(struct t3_receive_wr) >> 3);
+ 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP);
PDBG("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x "
"wqe %p \n", __func__, (unsigned long long) wr->wr_id,
idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
@@ -419,10 +562,10 @@ int iwch_bind_mw(struct ib_qp *qp,
sgl.lkey = mw_bind->mr->lkey;
sgl.length = mw_bind->length;
wqe->bind.reserved = 0;
- wqe->bind.type = T3_VA_BASED_TO;
+ wqe->bind.type = TPT_VATO;
/* TBD: check perms */
- wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->mw_access_flags);
+ wqe->bind.perms = iwch_ib_to_tpt_access(mw_bind->mw_access_flags);
wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey);
wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
wqe->bind.mw_len = cpu_to_be32(mw_bind->length);
@@ -430,7 +573,7 @@ int iwch_bind_mw(struct ib_qp *qp,
err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
if (err) {
spin_unlock_irqrestore(&qhp->lock, flag);
- return err;
+ return err;
}
wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
@@ -441,10 +584,9 @@ int iwch_bind_mw(struct ib_qp *qp,
sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED);
wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr);
wqe->bind.mr_pagesz = page_size;
- wqe->flit[T3_SQ_COOKIE_FLIT] = mw_bind->wr_id;
build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0,
- sizeof(struct t3_bind_mw_wr) >> 3);
+ sizeof(struct t3_bind_mw_wr) >> 3, T3_SOPEOP);
++(qhp->wq.wptr);
++(qhp->wq.sq_wptr);
spin_unlock_irqrestore(&qhp->lock, flag);
@@ -655,6 +797,7 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag)
{
struct iwch_cq *rchp, *schp;
int count;
+ int flushed;
rchp = get_chp(qhp->rhp, qhp->attr.rcq);
schp = get_chp(qhp->rhp, qhp->attr.scq);
@@ -669,20 +812,22 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag)
spin_lock(&qhp->lock);
cxio_flush_hw_cq(&rchp->cq);
cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
- cxio_flush_rq(&qhp->wq, &rchp->cq, count);
+ flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count);
spin_unlock(&qhp->lock);
spin_unlock_irqrestore(&rchp->lock, *flag);
- (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+ if (flushed)
+ (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
/* locking heirarchy: cq lock first, then qp lock. */
spin_lock_irqsave(&schp->lock, *flag);
spin_lock(&qhp->lock);
cxio_flush_hw_cq(&schp->cq);
cxio_count_scqes(&schp->cq, &qhp->wq, &count);
- cxio_flush_sq(&qhp->wq, &schp->cq, count);
+ flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count);
spin_unlock(&qhp->lock);
spin_unlock_irqrestore(&schp->lock, *flag);
- (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
+ if (flushed)
+ (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
/* deref */
if (atomic_dec_and_test(&qhp->refcnt))
@@ -755,7 +900,8 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
init_attr.rqe_count = iwch_rqes_posted(qhp);
init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
- init_attr.flags |= capable(CAP_NET_BIND_SERVICE) ? PRIV_QP : 0;
+ if (!qhp->ibqp.uobject)
+ init_attr.flags |= PRIV_QP;
if (peer2peer) {
init_attr.rtr_type = RTR_READ;
if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator)
@@ -880,7 +1026,6 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
ep = qhp->ep;
get_ep(&ep->com);
}
- flush_qp(qhp, &flag);
break;
case IWCH_QP_STATE_TERMINATE:
qhp->attr.state = IWCH_QP_STATE_TERMINATE;
@@ -911,6 +1056,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
}
switch (attrs->next_state) {
case IWCH_QP_STATE_IDLE:
+ flush_qp(qhp, &flag);
qhp->attr.state = IWCH_QP_STATE_IDLE;
qhp->attr.llp_stream_handle = NULL;
put_ep(&qhp->ep->com);
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 00bab60f6de..0b0618edd64 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -192,6 +192,9 @@ struct ehca_qp {
int mtu_shift;
u32 message_count;
u32 packet_count;
+ atomic_t nr_events; /* events seen */
+ wait_queue_head_t wait_completion;
+ int mig_armed;
};
#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index 2515cbde7e6..46288220cfb 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -101,7 +101,6 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
props->max_ee = limit_uint(rblock->max_rd_ee_context);
props->max_rdd = limit_uint(rblock->max_rd_domain);
props->max_fmr = limit_uint(rblock->max_mr);
- props->local_ca_ack_delay = limit_uint(rblock->local_ca_ack_delay);
props->max_qp_rd_atom = limit_uint(rblock->max_rr_qp);
props->max_ee_rd_atom = limit_uint(rblock->max_rr_ee_context);
props->max_res_rd_atom = limit_uint(rblock->max_rr_hca);
@@ -115,7 +114,9 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
}
props->max_pkeys = 16;
- props->local_ca_ack_delay = limit_uint(rblock->local_ca_ack_delay);
+ /* Some FW versions say 0 here; insert sensible value in that case */
+ props->local_ca_ack_delay = rblock->local_ca_ack_delay ?
+ min_t(u8, rblock->local_ca_ack_delay, 255) : 12;
props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp);
props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp);
props->max_mcast_grp = limit_uint(rblock->max_mcast_grp);
@@ -136,7 +137,7 @@ query_device1:
return ret;
}
-static int map_mtu(struct ehca_shca *shca, u32 fw_mtu)
+static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu)
{
switch (fw_mtu) {
case 0x1:
@@ -156,7 +157,7 @@ static int map_mtu(struct ehca_shca *shca, u32 fw_mtu)
}
}
-static int map_number_of_vls(struct ehca_shca *shca, u32 vl_cap)
+static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap)
{
switch (vl_cap) {
case 0x1:
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index ca5eb0cb628..cb55be04442 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -178,6 +178,10 @@ static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp,
{
struct ib_event event;
+ /* PATH_MIG without the QP ever having been armed is false alarm */
+ if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed)
+ return;
+
event.device = &shca->ib_device;
event.event = event_type;
@@ -204,6 +208,8 @@ static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
read_lock(&ehca_qp_idr_lock);
qp = idr_find(&ehca_qp_idr, token);
+ if (qp)
+ atomic_inc(&qp->nr_events);
read_unlock(&ehca_qp_idr_lock);
if (!qp)
@@ -223,6 +229,8 @@ static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
if (fatal && qp->ext_type == EQPT_SRQBASE)
dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED);
+ if (atomic_dec_and_test(&qp->nr_events))
+ wake_up(&qp->wait_completion);
return;
}
@@ -527,7 +535,7 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
{
struct ehca_eq *eq = &shca->eq;
struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
- u64 eqe_value;
+ u64 eqe_value, ret;
unsigned long flags;
int eqe_cnt, i;
int eq_empty = 0;
@@ -579,8 +587,13 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
ehca_dbg(&shca->ib_device,
"No eqe found for irq event");
goto unlock_irq_spinlock;
- } else if (!is_irq)
+ } else if (!is_irq) {
+ ret = hipz_h_eoi(eq->ist);
+ if (ret != H_SUCCESS)
+ ehca_err(&shca->ib_device,
+ "bad return code EOI -rc = %ld\n", ret);
ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
+ }
if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))
ehca_dbg(&shca->ib_device, "too many eqes for one irq event");
/* enable irq for new packets */
@@ -637,8 +650,8 @@ static inline int find_next_online_cpu(struct ehca_comp_pool *pool)
ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
spin_lock_irqsave(&pool->last_cpu_lock, flags);
- cpu = next_cpu(pool->last_cpu, cpu_online_map);
- if (cpu == NR_CPUS)
+ cpu = next_cpu_nr(pool->last_cpu, cpu_online_map);
+ if (cpu >= nr_cpu_ids)
cpu = first_cpu(cpu_online_map);
pool->last_cpu = cpu;
spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 482103eb6ea..598844d2edc 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -923,6 +923,7 @@ static struct of_device_id ehca_device_table[] =
},
{},
};
+MODULE_DEVICE_TABLE(of, ehca_device_table);
static struct of_platform_driver ehca_driver = {
.name = "ehca",
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 18fba92fa7a..ea13efddf17 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -566,6 +566,8 @@ static struct ehca_qp *internal_create_qp(
return ERR_PTR(-ENOMEM);
}
+ atomic_set(&my_qp->nr_events, 0);
+ init_waitqueue_head(&my_qp->wait_completion);
spin_lock_init(&my_qp->spinlock_s);
spin_lock_init(&my_qp->spinlock_r);
my_qp->qp_type = qp_type;
@@ -1458,6 +1460,8 @@ static int internal_modify_qp(struct ib_qp *ibqp,
goto modify_qp_exit2;
}
mqpcb->path_migration_state = attr->path_mig_state + 1;
+ if (attr->path_mig_state == IB_MIG_REARM)
+ my_qp->mig_armed = 1;
update_mask |=
EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1);
}
@@ -1934,6 +1938,9 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
idr_remove(&ehca_qp_idr, my_qp->token);
write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+ /* now wait until all pending events have completed */
+ wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events));
+
h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
if (h_ret != H_SUCCESS) {
ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%li "
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index bbe0436f4f7..dd9bc68f1c7 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -421,8 +421,10 @@ int ehca_post_send(struct ib_qp *qp,
int ret = 0;
unsigned long flags;
- if (unlikely(my_qp->state != IB_QPS_RTS)) {
- ehca_err(qp->device, "QP not in RTS state qpn=%x", qp->qp_num);
+ /* Reject WR if QP is in RESET, INIT or RTR state */
+ if (unlikely(my_qp->state < IB_QPS_RTS)) {
+ ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
+ my_qp->state, qp->qp_num);
return -EINVAL;
}
@@ -542,8 +544,16 @@ int ehca_post_recv(struct ib_qp *qp,
struct ib_recv_wr *recv_wr,
struct ib_recv_wr **bad_recv_wr)
{
- return internal_post_recv(container_of(qp, struct ehca_qp, ib_qp),
- qp->device, recv_wr, bad_recv_wr);
+ struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
+
+ /* Reject WR if QP is in RESET state */
+ if (unlikely(my_qp->state == IB_QPS_RESET)) {
+ ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
+ my_qp->state, qp->qp_num);
+ return -EINVAL;
+ }
+
+ return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr);
}
int ehca_post_srq_recv(struct ib_srq *srq,
@@ -679,7 +689,7 @@ poll_cq_one_read_cqe:
wc->dlid_path_bits = cqe->dlid;
wc->src_qp = cqe->remote_qp_number;
wc->wc_flags = cqe->w_completion_flags;
- wc->imm_data = cpu_to_be32(cqe->immediate_data);
+ wc->ex.imm_data = cpu_to_be32(cqe->immediate_data);
wc->sl = cqe->service_level;
poll_cq_one_exit0:
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index 5245e13c3a3..415d3a465de 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -933,3 +933,13 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
r_cb,
0, 0, 0, 0);
}
+
+u64 hipz_h_eoi(int irq)
+{
+ unsigned long xirr;
+
+ iosync();
+ xirr = (0xffULL << 24) | irq;
+
+ return plpar_hcall_norets(H_EOI, xirr);
+}
diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h
index 60ce02b7066..2c3c6e0ea5c 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.h
+++ b/drivers/infiniband/hw/ehca/hcp_if.h
@@ -260,5 +260,6 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
const u64 ressource_handle,
void *rblock,
unsigned long *byte_count);
+u64 hipz_h_eoi(int irq);
#endif /* __HCP_IF_H__ */
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
index 661f8db6270..c3a32846543 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
@@ -163,6 +163,7 @@ static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
out:
ehca_err(pd->ib_pd.device, "failed to allocate small queue page");
+ mutex_unlock(&pd->lock);
return 0;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index a03bd28d9b4..d385e4168c9 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -82,7 +82,7 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
wc->uqueue[head].opcode = entry->opcode;
wc->uqueue[head].vendor_err = entry->vendor_err;
wc->uqueue[head].byte_len = entry->byte_len;
- wc->uqueue[head].imm_data = (__u32 __force)entry->imm_data;
+ wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data;
wc->uqueue[head].qp_num = entry->qp->qp_num;
wc->uqueue[head].src_qp = entry->src_qp;
wc->uqueue[head].wc_flags = entry->wc_flags;
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index acf30c06a0c..daad09a4591 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -1197,7 +1197,7 @@ void ipath_kreceive(struct ipath_portdata *pd)
}
reloop:
- for (last = 0, i = 1; !last; i++) {
+ for (last = 0, i = 1; !last; i += !last) {
hdr = dd->ipath_f_get_msgheader(dd, rhf_addr);
eflags = ipath_hdrget_err_flags(rhf_addr);
etype = ipath_hdrget_rcv_type(rhf_addr);
@@ -1428,6 +1428,40 @@ static void ipath_update_pio_bufs(struct ipath_devdata *dd)
spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
}
+/*
+ * used to force update of pioavailshadow if we can't get a pio buffer.
+ * Needed primarily due to exitting freeze mode after recovering
+ * from errors. Done lazily, because it's safer (known to not
+ * be writing pio buffers).
+ */
+static void ipath_reset_availshadow(struct ipath_devdata *dd)
+{
+ int i, im;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ipath_pioavail_lock, flags);
+ for (i = 0; i < dd->ipath_pioavregs; i++) {
+ u64 val, oldval;
+ /* deal with 6110 chip bug on high register #s */
+ im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
+ i ^ 1 : i;
+ val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]);
+ /*
+ * busy out the buffers not in the kernel avail list,
+ * without changing the generation bits.
+ */
+ oldval = dd->ipath_pioavailshadow[i];
+ dd->ipath_pioavailshadow[i] = val |
+ ((~dd->ipath_pioavailkernel[i] <<
+ INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) &
+ 0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */
+ if (oldval != dd->ipath_pioavailshadow[i])
+ ipath_dbg("shadow[%d] was %Lx, now %lx\n",
+ i, oldval, dd->ipath_pioavailshadow[i]);
+ }
+ spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
+}
+
/**
* ipath_setrcvhdrsize - set the receive header size
* @dd: the infinipath device
@@ -1482,9 +1516,12 @@ static noinline void no_pio_bufs(struct ipath_devdata *dd)
*/
ipath_stats.sps_nopiobufs++;
if (!(++dd->ipath_consec_nopiobuf % 100000)) {
- ipath_dbg("%u pio sends with no bufavail; dmacopy: "
- "%llx %llx %llx %llx; shadow: %lx %lx %lx %lx\n",
+ ipath_force_pio_avail_update(dd); /* at start */
+ ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: "
+ "%llx %llx %llx %llx\n"
+ "ipath shadow: %lx %lx %lx %lx\n",
dd->ipath_consec_nopiobuf,
+ (unsigned long)get_cycles(),
(unsigned long long) le64_to_cpu(dma[0]),
(unsigned long long) le64_to_cpu(dma[1]),
(unsigned long long) le64_to_cpu(dma[2]),
@@ -1496,14 +1533,17 @@ static noinline void no_pio_bufs(struct ipath_devdata *dd)
*/
if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
(sizeof(shadow[0]) * 4 * 4))
- ipath_dbg("2nd group: dmacopy: %llx %llx "
- "%llx %llx; shadow: %lx %lx %lx %lx\n",
+ ipath_dbg("2nd group: dmacopy: "
+ "%llx %llx %llx %llx\n"
+ "ipath shadow: %lx %lx %lx %lx\n",
(unsigned long long)le64_to_cpu(dma[4]),
(unsigned long long)le64_to_cpu(dma[5]),
(unsigned long long)le64_to_cpu(dma[6]),
(unsigned long long)le64_to_cpu(dma[7]),
- shadow[4], shadow[5], shadow[6],
- shadow[7]);
+ shadow[4], shadow[5], shadow[6], shadow[7]);
+
+ /* at end, so update likely happened */
+ ipath_reset_availshadow(dd);
}
}
@@ -1652,19 +1692,46 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
unsigned len, int avail)
{
unsigned long flags;
- unsigned end;
+ unsigned end, cnt = 0, next;
/* There are two bits per send buffer (busy and generation) */
start *= 2;
- len *= 2;
- end = start + len;
+ end = start + len * 2;
- /* Set or clear the generation bits. */
spin_lock_irqsave(&ipath_pioavail_lock, flags);
+ /* Set or clear the busy bit in the shadow. */
while (start < end) {
if (avail) {
- __clear_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
- dd->ipath_pioavailshadow);
+ unsigned long dma;
+ int i, im;
+ /*
+ * the BUSY bit will never be set, because we disarm
+ * the user buffers before we hand them back to the
+ * kernel. We do have to make sure the generation
+ * bit is set correctly in shadow, since it could
+ * have changed many times while allocated to user.
+ * We can't use the bitmap functions on the full
+ * dma array because it is always little-endian, so
+ * we have to flip to host-order first.
+ * BITS_PER_LONG is slightly wrong, since it's
+ * always 64 bits per register in chip...
+ * We only work on 64 bit kernels, so that's OK.
+ */
+ /* deal with 6110 chip bug on high register #s */
+ i = start / BITS_PER_LONG;
+ im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
+ i ^ 1 : i;
+ __clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT
+ + start, dd->ipath_pioavailshadow);
+ dma = (unsigned long) le64_to_cpu(
+ dd->ipath_pioavailregs_dma[im]);
+ if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
+ + start) % BITS_PER_LONG, &dma))
+ __set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
+ + start, dd->ipath_pioavailshadow);
+ else
+ __clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
+ + start, dd->ipath_pioavailshadow);
__set_bit(start, dd->ipath_pioavailkernel);
} else {
__set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
@@ -1673,7 +1740,44 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
}
start += 2;
}
+
+ if (dd->ipath_pioupd_thresh) {
+ end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
+ next = find_first_bit(dd->ipath_pioavailkernel, end);
+ while (next < end) {
+ cnt++;
+ next = find_next_bit(dd->ipath_pioavailkernel, end,
+ next + 1);
+ }
+ }
spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
+
+ /*
+ * When moving buffers from kernel to user, if number assigned to
+ * the user is less than the pio update threshold, and threshold
+ * is supported (cnt was computed > 0), drop the update threshold
+ * so we update at least once per allocated number of buffers.
+ * In any case, if the kernel buffers are less than the threshold,
+ * drop the threshold. We don't bother increasing it, having once
+ * decreased it, since it would typically just cycle back and forth.
+ * If we don't decrease below buffers in use, we can wait a long
+ * time for an update, until some other context uses PIO buffers.
+ */
+ if (!avail && len < cnt)
+ cnt = len;
+ if (cnt < dd->ipath_pioupd_thresh) {
+ dd->ipath_pioupd_thresh = cnt;
+ ipath_dbg("Decreased pio update threshold to %u\n",
+ dd->ipath_pioupd_thresh);
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
+ << INFINIPATH_S_UPDTHRESH_SHIFT);
+ dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
+ << INFINIPATH_S_UPDTHRESH_SHIFT;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+ }
}
/**
@@ -1790,12 +1894,12 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
*/
if (dd->ipath_flags & IPATH_HAS_SEND_DMA) {
int skip_cancel;
- u64 *statp = &dd->ipath_sdma_status;
+ unsigned long *statp = &dd->ipath_sdma_status;
spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
skip_cancel =
- !test_bit(IPATH_SDMA_DISABLED, statp) &&
- test_and_set_bit(IPATH_SDMA_ABORTING, statp);
+ test_and_set_bit(IPATH_SDMA_ABORTING, statp)
+ && !test_bit(IPATH_SDMA_DISABLED, statp);
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
if (skip_cancel)
goto bail;
@@ -1826,6 +1930,9 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
ipath_disarm_piobufs(dd, 0,
dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
+ if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+ set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
+
if (restore_sendctrl) {
/* else done by caller later if needed */
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
@@ -1845,7 +1952,6 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
/* only wait so long for intr */
dd->ipath_sdma_abort_intr_timeout = jiffies + HZ;
dd->ipath_sdma_reset_wait = 200;
- __set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
@@ -2510,7 +2616,7 @@ int ipath_reset_device(int unit)
ipath_dbg("unit %u port %d is in use "
"(PID %u cmd %s), can't reset\n",
unit, i,
- dd->ipath_pd[i]->port_pid,
+ pid_nr(dd->ipath_pd[i]->port_pid),
dd->ipath_pd[i]->port_comm);
ret = -EBUSY;
goto bail;
@@ -2548,19 +2654,21 @@ bail:
static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
{
int i, sub, any = 0;
- pid_t pid;
+ struct pid *pid;
if (!dd->ipath_pd)
return 0;
for (i = 1; i < dd->ipath_cfgports; i++) {
- if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt ||
- !dd->ipath_pd[i]->port_pid)
+ if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
continue;
pid = dd->ipath_pd[i]->port_pid;
+ if (!pid)
+ continue;
+
dev_info(&dd->pcidev->dev, "context %d in use "
"(PID %u), sending signal %d\n",
- i, pid, sig);
- kill_proc(pid, sig, 1);
+ i, pid_nr(pid), sig);
+ kill_pid(pid, sig, 1);
any++;
for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) {
pid = dd->ipath_pd[i]->port_subpid[sub];
@@ -2568,8 +2676,8 @@ static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
continue;
dev_info(&dd->pcidev->dev, "sub-context "
"%d:%d in use (PID %u), sending "
- "signal %d\n", i, sub, pid, sig);
- kill_proc(pid, sig, 1);
+ "signal %d\n", i, sub, pid_nr(pid), sig);
+ kill_pid(pid, sig, 1);
any++;
}
}
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 8b1752202e7..56c0eda3c07 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -39,6 +39,7 @@
#include <linux/highmem.h>
#include <linux/io.h>
#include <linux/jiffies.h>
+#include <linux/smp_lock.h>
#include <asm/pgtable.h>
#include "ipath_kernel.h"
@@ -173,47 +174,25 @@ static int ipath_get_base_info(struct file *fp,
(void *) dd->ipath_statusp -
(void *) dd->ipath_pioavailregs_dma;
if (!shared) {
- kinfo->spi_piocnt = dd->ipath_pbufsport;
+ kinfo->spi_piocnt = pd->port_piocnt;
kinfo->spi_piobufbase = (u64) pd->port_piobufs;
kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
dd->ipath_ureg_align * pd->port_port;
} else if (master) {
- kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) +
- (dd->ipath_pbufsport % subport_cnt);
+ kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) +
+ (pd->port_piocnt % subport_cnt);
/* Master's PIO buffers are after all the slave's */
kinfo->spi_piobufbase = (u64) pd->port_piobufs +
dd->ipath_palign *
- (dd->ipath_pbufsport - kinfo->spi_piocnt);
+ (pd->port_piocnt - kinfo->spi_piocnt);
} else {
unsigned slave = subport_fp(fp) - 1;
- kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt;
+ kinfo->spi_piocnt = pd->port_piocnt / subport_cnt;
kinfo->spi_piobufbase = (u64) pd->port_piobufs +
dd->ipath_palign * kinfo->spi_piocnt * slave;
}
- /*
- * Set the PIO avail update threshold to no larger
- * than the number of buffers per process. Note that
- * we decrease it here, but won't ever increase it.
- */
- if (dd->ipath_pioupd_thresh &&
- kinfo->spi_piocnt < dd->ipath_pioupd_thresh) {
- unsigned long flags;
-
- dd->ipath_pioupd_thresh = kinfo->spi_piocnt;
- ipath_dbg("Decreased pio update threshold to %u\n",
- dd->ipath_pioupd_thresh);
- spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
- dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
- << INFINIPATH_S_UPDTHRESH_SHIFT);
- dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
- << INFINIPATH_S_UPDTHRESH_SHIFT;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- dd->ipath_sendctrl);
- spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
- }
-
if (shared) {
kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
dd->ipath_ureg_align * pd->port_port;
@@ -577,7 +556,7 @@ static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
p = dd->ipath_pageshadow[porttid + tid];
dd->ipath_pageshadow[porttid + tid] = NULL;
ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
- pd->port_pid, tid);
+ pid_nr(pd->port_pid), tid);
dd->ipath_f_put_tid(dd, &tidbase[tid],
RCVHQ_RCV_TYPE_EXPECTED,
dd->ipath_tidinvalid);
@@ -1309,19 +1288,19 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port;
if (!pd->port_subport_cnt) {
/* port is not shared */
- piocnt = dd->ipath_pbufsport;
+ piocnt = pd->port_piocnt;
piobufs = pd->port_piobufs;
} else if (!subport_fp(fp)) {
/* caller is the master */
- piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) +
- (dd->ipath_pbufsport % pd->port_subport_cnt);
+ piocnt = (pd->port_piocnt / pd->port_subport_cnt) +
+ (pd->port_piocnt % pd->port_subport_cnt);
piobufs = pd->port_piobufs +
- dd->ipath_palign * (dd->ipath_pbufsport - piocnt);
+ dd->ipath_palign * (pd->port_piocnt - piocnt);
} else {
unsigned slave = subport_fp(fp) - 1;
/* caller is a slave */
- piocnt = dd->ipath_pbufsport / pd->port_subport_cnt;
+ piocnt = pd->port_piocnt / pd->port_subport_cnt;
piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
}
@@ -1631,11 +1610,8 @@ static int try_alloc_port(struct ipath_devdata *dd, int port,
port);
pd->port_cnt = 1;
port_fp(fp) = pd;
- pd->port_pid = current->pid;
+ pd->port_pid = get_pid(task_pid(current));
strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
- ipath_chg_pioavailkernel(dd,
- dd->ipath_pbufsport * (pd->port_port - 1),
- dd->ipath_pbufsport, 0);
ipath_stats.sps_ports++;
ret = 0;
} else
@@ -1818,14 +1794,15 @@ static int find_shared_port(struct file *fp,
}
port_fp(fp) = pd;
subport_fp(fp) = pd->port_cnt++;
- pd->port_subpid[subport_fp(fp)] = current->pid;
+ pd->port_subpid[subport_fp(fp)] =
+ get_pid(task_pid(current));
tidcursor_fp(fp) = 0;
pd->active_slaves |= 1 << subport_fp(fp);
ipath_cdbg(PROC,
"%s[%u] %u sharing %s[%u] unit:port %u:%u\n",
current->comm, current->pid,
subport_fp(fp),
- pd->port_comm, pd->port_pid,
+ pd->port_comm, pid_nr(pd->port_pid),
dd->ipath_unit, pd->port_port);
ret = 1;
goto done;
@@ -1839,6 +1816,7 @@ done:
static int ipath_open(struct inode *in, struct file *fp)
{
/* The real work is performed later in ipath_assign_port() */
+ cycle_kernel_lock();
fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL);
return fp->private_data ? 0 : -ENOMEM;
}
@@ -1938,11 +1916,25 @@ static int ipath_do_user_init(struct file *fp,
/* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
+ /* some ports may get extra buffers, calculate that here */
+ if (pd->port_port <= dd->ipath_ports_extrabuf)
+ pd->port_piocnt = dd->ipath_pbufsport + 1;
+ else
+ pd->port_piocnt = dd->ipath_pbufsport;
+
/* for right now, kernel piobufs are at end, so port 1 is at 0 */
+ if (pd->port_port <= dd->ipath_ports_extrabuf)
+ pd->port_pio_base = (dd->ipath_pbufsport + 1)
+ * (pd->port_port - 1);
+ else
+ pd->port_pio_base = dd->ipath_ports_extrabuf +
+ dd->ipath_pbufsport * (pd->port_port - 1);
pd->port_piobufs = dd->ipath_piobufbase +
- dd->ipath_pbufsport * (pd->port_port - 1) * dd->ipath_palign;
- ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n",
- pd->port_port, pd->port_piobufs);
+ pd->port_pio_base * dd->ipath_palign;
+ ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u,"
+ " first pio %u\n", pd->port_port, pd->port_piobufs,
+ pd->port_piocnt, pd->port_pio_base);
+ ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0);
/*
* Now allocate the rcvhdr Q and eager TIDs; skip the TID
@@ -2077,7 +2069,8 @@ static int ipath_close(struct inode *in, struct file *fp)
* the slave(s) don't wait for receive data forever.
*/
pd->active_slaves &= ~(1 << fd->subport);
- pd->port_subpid[fd->subport] = 0;
+ put_pid(pd->port_subpid[fd->subport]);
+ pd->port_subpid[fd->subport] = NULL;
mutex_unlock(&ipath_mutex);
goto bail;
}
@@ -2085,7 +2078,7 @@ static int ipath_close(struct inode *in, struct file *fp)
if (pd->port_hdrqfull) {
ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors "
- "during run\n", pd->port_comm, pd->port_pid,
+ "during run\n", pd->port_comm, pid_nr(pd->port_pid),
pd->port_hdrqfull);
pd->port_hdrqfull = 0;
}
@@ -2107,7 +2100,6 @@ static int ipath_close(struct inode *in, struct file *fp)
}
if (dd->ipath_kregbase) {
- int i;
/* atomically clear receive enable port and intr avail. */
clear_bit(dd->ipath_r_portenable_shift + port,
&dd->ipath_rcvctrl);
@@ -2136,9 +2128,9 @@ static int ipath_close(struct inode *in, struct file *fp)
ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
pd->port_port, dd->ipath_dummy_hdrq_phys);
- i = dd->ipath_pbufsport * (port - 1);
- ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport);
- ipath_chg_pioavailkernel(dd, i, dd->ipath_pbufsport, 1);
+ ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt);
+ ipath_chg_pioavailkernel(dd, pd->port_pio_base,
+ pd->port_piocnt, 1);
dd->ipath_f_clear_tids(dd, pd->port_port);
@@ -2146,11 +2138,12 @@ static int ipath_close(struct inode *in, struct file *fp)
unlock_expected_tids(pd);
ipath_stats.sps_ports--;
ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
- pd->port_comm, pd->port_pid,
+ pd->port_comm, pid_nr(pd->port_pid),
dd->ipath_unit, port);
}
- pd->port_pid = 0;
+ put_pid(pd->port_pid);
+ pd->port_pid = NULL;
dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */
mutex_unlock(&ipath_mutex);
ipath_free_pddata(dd, pd); /* after releasing the mutex */
@@ -2462,7 +2455,7 @@ static int init_cdev(int minor, char *name, const struct file_operations *fops,
goto err_cdev;
}
- device = device_create(ipath_class, NULL, dev, name);
+ device = device_create_drvdata(ipath_class, NULL, dev, NULL, name);
if (IS_ERR(device)) {
ret = PTR_ERR(device);
diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c
index e3ec0d1bdf5..fb70712ac85 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba7220.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c
@@ -595,7 +595,7 @@ static void ipath_7220_txe_recover(struct ipath_devdata *dd)
dev_info(&dd->pcidev->dev,
"Recovering from TXE PIO parity error\n");
- ipath_disarm_senderrbufs(dd, 1);
+ ipath_disarm_senderrbufs(dd);
}
@@ -675,10 +675,8 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
/*
- * Parity errors in send memory are recoverable,
- * just cancel the send (if indicated in * sendbuffererror),
- * count the occurrence, unfreeze (if no other handled
- * hardware error bits are set), and continue.
+ * Parity errors in send memory are recoverable by h/w
+ * just do housekeeping, exit freeze mode and continue.
*/
if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
@@ -687,13 +685,6 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
<< INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
- if (!hwerrs) {
- /* else leave in freeze mode */
- ipath_write_kreg(dd,
- dd->ipath_kregs->kr_control,
- dd->ipath_control);
- goto bail;
- }
}
if (hwerrs) {
/*
@@ -723,8 +714,8 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
dd->ipath_flags &= ~IPATH_INITTED;
} else {
- ipath_dbg("Clearing freezemode on ignored hardware "
- "error\n");
+ ipath_dbg("Clearing freezemode on ignored or "
+ "recovered hardware error\n");
ipath_clear_freeze(dd);
}
}
@@ -870,8 +861,9 @@ static int ipath_7220_boardname(struct ipath_devdata *dd, char *name,
"revision %u.%u!\n",
dd->ipath_majrev, dd->ipath_minrev);
ret = 1;
- } else if (dd->ipath_minrev == 1) {
- /* Rev1 chips are prototype. Complain, but allow use */
+ } else if (dd->ipath_minrev == 1 &&
+ !(dd->ipath_flags & IPATH_INITTED)) {
+ /* Rev1 chips are prototype. Complain at init, but allow use */
ipath_dev_err(dd, "Unsupported hardware "
"revision %u.%u, Contact support@qlogic.com\n",
dd->ipath_majrev, dd->ipath_minrev);
@@ -1966,7 +1958,7 @@ static void ipath_7220_config_ports(struct ipath_devdata *dd, ushort cfgports)
dd->ipath_rcvctrl);
dd->ipath_p0_rcvegrcnt = 2048; /* always */
if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
- dd->ipath_pioreserved = 1; /* reserve a buffer */
+ dd->ipath_pioreserved = 3; /* kpiobufs used for PIO */
}
@@ -2236,8 +2228,8 @@ static void ipath_autoneg_send(struct ipath_devdata *dd, int which)
0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x40000001, 0x1388, 0x15e, /* rest 0's */
};
- dcnt = sizeof(madpayload_start)/sizeof(madpayload_start[0]);
- hcnt = sizeof(hdr)/sizeof(hdr[0]);
+ dcnt = ARRAY_SIZE(madpayload_start);
+ hcnt = ARRAY_SIZE(hdr);
if (!swapped) {
/* for maintainability, do it at runtime */
for (i = 0; i < hcnt; i++) {
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 27dd8947666..3e5baa43fc8 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -41,7 +41,7 @@
/*
* min buffers we want to have per port, after driver
*/
-#define IPATH_MIN_USER_PORT_BUFCNT 8
+#define IPATH_MIN_USER_PORT_BUFCNT 7
/*
* Number of ports we are configured to use (to allow for more pio
@@ -54,13 +54,9 @@ MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
/*
* Number of buffers reserved for driver (verbs and layered drivers.)
- * Reserved at end of buffer list. Initialized based on
- * number of PIO buffers if not set via module interface.
+ * Initialized based on number of PIO buffers if not set via module interface.
* The problem with this is that it's global, but we'll use different
- * numbers for different chip types. So the default value is not
- * very useful. I've redefined it for the 1.3 release so that it's
- * zero unless set by the user to something else, in which case we
- * try to respect it.
+ * numbers for different chip types.
*/
static ushort ipath_kpiobufs;
@@ -546,9 +542,12 @@ static void enable_chip(struct ipath_devdata *dd, int reinit)
pioavail = dd->ipath_pioavailregs_dma[i ^ 1];
else
pioavail = dd->ipath_pioavailregs_dma[i];
- dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail) |
- (~dd->ipath_pioavailkernel[i] <<
- INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT);
+ /*
+ * don't need to worry about ipath_pioavailkernel here
+ * because we will call ipath_chg_pioavailkernel() later
+ * in initialization, to busy out buffers as needed
+ */
+ dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail);
}
/* can get counters, stats, etc. */
dd->ipath_flags |= IPATH_PRESENT;
@@ -708,12 +707,11 @@ static void verify_interrupt(unsigned long opaque)
int ipath_init_chip(struct ipath_devdata *dd, int reinit)
{
int ret = 0;
- u32 val32, kpiobufs;
+ u32 kpiobufs, defkbufs;
u32 piobufs, uports;
u64 val;
struct ipath_portdata *pd;
gfp_t gfp_flags = GFP_USER | __GFP_COMP;
- unsigned long flags;
ret = init_housekeeping(dd, reinit);
if (ret)
@@ -753,56 +751,46 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
/ (sizeof(u64) * BITS_PER_BYTE / 2);
uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
- if (ipath_kpiobufs == 0) {
- /* not set by user (this is default) */
- if (piobufs > 144)
- kpiobufs = 32;
- else
- kpiobufs = 16;
- }
+ if (piobufs > 144)
+ defkbufs = 32 + dd->ipath_pioreserved;
else
- kpiobufs = ipath_kpiobufs;
+ defkbufs = 16 + dd->ipath_pioreserved;
- if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) {
+ if (ipath_kpiobufs && (ipath_kpiobufs +
+ (uports * IPATH_MIN_USER_PORT_BUFCNT)) > piobufs) {
int i = (int) piobufs -
(int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
if (i < 1)
i = 1;
dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
"%d for kernel leaves too few for %d user ports "
- "(%d each); using %u\n", kpiobufs,
+ "(%d each); using %u\n", ipath_kpiobufs,
piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
/*
* shouldn't change ipath_kpiobufs, because could be
* different for different devices...
*/
kpiobufs = i;
- }
+ } else if (ipath_kpiobufs)
+ kpiobufs = ipath_kpiobufs;
+ else
+ kpiobufs = defkbufs;
dd->ipath_lastport_piobuf = piobufs - kpiobufs;
dd->ipath_pbufsport =
uports ? dd->ipath_lastport_piobuf / uports : 0;
- val32 = dd->ipath_lastport_piobuf - (dd->ipath_pbufsport * uports);
- if (val32 > 0) {
- ipath_dbg("allocating %u pbufs/port leaves %u unused, "
- "add to kernel\n", dd->ipath_pbufsport, val32);
- dd->ipath_lastport_piobuf -= val32;
- kpiobufs += val32;
- ipath_dbg("%u pbufs/port leaves %u unused, add to kernel\n",
- dd->ipath_pbufsport, val32);
- }
+ /* if not an even divisor, some user ports get extra buffers */
+ dd->ipath_ports_extrabuf = dd->ipath_lastport_piobuf -
+ (dd->ipath_pbufsport * uports);
+ if (dd->ipath_ports_extrabuf)
+ ipath_dbg("%u pbufs/port leaves some unused, add 1 buffer to "
+ "ports <= %u\n", dd->ipath_pbufsport,
+ dd->ipath_ports_extrabuf);
dd->ipath_lastpioindex = 0;
dd->ipath_lastpioindexl = dd->ipath_piobcnt2k;
- ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
+ /* ipath_pioavailshadow initialized earlier */
ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
"each for %u user ports\n", kpiobufs,
piobufs, dd->ipath_pbufsport, uports);
- if (dd->ipath_pioupd_thresh) {
- if (dd->ipath_pbufsport < dd->ipath_pioupd_thresh)
- dd->ipath_pioupd_thresh = dd->ipath_pbufsport;
- if (kpiobufs < dd->ipath_pioupd_thresh)
- dd->ipath_pioupd_thresh = kpiobufs;
- }
-
ret = dd->ipath_f_early_init(dd);
if (ret) {
ipath_dev_err(dd, "Early initialization failure\n");
@@ -810,13 +798,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
}
/*
- * Cancel any possible active sends from early driver load.
- * Follows early_init because some chips have to initialize
- * PIO buffers in early_init to avoid false parity errors.
- */
- ipath_cancel_sends(dd, 0);
-
- /*
* Early_init sets rcvhdrentsize and rcvhdrsize, so this must be
* done after early_init.
*/
@@ -836,6 +817,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr,
dd->ipath_pioavailregs_phys);
+
/*
* this is to detect s/w errors, which the h/w works around by
* ignoring the low 6 bits of address, if it wasn't aligned.
@@ -862,12 +844,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
- spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
- dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
- ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
- spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
/*
* before error clears, since we expect serdes pll errors during
* this, the first time after reset
@@ -940,6 +916,19 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
else
enable_chip(dd, reinit);
+ /* after enable_chip, so pioavailshadow setup */
+ ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
+
+ /*
+ * Cancel any possible active sends from early driver load.
+ * Follows early_init because some chips have to initialize
+ * PIO buffers in early_init to avoid false parity errors.
+ * After enable and ipath_chg_pioavailkernel so we can safely
+ * enable pioavail updates and PIOENABLE; packets are now
+ * ready to go out.
+ */
+ ipath_cancel_sends(dd, 1);
+
if (!reinit) {
/*
* Used when we close a port, for DMA already in flight
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 1b58f4737c7..26900b3b7a4 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -38,42 +38,12 @@
#include "ipath_verbs.h"
#include "ipath_common.h"
-/*
- * clear (write) a pio buffer, to clear a parity error. This routine
- * should only be called when in freeze mode, and the buffer should be
- * canceled afterwards.
- */
-static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum)
-{
- u32 __iomem *pbuf;
- u32 dwcnt; /* dword count to write */
- if (pnum < dd->ipath_piobcnt2k) {
- pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum *
- dd->ipath_palign);
- dwcnt = dd->ipath_piosize2k >> 2;
- }
- else {
- pbuf = (u32 __iomem *) (dd->ipath_pio4kbase +
- (pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
- dwcnt = dd->ipath_piosize4k >> 2;
- }
- dev_info(&dd->pcidev->dev,
- "Rewrite PIO buffer %u, to recover from parity error\n",
- pnum);
-
- /* no flush required, since already in freeze */
- writel(dwcnt + 1, pbuf);
- while (--dwcnt)
- writel(0, pbuf++);
-}
/*
* Called when we might have an error that is specific to a particular
* PIO buffer, and may need to cancel that buffer, so it can be re-used.
- * If rewrite is true, and bits are set in the sendbufferror registers,
- * we'll write to the buffer, for error recovery on parity errors.
*/
-void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
+void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
{
u32 piobcnt;
unsigned long sbuf[4];
@@ -109,11 +79,8 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
}
for (i = 0; i < piobcnt; i++)
- if (test_bit(i, sbuf)) {
- if (rewrite)
- ipath_clrpiobuf(dd, i);
+ if (test_bit(i, sbuf))
ipath_disarm_piobufs(dd, i, 1);
- }
/* ignore armlaunch errs for a bit */
dd->ipath_lastcancel = jiffies+3;
}
@@ -164,7 +131,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
{
u64 ignore_this_time = 0;
- ipath_disarm_senderrbufs(dd, 0);
+ ipath_disarm_senderrbufs(dd);
if ((errs & E_SUM_LINK_PKTERRS) &&
!(dd->ipath_flags & IPATH_LINKACTIVE)) {
/*
@@ -909,8 +876,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
* processes (causing armlaunch), send errors due to going into freeze mode,
* etc., and try to avoid causing extra interrupts while doing so.
* Forcibly update the in-memory pioavail register copies after cleanup
- * because the chip won't do it for anything changing while in freeze mode
- * (we don't want to wait for the next pio buffer state change).
+ * because the chip won't do it while in freeze mode (the register values
+ * themselves are kept correct).
* Make sure that we don't lose any important interrupts by using the chip
* feature that says that writing 0 to a bit in *clear that is set in
* *status will cause an interrupt to be generated again (if allowed by
@@ -918,44 +885,23 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
*/
void ipath_clear_freeze(struct ipath_devdata *dd)
{
- int i, im;
- u64 val;
-
/* disable error interrupts, to avoid confusion */
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
/* also disable interrupts; errormask is sometimes overwriten */
ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
- /*
- * clear all sends, because they have may been
- * completed by usercode while in freeze mode, and
- * therefore would not be sent, and eventually
- * might cause the process to run out of bufs
- */
- ipath_cancel_sends(dd, 0);
+ ipath_cancel_sends(dd, 1);
+
+ /* clear the freeze, and be sure chip saw it */
ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
dd->ipath_control);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
- /* ensure pio avail updates continue */
+ /* force in-memory update now we are out of freeze */
ipath_force_pio_avail_update(dd);
/*
- * We just enabled pioavailupdate, so dma copy is almost certainly
- * not yet right, so read the registers directly. Similar to init
- */
- for (i = 0; i < dd->ipath_pioavregs; i++) {
- /* deal with 6110 chip bug */
- im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
- i ^ 1 : i;
- val = ipath_read_kreg64(dd, (0x1000 / sizeof(u64)) + im);
- dd->ipath_pioavailregs_dma[i] = cpu_to_le64(val);
- dd->ipath_pioavailshadow[i] = val |
- (~dd->ipath_pioavailkernel[i] <<
- INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT);
- }
-
- /*
* force new interrupt if any hwerr, error or interrupt bits are
* still set, and clear "safe" send packet errors related to freeze
* and cancelling sends. Re-enable error interrupts before possible
@@ -1312,10 +1258,8 @@ irqreturn_t ipath_intr(int irq, void *data)
ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
- if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA))
- handle_layer_pioavail(dd);
- else
- ipath_dbg("unexpected BUFAVAIL intr\n");
+ /* always process; sdma verbs uses PIO for acks and VL15 */
+ handle_layer_pioavail(dd);
}
ret = IRQ_HANDLED;
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 202337ae90d..0bd8bcb184a 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -117,6 +117,10 @@ struct ipath_portdata {
u16 port_subport_cnt;
/* non-zero if port is being shared. */
u16 port_subport_id;
+ /* number of pio bufs for this port (all procs, if shared) */
+ u32 port_piocnt;
+ /* first pio buffer for this port */
+ u32 port_pio_base;
/* chip offset of PIO buffers for this port */
u32 port_piobufs;
/* how many alloc_pages() chunks in port_rcvegrbuf_pages */
@@ -155,8 +159,8 @@ struct ipath_portdata {
/* saved total number of polled urgent packets for poll edge trigger */
u32 port_urgent_poll;
/* pid of process using this port */
- pid_t port_pid;
- pid_t port_subpid[INFINIPATH_MAX_SUBPORT];
+ struct pid *port_pid;
+ struct pid *port_subpid[INFINIPATH_MAX_SUBPORT];
/* same size as task_struct .comm[] */
char port_comm[16];
/* pkeys set by this use of this port */
@@ -228,6 +232,11 @@ struct ipath_sdma_desc {
#define IPATH_SDMA_TXREQ_S_ABORTED 2
#define IPATH_SDMA_TXREQ_S_SHUTDOWN 3
+#define IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG (1ull << 63)
+#define IPATH_SDMA_STATUS_ABORT_IN_PROG (1ull << 62)
+#define IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE (1ull << 61)
+#define IPATH_SDMA_STATUS_SCB_EMPTY (1ull << 30)
+
/* max dwords in small buffer packet */
#define IPATH_SMALLBUF_DWORDS (dd->ipath_piosize2k >> 2)
@@ -384,6 +393,8 @@ struct ipath_devdata {
u32 ipath_lastrpkts;
/* pio bufs allocated per port */
u32 ipath_pbufsport;
+ /* if remainder on bufs/port, ports < extrabuf get 1 extra */
+ u32 ipath_ports_extrabuf;
u32 ipath_pioupd_thresh; /* update threshold, some chips */
/*
* number of ports configured as max; zero is set to number chip
@@ -477,7 +488,7 @@ struct ipath_devdata {
/* SendDMA related entries */
spinlock_t ipath_sdma_lock;
- u64 ipath_sdma_status;
+ unsigned long ipath_sdma_status;
unsigned long ipath_sdma_abort_jiffies;
unsigned long ipath_sdma_abort_intr_timeout;
unsigned long ipath_sdma_buf_jiffies;
@@ -816,8 +827,8 @@ struct ipath_devdata {
#define IPATH_SDMA_DISARMED 1
#define IPATH_SDMA_DISABLED 2
#define IPATH_SDMA_LAYERBUF 3
-#define IPATH_SDMA_RUNNING 62
-#define IPATH_SDMA_SHUTDOWN 63
+#define IPATH_SDMA_RUNNING 30
+#define IPATH_SDMA_SHUTDOWN 31
/* bit combinations that correspond to abort states */
#define IPATH_SDMA_ABORT_NONE 0
@@ -1011,7 +1022,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *);
int ipath_update_eeprom_log(struct ipath_devdata *dd);
void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
-void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
+void ipath_disarm_senderrbufs(struct ipath_devdata *);
void ipath_force_pio_avail_update(struct ipath_devdata *);
void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev);
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index 1ff46ae7dd9..be4fc9ada8e 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -111,9 +111,9 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp,
nip->revision = cpu_to_be32((majrev << 16) | minrev);
nip->local_port_num = port;
vendor = dd->ipath_vendorid;
- nip->vendor_id[0] = 0;
- nip->vendor_id[1] = vendor >> 8;
- nip->vendor_id[2] = vendor;
+ nip->vendor_id[0] = IPATH_SRC_OUI_1;
+ nip->vendor_id[1] = IPATH_SRC_OUI_2;
+ nip->vendor_id[2] = IPATH_SRC_OUI_3;
return reply(smp);
}
@@ -1492,6 +1492,10 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
goto bail;
}
+ case IB_MGMT_METHOD_TRAP:
+ case IB_MGMT_METHOD_REPORT:
+ case IB_MGMT_METHOD_REPORT_RESP:
+ case IB_MGMT_METHOD_TRAP_REPRESS:
case IB_MGMT_METHOD_GET_RESP:
/*
* The ib_mad module will call us to process responses
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index dd5b6e9d57c..4715911101e 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -242,7 +242,6 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
{
struct ipath_qp *q, **qpp;
unsigned long flags;
- int fnd = 0;
spin_lock_irqsave(&qpt->lock, flags);
@@ -253,51 +252,40 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
*qpp = qp->next;
qp->next = NULL;
atomic_dec(&qp->refcount);
- fnd = 1;
break;
}
}
spin_unlock_irqrestore(&qpt->lock, flags);
-
- if (!fnd)
- return;
-
- free_qpn(qpt, qp->ibqp.qp_num);
-
- wait_event(qp->wait, !atomic_read(&qp->refcount));
}
/**
- * ipath_free_all_qps - remove all QPs from the table
+ * ipath_free_all_qps - check for QPs still in use
* @qpt: the QP table to empty
+ *
+ * There should not be any QPs still in use.
+ * Free memory for table.
*/
-void ipath_free_all_qps(struct ipath_qp_table *qpt)
+unsigned ipath_free_all_qps(struct ipath_qp_table *qpt)
{
unsigned long flags;
- struct ipath_qp *qp, *nqp;
- u32 n;
+ struct ipath_qp *qp;
+ u32 n, qp_inuse = 0;
+ spin_lock_irqsave(&qpt->lock, flags);
for (n = 0; n < qpt->max; n++) {
- spin_lock_irqsave(&qpt->lock, flags);
qp = qpt->table[n];
qpt->table[n] = NULL;
- spin_unlock_irqrestore(&qpt->lock, flags);
-
- while (qp) {
- nqp = qp->next;
- free_qpn(qpt, qp->ibqp.qp_num);
- if (!atomic_dec_and_test(&qp->refcount) ||
- !ipath_destroy_qp(&qp->ibqp))
- ipath_dbg("QP memory leak!\n");
- qp = nqp;
- }
+
+ for (; qp; qp = qp->next)
+ qp_inuse++;
}
+ spin_unlock_irqrestore(&qpt->lock, flags);
- for (n = 0; n < ARRAY_SIZE(qpt->map); n++) {
+ for (n = 0; n < ARRAY_SIZE(qpt->map); n++)
if (qpt->map[n].page)
- free_page((unsigned long)qpt->map[n].page);
- }
+ free_page((unsigned long) qpt->map[n].page);
+ return qp_inuse;
}
/**
@@ -336,11 +324,12 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
qp->remote_qpn = 0;
qp->qkey = 0;
qp->qp_access_flags = 0;
- qp->s_busy = 0;
+ atomic_set(&qp->s_dma_busy, 0);
qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
qp->s_hdrwords = 0;
qp->s_wqe = NULL;
qp->s_pkt_delay = 0;
+ qp->s_draining = 0;
qp->s_psn = 0;
qp->r_psn = 0;
qp->r_msn = 0;
@@ -353,7 +342,8 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
}
qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
qp->r_nak_state = 0;
- qp->r_wrid_valid = 0;
+ qp->r_aflags = 0;
+ qp->r_flags = 0;
qp->s_rnr_timeout = 0;
qp->s_head = 0;
qp->s_tail = 0;
@@ -361,7 +351,6 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
qp->s_last = 0;
qp->s_ssn = 1;
qp->s_lsn = 0;
- qp->s_wait_credit = 0;
memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
qp->r_head_ack_queue = 0;
qp->s_tail_ack_queue = 0;
@@ -370,17 +359,17 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
qp->r_rq.wq->head = 0;
qp->r_rq.wq->tail = 0;
}
- qp->r_reuse_sge = 0;
}
/**
- * ipath_error_qp - put a QP into an error state
- * @qp: the QP to put into an error state
+ * ipath_error_qp - put a QP into the error state
+ * @qp: the QP to put into the error state
* @err: the receive completion error to signal if a RWQE is active
*
* Flushes both send and receive work queues.
* Returns true if last WQE event should be generated.
* The QP s_lock should be held and interrupts disabled.
+ * If we are already in error state, just return.
*/
int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
@@ -389,8 +378,10 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
struct ib_wc wc;
int ret = 0;
- ipath_dbg("QP%d/%d in error state (%d)\n",
- qp->ibqp.qp_num, qp->remote_qpn, err);
+ if (qp->state == IB_QPS_ERR)
+ goto bail;
+
+ qp->state = IB_QPS_ERR;
spin_lock(&dev->pending_lock);
if (!list_empty(&qp->timerwait))
@@ -399,39 +390,21 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
list_del_init(&qp->piowait);
spin_unlock(&dev->pending_lock);
- wc.vendor_err = 0;
- wc.byte_len = 0;
- wc.imm_data = 0;
+ /* Schedule the sending tasklet to drain the send work queue. */
+ if (qp->s_last != qp->s_head)
+ ipath_schedule_send(qp);
+
+ memset(&wc, 0, sizeof(wc));
wc.qp = &qp->ibqp;
- wc.src_qp = 0;
- wc.wc_flags = 0;
- wc.pkey_index = 0;
- wc.slid = 0;
- wc.sl = 0;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
- if (qp->r_wrid_valid) {
- qp->r_wrid_valid = 0;
+ wc.opcode = IB_WC_RECV;
+
+ if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) {
wc.wr_id = qp->r_wr_id;
- wc.opcode = IB_WC_RECV;
wc.status = err;
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
}
wc.status = IB_WC_WR_FLUSH_ERR;
- while (qp->s_last != qp->s_head) {
- struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
-
- wc.wr_id = wqe->wr.wr_id;
- wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- if (++qp->s_last >= qp->s_size)
- qp->s_last = 0;
- ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
- }
- qp->s_cur = qp->s_tail = qp->s_head;
- qp->s_hdrwords = 0;
- qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
-
if (qp->r_rq.wq) {
struct ipath_rwq *wq;
u32 head;
@@ -447,7 +420,6 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
tail = wq->tail;
if (tail >= qp->r_rq.size)
tail = 0;
- wc.opcode = IB_WC_RECV;
while (tail != head) {
wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
if (++tail >= qp->r_rq.size)
@@ -460,6 +432,7 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
} else if (qp->ibqp.event_handler)
ret = 1;
+bail:
return ret;
}
@@ -478,11 +451,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct ipath_ibdev *dev = to_idev(ibqp->device);
struct ipath_qp *qp = to_iqp(ibqp);
enum ib_qp_state cur_state, new_state;
- unsigned long flags;
int lastwqe = 0;
int ret;
- spin_lock_irqsave(&qp->s_lock, flags);
+ spin_lock_irq(&qp->s_lock);
cur_state = attr_mask & IB_QP_CUR_STATE ?
attr->cur_qp_state : qp->state;
@@ -535,16 +507,42 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
switch (new_state) {
case IB_QPS_RESET:
+ if (qp->state != IB_QPS_RESET) {
+ qp->state = IB_QPS_RESET;
+ spin_lock(&dev->pending_lock);
+ if (!list_empty(&qp->timerwait))
+ list_del_init(&qp->timerwait);
+ if (!list_empty(&qp->piowait))
+ list_del_init(&qp->piowait);
+ spin_unlock(&dev->pending_lock);
+ qp->s_flags &= ~IPATH_S_ANY_WAIT;
+ spin_unlock_irq(&qp->s_lock);
+ /* Stop the sending tasklet */
+ tasklet_kill(&qp->s_task);
+ wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
+ spin_lock_irq(&qp->s_lock);
+ }
ipath_reset_qp(qp, ibqp->qp_type);
break;
+ case IB_QPS_SQD:
+ qp->s_draining = qp->s_last != qp->s_cur;
+ qp->state = new_state;
+ break;
+
+ case IB_QPS_SQE:
+ if (qp->ibqp.qp_type == IB_QPT_RC)
+ goto inval;
+ qp->state = new_state;
+ break;
+
case IB_QPS_ERR:
lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
break;
default:
+ qp->state = new_state;
break;
-
}
if (attr_mask & IB_QP_PKEY_INDEX)
@@ -597,8 +595,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
qp->s_max_rd_atomic = attr->max_rd_atomic;
- qp->state = new_state;
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ spin_unlock_irq(&qp->s_lock);
if (lastwqe) {
struct ib_event ev;
@@ -612,7 +609,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto bail;
inval:
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ spin_unlock_irq(&qp->s_lock);
ret = -EINVAL;
bail:
@@ -643,7 +640,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->pkey_index = qp->s_pkey_index;
attr->alt_pkey_index = 0;
attr->en_sqd_async_notify = 0;
- attr->sq_draining = 0;
+ attr->sq_draining = qp->s_draining;
attr->max_rd_atomic = qp->s_max_rd_atomic;
attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
attr->min_rnr_timer = qp->r_min_rnr_timer;
@@ -833,6 +830,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
spin_lock_init(&qp->r_rq.lock);
atomic_set(&qp->refcount, 0);
init_waitqueue_head(&qp->wait);
+ init_waitqueue_head(&qp->wait_dma);
tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp);
INIT_LIST_HEAD(&qp->piowait);
INIT_LIST_HEAD(&qp->timerwait);
@@ -926,6 +924,7 @@ bail_ip:
else
vfree(qp->r_rq.wq);
ipath_free_qp(&dev->qp_table, qp);
+ free_qpn(&dev->qp_table, qp->ibqp.qp_num);
bail_qp:
kfree(qp);
bail_swq:
@@ -947,41 +946,44 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
{
struct ipath_qp *qp = to_iqp(ibqp);
struct ipath_ibdev *dev = to_idev(ibqp->device);
- unsigned long flags;
- spin_lock_irqsave(&qp->s_lock, flags);
- qp->state = IB_QPS_ERR;
- spin_unlock_irqrestore(&qp->s_lock, flags);
- spin_lock(&dev->n_qps_lock);
- dev->n_qps_allocated--;
- spin_unlock(&dev->n_qps_lock);
+ /* Make sure HW and driver activity is stopped. */
+ spin_lock_irq(&qp->s_lock);
+ if (qp->state != IB_QPS_RESET) {
+ qp->state = IB_QPS_RESET;
+ spin_lock(&dev->pending_lock);
+ if (!list_empty(&qp->timerwait))
+ list_del_init(&qp->timerwait);
+ if (!list_empty(&qp->piowait))
+ list_del_init(&qp->piowait);
+ spin_unlock(&dev->pending_lock);
+ qp->s_flags &= ~IPATH_S_ANY_WAIT;
+ spin_unlock_irq(&qp->s_lock);
+ /* Stop the sending tasklet */
+ tasklet_kill(&qp->s_task);
+ wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
+ } else
+ spin_unlock_irq(&qp->s_lock);
- /* Stop the sending tasklet. */
- tasklet_kill(&qp->s_task);
+ ipath_free_qp(&dev->qp_table, qp);
if (qp->s_tx) {
atomic_dec(&qp->refcount);
if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
kfree(qp->s_tx->txreq.map_addr);
+ spin_lock_irq(&dev->pending_lock);
+ list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
+ spin_unlock_irq(&dev->pending_lock);
+ qp->s_tx = NULL;
}
- /* Make sure the QP isn't on the timeout list. */
- spin_lock_irqsave(&dev->pending_lock, flags);
- if (!list_empty(&qp->timerwait))
- list_del_init(&qp->timerwait);
- if (!list_empty(&qp->piowait))
- list_del_init(&qp->piowait);
- if (qp->s_tx)
- list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
- spin_unlock_irqrestore(&dev->pending_lock, flags);
+ wait_event(qp->wait, !atomic_read(&qp->refcount));
- /*
- * Make sure that the QP is not in the QPN table so receive
- * interrupts will discard packets for this QP. XXX Also remove QP
- * from multicast table.
- */
- if (atomic_read(&qp->refcount) != 0)
- ipath_free_qp(&dev->qp_table, qp);
+ /* all user's cleaned up, mark it available */
+ free_qpn(&dev->qp_table, qp->ibqp.qp_num);
+ spin_lock(&dev->n_qps_lock);
+ dev->n_qps_allocated--;
+ spin_unlock(&dev->n_qps_lock);
if (qp->ip)
kref_put(&qp->ip->ref, ipath_release_mmap_info);
@@ -1026,48 +1028,6 @@ bail:
}
/**
- * ipath_sqerror_qp - put a QP's send queue into an error state
- * @qp: QP who's send queue will be put into an error state
- * @wc: the WC responsible for putting the QP in this state
- *
- * Flushes the send work queue.
- * The QP s_lock should be held and interrupts disabled.
- */
-
-void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
-{
- struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
- struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
-
- ipath_dbg("Send queue error on QP%d/%d: err: %d\n",
- qp->ibqp.qp_num, qp->remote_qpn, wc->status);
-
- spin_lock(&dev->pending_lock);
- if (!list_empty(&qp->timerwait))
- list_del_init(&qp->timerwait);
- if (!list_empty(&qp->piowait))
- list_del_init(&qp->piowait);
- spin_unlock(&dev->pending_lock);
-
- ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
- if (++qp->s_last >= qp->s_size)
- qp->s_last = 0;
-
- wc->status = IB_WC_WR_FLUSH_ERR;
-
- while (qp->s_last != qp->s_head) {
- wqe = get_swqe_ptr(qp, qp->s_last);
- wc->wr_id = wqe->wr.wr_id;
- wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
- if (++qp->s_last >= qp->s_size)
- qp->s_last = 0;
- }
- qp->s_cur = qp->s_tail = qp->s_head;
- qp->state = IB_QPS_SQE;
-}
-
-/**
* ipath_get_credit - flush the send work queue of a QP
* @qp: the qp who's send work queue to flush
* @aeth: the Acknowledge Extended Transport Header
@@ -1093,9 +1053,10 @@ void ipath_get_credit(struct ipath_qp *qp, u32 aeth)
}
/* Restart sending if it was blocked due to lack of credits. */
- if (qp->s_cur != qp->s_head &&
+ if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) &&
+ qp->s_cur != qp->s_head &&
(qp->s_lsn == (u32) -1 ||
ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn,
qp->s_lsn + 1) <= 0))
- tasklet_hi_schedule(&qp->s_task);
+ ipath_schedule_send(qp);
}
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index c405dfba553..97710522624 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -92,6 +92,10 @@ static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp,
u32 bth0;
u32 bth2;
+ /* Don't send an ACK if we aren't supposed to. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto bail;
+
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5;
@@ -238,14 +242,25 @@ int ipath_make_rc_req(struct ipath_qp *qp)
ipath_make_rc_ack(dev, qp, ohdr, pmtu))
goto done;
- if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) ||
- qp->s_rnr_timeout || qp->s_wait_credit)
- goto bail;
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
+ goto bail;
+ /* We are in the error state, flush the work request. */
+ if (qp->s_last == qp->s_head)
+ goto bail;
+ /* If DMAs are in progress, we can't flush immediately. */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= IPATH_S_WAIT_DMA;
+ goto bail;
+ }
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ goto done;
+ }
- /* Limit the number of packets sent without an ACK. */
- if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) {
- qp->s_wait_credit = 1;
- dev->n_rc_stalls++;
+ /* Leave BUSY set until RNR timeout. */
+ if (qp->s_rnr_timeout) {
+ qp->s_flags |= IPATH_S_WAITING;
goto bail;
}
@@ -257,6 +272,9 @@ int ipath_make_rc_req(struct ipath_qp *qp)
wqe = get_swqe_ptr(qp, qp->s_cur);
switch (qp->s_state) {
default:
+ if (!(ib_ipath_state_ops[qp->state] &
+ IPATH_PROCESS_NEXT_SEND_OK))
+ goto bail;
/*
* Resend an old request or start a new one.
*
@@ -294,8 +312,10 @@ int ipath_make_rc_req(struct ipath_qp *qp)
case IB_WR_SEND_WITH_IMM:
/* If no credit, return. */
if (qp->s_lsn != (u32) -1 &&
- ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
+ ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
+ qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
goto bail;
+ }
wqe->lpsn = wqe->psn;
if (len > pmtu) {
wqe->lpsn += (len - 1) / pmtu;
@@ -325,8 +345,10 @@ int ipath_make_rc_req(struct ipath_qp *qp)
case IB_WR_RDMA_WRITE_WITH_IMM:
/* If no credit, return. */
if (qp->s_lsn != (u32) -1 &&
- ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
+ ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
+ qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
goto bail;
+ }
ohdr->u.rc.reth.vaddr =
cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
ohdr->u.rc.reth.rkey =
@@ -570,7 +592,11 @@ int ipath_make_rc_req(struct ipath_qp *qp)
ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);
done:
ret = 1;
+ goto unlock;
+
bail:
+ qp->s_flags &= ~IPATH_S_BUSY;
+unlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
@@ -606,7 +632,11 @@ static void send_rc_ack(struct ipath_qp *qp)
spin_unlock_irqrestore(&qp->s_lock, flags);
+ /* Don't try to send ACKs if the link isn't ACTIVE */
dd = dev->dd;
+ if (!(dd->ipath_flags & IPATH_LINKACTIVE))
+ goto done;
+
piobuf = ipath_getpiobuf(dd, 0, NULL);
if (!piobuf) {
/*
@@ -668,15 +698,16 @@ static void send_rc_ack(struct ipath_qp *qp)
goto done;
queue_ack:
- dev->n_rc_qacks++;
- qp->s_flags |= IPATH_S_ACK_PENDING;
- qp->s_nak_state = qp->r_nak_state;
- qp->s_ack_psn = qp->r_ack_psn;
+ if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) {
+ dev->n_rc_qacks++;
+ qp->s_flags |= IPATH_S_ACK_PENDING;
+ qp->s_nak_state = qp->r_nak_state;
+ qp->s_ack_psn = qp->r_ack_psn;
+
+ /* Schedule the send tasklet. */
+ ipath_schedule_send(qp);
+ }
spin_unlock_irqrestore(&qp->s_lock, flags);
-
- /* Call ipath_do_rc_send() in another thread. */
- tasklet_hi_schedule(&qp->s_task);
-
done:
return;
}
@@ -735,7 +766,7 @@ static void reset_psn(struct ipath_qp *qp, u32 psn)
/*
* Set the state to restart in the middle of a request.
* Don't change the s_sge, s_cur_sge, or s_cur_size.
- * See ipath_do_rc_send().
+ * See ipath_make_rc_req().
*/
switch (opcode) {
case IB_WR_SEND:
@@ -771,27 +802,14 @@ done:
*
* The QP s_lock should be held and interrupts disabled.
*/
-void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
+void ipath_restart_rc(struct ipath_qp *qp, u32 psn)
{
struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
struct ipath_ibdev *dev;
if (qp->s_retry == 0) {
- wc->wr_id = wqe->wr.wr_id;
- wc->status = IB_WC_RETRY_EXC_ERR;
- wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- wc->vendor_err = 0;
- wc->byte_len = 0;
- wc->qp = &qp->ibqp;
- wc->imm_data = 0;
- wc->src_qp = qp->remote_qpn;
- wc->wc_flags = 0;
- wc->pkey_index = 0;
- wc->slid = qp->remote_ah_attr.dlid;
- wc->sl = qp->remote_ah_attr.sl;
- wc->dlid_path_bits = 0;
- wc->port_num = 0;
- ipath_sqerror_qp(qp, wc);
+ ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
+ ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
goto bail;
}
qp->s_retry--;
@@ -804,6 +822,8 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
spin_lock(&dev->pending_lock);
if (!list_empty(&qp->timerwait))
list_del_init(&qp->timerwait);
+ if (!list_empty(&qp->piowait))
+ list_del_init(&qp->piowait);
spin_unlock(&dev->pending_lock);
if (wqe->wr.opcode == IB_WR_RDMA_READ)
@@ -812,7 +832,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
reset_psn(qp, psn);
- tasklet_hi_schedule(&qp->s_task);
+ ipath_schedule_send(qp);
bail:
return;
@@ -820,13 +840,7 @@ bail:
static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
{
- if (qp->s_last_psn != psn) {
- qp->s_last_psn = psn;
- if (qp->s_wait_credit) {
- qp->s_wait_credit = 0;
- tasklet_hi_schedule(&qp->s_task);
- }
- }
+ qp->s_last_psn = psn;
}
/**
@@ -845,6 +859,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ib_wc wc;
+ enum ib_wc_status status;
struct ipath_swqe *wqe;
int ret = 0;
u32 ack_psn;
@@ -909,7 +924,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
*/
update_last_psn(qp, wqe->psn - 1);
/* Retry this request. */
- ipath_restart_rc(qp, wqe->psn, &wc);
+ ipath_restart_rc(qp, wqe->psn);
/*
* No need to process the ACK/NAK since we are
* restarting an earlier request.
@@ -925,32 +940,23 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
qp->s_num_rd_atomic--;
/* Restart sending task if fence is complete */
- if ((qp->s_flags & IPATH_S_FENCE_PENDING) &&
- !qp->s_num_rd_atomic) {
- qp->s_flags &= ~IPATH_S_FENCE_PENDING;
- tasklet_hi_schedule(&qp->s_task);
- } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) {
- qp->s_flags &= ~IPATH_S_RDMAR_PENDING;
- tasklet_hi_schedule(&qp->s_task);
- }
+ if (((qp->s_flags & IPATH_S_FENCE_PENDING) &&
+ !qp->s_num_rd_atomic) ||
+ qp->s_flags & IPATH_S_RDMAR_PENDING)
+ ipath_schedule_send(qp);
}
/* Post a send completion queue entry if requested. */
if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
+ memset(&wc, 0, sizeof wc);
wc.wr_id = wqe->wr.wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- wc.vendor_err = 0;
wc.byte_len = wqe->length;
- wc.imm_data = 0;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.wc_flags = 0;
- wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
}
qp->s_retry = qp->s_retry_cnt;
@@ -971,6 +977,8 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
} else {
if (++qp->s_last >= qp->s_size)
qp->s_last = 0;
+ if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur)
+ qp->s_draining = 0;
if (qp->s_last == qp->s_tail)
break;
wqe = get_swqe_ptr(qp, qp->s_last);
@@ -994,7 +1002,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
*/
if (ipath_cmp24(qp->s_psn, psn) <= 0) {
reset_psn(qp, psn + 1);
- tasklet_hi_schedule(&qp->s_task);
+ ipath_schedule_send(qp);
}
} else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
qp->s_state = OP(SEND_LAST);
@@ -1012,7 +1020,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
if (qp->s_last == qp->s_tail)
goto bail;
if (qp->s_rnr_retry == 0) {
- wc.status = IB_WC_RNR_RETRY_EXC_ERR;
+ status = IB_WC_RNR_RETRY_EXC_ERR;
goto class_b;
}
if (qp->s_rnr_retry_cnt < 7)
@@ -1033,6 +1041,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) &
IPATH_AETH_CREDIT_MASK];
ipath_insert_rnr_queue(qp);
+ ipath_schedule_send(qp);
goto bail;
case 3: /* NAK */
@@ -1050,37 +1059,25 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
* RDMA READ response which terminates the RDMA
* READ.
*/
- ipath_restart_rc(qp, psn, &wc);
+ ipath_restart_rc(qp, psn);
break;
case 1: /* Invalid Request */
- wc.status = IB_WC_REM_INV_REQ_ERR;
+ status = IB_WC_REM_INV_REQ_ERR;
dev->n_other_naks++;
goto class_b;
case 2: /* Remote Access Error */
- wc.status = IB_WC_REM_ACCESS_ERR;
+ status = IB_WC_REM_ACCESS_ERR;
dev->n_other_naks++;
goto class_b;
case 3: /* Remote Operation Error */
- wc.status = IB_WC_REM_OP_ERR;
+ status = IB_WC_REM_OP_ERR;
dev->n_other_naks++;
class_b:
- wc.wr_id = wqe->wr.wr_id;
- wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- wc.vendor_err = 0;
- wc.byte_len = 0;
- wc.qp = &qp->ibqp;
- wc.imm_data = 0;
- wc.src_qp = qp->remote_qpn;
- wc.wc_flags = 0;
- wc.pkey_index = 0;
- wc.slid = qp->remote_ah_attr.dlid;
- wc.sl = qp->remote_ah_attr.sl;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
- ipath_sqerror_qp(qp, &wc);
+ ipath_send_complete(qp, wqe, status);
+ ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
break;
default:
@@ -1126,8 +1123,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
int header_in_data)
{
struct ipath_swqe *wqe;
+ enum ib_wc_status status;
unsigned long flags;
- struct ib_wc wc;
int diff;
u32 pad;
u32 aeth;
@@ -1135,6 +1132,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
spin_lock_irqsave(&qp->s_lock, flags);
+ /* Double check we can process this now that we hold the s_lock. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto ack_done;
+
/* Ignore invalid responses. */
if (ipath_cmp24(psn, qp->s_next_psn) >= 0)
goto ack_done;
@@ -1159,6 +1160,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
if (unlikely(qp->s_last == qp->s_tail))
goto ack_done;
wqe = get_swqe_ptr(qp, qp->s_last);
+ status = IB_WC_SUCCESS;
switch (opcode) {
case OP(ACKNOWLEDGE):
@@ -1187,6 +1189,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
wqe = get_swqe_ptr(qp, qp->s_last);
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
goto ack_op_err;
+ qp->r_flags &= ~IPATH_R_RDMAR_SEQ;
/*
* If this is a response to a resent RDMA read, we
* have to be careful to copy the data to the right
@@ -1200,7 +1203,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
/* no AETH, no ACK */
if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
dev->n_rdma_seq++;
- ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+ if (qp->r_flags & IPATH_R_RDMAR_SEQ)
+ goto ack_done;
+ qp->r_flags |= IPATH_R_RDMAR_SEQ;
+ ipath_restart_rc(qp, qp->s_last_psn + 1);
goto ack_done;
}
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
@@ -1261,7 +1267,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
/* ACKs READ req. */
if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
dev->n_rdma_seq++;
- ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+ if (qp->r_flags & IPATH_R_RDMAR_SEQ)
+ goto ack_done;
+ qp->r_flags |= IPATH_R_RDMAR_SEQ;
+ ipath_restart_rc(qp, qp->s_last_psn + 1);
goto ack_done;
}
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
@@ -1291,31 +1300,16 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
goto ack_done;
}
-ack_done:
- spin_unlock_irqrestore(&qp->s_lock, flags);
- goto bail;
-
ack_op_err:
- wc.status = IB_WC_LOC_QP_OP_ERR;
+ status = IB_WC_LOC_QP_OP_ERR;
goto ack_err;
ack_len_err:
- wc.status = IB_WC_LOC_LEN_ERR;
+ status = IB_WC_LOC_LEN_ERR;
ack_err:
- wc.wr_id = wqe->wr.wr_id;
- wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- wc.vendor_err = 0;
- wc.byte_len = 0;
- wc.imm_data = 0;
- wc.qp = &qp->ibqp;
- wc.src_qp = qp->remote_qpn;
- wc.wc_flags = 0;
- wc.pkey_index = 0;
- wc.slid = qp->remote_ah_attr.dlid;
- wc.sl = qp->remote_ah_attr.sl;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
- ipath_sqerror_qp(qp, &wc);
+ ipath_send_complete(qp, wqe, status);
+ ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ack_done:
spin_unlock_irqrestore(&qp->s_lock, flags);
bail:
return;
@@ -1384,7 +1378,12 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
psn &= IPATH_PSN_MASK;
e = NULL;
old_req = 1;
+
spin_lock_irqsave(&qp->s_lock, flags);
+ /* Double check we can process this now that we hold the s_lock. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto unlock_done;
+
for (i = qp->r_head_ack_queue; ; i = prev) {
if (i == qp->s_tail_ack_queue)
old_req = 0;
@@ -1512,7 +1511,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
break;
}
qp->r_nak_state = 0;
- tasklet_hi_schedule(&qp->s_task);
+ ipath_schedule_send(qp);
unlock_done:
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1523,13 +1522,12 @@ send_ack:
return 0;
}
-static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
+void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
{
unsigned long flags;
int lastwqe;
spin_lock_irqsave(&qp->s_lock, flags);
- qp->state = IB_QPS_ERR;
lastwqe = ipath_error_qp(qp, err);
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1545,18 +1543,15 @@ static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
{
- unsigned long flags;
unsigned next;
next = n + 1;
if (next > IPATH_MAX_RDMA_ATOMIC)
next = 0;
- spin_lock_irqsave(&qp->s_lock, flags);
if (n == qp->s_tail_ack_queue) {
qp->s_tail_ack_queue = next;
qp->s_ack_state = OP(ACKNOWLEDGE);
}
- spin_unlock_irqrestore(&qp->s_lock, flags);
}
/**
@@ -1585,6 +1580,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int diff;
struct ib_reth *reth;
int header_in_data;
+ unsigned long flags;
/* Validate the SLID. See Ch. 9.6.1.5 */
if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
@@ -1643,11 +1639,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
opcode == OP(SEND_LAST) ||
opcode == OP(SEND_LAST_WITH_IMMEDIATE))
break;
- nack_inv:
- ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR);
- qp->r_nak_state = IB_NAK_INVALID_REQUEST;
- qp->r_ack_psn = qp->r_psn;
- goto send_ack;
+ goto nack_inv;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_MIDDLE):
@@ -1673,18 +1665,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
break;
}
- wc.imm_data = 0;
- wc.wc_flags = 0;
+ memset(&wc, 0, sizeof wc);
/* OK, process the packet. */
switch (opcode) {
case OP(SEND_FIRST):
- if (!ipath_get_rwqe(qp, 0)) {
- rnr_nak:
- qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
- qp->r_ack_psn = qp->r_psn;
- goto send_ack;
- }
+ if (!ipath_get_rwqe(qp, 0))
+ goto rnr_nak;
qp->r_rcv_len = 0;
/* FALLTHROUGH */
case OP(SEND_MIDDLE):
@@ -1716,11 +1703,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
case OP(SEND_LAST_WITH_IMMEDIATE):
send_last_imm:
if (header_in_data) {
- wc.imm_data = *(__be32 *) data;
+ wc.ex.imm_data = *(__be32 *) data;
data += sizeof(__be32);
} else {
/* Immediate data comes after BTH */
- wc.imm_data = ohdr->u.imm_data;
+ wc.ex.imm_data = ohdr->u.imm_data;
}
hdrsize += 4;
wc.wc_flags = IB_WC_WITH_IMM;
@@ -1741,20 +1728,19 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
goto nack_inv;
ipath_copy_sge(&qp->r_sge, data, tlen);
qp->r_msn++;
- if (!qp->r_wrid_valid)
+ if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
break;
- qp->r_wrid_valid = 0;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
- wc.opcode = IB_WC_RECV;
- wc.vendor_err = 0;
+ if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
+ opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
+ wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ else
+ wc.opcode = IB_WC_RECV;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
@@ -1815,9 +1801,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
next = qp->r_head_ack_queue + 1;
if (next > IPATH_MAX_RDMA_ATOMIC)
next = 0;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ /* Double check we can process this while holding the s_lock. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto unlock;
if (unlikely(next == qp->s_tail_ack_queue)) {
if (!qp->s_ack_queue[next].sent)
- goto nack_inv;
+ goto nack_inv_unlck;
ipath_update_ack_queue(qp, next);
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
@@ -1838,7 +1828,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
rkey, IB_ACCESS_REMOTE_READ);
if (unlikely(!ok))
- goto nack_acc;
+ goto nack_acc_unlck;
/*
* Update the next expected PSN. We add 1 later
* below, so only add the remainder here.
@@ -1865,13 +1855,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
qp->r_psn++;
qp->r_state = opcode;
qp->r_nak_state = 0;
- barrier();
qp->r_head_ack_queue = next;
- /* Call ipath_do_rc_send() in another thread. */
- tasklet_hi_schedule(&qp->s_task);
+ /* Schedule the send tasklet. */
+ ipath_schedule_send(qp);
- goto done;
+ goto unlock;
}
case OP(COMPARE_SWAP):
@@ -1890,9 +1879,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
next = qp->r_head_ack_queue + 1;
if (next > IPATH_MAX_RDMA_ATOMIC)
next = 0;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ /* Double check we can process this while holding the s_lock. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto unlock;
if (unlikely(next == qp->s_tail_ack_queue)) {
if (!qp->s_ack_queue[next].sent)
- goto nack_inv;
+ goto nack_inv_unlck;
ipath_update_ack_queue(qp, next);
}
if (!header_in_data)
@@ -1902,13 +1895,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
be32_to_cpu(ateth->vaddr[1]);
if (unlikely(vaddr & (sizeof(u64) - 1)))
- goto nack_inv;
+ goto nack_inv_unlck;
rkey = be32_to_cpu(ateth->rkey);
/* Check rkey & NAK */
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
sizeof(u64), vaddr, rkey,
IB_ACCESS_REMOTE_ATOMIC)))
- goto nack_acc;
+ goto nack_acc_unlck;
/* Perform atomic OP and save result. */
maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
sdata = be64_to_cpu(ateth->swap_data);
@@ -1925,13 +1918,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
qp->r_psn++;
qp->r_state = opcode;
qp->r_nak_state = 0;
- barrier();
qp->r_head_ack_queue = next;
- /* Call ipath_do_rc_send() in another thread. */
- tasklet_hi_schedule(&qp->s_task);
+ /* Schedule the send tasklet. */
+ ipath_schedule_send(qp);
- goto done;
+ goto unlock;
}
default:
@@ -1947,14 +1939,31 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
goto send_ack;
goto done;
+rnr_nak:
+ qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
+ qp->r_ack_psn = qp->r_psn;
+ goto send_ack;
+
+nack_inv_unlck:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+nack_inv:
+ ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ qp->r_nak_state = IB_NAK_INVALID_REQUEST;
+ qp->r_ack_psn = qp->r_psn;
+ goto send_ack;
+
+nack_acc_unlck:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
nack_acc:
- ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
+ ipath_rc_error(qp, IB_WC_LOC_PROT_ERR);
qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
qp->r_ack_psn = qp->r_psn;
-
send_ack:
send_rc_ack(qp);
+ goto done;
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
done:
return;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index 8ac5c1d82cc..af051f75766 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -78,6 +78,7 @@ const u32 ib_ipath_rnr_table[32] = {
* ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
* @qp: the QP
*
+ * Called with the QP s_lock held and interrupts disabled.
* XXX Use a simple list for now. We might need a priority
* queue if we have lots of QPs waiting for RNR timeouts
* but that should be rare.
@@ -85,9 +86,9 @@ const u32 ib_ipath_rnr_table[32] = {
void ipath_insert_rnr_queue(struct ipath_qp *qp)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
- unsigned long flags;
- spin_lock_irqsave(&dev->pending_lock, flags);
+ /* We already did a spin_lock_irqsave(), so just use spin_lock */
+ spin_lock(&dev->pending_lock);
if (list_empty(&dev->rnrwait))
list_add(&qp->timerwait, &dev->rnrwait);
else {
@@ -109,7 +110,7 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
nqp->s_rnr_timeout -= qp->s_rnr_timeout;
list_add(&qp->timerwait, l);
}
- spin_unlock_irqrestore(&dev->pending_lock, flags);
+ spin_unlock(&dev->pending_lock);
}
/**
@@ -140,20 +141,11 @@ int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
goto bail;
bad_lkey:
+ memset(&wc, 0, sizeof(wc));
wc.wr_id = wqe->wr_id;
wc.status = IB_WC_LOC_PROT_ERR;
wc.opcode = IB_WC_RECV;
- wc.vendor_err = 0;
- wc.byte_len = 0;
- wc.imm_data = 0;
wc.qp = &qp->ibqp;
- wc.src_qp = 0;
- wc.wc_flags = 0;
- wc.pkey_index = 0;
- wc.slid = 0;
- wc.sl = 0;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
/* Signal solicited completion event. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
ret = 0;
@@ -194,6 +186,11 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
}
spin_lock_irqsave(&rq->lock, flags);
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
+ ret = 0;
+ goto unlock;
+ }
+
wq = rq->wq;
tail = wq->tail;
/* Validate tail before using it since it is user writable. */
@@ -201,9 +198,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
tail = 0;
do {
if (unlikely(tail == wq->head)) {
- spin_unlock_irqrestore(&rq->lock, flags);
ret = 0;
- goto bail;
+ goto unlock;
}
/* Make sure entry is read after head index is read. */
smp_rmb();
@@ -216,7 +212,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
wq->tail = tail;
ret = 1;
- qp->r_wrid_valid = 1;
+ set_bit(IPATH_R_WRID_VALID, &qp->r_aflags);
if (handler) {
u32 n;
@@ -243,8 +239,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
goto bail;
}
}
+unlock:
spin_unlock_irqrestore(&rq->lock, flags);
-
bail:
return ret;
}
@@ -270,38 +266,63 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
struct ib_wc wc;
u64 sdata;
atomic64_t *maddr;
+ enum ib_wc_status send_status;
+ /*
+ * Note that we check the responder QP state after
+ * checking the requester's state.
+ */
qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
- if (!qp) {
- dev->n_pkt_drops++;
- return;
- }
-again:
spin_lock_irqsave(&sqp->s_lock, flags);
- if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) ||
- sqp->s_rnr_timeout) {
- spin_unlock_irqrestore(&sqp->s_lock, flags);
- goto done;
- }
+ /* Return if we are already busy processing a work request. */
+ if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
+ !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
+ goto unlock;
- /* Get the next send request. */
- if (sqp->s_last == sqp->s_head) {
- /* Send work queue is empty. */
- spin_unlock_irqrestore(&sqp->s_lock, flags);
- goto done;
+ sqp->s_flags |= IPATH_S_BUSY;
+
+again:
+ if (sqp->s_last == sqp->s_head)
+ goto clr_busy;
+ wqe = get_swqe_ptr(sqp, sqp->s_last);
+
+ /* Return if it is not OK to start a new work reqeust. */
+ if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
+ if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND))
+ goto clr_busy;
+ /* We are in the error state, flush the work request. */
+ send_status = IB_WC_WR_FLUSH_ERR;
+ goto flush_send;
}
/*
* We can rely on the entry not changing without the s_lock
* being held until we update s_last.
+ * We increment s_cur to indicate s_last is in progress.
*/
- wqe = get_swqe_ptr(sqp, sqp->s_last);
+ if (sqp->s_last == sqp->s_cur) {
+ if (++sqp->s_cur >= sqp->s_size)
+ sqp->s_cur = 0;
+ }
spin_unlock_irqrestore(&sqp->s_lock, flags);
- wc.wc_flags = 0;
- wc.imm_data = 0;
+ if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
+ dev->n_pkt_drops++;
+ /*
+ * For RC, the requester would timeout and retry so
+ * shortcut the timeouts and just signal too many retries.
+ */
+ if (sqp->ibqp.qp_type == IB_QPT_RC)
+ send_status = IB_WC_RETRY_EXC_ERR;
+ else
+ send_status = IB_WC_SUCCESS;
+ goto serr;
+ }
+
+ memset(&wc, 0, sizeof wc);
+ send_status = IB_WC_SUCCESS;
sqp->s_sge.sge = wqe->sg_list[0];
sqp->s_sge.sg_list = wqe->sg_list + 1;
@@ -310,78 +331,36 @@ again:
switch (wqe->wr.opcode) {
case IB_WR_SEND_WITH_IMM:
wc.wc_flags = IB_WC_WITH_IMM;
- wc.imm_data = wqe->wr.ex.imm_data;
+ wc.ex.imm_data = wqe->wr.ex.imm_data;
/* FALLTHROUGH */
case IB_WR_SEND:
- if (!ipath_get_rwqe(qp, 0)) {
- rnr_nak:
- /* Handle RNR NAK */
- if (qp->ibqp.qp_type == IB_QPT_UC)
- goto send_comp;
- if (sqp->s_rnr_retry == 0) {
- wc.status = IB_WC_RNR_RETRY_EXC_ERR;
- goto err;
- }
- if (sqp->s_rnr_retry_cnt < 7)
- sqp->s_rnr_retry--;
- dev->n_rnr_naks++;
- sqp->s_rnr_timeout =
- ib_ipath_rnr_table[qp->r_min_rnr_timer];
- ipath_insert_rnr_queue(sqp);
- goto done;
- }
+ if (!ipath_get_rwqe(qp, 0))
+ goto rnr_nak;
break;
case IB_WR_RDMA_WRITE_WITH_IMM:
- if (unlikely(!(qp->qp_access_flags &
- IB_ACCESS_REMOTE_WRITE))) {
- wc.status = IB_WC_REM_INV_REQ_ERR;
- goto err;
- }
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+ goto inv_err;
wc.wc_flags = IB_WC_WITH_IMM;
- wc.imm_data = wqe->wr.ex.imm_data;
+ wc.ex.imm_data = wqe->wr.ex.imm_data;
if (!ipath_get_rwqe(qp, 1))
goto rnr_nak;
/* FALLTHROUGH */
case IB_WR_RDMA_WRITE:
- if (unlikely(!(qp->qp_access_flags &
- IB_ACCESS_REMOTE_WRITE))) {
- wc.status = IB_WC_REM_INV_REQ_ERR;
- goto err;
- }
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+ goto inv_err;
if (wqe->length == 0)
break;
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
- IB_ACCESS_REMOTE_WRITE))) {
- acc_err:
- wc.status = IB_WC_REM_ACCESS_ERR;
- err:
- wc.wr_id = wqe->wr.wr_id;
- wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- wc.vendor_err = 0;
- wc.byte_len = 0;
- wc.qp = &sqp->ibqp;
- wc.src_qp = sqp->remote_qpn;
- wc.pkey_index = 0;
- wc.slid = sqp->remote_ah_attr.dlid;
- wc.sl = sqp->remote_ah_attr.sl;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
- spin_lock_irqsave(&sqp->s_lock, flags);
- ipath_sqerror_qp(sqp, &wc);
- spin_unlock_irqrestore(&sqp->s_lock, flags);
- goto done;
- }
+ IB_ACCESS_REMOTE_WRITE)))
+ goto acc_err;
break;
case IB_WR_RDMA_READ:
- if (unlikely(!(qp->qp_access_flags &
- IB_ACCESS_REMOTE_READ))) {
- wc.status = IB_WC_REM_INV_REQ_ERR;
- goto err;
- }
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
+ goto inv_err;
if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
@@ -394,11 +373,8 @@ again:
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
- if (unlikely(!(qp->qp_access_flags &
- IB_ACCESS_REMOTE_ATOMIC))) {
- wc.status = IB_WC_REM_INV_REQ_ERR;
- goto err;
- }
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+ goto inv_err;
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
wqe->wr.wr.atomic.remote_addr,
wqe->wr.wr.atomic.rkey,
@@ -415,7 +391,8 @@ again:
goto send_comp;
default:
- goto done;
+ send_status = IB_WC_LOC_QP_OP_ERR;
+ goto serr;
}
sge = &sqp->s_sge.sge;
@@ -448,8 +425,7 @@ again:
sqp->s_len -= len;
}
- if (wqe->wr.opcode == IB_WR_RDMA_WRITE ||
- wqe->wr.opcode == IB_WR_RDMA_READ)
+ if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
goto send_comp;
if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
@@ -458,32 +434,89 @@ again:
wc.opcode = IB_WC_RECV;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
- wc.vendor_err = 0;
wc.byte_len = wqe->length;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
- wc.dlid_path_bits = 0;
wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
wqe->wr.send_flags & IB_SEND_SOLICITED);
send_comp:
+ spin_lock_irqsave(&sqp->s_lock, flags);
+flush_send:
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
- ipath_send_complete(sqp, wqe, IB_WC_SUCCESS);
+ ipath_send_complete(sqp, wqe, send_status);
goto again;
+rnr_nak:
+ /* Handle RNR NAK */
+ if (qp->ibqp.qp_type == IB_QPT_UC)
+ goto send_comp;
+ /*
+ * Note: we don't need the s_lock held since the BUSY flag
+ * makes this single threaded.
+ */
+ if (sqp->s_rnr_retry == 0) {
+ send_status = IB_WC_RNR_RETRY_EXC_ERR;
+ goto serr;
+ }
+ if (sqp->s_rnr_retry_cnt < 7)
+ sqp->s_rnr_retry--;
+ spin_lock_irqsave(&sqp->s_lock, flags);
+ if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK))
+ goto clr_busy;
+ sqp->s_flags |= IPATH_S_WAITING;
+ dev->n_rnr_naks++;
+ sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer];
+ ipath_insert_rnr_queue(sqp);
+ goto clr_busy;
+
+inv_err:
+ send_status = IB_WC_REM_INV_REQ_ERR;
+ wc.status = IB_WC_LOC_QP_OP_ERR;
+ goto err;
+
+acc_err:
+ send_status = IB_WC_REM_ACCESS_ERR;
+ wc.status = IB_WC_LOC_PROT_ERR;
+err:
+ /* responder goes to error state */
+ ipath_rc_error(qp, wc.status);
+
+serr:
+ spin_lock_irqsave(&sqp->s_lock, flags);
+ ipath_send_complete(sqp, wqe, send_status);
+ if (sqp->ibqp.qp_type == IB_QPT_RC) {
+ int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+
+ sqp->s_flags &= ~IPATH_S_BUSY;
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
+ if (lastwqe) {
+ struct ib_event ev;
+
+ ev.device = sqp->ibqp.device;
+ ev.element.qp = &sqp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
+ }
+ goto done;
+ }
+clr_busy:
+ sqp->s_flags &= ~IPATH_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
done:
- if (atomic_dec_and_test(&qp->refcount))
+ if (qp && atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
}
-static void want_buffer(struct ipath_devdata *dd)
+static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
{
- if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) {
+ if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
+ qp->ibqp.qp_type == IB_QPT_SMI) {
unsigned long flags;
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
@@ -501,26 +534,36 @@ static void want_buffer(struct ipath_devdata *dd)
* @dev: the device we ran out of buffers on
*
* Called when we run out of PIO buffers.
+ * If we are now in the error state, return zero to flush the
+ * send work request.
*/
-static void ipath_no_bufs_available(struct ipath_qp *qp,
+static int ipath_no_bufs_available(struct ipath_qp *qp,
struct ipath_ibdev *dev)
{
unsigned long flags;
+ int ret = 1;
/*
* Note that as soon as want_buffer() is called and
* possibly before it returns, ipath_ib_piobufavail()
- * could be called. If we are still in the tasklet function,
- * tasklet_hi_schedule() will not call us until the next time
- * tasklet_hi_schedule() is called.
- * We leave the busy flag set so that another post send doesn't
- * try to put the same QP on the piowait list again.
+ * could be called. Therefore, put QP on the piowait list before
+ * enabling the PIO avail interrupt.
*/
- spin_lock_irqsave(&dev->pending_lock, flags);
- list_add_tail(&qp->piowait, &dev->piowait);
- spin_unlock_irqrestore(&dev->pending_lock, flags);
- want_buffer(dev->dd);
- dev->n_piowait++;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
+ dev->n_piowait++;
+ qp->s_flags |= IPATH_S_WAITING;
+ qp->s_flags &= ~IPATH_S_BUSY;
+ spin_lock(&dev->pending_lock);
+ if (list_empty(&qp->piowait))
+ list_add_tail(&qp->piowait, &dev->piowait);
+ spin_unlock(&dev->pending_lock);
+ } else
+ ret = 0;
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ if (ret)
+ want_buffer(dev->dd, qp);
+ return ret;
}
/**
@@ -596,15 +639,13 @@ void ipath_do_send(unsigned long data)
struct ipath_qp *qp = (struct ipath_qp *)data;
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
int (*make_req)(struct ipath_qp *qp);
-
- if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
- goto bail;
+ unsigned long flags;
if ((qp->ibqp.qp_type == IB_QPT_RC ||
qp->ibqp.qp_type == IB_QPT_UC) &&
qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
ipath_ruc_loopback(qp);
- goto clear;
+ goto bail;
}
if (qp->ibqp.qp_type == IB_QPT_RC)
@@ -614,6 +655,19 @@ void ipath_do_send(unsigned long data)
else
make_req = ipath_make_ud_req;
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ /* Return if we are already busy processing a work request. */
+ if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
+ !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) {
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ goto bail;
+ }
+
+ qp->s_flags |= IPATH_S_BUSY;
+
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
again:
/* Check for a constructed packet to be sent. */
if (qp->s_hdrwords != 0) {
@@ -623,8 +677,8 @@ again:
*/
if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
qp->s_cur_sge, qp->s_cur_size)) {
- ipath_no_bufs_available(qp, dev);
- goto bail;
+ if (ipath_no_bufs_available(qp, dev))
+ goto bail;
}
dev->n_unicast_xmit++;
/* Record that we sent the packet and s_hdr is empty. */
@@ -633,16 +687,20 @@ again:
if (make_req(qp))
goto again;
-clear:
- clear_bit(IPATH_S_BUSY, &qp->s_busy);
+
bail:;
}
+/*
+ * This should be called with s_lock held.
+ */
void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
enum ib_wc_status status)
{
- unsigned long flags;
- u32 last;
+ u32 old_last, last;
+
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
+ return;
/* See ch. 11.2.4.1 and 10.7.3.1 */
if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
@@ -650,27 +708,25 @@ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
status != IB_WC_SUCCESS) {
struct ib_wc wc;
+ memset(&wc, 0, sizeof wc);
wc.wr_id = wqe->wr.wr_id;
wc.status = status;
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- wc.vendor_err = 0;
- wc.byte_len = wqe->length;
- wc.imm_data = 0;
wc.qp = &qp->ibqp;
- wc.src_qp = 0;
- wc.wc_flags = 0;
- wc.pkey_index = 0;
- wc.slid = 0;
- wc.sl = 0;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
- ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+ if (status == IB_WC_SUCCESS)
+ wc.byte_len = wqe->length;
+ ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
+ status != IB_WC_SUCCESS);
}
- spin_lock_irqsave(&qp->s_lock, flags);
- last = qp->s_last;
+ old_last = last = qp->s_last;
if (++last >= qp->s_size)
last = 0;
qp->s_last = last;
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ if (qp->s_cur == old_last)
+ qp->s_cur = last;
+ if (qp->s_tail == old_last)
+ qp->s_tail = last;
+ if (qp->state == IB_QPS_SQD && last == qp->s_cur)
+ qp->s_draining = 0;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c
index 1974df7a9f7..284c9bca517 100644
--- a/drivers/infiniband/hw/ipath/ipath_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_sdma.c
@@ -263,14 +263,10 @@ static void sdma_abort_task(unsigned long opaque)
hwstatus = ipath_read_kreg64(dd,
dd->ipath_kregs->kr_senddmastatus);
- if (/* ScoreBoardDrainInProg */
- test_bit(63, &hwstatus) ||
- /* AbortInProg */
- test_bit(62, &hwstatus) ||
- /* InternalSDmaEnable */
- test_bit(61, &hwstatus) ||
- /* ScbEmpty */
- !test_bit(30, &hwstatus)) {
+ if ((hwstatus & (IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG |
+ IPATH_SDMA_STATUS_ABORT_IN_PROG |
+ IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE)) ||
+ !(hwstatus & IPATH_SDMA_STATUS_SCB_EMPTY)) {
if (dd->ipath_sdma_reset_wait > 0) {
/* not done shutting down sdma */
--dd->ipath_sdma_reset_wait;
@@ -308,13 +304,15 @@ static void sdma_abort_task(unsigned long opaque)
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
/*
- * Don't restart sdma here. Wait until link is up to ACTIVE.
- * VL15 MADs used to bring the link up use PIO, and multiple
- * link transitions otherwise cause the sdma engine to be
+ * Don't restart sdma here (with the exception
+ * below). Wait until link is up to ACTIVE. VL15 MADs
+ * used to bring the link up use PIO, and multiple link
+ * transitions otherwise cause the sdma engine to be
* stopped and started multiple times.
- * The disable is done here, including the shadow, so the
- * state is kept consistent.
- * See ipath_restart_sdma() for the actual starting of sdma.
+ * The disable is done here, including the shadow,
+ * so the state is kept consistent.
+ * See ipath_restart_sdma() for the actual starting
+ * of sdma.
*/
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
@@ -326,6 +324,13 @@ static void sdma_abort_task(unsigned long opaque)
/* make sure I see next message */
dd->ipath_sdma_abort_jiffies = 0;
+ /*
+ * Not everything that takes SDMA offline is a link
+ * status change. If the link was up, restart SDMA.
+ */
+ if (dd->ipath_flags & IPATH_LINKACTIVE)
+ ipath_restart_sdma(dd);
+
goto done;
}
@@ -336,7 +341,7 @@ resched:
* state change
*/
if (jiffies > dd->ipath_sdma_abort_jiffies) {
- ipath_dbg("looping with status 0x%016llx\n",
+ ipath_dbg("looping with status 0x%08lx\n",
dd->ipath_sdma_status);
dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ;
}
@@ -427,7 +432,12 @@ int setup_sdma(struct ipath_devdata *dd)
goto done;
}
- dd->ipath_sdma_status = 0;
+ /*
+ * Set initial status as if we had been up, then gone down.
+ * This lets initial start on transition to ACTIVE be the
+ * same as restart after link flap.
+ */
+ dd->ipath_sdma_status = IPATH_SDMA_ABORT_ABORTED;
dd->ipath_sdma_abort_jiffies = 0;
dd->ipath_sdma_generation = 0;
dd->ipath_sdma_descq_tail = 0;
@@ -449,16 +459,19 @@ int setup_sdma(struct ipath_devdata *dd)
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr,
dd->ipath_sdma_head_phys);
- /* Reserve all the former "kernel" piobufs */
- n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - dd->ipath_pioreserved;
- for (i = dd->ipath_lastport_piobuf; i < n; ++i) {
+ /*
+ * Reserve all the former "kernel" piobufs, using high number range
+ * so we get as many 4K buffers as possible
+ */
+ n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
+ i = dd->ipath_lastport_piobuf + dd->ipath_pioreserved;
+ ipath_chg_pioavailkernel(dd, i, n - i , 0);
+ for (; i < n; ++i) {
unsigned word = i / 64;
unsigned bit = i & 63;
BUG_ON(word >= 3);
senddmabufmask[word] |= 1ULL << bit;
}
- ipath_chg_pioavailkernel(dd, dd->ipath_lastport_piobuf,
- n - dd->ipath_lastport_piobuf, 0);
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0,
senddmabufmask[0]);
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1,
@@ -598,7 +611,7 @@ void ipath_restart_sdma(struct ipath_devdata *dd)
}
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
if (!needed) {
- ipath_dbg("invalid attempt to restart SDMA, status 0x%016llx\n",
+ ipath_dbg("invalid attempt to restart SDMA, status 0x%08lx\n",
dd->ipath_sdma_status);
goto bail;
}
@@ -615,6 +628,9 @@ void ipath_restart_sdma(struct ipath_devdata *dd)
ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+ /* notify upper layers */
+ ipath_ib_piobufavail(dd->verbs_dev);
+
bail:
return;
}
@@ -682,7 +698,7 @@ retry:
addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr,
tx->map_len, DMA_TO_DEVICE);
- if (dma_mapping_error(addr)) {
+ if (dma_mapping_error(&dd->pcidev->dev, addr)) {
ret = -EIO;
goto unlock;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index bfe8926b551..82cc588b8bf 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -47,14 +47,30 @@ int ipath_make_uc_req(struct ipath_qp *qp)
{
struct ipath_other_headers *ohdr;
struct ipath_swqe *wqe;
+ unsigned long flags;
u32 hwords;
u32 bth0;
u32 len;
u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
int ret = 0;
- if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
+ goto bail;
+ /* We are in the error state, flush the work request. */
+ if (qp->s_last == qp->s_head)
+ goto bail;
+ /* If DMAs are in progress, we can't flush immediately. */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= IPATH_S_WAIT_DMA;
+ goto bail;
+ }
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done;
+ }
ohdr = &qp->s_hdr.u.oth;
if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
@@ -69,9 +85,12 @@ int ipath_make_uc_req(struct ipath_qp *qp)
qp->s_wqe = NULL;
switch (qp->s_state) {
default:
+ if (!(ib_ipath_state_ops[qp->state] &
+ IPATH_PROCESS_NEXT_SEND_OK))
+ goto bail;
/* Check if send work queue is empty. */
if (qp->s_cur == qp->s_head)
- goto done;
+ goto bail;
/*
* Start a new request.
*/
@@ -134,7 +153,7 @@ int ipath_make_uc_req(struct ipath_qp *qp)
break;
default:
- goto done;
+ goto bail;
}
break;
@@ -194,9 +213,14 @@ int ipath_make_uc_req(struct ipath_qp *qp)
ipath_make_ruc_header(to_idev(qp->ibqp.device),
qp, ohdr, bth0 | (qp->s_state << 24),
qp->s_next_psn++ & IPATH_PSN_MASK);
+done:
ret = 1;
+ goto unlock;
-done:
+bail:
+ qp->s_flags &= ~IPATH_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
@@ -258,8 +282,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
*/
opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
- wc.imm_data = 0;
- wc.wc_flags = 0;
+ memset(&wc, 0, sizeof wc);
/* Compare the PSN verses the expected PSN. */
if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
@@ -322,8 +345,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
case OP(SEND_ONLY):
case OP(SEND_ONLY_WITH_IMMEDIATE):
send_first:
- if (qp->r_reuse_sge) {
- qp->r_reuse_sge = 0;
+ if (qp->r_flags & IPATH_R_REUSE_SGE) {
+ qp->r_flags &= ~IPATH_R_REUSE_SGE;
qp->r_sge = qp->s_rdma_read_sge;
} else if (!ipath_get_rwqe(qp, 0)) {
dev->n_pkt_drops++;
@@ -340,13 +363,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
case OP(SEND_MIDDLE):
/* Check for invalid length PMTU or posted rwqe len. */
if (unlikely(tlen != (hdrsize + pmtu + 4))) {
- qp->r_reuse_sge = 1;
+ qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto done;
}
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len)) {
- qp->r_reuse_sge = 1;
+ qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto done;
}
@@ -356,11 +379,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
case OP(SEND_LAST_WITH_IMMEDIATE):
send_last_imm:
if (header_in_data) {
- wc.imm_data = *(__be32 *) data;
+ wc.ex.imm_data = *(__be32 *) data;
data += sizeof(__be32);
} else {
/* Immediate data comes after BTH */
- wc.imm_data = ohdr->u.imm_data;
+ wc.ex.imm_data = ohdr->u.imm_data;
}
hdrsize += 4;
wc.wc_flags = IB_WC_WITH_IMM;
@@ -372,7 +395,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
/* Check for invalid length. */
/* XXX LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4))) {
- qp->r_reuse_sge = 1;
+ qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto done;
}
@@ -380,24 +403,19 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
tlen -= (hdrsize + pad + 4);
wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len)) {
- qp->r_reuse_sge = 1;
+ qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto done;
}
- /* XXX Need to free SGEs */
+ wc.opcode = IB_WC_RECV;
last_imm:
ipath_copy_sge(&qp->r_sge, data, tlen);
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
- wc.opcode = IB_WC_RECV;
- wc.vendor_err = 0;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
@@ -465,11 +483,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
rdma_last_imm:
if (header_in_data) {
- wc.imm_data = *(__be32 *) data;
+ wc.ex.imm_data = *(__be32 *) data;
data += sizeof(__be32);
} else {
/* Immediate data comes after BTH */
- wc.imm_data = ohdr->u.imm_data;
+ wc.ex.imm_data = ohdr->u.imm_data;
}
hdrsize += 4;
wc.wc_flags = IB_WC_WITH_IMM;
@@ -488,13 +506,14 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
dev->n_pkt_drops++;
goto done;
}
- if (qp->r_reuse_sge)
- qp->r_reuse_sge = 0;
+ if (qp->r_flags & IPATH_R_REUSE_SGE)
+ qp->r_flags &= ~IPATH_R_REUSE_SGE;
else if (!ipath_get_rwqe(qp, 1)) {
dev->n_pkt_drops++;
goto done;
}
wc.byte_len = qp->r_len;
+ wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
goto last_imm;
case OP(RDMA_WRITE_LAST):
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 8b6a261c89e..36aa242c487 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -65,9 +65,9 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
u32 length;
qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn);
- if (!qp) {
+ if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
dev->n_pkt_drops++;
- goto send_comp;
+ goto done;
}
rsge.sg_list = NULL;
@@ -91,14 +91,12 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
* present on the wire.
*/
length = swqe->length;
+ memset(&wc, 0, sizeof wc);
wc.byte_len = length + sizeof(struct ib_grh);
if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
wc.wc_flags = IB_WC_WITH_IMM;
- wc.imm_data = swqe->wr.ex.imm_data;
- } else {
- wc.wc_flags = 0;
- wc.imm_data = 0;
+ wc.ex.imm_data = swqe->wr.ex.imm_data;
}
/*
@@ -229,7 +227,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
}
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
- wc.vendor_err = 0;
wc.qp = &qp->ibqp;
wc.src_qp = sqp->ibqp.qp_num;
/* XXX do we know which pkey matched? Only needed for GSI. */
@@ -248,8 +245,7 @@ drop:
kfree(rsge.sg_list);
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
-send_comp:
- ipath_send_complete(sqp, swqe, IB_WC_SUCCESS);
+done:;
}
/**
@@ -264,6 +260,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
struct ipath_other_headers *ohdr;
struct ib_ah_attr *ah_attr;
struct ipath_swqe *wqe;
+ unsigned long flags;
u32 nwords;
u32 extra_bytes;
u32 bth0;
@@ -271,13 +268,30 @@ int ipath_make_ud_req(struct ipath_qp *qp)
u16 lid;
int ret = 0;
- if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)))
- goto bail;
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
+ goto bail;
+ /* We are in the error state, flush the work request. */
+ if (qp->s_last == qp->s_head)
+ goto bail;
+ /* If DMAs are in progress, we can't flush immediately. */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= IPATH_S_WAIT_DMA;
+ goto bail;
+ }
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ goto done;
+ }
if (qp->s_cur == qp->s_head)
goto bail;
wqe = get_swqe_ptr(qp, qp->s_cur);
+ if (++qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
/* Construct the header. */
ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
@@ -288,10 +302,23 @@ int ipath_make_ud_req(struct ipath_qp *qp)
dev->n_unicast_xmit++;
} else {
dev->n_unicast_xmit++;
- lid = ah_attr->dlid &
- ~((1 << dev->dd->ipath_lmc) - 1);
+ lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1);
if (unlikely(lid == dev->dd->ipath_lid)) {
+ /*
+ * If DMAs are in progress, we can't generate
+ * a completion for the loopback packet since
+ * it would be out of order.
+ * XXX Instead of waiting, we could queue a
+ * zero length descriptor so we get a callback.
+ */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= IPATH_S_WAIT_DMA;
+ goto bail;
+ }
+ spin_unlock_irqrestore(&qp->s_lock, flags);
ipath_ud_loopback(qp, wqe);
+ spin_lock_irqsave(&qp->s_lock, flags);
+ ipath_send_complete(qp, wqe, IB_WC_SUCCESS);
goto done;
}
}
@@ -368,11 +395,13 @@ int ipath_make_ud_req(struct ipath_qp *qp)
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
done:
- if (++qp->s_cur >= qp->s_size)
- qp->s_cur = 0;
ret = 1;
+ goto unlock;
bail:
+ qp->s_flags &= ~IPATH_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
@@ -463,14 +492,14 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
if (qp->ibqp.qp_num > 1 &&
opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
if (header_in_data) {
- wc.imm_data = *(__be32 *) data;
+ wc.ex.imm_data = *(__be32 *) data;
data += sizeof(__be32);
} else
- wc.imm_data = ohdr->u.ud.imm_data;
+ wc.ex.imm_data = ohdr->u.ud.imm_data;
wc.wc_flags = IB_WC_WITH_IMM;
hdrsize += sizeof(u32);
} else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
- wc.imm_data = 0;
+ wc.ex.imm_data = 0;
wc.wc_flags = 0;
} else {
dev->n_pkt_drops++;
@@ -506,8 +535,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
/*
* Get the next work request entry to find where to put the data.
*/
- if (qp->r_reuse_sge)
- qp->r_reuse_sge = 0;
+ if (qp->r_flags & IPATH_R_REUSE_SGE)
+ qp->r_flags &= ~IPATH_R_REUSE_SGE;
else if (!ipath_get_rwqe(qp, 0)) {
/*
* Count VL15 packets dropped due to no receive buffer.
@@ -523,7 +552,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
}
/* Silently drop packets which are too big. */
if (wc.byte_len > qp->r_len) {
- qp->r_reuse_sge = 1;
+ qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto bail;
}
@@ -535,7 +564,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
ipath_copy_sge(&qp->r_sge, data,
wc.byte_len - sizeof(struct ib_grh));
- qp->r_wrid_valid = 0;
+ if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
+ goto bail;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
index 86e016916cd..82d9a0b5ca2 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
@@ -206,7 +206,7 @@ static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd,
dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len,
DMA_TO_DEVICE);
- if (dma_mapping_error(dma_addr)) {
+ if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
ret = -ENOMEM;
goto free_unmap;
}
@@ -301,7 +301,7 @@ static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd,
pages[j], 0, flen, DMA_TO_DEVICE);
unsigned long fofs = addr & ~PAGE_MASK;
- if (dma_mapping_error(dma_addr)) {
+ if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
ret = -ENOMEM;
goto done;
}
@@ -508,7 +508,7 @@ static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd,
if (page) {
dma_addr = dma_map_page(&dd->pcidev->dev,
page, 0, len, DMA_TO_DEVICE);
- if (dma_mapping_error(dma_addr)) {
+ if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
ret = -ENOMEM;
goto free_pbc;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.h b/drivers/infiniband/hw/ipath/ipath_user_sdma.h
index e70946c1428..fc76316c4a5 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_sdma.h
+++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.h
@@ -45,8 +45,6 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd,
int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
struct ipath_user_sdma_queue *pq);
-int ipath_user_sdma_pkt_sent(const struct ipath_user_sdma_queue *pq,
- u32 counter);
void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
struct ipath_user_sdma_queue *pq);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index e63927cce5b..55c71882882 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -35,6 +35,7 @@
#include <rdma/ib_user_verbs.h>
#include <linux/io.h>
#include <linux/utsname.h>
+#include <linux/rculist.h>
#include "ipath_kernel.h"
#include "ipath_verbs.h"
@@ -111,16 +112,24 @@ static unsigned int ib_ipath_disable_sma;
module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(disable_sma, "Disable the SMA");
+/*
+ * Note that it is OK to post send work requests in the SQE and ERR
+ * states; ipath_do_send() will process them and generate error
+ * completions as per IB 1.2 C10-96.
+ */
const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = 0,
[IB_QPS_INIT] = IPATH_POST_RECV_OK,
[IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
[IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
- IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
+ IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK |
+ IPATH_PROCESS_NEXT_SEND_OK,
[IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
- IPATH_POST_SEND_OK,
- [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
- [IB_QPS_ERR] = 0,
+ IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
+ [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
+ IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
+ [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV |
+ IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
};
struct ipath_ucontext {
@@ -230,18 +239,6 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
}
}
-static void ipath_flush_wqe(struct ipath_qp *qp, struct ib_send_wr *wr)
-{
- struct ib_wc wc;
-
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wr->wr_id;
- wc.status = IB_WC_WR_FLUSH_ERR;
- wc.opcode = ib_ipath_wc_opcode[wr->opcode];
- wc.qp = &qp->ibqp;
- ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
-}
-
/*
* Count the number of DMA descriptors needed to send length bytes of data.
* Don't modify the ipath_sge_state to get the count.
@@ -347,14 +344,8 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
spin_lock_irqsave(&qp->s_lock, flags);
/* Check that state is OK to post send. */
- if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) {
- if (qp->state != IB_QPS_SQE && qp->state != IB_QPS_ERR)
- goto bail_inval;
- /* C10-96 says generate a flushed completion entry. */
- ipath_flush_wqe(qp, wr);
- ret = 0;
- goto bail;
- }
+ if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))
+ goto bail_inval;
/* IB spec says that num_sge == 0 is OK. */
if (wr->num_sge > qp->s_max_sge)
@@ -396,7 +387,6 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
wqe = get_swqe_ptr(qp, qp->s_head);
wqe->wr = *wr;
- wqe->ssn = qp->s_ssn++;
wqe->length = 0;
if (wr->num_sge) {
acc = wr->opcode >= IB_WR_RDMA_READ ?
@@ -422,6 +412,7 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
goto bail_inval;
} else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
goto bail_inval;
+ wqe->ssn = qp->s_ssn++;
qp->s_head = next;
ret = 0;
@@ -677,6 +668,7 @@ bail:;
static void ipath_ib_timer(struct ipath_ibdev *dev)
{
struct ipath_qp *resend = NULL;
+ struct ipath_qp *rnr = NULL;
struct list_head *last;
struct ipath_qp *qp;
unsigned long flags;
@@ -703,7 +695,9 @@ static void ipath_ib_timer(struct ipath_ibdev *dev)
if (--qp->s_rnr_timeout == 0) {
do {
list_del_init(&qp->timerwait);
- tasklet_hi_schedule(&qp->s_task);
+ qp->timer_next = rnr;
+ rnr = qp;
+ atomic_inc(&qp->refcount);
if (list_empty(last))
break;
qp = list_entry(last->next, struct ipath_qp,
@@ -743,13 +737,15 @@ static void ipath_ib_timer(struct ipath_ibdev *dev)
spin_unlock_irqrestore(&dev->pending_lock, flags);
/* XXX What if timer fires again while this is running? */
- for (qp = resend; qp != NULL; qp = qp->timer_next) {
- struct ib_wc wc;
+ while (resend != NULL) {
+ qp = resend;
+ resend = qp->timer_next;
spin_lock_irqsave(&qp->s_lock, flags);
- if (qp->s_last != qp->s_tail && qp->state == IB_QPS_RTS) {
+ if (qp->s_last != qp->s_tail &&
+ ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
dev->n_timeouts++;
- ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+ ipath_restart_rc(qp, qp->s_last_psn + 1);
}
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -757,6 +753,19 @@ static void ipath_ib_timer(struct ipath_ibdev *dev)
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
}
+ while (rnr != NULL) {
+ qp = rnr;
+ rnr = qp->timer_next;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
+ ipath_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ /* Notify ipath_destroy_qp() if it is waiting. */
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ }
}
static void update_sge(struct ipath_sge_state *ss, u32 length)
@@ -1012,13 +1021,24 @@ static void sdma_complete(void *cookie, int status)
struct ipath_verbs_txreq *tx = cookie;
struct ipath_qp *qp = tx->qp;
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+ unsigned int flags;
+ enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
+ IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
- /* Generate a completion queue entry if needed */
- if (qp->ibqp.qp_type != IB_QPT_RC && tx->wqe) {
- enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
- IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
-
+ if (atomic_dec_and_test(&qp->s_dma_busy)) {
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (tx->wqe)
+ ipath_send_complete(qp, tx->wqe, ibs);
+ if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
+ qp->s_last != qp->s_head) ||
+ (qp->s_flags & IPATH_S_WAIT_DMA))
+ ipath_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ wake_up(&qp->wait_dma);
+ } else if (tx->wqe) {
+ spin_lock_irqsave(&qp->s_lock, flags);
ipath_send_complete(qp, tx->wqe, ibs);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
}
if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
@@ -1029,6 +1049,21 @@ static void sdma_complete(void *cookie, int status)
wake_up(&qp->wait);
}
+static void decrement_dma_busy(struct ipath_qp *qp)
+{
+ unsigned int flags;
+
+ if (atomic_dec_and_test(&qp->s_dma_busy)) {
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
+ qp->s_last != qp->s_head) ||
+ (qp->s_flags & IPATH_S_WAIT_DMA))
+ ipath_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ wake_up(&qp->wait_dma);
+ }
+}
+
/*
* Compute the number of clock cycles of delay before sending the next packet.
* The multipliers reflect the number of clocks for the fastest rate so
@@ -1067,9 +1102,12 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
if (tx) {
qp->s_tx = NULL;
/* resend previously constructed packet */
+ atomic_inc(&qp->s_dma_busy);
ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
- if (ret)
+ if (ret) {
qp->s_tx = tx;
+ decrement_dma_busy(qp);
+ }
goto bail;
}
@@ -1120,12 +1158,14 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
tx->txreq.sg_count = ndesc;
tx->map_len = (hdrwords + 2) << 2;
tx->txreq.map_addr = &tx->hdr;
+ atomic_inc(&qp->s_dma_busy);
ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
if (ret) {
/* save ss and length in dwords */
tx->ss = ss;
tx->len = dwords;
qp->s_tx = tx;
+ decrement_dma_busy(qp);
}
goto bail;
}
@@ -1146,6 +1186,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
memcpy(piobuf, hdr, hdrwords << 2);
ipath_copy_from_sge(piobuf + hdrwords, ss, len);
+ atomic_inc(&qp->s_dma_busy);
ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
/*
* If we couldn't queue the DMA request, save the info
@@ -1156,6 +1197,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
tx->ss = NULL;
tx->len = 0;
qp->s_tx = tx;
+ decrement_dma_busy(qp);
}
dev->n_unaligned++;
goto bail;
@@ -1179,6 +1221,7 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp,
unsigned flush_wc;
u32 control;
int ret;
+ unsigned int flags;
piobuf = ipath_getpiobuf(dd, plen, NULL);
if (unlikely(piobuf == NULL)) {
@@ -1249,8 +1292,11 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp,
}
copy_io(piobuf, ss, len, flush_wc);
done:
- if (qp->s_wqe)
+ if (qp->s_wqe) {
+ spin_lock_irqsave(&qp->s_lock, flags);
ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ }
ret = 0;
bail:
return ret;
@@ -1283,19 +1329,12 @@ int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
* can defer SDMA restart until link goes ACTIVE without
* worrying about just how we got there.
*/
- if (qp->ibqp.qp_type == IB_QPT_SMI)
+ if (qp->ibqp.qp_type == IB_QPT_SMI ||
+ !(dd->ipath_flags & IPATH_HAS_SEND_DMA))
ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
plen, dwords);
- /* All non-VL15 packets are dropped if link is not ACTIVE */
- else if (!(dd->ipath_flags & IPATH_LINKACTIVE)) {
- if (qp->s_wqe)
- ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
- ret = 0;
- } else if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
- ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
- plen, dwords);
else
- ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
+ ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
plen, dwords);
return ret;
@@ -1403,27 +1442,46 @@ bail:
* This is called from ipath_intr() at interrupt level when a PIO buffer is
* available after ipath_verbs_send() returned an error that no buffers were
* available. Return 1 if we consumed all the PIO buffers and we still have
- * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and
+ * QPs waiting for buffers (for now, just restart the send tasklet and
* return zero).
*/
int ipath_ib_piobufavail(struct ipath_ibdev *dev)
{
+ struct list_head *list;
+ struct ipath_qp *qplist;
struct ipath_qp *qp;
unsigned long flags;
if (dev == NULL)
goto bail;
+ list = &dev->piowait;
+ qplist = NULL;
+
spin_lock_irqsave(&dev->pending_lock, flags);
- while (!list_empty(&dev->piowait)) {
- qp = list_entry(dev->piowait.next, struct ipath_qp,
- piowait);
+ while (!list_empty(list)) {
+ qp = list_entry(list->next, struct ipath_qp, piowait);
list_del_init(&qp->piowait);
- clear_bit(IPATH_S_BUSY, &qp->s_busy);
- tasklet_hi_schedule(&qp->s_task);
+ qp->pio_next = qplist;
+ qplist = qp;
+ atomic_inc(&qp->refcount);
}
spin_unlock_irqrestore(&dev->pending_lock, flags);
+ while (qplist != NULL) {
+ qp = qplist;
+ qplist = qp->pio_next;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
+ ipath_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ /* Notify ipath_destroy_qp() if it is waiting. */
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ }
+
bail:
return 0;
}
@@ -1437,9 +1495,11 @@ static int ipath_query_device(struct ib_device *ibdev,
props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
- IB_DEVICE_SYS_IMAGE_GUID;
+ IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
+ IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
props->page_size_cap = PAGE_SIZE;
- props->vendor_id = dev->dd->ipath_vendorid;
+ props->vendor_id =
+ IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3;
props->vendor_part_id = dev->dd->ipath_deviceid;
props->hw_ver = dev->dd->ipath_pcirev;
@@ -2145,11 +2205,12 @@ bail:
void ipath_unregister_ib_device(struct ipath_ibdev *dev)
{
struct ib_device *ibdev = &dev->ibdev;
-
- disable_timer(dev->dd);
+ u32 qps_inuse;
ib_unregister_device(ibdev);
+ disable_timer(dev->dd);
+
if (!list_empty(&dev->pending[0]) ||
!list_empty(&dev->pending[1]) ||
!list_empty(&dev->pending[2]))
@@ -2164,7 +2225,10 @@ void ipath_unregister_ib_device(struct ipath_ibdev *dev)
* Note that ipath_unregister_ib_device() can be called before all
* the QPs are destroyed!
*/
- ipath_free_all_qps(&dev->qp_table);
+ qps_inuse = ipath_free_all_qps(&dev->qp_table);
+ if (qps_inuse)
+ ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n",
+ qps_inuse);
kfree(dev->qp_table.table);
kfree(dev->lk_table.table);
kfree(dev->txreq_bufs);
@@ -2215,17 +2279,14 @@ static ssize_t show_stats(struct device *device, struct device_attribute *attr,
"RC OTH NAKs %d\n"
"RC timeouts %d\n"
"RC RDMA dup %d\n"
- "RC stalls %d\n"
"piobuf wait %d\n"
- "no piobuf %d\n"
"unaligned %d\n"
"PKT drops %d\n"
"WQE errs %d\n",
dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
dev->n_other_naks, dev->n_timeouts,
- dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait,
- dev->n_no_piobuf, dev->n_unaligned,
+ dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,
dev->n_pkt_drops, dev->n_wqe_errs);
for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
const struct ipath_opcode_stats *si = &dev->opstats[i];
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 6514aa8306c..9d12ae8a778 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -74,6 +74,11 @@
#define IPATH_POST_RECV_OK 0x02
#define IPATH_PROCESS_RECV_OK 0x04
#define IPATH_PROCESS_SEND_OK 0x08
+#define IPATH_PROCESS_NEXT_SEND_OK 0x10
+#define IPATH_FLUSH_SEND 0x20
+#define IPATH_FLUSH_RECV 0x40
+#define IPATH_PROCESS_OR_FLUSH_SEND \
+ (IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND)
/* IB Performance Manager status values */
#define IB_PMA_SAMPLE_STATUS_DONE 0x00
@@ -353,12 +358,14 @@ struct ipath_qp {
struct ib_qp ibqp;
struct ipath_qp *next; /* link list for QPN hash table */
struct ipath_qp *timer_next; /* link list for ipath_ib_timer() */
+ struct ipath_qp *pio_next; /* link for ipath_ib_piobufavail() */
struct list_head piowait; /* link for wait PIO buf */
struct list_head timerwait; /* link for waiting for timeouts */
struct ib_ah_attr remote_ah_attr;
struct ipath_ib_header s_hdr; /* next packet header to send */
atomic_t refcount;
wait_queue_head_t wait;
+ wait_queue_head_t wait_dma;
struct tasklet_struct s_task;
struct ipath_mmap_info *ip;
struct ipath_sge_state *s_cur_sge;
@@ -369,7 +376,7 @@ struct ipath_qp {
struct ipath_sge_state s_rdma_read_sge;
struct ipath_sge_state r_sge; /* current receive data */
spinlock_t s_lock;
- unsigned long s_busy;
+ atomic_t s_dma_busy;
u16 s_pkt_delay;
u16 s_hdrwords; /* size of s_hdr in 32 bit words */
u32 s_cur_size; /* size of send packet in bytes */
@@ -383,6 +390,7 @@ struct ipath_qp {
u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
u64 r_wr_id; /* ID for current receive WQE */
+ unsigned long r_aflags;
u32 r_len; /* total length of r_sge */
u32 r_rcv_len; /* receive data len processed */
u32 r_psn; /* expected rcv packet sequence number */
@@ -394,8 +402,7 @@ struct ipath_qp {
u8 r_state; /* opcode of last packet received */
u8 r_nak_state; /* non-zero if NAK is pending */
u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
- u8 r_reuse_sge; /* for UC receive errors */
- u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */
+ u8 r_flags;
u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
u8 r_head_ack_queue; /* index into s_ack_queue[] */
u8 qp_access_flags;
@@ -404,13 +411,13 @@ struct ipath_qp {
u8 s_rnr_retry_cnt;
u8 s_retry; /* requester retry counter */
u8 s_rnr_retry; /* requester RNR retry counter */
- u8 s_wait_credit; /* limit number of unacked packets sent */
u8 s_pkey_index; /* PKEY index to use */
u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
u8 s_tail_ack_queue; /* index into s_ack_queue[] */
u8 s_flags;
u8 s_dmult;
+ u8 s_draining;
u8 timeout; /* Timeout for this QP */
enum ib_mtu path_mtu;
u32 remote_qpn;
@@ -428,16 +435,40 @@ struct ipath_qp {
struct ipath_sge r_sg_list[0]; /* verified SGEs */
};
-/* Bit definition for s_busy. */
-#define IPATH_S_BUSY 0
+/*
+ * Atomic bit definitions for r_aflags.
+ */
+#define IPATH_R_WRID_VALID 0
+
+/*
+ * Bit definitions for r_flags.
+ */
+#define IPATH_R_REUSE_SGE 0x01
+#define IPATH_R_RDMAR_SEQ 0x02
/*
* Bit definitions for s_flags.
+ *
+ * IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs
+ * before processing the next SWQE
+ * IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs
+ * before processing the next SWQE
+ * IPATH_S_WAITING - waiting for RNR timeout or send buffer available.
+ * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
+ * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating
+ * next send completion entry not via send DMA.
*/
#define IPATH_S_SIGNAL_REQ_WR 0x01
#define IPATH_S_FENCE_PENDING 0x02
#define IPATH_S_RDMAR_PENDING 0x04
#define IPATH_S_ACK_PENDING 0x08
+#define IPATH_S_BUSY 0x10
+#define IPATH_S_WAITING 0x20
+#define IPATH_S_WAIT_SSN_CREDIT 0x40
+#define IPATH_S_WAIT_DMA 0x80
+
+#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \
+ IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA)
#define IPATH_PSN_CREDIT 512
@@ -573,13 +604,11 @@ struct ipath_ibdev {
u32 n_rnr_naks;
u32 n_other_naks;
u32 n_timeouts;
- u32 n_rc_stalls;
u32 n_pkt_drops;
u32 n_vl15_dropped;
u32 n_wqe_errs;
u32 n_rdma_dup_busy;
u32 n_piowait;
- u32 n_no_piobuf;
u32 n_unaligned;
u32 port_cap_flags;
u32 pma_sample_start;
@@ -657,6 +686,17 @@ static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev)
return container_of(ibdev, struct ipath_ibdev, ibdev);
}
+/*
+ * This must be called with s_lock held.
+ */
+static inline void ipath_schedule_send(struct ipath_qp *qp)
+{
+ if (qp->s_flags & IPATH_S_ANY_WAIT)
+ qp->s_flags &= ~IPATH_S_ANY_WAIT;
+ if (!(qp->s_flags & IPATH_S_BUSY))
+ tasklet_hi_schedule(&qp->s_task);
+}
+
int ipath_process_mad(struct ib_device *ibdev,
int mad_flags,
u8 port_num,
@@ -706,12 +746,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_qp_init_attr *init_attr);
-void ipath_free_all_qps(struct ipath_qp_table *qpt);
+unsigned ipath_free_all_qps(struct ipath_qp_table *qpt);
int ipath_init_qp_table(struct ipath_ibdev *idev, int size);
-void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc);
-
void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
unsigned ipath_ib_rate_to_mult(enum ib_rate rate);
@@ -729,7 +767,9 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
-void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc);
+void ipath_restart_rc(struct ipath_qp *qp, u32 psn);
+
+void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err);
int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
index 9e5abf9c309..d73e3223287 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
@@ -31,8 +31,7 @@
* SOFTWARE.
*/
-#include <linux/list.h>
-#include <linux/rcupdate.h>
+#include <linux/rculist.h>
#include "ipath_verbs.h"
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 2f199c5c4a7..a1464574bfd 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -246,7 +247,7 @@ err_mtt:
if (context)
ib_umem_release(cq->umem);
else
- mlx4_ib_free_cq_buf(dev, &cq->buf, entries);
+ mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
err_db:
if (!context)
@@ -434,7 +435,7 @@ int mlx4_ib_destroy_cq(struct ib_cq *cq)
mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db);
ib_umem_release(mcq->umem);
} else {
- mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe + 1);
+ mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe);
mlx4_db_free(dev->dev, &mcq->db);
}
@@ -637,6 +638,7 @@ repoll:
case MLX4_OPCODE_SEND_IMM:
wc->wc_flags |= IB_WC_WITH_IMM;
case MLX4_OPCODE_SEND:
+ case MLX4_OPCODE_SEND_INVAL:
wc->opcode = IB_WC_SEND;
break;
case MLX4_OPCODE_RDMA_READ:
@@ -657,24 +659,35 @@ repoll:
case MLX4_OPCODE_LSO:
wc->opcode = IB_WC_LSO;
break;
+ case MLX4_OPCODE_FMR:
+ wc->opcode = IB_WC_FAST_REG_MR;
+ break;
+ case MLX4_OPCODE_LOCAL_INVAL:
+ wc->opcode = IB_WC_LOCAL_INV;
+ break;
}
} else {
wc->byte_len = be32_to_cpu(cqe->byte_cnt);
switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
case MLX4_RECV_OPCODE_RDMA_WRITE_IMM:
- wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
- wc->wc_flags = IB_WC_WITH_IMM;
- wc->imm_data = cqe->immed_rss_invalid;
+ wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ wc->wc_flags = IB_WC_WITH_IMM;
+ wc->ex.imm_data = cqe->immed_rss_invalid;
+ break;
+ case MLX4_RECV_OPCODE_SEND_INVAL:
+ wc->opcode = IB_WC_RECV;
+ wc->wc_flags = IB_WC_WITH_INVALIDATE;
+ wc->ex.invalidate_rkey = be32_to_cpu(cqe->immed_rss_invalid);
break;
case MLX4_RECV_OPCODE_SEND:
wc->opcode = IB_WC_RECV;
wc->wc_flags = 0;
break;
case MLX4_RECV_OPCODE_SEND_IMM:
- wc->opcode = IB_WC_RECV;
- wc->wc_flags = IB_WC_WITH_IMM;
- wc->imm_data = cqe->immed_rss_invalid;
+ wc->opcode = IB_WC_RECV;
+ wc->wc_flags = IB_WC_WITH_IMM;
+ wc->ex.imm_data = cqe->immed_rss_invalid;
break;
}
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 4c1e72fc8f5..cdca3a511e1 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -255,7 +255,8 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
return IB_MAD_RESULT_SUCCESS;
} else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 ||
- in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2) {
+ in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2 ||
+ in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) {
if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
return IB_MAD_RESULT_SUCCESS;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 4d61e32866c..a3c2851c054 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -90,7 +91,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID |
- IB_DEVICE_RC_RNR_NAK_GEN;
+ IB_DEVICE_RC_RNR_NAK_GEN |
+ IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
@@ -103,6 +105,12 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
if (dev->dev->caps.max_gso_sz)
props->device_cap_flags |= IB_DEVICE_UD_TSO;
+ if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
+ props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
+ if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
+ (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
+ (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
+ props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
0xffffff;
@@ -126,6 +134,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs;
props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1;
props->max_srq_sge = dev->dev->caps.max_srq_sge;
+ props->max_fast_reg_page_list_len = PAGE_SIZE / sizeof (u64);
props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay;
props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
@@ -437,7 +446,9 @@ static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
return mlx4_multicast_attach(to_mdev(ibqp->device)->dev,
- &to_mqp(ibqp)->mqp, gid->raw);
+ &to_mqp(ibqp)->mqp, gid->raw,
+ !!(to_mqp(ibqp)->flags &
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK));
}
static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
@@ -562,6 +573,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
ibdev->ib_dev.owner = THIS_MODULE;
ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
+ ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
ibdev->ib_dev.phys_port_cnt = dev->caps.num_ports;
ibdev->ib_dev.num_comp_vectors = 1;
ibdev->ib_dev.dma_device = &dev->pdev->dev;
@@ -624,6 +636,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr;
ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr;
ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
+ ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr;
+ ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
+ ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list;
ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 5cf994794d2..6e2b0dc21b6 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2006, 2007 Cisco Systems. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -83,6 +84,11 @@ struct mlx4_ib_mr {
struct ib_umem *umem;
};
+struct mlx4_ib_fast_reg_page_list {
+ struct ib_fast_reg_page_list ibfrpl;
+ dma_addr_t map;
+};
+
struct mlx4_ib_fmr {
struct ib_fmr ibfmr;
struct mlx4_fmr mfmr;
@@ -101,7 +107,8 @@ struct mlx4_ib_wq {
};
enum mlx4_ib_qp_flags {
- MLX4_IB_QP_LSO = 1 << 0
+ MLX4_IB_QP_LSO = 1 << 0,
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
};
struct mlx4_ib_qp {
@@ -198,6 +205,11 @@ static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr)
return container_of(ibmr, struct mlx4_ib_mr, ibmr);
}
+static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
+{
+ return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl);
+}
+
static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
{
return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr);
@@ -238,6 +250,11 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
int mlx4_ib_dereg_mr(struct ib_mr *mr);
+struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
+ int max_page_list_len);
+struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
+ int page_list_len);
+void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 68e92485fc7..a4cdb465cd1 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -183,6 +184,76 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
return 0;
}
+struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
+ int max_page_list_len)
+{
+ struct mlx4_ib_dev *dev = to_mdev(pd->device);
+ struct mlx4_ib_mr *mr;
+ int err;
+
+ mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0,
+ max_page_list_len, 0, &mr->mmr);
+ if (err)
+ goto err_free;
+
+ err = mlx4_mr_enable(dev->dev, &mr->mmr);
+ if (err)
+ goto err_mr;
+
+ return &mr->ibmr;
+
+err_mr:
+ mlx4_mr_free(dev->dev, &mr->mmr);
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
+ int page_list_len)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct mlx4_ib_fast_reg_page_list *mfrpl;
+ int size = page_list_len * sizeof (u64);
+
+ if (size > PAGE_SIZE)
+ return ERR_PTR(-EINVAL);
+
+ mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL);
+ if (!mfrpl)
+ return ERR_PTR(-ENOMEM);
+
+ mfrpl->ibfrpl.page_list = dma_alloc_coherent(&dev->dev->pdev->dev,
+ size, &mfrpl->map,
+ GFP_KERNEL);
+ if (!mfrpl->ibfrpl.page_list)
+ goto err_free;
+
+ WARN_ON(mfrpl->map & 0x3f);
+
+ return &mfrpl->ibfrpl;
+
+err_free:
+ kfree(mfrpl);
+ return ERR_PTR(-ENOMEM);
+}
+
+void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
+{
+ struct mlx4_ib_dev *dev = to_mdev(page_list->device);
+ struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
+ int size = page_list->max_page_list_len * sizeof (u64);
+
+ dma_free_coherent(&dev->dev->pdev->dev, size, page_list->page_list,
+ mfrpl->map);
+ kfree(mfrpl);
+}
+
struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
struct ib_fmr_attr *fmr_attr)
{
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 8e02ecfec18..f7bc7dd8578 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -78,6 +79,9 @@ static const __be32 mlx4_ib_opcode[] = {
[IB_WR_RDMA_READ] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ),
[IB_WR_ATOMIC_CMP_AND_SWP] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
[IB_WR_ATOMIC_FETCH_AND_ADD] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
+ [IB_WR_SEND_WITH_INV] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
+ [IB_WR_LOCAL_INV] = __constant_cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
+ [IB_WR_FAST_REG_MR] = __constant_cpu_to_be32(MLX4_OPCODE_FMR),
};
static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
@@ -129,9 +133,10 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size)
int ind;
void *buf;
__be32 stamp;
+ struct mlx4_wqe_ctrl_seg *ctrl;
- s = roundup(size, 1U << qp->sq.wqe_shift);
if (qp->sq_max_wqes_per_wr > 1) {
+ s = roundup(size, 1U << qp->sq.wqe_shift);
for (i = 0; i < s; i += 64) {
ind = (i >> qp->sq.wqe_shift) + n;
stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) :
@@ -141,7 +146,8 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size)
*wqe = stamp;
}
} else {
- buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
+ ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
+ s = (ctrl->fence_size & 0x3f) << 4;
for (i = 64; i < s; i += 64) {
wqe = buf + i;
*wqe = cpu_to_be32(0xffffffff);
@@ -333,6 +339,9 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) +
send_wqe_overhead(type, qp->flags);
+ if (s > dev->dev->caps.max_sq_desc_sz)
+ return -EINVAL;
+
/*
* Hermon supports shrinking WQEs, such that a single work
* request can include multiple units of 1 << wqe_shift. This
@@ -372,9 +381,6 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s));
for (;;) {
- if (1 << qp->sq.wqe_shift > dev->dev->caps.max_sq_desc_sz)
- return -EINVAL;
-
qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift);
/*
@@ -395,7 +401,8 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
++qp->sq.wqe_shift;
}
- qp->sq.max_gs = ((qp->sq_max_wqes_per_wr << qp->sq.wqe_shift) -
+ qp->sq.max_gs = (min(dev->dev->caps.max_sq_desc_sz,
+ (qp->sq_max_wqes_per_wr << qp->sq.wqe_shift)) -
send_wqe_overhead(type, qp->flags)) /
sizeof (struct mlx4_wqe_data_seg);
@@ -411,7 +418,9 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
cap->max_send_wr = qp->sq.max_post =
(qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr;
- cap->max_send_sge = qp->sq.max_gs;
+ cap->max_send_sge = min(qp->sq.max_gs,
+ min(dev->dev->caps.max_sq_sg,
+ dev->dev->caps.max_rq_sg));
/* We don't support inline sends for kernel QPs (yet) */
cap->max_inline_data = 0;
@@ -449,19 +458,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
spin_lock_init(&qp->rq.lock);
qp->state = IB_QPS_RESET;
- qp->atomic_rd_en = 0;
- qp->resp_depth = 0;
-
- qp->rq.head = 0;
- qp->rq.tail = 0;
- qp->sq.head = 0;
- qp->sq.tail = 0;
- qp->sq_next_wqe = 0;
-
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
- else
- qp->sq_signal_bits = 0;
err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp);
if (err)
@@ -506,6 +504,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
} else {
qp->sq_no_prefetch = 0;
+ if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
+ qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
+
if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
qp->flags |= MLX4_IB_QP_LSO;
@@ -679,10 +680,15 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
struct mlx4_ib_qp *qp;
int err;
- /* We only support LSO, and only for kernel UD QPs. */
- if (init_attr->create_flags & ~IB_QP_CREATE_IPOIB_UD_LSO)
+ /*
+ * We only support LSO and multicast loopback blocking, and
+ * only for kernel UD QPs.
+ */
+ if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO |
+ IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK))
return ERR_PTR(-EINVAL);
- if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO &&
+
+ if (init_attr->create_flags &&
(pd->uobject || init_attr->qp_type != IB_QPT_UD))
return ERR_PTR(-EINVAL);
@@ -691,7 +697,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
case IB_QPT_UC:
case IB_QPT_UD:
{
- qp = kmalloc(sizeof *qp, GFP_KERNEL);
+ qp = kzalloc(sizeof *qp, GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
@@ -712,7 +718,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
if (pd->uobject)
return ERR_PTR(-EINVAL);
- sqp = kmalloc(sizeof *sqp, GFP_KERNEL);
+ sqp = kzalloc(sizeof *sqp, GFP_KERNEL);
if (!sqp)
return ERR_PTR(-ENOMEM);
@@ -903,7 +909,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
attr->path_mtu);
goto out;
}
- context->mtu_msgmax = (attr->path_mtu << 5) | 31;
+ context->mtu_msgmax = (attr->path_mtu << 5) |
+ ilog2(dev->dev->caps.max_msg_sz);
}
if (qp->rq.wqe_cnt)
@@ -973,6 +980,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn);
context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
+ /* Set "fast registration enabled" for all kernel QPs */
+ if (!qp->ibqp.uobject)
+ context->params1 |= cpu_to_be32(1 << 11);
+
if (attr_mask & IB_QP_RNR_RETRY) {
context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
optpar |= MLX4_QP_OPTPAR_RNR_RETRY;
@@ -1060,6 +1071,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
for (i = 0; i < qp->sq.wqe_cnt; ++i) {
ctrl = get_send_wqe(qp, i);
ctrl->owner_opcode = cpu_to_be32(1 << 31);
+ if (qp->sq_max_wqes_per_wr == 1)
+ ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4);
stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift);
}
@@ -1124,23 +1137,6 @@ out:
return err;
}
-static const struct ib_qp_attr mlx4_ib_qp_attr = { .port_num = 1 };
-static const int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1] = {
- [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
- IB_QP_PORT |
- IB_QP_QKEY),
- [IB_QPT_UC] = (IB_QP_PKEY_INDEX |
- IB_QP_PORT |
- IB_QP_ACCESS_FLAGS),
- [IB_QPT_RC] = (IB_QP_PKEY_INDEX |
- IB_QP_PORT |
- IB_QP_ACCESS_FLAGS),
- [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
- IB_QP_QKEY),
- [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
- IB_QP_QKEY),
-};
-
int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
@@ -1183,15 +1179,6 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
}
- if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) {
- err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr,
- mlx4_ib_qp_attr_mask_table[ibqp->qp_type],
- IB_QPS_RESET, IB_QPS_INIT);
- if (err)
- goto out;
- cur_state = IB_QPS_INIT;
- }
-
err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
out:
@@ -1343,6 +1330,38 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq
return cur + nreq >= wq->max_post;
}
+static __be32 convert_access(int acc)
+{
+ return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_PERM_ATOMIC) : 0) |
+ (acc & IB_ACCESS_REMOTE_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_WRITE) : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_READ) : 0) |
+ (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) |
+ cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ);
+}
+
+static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr)
+{
+ struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
+
+ fseg->flags = convert_access(wr->wr.fast_reg.access_flags);
+ fseg->mem_key = cpu_to_be32(wr->wr.fast_reg.rkey);
+ fseg->buf_list = cpu_to_be64(mfrpl->map);
+ fseg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
+ fseg->reg_len = cpu_to_be64(wr->wr.fast_reg.length);
+ fseg->offset = 0; /* XXX -- is this just for ZBVA? */
+ fseg->page_size = cpu_to_be32(wr->wr.fast_reg.page_shift);
+ fseg->reserved[0] = 0;
+ fseg->reserved[1] = 0;
+}
+
+static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
+{
+ iseg->flags = 0;
+ iseg->mem_key = cpu_to_be32(rkey);
+ iseg->guest_id = 0;
+ iseg->pa = 0;
+}
+
static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
u64 remote_addr, u32 rkey)
{
@@ -1416,7 +1435,7 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
dseg->addr = cpu_to_be64(sg->addr);
}
-static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr,
+static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
struct mlx4_ib_qp *qp, unsigned *lso_seg_len)
{
unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
@@ -1444,6 +1463,21 @@ static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr,
return 0;
}
+static __be32 send_ieth(struct ib_send_wr *wr)
+{
+ switch (wr->opcode) {
+ case IB_WR_SEND_WITH_IMM:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ return wr->ex.imm_data;
+
+ case IB_WR_SEND_WITH_INV:
+ return cpu_to_be32(wr->ex.invalidate_rkey);
+
+ default:
+ return 0;
+ }
+}
+
int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
@@ -1457,7 +1491,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
unsigned ind;
int uninitialized_var(stamp);
int uninitialized_var(size);
- unsigned seglen;
+ unsigned uninitialized_var(seglen);
int i;
spin_lock_irqsave(&qp->sq.lock, flags);
@@ -1490,11 +1524,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |
qp->sq_signal_bits;
- if (wr->opcode == IB_WR_SEND_WITH_IMM ||
- wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
- ctrl->imm = wr->ex.imm_data;
- else
- ctrl->imm = 0;
+ ctrl->imm = send_ieth(wr);
wqe += sizeof *ctrl;
size = sizeof *ctrl / 16;
@@ -1526,6 +1556,18 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
break;
+ case IB_WR_LOCAL_INV:
+ set_local_inv_seg(wqe, wr->ex.invalidate_rkey);
+ wqe += sizeof (struct mlx4_wqe_local_inval_seg);
+ size += sizeof (struct mlx4_wqe_local_inval_seg) / 16;
+ break;
+
+ case IB_WR_FAST_REG_MR:
+ set_fmr_seg(wqe, wr);
+ wqe += sizeof (struct mlx4_wqe_fmr_seg);
+ size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
+ break;
+
default:
/* No extra segments required for sends */
break;
@@ -1862,6 +1904,13 @@ done:
qp_init_attr->cap = qp_attr->cap;
+ qp_init_attr->create_flags = 0;
+ if (qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)
+ qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
+
+ if (qp->flags & MLX4_IB_QP_LSO)
+ qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
+
out:
mutex_unlock(&qp->mutex);
return err;
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 12d6bc6f800..d42565258fb 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h
index e2d11be4525..13beedeeef9 100644
--- a/drivers/infiniband/hw/mlx4/user.h
+++ b/drivers/infiniband/hw/mlx4/user.h
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c
index a7630670961..c5ccc2daab6 100644
--- a/drivers/infiniband/hw/mthca/mthca_allocator.c
+++ b/drivers/infiniband/hw/mthca/mthca_allocator.c
@@ -28,8 +28,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_allocator.c 1349 2004-12-16 21:09:43Z roland $
*/
#include <linux/errno.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index 4b111a852ff..32f6c631545 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -29,8 +29,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_av.c 1349 2004-12-16 21:09:43Z roland $
*/
#include <linux/string.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
index e948158a28d..cc440f90000 100644
--- a/drivers/infiniband/hw/mthca/mthca_catas.c
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -28,8 +28,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id$
*/
#include <linux/jiffies.h>
@@ -128,7 +126,6 @@ static void handle_catas(struct mthca_dev *dev)
static void poll_catas(unsigned long dev_ptr)
{
struct mthca_dev *dev = (struct mthca_dev *) dev_ptr;
- unsigned long flags;
int i;
for (i = 0; i < dev->catas_err.size; ++i)
@@ -137,13 +134,8 @@ static void poll_catas(unsigned long dev_ptr)
return;
}
- spin_lock_irqsave(&catas_lock, flags);
- if (!dev->catas_err.stop)
- mod_timer(&dev->catas_err.timer,
- jiffies + MTHCA_CATAS_POLL_INTERVAL);
- spin_unlock_irqrestore(&catas_lock, flags);
-
- return;
+ mod_timer(&dev->catas_err.timer,
+ round_jiffies(jiffies + MTHCA_CATAS_POLL_INTERVAL));
}
void mthca_start_catas_poll(struct mthca_dev *dev)
@@ -151,7 +143,6 @@ void mthca_start_catas_poll(struct mthca_dev *dev)
unsigned long addr;
init_timer(&dev->catas_err.timer);
- dev->catas_err.stop = 0;
dev->catas_err.map = NULL;
addr = pci_resource_start(dev->pdev, 0) +
@@ -182,10 +173,6 @@ void mthca_start_catas_poll(struct mthca_dev *dev)
void mthca_stop_catas_poll(struct mthca_dev *dev)
{
- spin_lock_irq(&catas_lock);
- dev->catas_err.stop = 1;
- spin_unlock_irq(&catas_lock);
-
del_timer_sync(&dev->catas_err.timer);
if (dev->catas_err.map) {
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 54d230ee7d6..c33e1c53c79 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -30,8 +30,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_cmd.c 1349 2004-12-16 21:09:43Z roland $
*/
#include <linux/completion.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h
index 8928ca4a932..6efd3265f24 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.h
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.h
@@ -30,8 +30,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_cmd.h 1349 2004-12-16 21:09:43Z roland $
*/
#ifndef MTHCA_CMD_H
diff --git a/drivers/infiniband/hw/mthca/mthca_config_reg.h b/drivers/infiniband/hw/mthca/mthca_config_reg.h
index afa56bfaab2..75671f75cac 100644
--- a/drivers/infiniband/hw/mthca/mthca_config_reg.h
+++ b/drivers/infiniband/hw/mthca/mthca_config_reg.h
@@ -29,8 +29,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_config_reg.h 1349 2004-12-16 21:09:43Z roland $
*/
#ifndef MTHCA_CONFIG_REG_H
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 20401d2ba6b..d9f4735c2b3 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -32,8 +32,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_cq.c 1369 2004-12-20 16:17:07Z roland $
*/
#include <linux/hardirq.h>
@@ -622,13 +620,13 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
case IB_OPCODE_SEND_LAST_WITH_IMMEDIATE:
case IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE:
entry->wc_flags = IB_WC_WITH_IMM;
- entry->imm_data = cqe->imm_etype_pkey_eec;
+ entry->ex.imm_data = cqe->imm_etype_pkey_eec;
entry->opcode = IB_WC_RECV;
break;
case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE:
case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE:
entry->wc_flags = IB_WC_WITH_IMM;
- entry->imm_data = cqe->imm_etype_pkey_eec;
+ entry->ex.imm_data = cqe->imm_etype_pkey_eec;
entry->opcode = IB_WC_RECV_RDMA_WITH_IMM;
break;
default:
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 7bc32f8e377..252590116df 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -32,8 +32,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_dev.h 1349 2004-12-16 21:09:43Z roland $
*/
#ifndef MTHCA_DEV_H
@@ -204,6 +202,7 @@ struct mthca_pd_table {
struct mthca_buddy {
unsigned long **bits;
+ int *num_free;
int max_order;
spinlock_t lock;
};
@@ -279,7 +278,6 @@ struct mthca_mcg_table {
struct mthca_catas_err {
u64 addr;
u32 __iomem *map;
- unsigned long stop;
u32 size;
struct timer_list timer;
struct list_head list;
diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h
index b374dc395be..14f51ef97d7 100644
--- a/drivers/infiniband/hw/mthca/mthca_doorbell.h
+++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h
@@ -30,8 +30,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_doorbell.h 1349 2004-12-16 21:09:43Z roland $
*/
#include <linux/types.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 8bde7f98e58..cc6858f0b65 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -29,8 +29,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_eq.c 1382 2004-12-24 02:21:02Z roland $
*/
#include <linux/errno.h>
@@ -782,7 +780,7 @@ int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
return -ENOMEM;
dev->eq_table.icm_dma = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0,
PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
- if (pci_dma_mapping_error(dev->eq_table.icm_dma)) {
+ if (pci_dma_mapping_error(dev->pdev, dev->eq_table.icm_dma)) {
__free_page(dev->eq_table.icm_page);
return -ENOMEM;
}
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 8b7e83e6e88..640449582ab 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -30,8 +30,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_mad.c 1349 2004-12-16 21:09:43Z roland $
*/
#include <linux/string.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 9ebadd6e0cf..fb9f91b60f3 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -30,8 +30,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_main.c 1396 2004-12-28 04:10:27Z roland $
*/
#include <linux/module.h>
@@ -45,6 +43,7 @@
#include "mthca_cmd.h"
#include "mthca_profile.h"
#include "mthca_memfree.h"
+#include "mthca_wqe.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver");
@@ -200,7 +199,18 @@ static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim)
mdev->limits.gid_table_len = dev_lim->max_gids;
mdev->limits.pkey_table_len = dev_lim->max_pkeys;
mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay;
- mdev->limits.max_sg = dev_lim->max_sg;
+ /*
+ * Need to allow for worst case send WQE overhead and check
+ * whether max_desc_sz imposes a lower limit than max_sg; UD
+ * send has the biggest overhead.
+ */
+ mdev->limits.max_sg = min_t(int, dev_lim->max_sg,
+ (dev_lim->max_desc_sz -
+ sizeof (struct mthca_next_seg) -
+ (mthca_is_memfree(mdev) ?
+ sizeof (struct mthca_arbel_ud_seg) :
+ sizeof (struct mthca_tavor_ud_seg))) /
+ sizeof (struct mthca_data_seg));
mdev->limits.max_wqes = dev_lim->max_qp_sz;
mdev->limits.max_qp_init_rdma = dev_lim->max_requester_per_qp;
mdev->limits.reserved_qps = dev_lim->reserved_qps;
diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c
index a8ad072be07..3f5f9487920 100644
--- a/drivers/infiniband/hw/mthca/mthca_mcg.c
+++ b/drivers/infiniband/hw/mthca/mthca_mcg.c
@@ -28,8 +28,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_mcg.c 1349 2004-12-16 21:09:43Z roland $
*/
#include <linux/string.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index b224079d4e1..1f7d1a29d2a 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -30,8 +30,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id$
*/
#include <linux/mm.h>
@@ -109,7 +107,11 @@ static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_m
{
struct page *page;
- page = alloc_pages(gfp_mask, order);
+ /*
+ * Use __GFP_ZERO because buggy firmware assumes ICM pages are
+ * cleared, and subtle failures are seen if they aren't.
+ */
+ page = alloc_pages(gfp_mask | __GFP_ZERO, order);
if (!page)
return -ENOMEM;
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
index a1ab06847b7..da9b8f9b884 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -30,8 +30,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id$
*/
#ifndef MTHCA_MEMFREE_H
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index 820205dec56..882e6b73591 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -29,8 +29,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_mr.c 1349 2004-12-16 21:09:43Z roland $
*/
#include <linux/slab.h>
@@ -91,23 +89,26 @@ static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order)
spin_lock(&buddy->lock);
- for (o = order; o <= buddy->max_order; ++o) {
- m = 1 << (buddy->max_order - o);
- seg = find_first_bit(buddy->bits[o], m);
- if (seg < m)
- goto found;
- }
+ for (o = order; o <= buddy->max_order; ++o)
+ if (buddy->num_free[o]) {
+ m = 1 << (buddy->max_order - o);
+ seg = find_first_bit(buddy->bits[o], m);
+ if (seg < m)
+ goto found;
+ }
spin_unlock(&buddy->lock);
return -1;
found:
clear_bit(seg, buddy->bits[o]);
+ --buddy->num_free[o];
while (o > order) {
--o;
seg <<= 1;
set_bit(seg ^ 1, buddy->bits[o]);
+ ++buddy->num_free[o];
}
spin_unlock(&buddy->lock);
@@ -125,11 +126,13 @@ static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order)
while (test_bit(seg ^ 1, buddy->bits[order])) {
clear_bit(seg ^ 1, buddy->bits[order]);
+ --buddy->num_free[order];
seg >>= 1;
++order;
}
set_bit(seg, buddy->bits[order]);
+ ++buddy->num_free[order];
spin_unlock(&buddy->lock);
}
@@ -143,7 +146,9 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
GFP_KERNEL);
- if (!buddy->bits)
+ buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *),
+ GFP_KERNEL);
+ if (!buddy->bits || !buddy->num_free)
goto err_out;
for (i = 0; i <= buddy->max_order; ++i) {
@@ -156,6 +161,7 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
}
set_bit(0, buddy->bits[buddy->max_order]);
+ buddy->num_free[buddy->max_order] = 1;
return 0;
@@ -163,9 +169,10 @@ err_out_free:
for (i = 0; i <= buddy->max_order; ++i)
kfree(buddy->bits[i]);
+err_out:
kfree(buddy->bits);
+ kfree(buddy->num_free);
-err_out:
return -ENOMEM;
}
@@ -177,6 +184,7 @@ static void mthca_buddy_cleanup(struct mthca_buddy *buddy)
kfree(buddy->bits[i]);
kfree(buddy->bits);
+ kfree(buddy->num_free);
}
static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order,
diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c
index c1e950764bd..266f14e4740 100644
--- a/drivers/infiniband/hw/mthca/mthca_pd.c
+++ b/drivers/infiniband/hw/mthca/mthca_pd.c
@@ -30,8 +30,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_pd.c 1349 2004-12-16 21:09:43Z roland $
*/
#include <linux/errno.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c
index 605a8d57fac..d168c254061 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.c
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -29,8 +29,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_profile.c 1349 2004-12-16 21:09:43Z roland $
*/
#include <linux/module.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.h b/drivers/infiniband/hw/mthca/mthca_profile.h
index e76cb62d8e3..62b009cc873 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.h
+++ b/drivers/infiniband/hw/mthca/mthca_profile.h
@@ -29,8 +29,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_profile.h 1349 2004-12-16 21:09:43Z roland $
*/
#ifndef MTHCA_PROFILE_H
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index be34f99ca62..87ad889e367 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -32,8 +32,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_provider.c 4859 2006-01-09 21:55:10Z roland $
*/
#include <rdma/ib_smi.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 934bf954403..c621f8794b8 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -30,8 +30,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_provider.h 1349 2004-12-16 21:09:43Z roland $
*/
#ifndef MTHCA_PROVIDER_H
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 09dc3614cf2..f5081bfde6d 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -31,8 +31,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_qp.c 1355 2004-12-17 15:23:43Z roland $
*/
#include <linux/string.h>
@@ -850,23 +848,6 @@ out:
return err;
}
-static const struct ib_qp_attr dummy_init_attr = { .port_num = 1 };
-static const int dummy_init_attr_mask[] = {
- [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
- IB_QP_PORT |
- IB_QP_QKEY),
- [IB_QPT_UC] = (IB_QP_PKEY_INDEX |
- IB_QP_PORT |
- IB_QP_ACCESS_FLAGS),
- [IB_QPT_RC] = (IB_QP_PKEY_INDEX |
- IB_QP_PORT |
- IB_QP_ACCESS_FLAGS),
- [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
- IB_QP_QKEY),
- [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
- IB_QP_QKEY),
-};
-
int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
struct ib_udata *udata)
{
@@ -928,15 +909,6 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
goto out;
}
- if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) {
- err = __mthca_modify_qp(ibqp, &dummy_init_attr,
- dummy_init_attr_mask[ibqp->qp_type],
- IB_QPS_RESET, IB_QPS_INIT);
- if (err)
- goto out;
- cur_state = IB_QPS_INIT;
- }
-
err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
out:
@@ -1277,10 +1249,10 @@ static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
return -EINVAL;
/*
- * For MLX transport we need 2 extra S/G entries:
+ * For MLX transport we need 2 extra send gather entries:
* one for the header and one for the checksum at the end
*/
- if (qp->transport == MLX && cap->max_recv_sge + 2 > dev->limits.max_sg)
+ if (qp->transport == MLX && cap->max_send_sge + 2 > dev->limits.max_sg)
return -EINVAL;
if (mthca_is_memfree(dev)) {
diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c
index 91934f2d9db..acb6817f606 100644
--- a/drivers/infiniband/hw/mthca/mthca_reset.c
+++ b/drivers/infiniband/hw/mthca/mthca_reset.c
@@ -28,8 +28,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_reset.c 1349 2004-12-16 21:09:43Z roland $
*/
#include <linux/init.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index a5ffff6e102..4fabe62aab8 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -28,8 +28,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_srq.c 3047 2005-08-10 03:59:35Z roland $
*/
#include <linux/slab.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_uar.c b/drivers/infiniband/hw/mthca/mthca_uar.c
index 8b728486410..ca5900c96fc 100644
--- a/drivers/infiniband/hw/mthca/mthca_uar.c
+++ b/drivers/infiniband/hw/mthca/mthca_uar.c
@@ -28,8 +28,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id$
*/
#include <asm/page.h> /* PAGE_SHIFT */
diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h
index e1262c942db..5fe56e81073 100644
--- a/drivers/infiniband/hw/mthca/mthca_user.h
+++ b/drivers/infiniband/hw/mthca/mthca_user.h
@@ -29,7 +29,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
*/
#ifndef MTHCA_USER_H
diff --git a/drivers/infiniband/hw/mthca/mthca_wqe.h b/drivers/infiniband/hw/mthca/mthca_wqe.h
index b3551a8dea1..341a5ae881c 100644
--- a/drivers/infiniband/hw/mthca/mthca_wqe.h
+++ b/drivers/infiniband/hw/mthca/mthca_wqe.h
@@ -28,8 +28,6 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
- *
- * $Id: mthca_wqe.h 3047 2005-08-10 03:59:35Z roland $
*/
#ifndef MTHCA_WQE_H
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 9f7364a9096..b0cab64e5e3 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -91,10 +91,6 @@ unsigned int nes_debug_level = 0;
module_param_named(debug_level, nes_debug_level, uint, 0644);
MODULE_PARM_DESC(debug_level, "Enable debug output level");
-unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR;
-module_param(nes_lro_max_aggr, int, NES_LRO_MAX_AGGR);
-MODULE_PARM_DESC(nes_mro_max_aggr, " nic LRO MAX packet aggregation");
-
LIST_HEAD(nes_adapter_list);
static LIST_HEAD(nes_dev_list);
@@ -280,6 +276,7 @@ static void nes_cqp_rem_ref_callback(struct nes_device *nesdev, struct nes_cqp_r
}
nes_free_resource(nesadapter, nesadapter->allocated_qps, nesqp->hwqp.qp_id);
+ nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL;
kfree(nesqp->allocated_buffer);
}
@@ -293,7 +290,6 @@ void nes_rem_ref(struct ib_qp *ibqp)
struct nes_qp *nesqp;
struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
struct nes_hw_cqp_wqe *cqp_wqe;
struct nes_cqp_request *cqp_request;
u32 opcode;
@@ -307,8 +303,6 @@ void nes_rem_ref(struct ib_qp *ibqp)
}
if (atomic_dec_and_test(&nesqp->refcount)) {
- nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL;
-
/* Destroy the QP */
cqp_request = nes_get_cqp_request(nesdev);
if (cqp_request == NULL) {
@@ -332,7 +326,7 @@ void nes_rem_ref(struct ib_qp *ibqp)
set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
u64temp = (u64)nesqp->nesqp_context_pbase;
set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
- nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+ nes_post_cqp_request(nesdev, cqp_request);
}
}
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index 1f9f7bf7386..39bd897b40c 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -94,9 +94,6 @@
#define MAX_DPC_ITERATIONS 128
-#define NES_CQP_REQUEST_NO_DOORBELL_RING 0
-#define NES_CQP_REQUEST_RING_DOORBELL 1
-
#define NES_DRV_OPT_ENABLE_MPA_VER_0 0x00000001
#define NES_DRV_OPT_DISABLE_MPA_CRC 0x00000002
#define NES_DRV_OPT_DISABLE_FIRST_WRITE 0x00000004
@@ -173,7 +170,6 @@ extern int disable_mpa_crc;
extern unsigned int send_first;
extern unsigned int nes_drv_opt;
extern unsigned int nes_debug_level;
-extern unsigned int nes_lro_max_aggr;
extern struct list_head nes_adapter_list;
@@ -539,7 +535,11 @@ void nes_read_1G_phy_reg(struct nes_device *, u8, u8, u16 *);
void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16, u16);
void nes_read_10G_phy_reg(struct nes_device *, u8, u8, u16);
struct nes_cqp_request *nes_get_cqp_request(struct nes_device *);
-void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *, int);
+void nes_free_cqp_request(struct nes_device *nesdev,
+ struct nes_cqp_request *cqp_request);
+void nes_put_cqp_request(struct nes_device *nesdev,
+ struct nes_cqp_request *cqp_request);
+void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *);
int nes_arp_table(struct nes_device *, u32, u8 *, u32);
void nes_mh_fix(unsigned long);
void nes_clc(unsigned long);
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 9a4b40fae40..9f0b964b2c9 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -74,36 +74,59 @@ atomic_t cm_nodes_destroyed;
atomic_t cm_accel_dropped_pkts;
atomic_t cm_resets_recvd;
-static inline int mini_cm_accelerated(struct nes_cm_core *, struct nes_cm_node *);
+static inline int mini_cm_accelerated(struct nes_cm_core *,
+ struct nes_cm_node *);
static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *,
- struct nes_vnic *, struct nes_cm_info *);
-static int add_ref_cm_node(struct nes_cm_node *);
-static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *);
+ struct nes_vnic *, struct nes_cm_info *);
static int mini_cm_del_listen(struct nes_cm_core *, struct nes_cm_listener *);
-static struct sk_buff *form_cm_frame(struct sk_buff *, struct nes_cm_node *,
- void *, u32, void *, u32, u8);
-static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node);
-
static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *,
- struct nes_vnic *,
- struct ietf_mpa_frame *,
- struct nes_cm_info *);
+ struct nes_vnic *, u16, void *, struct nes_cm_info *);
+static int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *);
static int mini_cm_accept(struct nes_cm_core *, struct ietf_mpa_frame *,
- struct nes_cm_node *);
+ struct nes_cm_node *);
static int mini_cm_reject(struct nes_cm_core *, struct ietf_mpa_frame *,
- struct nes_cm_node *);
-static int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *);
-static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *,
- struct sk_buff *);
+ struct nes_cm_node *);
+static void mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *,
+ struct sk_buff *);
static int mini_cm_dealloc_core(struct nes_cm_core *);
static int mini_cm_get(struct nes_cm_core *);
static int mini_cm_set(struct nes_cm_core *, u32, u32);
+
+static struct sk_buff *form_cm_frame(struct sk_buff *, struct nes_cm_node *,
+ void *, u32, void *, u32, u8);
+static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node);
+static int add_ref_cm_node(struct nes_cm_node *);
+static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *);
+
static int nes_cm_disconn_true(struct nes_qp *);
static int nes_cm_post_event(struct nes_cm_event *event);
static int nes_disconnect(struct nes_qp *nesqp, int abrupt);
static void nes_disconnect_worker(struct work_struct *work);
-static int send_ack(struct nes_cm_node *cm_node);
+
+static int send_mpa_request(struct nes_cm_node *, struct sk_buff *);
+static int send_syn(struct nes_cm_node *, u32, struct sk_buff *);
+static int send_reset(struct nes_cm_node *, struct sk_buff *);
+static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb);
static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb);
+static void process_packet(struct nes_cm_node *, struct sk_buff *,
+ struct nes_cm_core *);
+
+static void active_open_err(struct nes_cm_node *, struct sk_buff *, int);
+static void passive_open_err(struct nes_cm_node *, struct sk_buff *, int);
+static void cleanup_retrans_entry(struct nes_cm_node *);
+static void handle_rcv_mpa(struct nes_cm_node *, struct sk_buff *,
+ enum nes_cm_event_type);
+static void free_retrans_entry(struct nes_cm_node *cm_node);
+static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph,
+ struct sk_buff *skb, int optionsize, int passive);
+
+/* CM event handler functions */
+static void cm_event_connected(struct nes_cm_event *);
+static void cm_event_connect_error(struct nes_cm_event *);
+static void cm_event_reset(struct nes_cm_event *);
+static void cm_event_mpa_req(struct nes_cm_event *);
+
+static void print_core(struct nes_cm_core *core);
/* External CM API Interface */
/* instance of function pointers for client API */
@@ -158,11 +181,11 @@ static struct nes_cm_event *create_event(struct nes_cm_node *cm_node,
event->cm_info.loc_port = cm_node->loc_port;
event->cm_info.cm_id = cm_node->cm_id;
- nes_debug(NES_DBG_CM, "Created event=%p, type=%u, dst_addr=%08x[%x],"
- " src_addr=%08x[%x]\n",
- event, type,
- event->cm_info.loc_addr, event->cm_info.loc_port,
- event->cm_info.rem_addr, event->cm_info.rem_port);
+ nes_debug(NES_DBG_CM, "cm_node=%p Created event=%p, type=%u, "
+ "dst_addr=%08x[%x], src_addr=%08x[%x]\n",
+ cm_node, event, type, event->cm_info.loc_addr,
+ event->cm_info.loc_port, event->cm_info.rem_addr,
+ event->cm_info.rem_port);
nes_cm_post_event(event);
return event;
@@ -172,14 +195,11 @@ static struct nes_cm_event *create_event(struct nes_cm_node *cm_node,
/**
* send_mpa_request
*/
-static int send_mpa_request(struct nes_cm_node *cm_node)
+static int send_mpa_request(struct nes_cm_node *cm_node, struct sk_buff *skb)
{
- struct sk_buff *skb;
int ret;
-
- skb = get_free_pkt(cm_node);
if (!skb) {
- nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
+ nes_debug(NES_DBG_CM, "skb set to NULL\n");
return -1;
}
@@ -188,9 +208,8 @@ static int send_mpa_request(struct nes_cm_node *cm_node)
cm_node->mpa_frame_size, SET_ACK);
ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
- if (ret < 0) {
+ if (ret < 0)
return ret;
- }
return 0;
}
@@ -229,46 +248,12 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 len)
/**
- * handle_exception_pkt - process an exception packet.
- * We have been in a TSA state, and we have now received SW
- * TCP/IP traffic should be a FIN request or IP pkt with options
- */
-static int handle_exception_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb)
-{
- int ret = 0;
- struct tcphdr *tcph = tcp_hdr(skb);
-
- /* first check to see if this a FIN pkt */
- if (tcph->fin) {
- /* we need to ACK the FIN request */
- send_ack(cm_node);
-
- /* check which side we are (client/server) and set next state accordingly */
- if (cm_node->tcp_cntxt.client)
- cm_node->state = NES_CM_STATE_CLOSING;
- else {
- /* we are the server side */
- cm_node->state = NES_CM_STATE_CLOSE_WAIT;
- /* since this is a self contained CM we don't wait for */
- /* an APP to close us, just send final FIN immediately */
- ret = send_fin(cm_node, NULL);
- cm_node->state = NES_CM_STATE_LAST_ACK;
- }
- } else {
- ret = -EINVAL;
- }
-
- return ret;
-}
-
-
-/**
* form_cm_frame - get a free packet and build empty frame Use
* node info to build.
*/
-static struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm_node,
- void *options, u32 optionsize, void *data,
- u32 datasize, u8 flags)
+static struct sk_buff *form_cm_frame(struct sk_buff *skb,
+ struct nes_cm_node *cm_node, void *options, u32 optionsize,
+ void *data, u32 datasize, u8 flags)
{
struct tcphdr *tcph;
struct iphdr *iph;
@@ -332,10 +317,12 @@ static struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm
cm_node->tcp_cntxt.loc_seq_num++;
tcph->syn = 1;
} else
- cm_node->tcp_cntxt.loc_seq_num += datasize; /* data (no headers) */
+ cm_node->tcp_cntxt.loc_seq_num += datasize;
- if (flags & SET_FIN)
+ if (flags & SET_FIN) {
+ cm_node->tcp_cntxt.loc_seq_num++;
tcph->fin = 1;
+ }
if (flags & SET_RST)
tcph->rst = 1;
@@ -389,7 +376,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
int close_when_complete)
{
unsigned long flags;
- struct nes_cm_core *cm_core;
+ struct nes_cm_core *cm_core = cm_node->cm_core;
struct nes_timer_entry *new_send;
int ret = 0;
u32 was_timer_set;
@@ -411,7 +398,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
new_send->close_when_complete = close_when_complete;
if (type == NES_TIMER_TYPE_CLOSE) {
- new_send->timetosend += (HZ/2); /* TODO: decide on the correct value here */
+ new_send->timetosend += (HZ/10);
spin_lock_irqsave(&cm_node->recv_list_lock, flags);
list_add_tail(&new_send->list, &cm_node->recv_list);
spin_unlock_irqrestore(&cm_node->recv_list_lock, flags);
@@ -420,36 +407,28 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
if (type == NES_TIMER_TYPE_SEND) {
new_send->seq_num = ntohl(tcp_hdr(skb)->seq);
atomic_inc(&new_send->skb->users);
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ cm_node->send_entry = new_send;
+ add_ref_cm_node(cm_node);
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+ new_send->timetosend = jiffies + NES_RETRY_TIMEOUT;
ret = nes_nic_cm_xmit(new_send->skb, cm_node->netdev);
if (ret != NETDEV_TX_OK) {
- nes_debug(NES_DBG_CM, "Error sending packet %p (jiffies = %lu)\n",
- new_send, jiffies);
+ nes_debug(NES_DBG_CM, "Error sending packet %p "
+ "(jiffies = %lu)\n", new_send, jiffies);
atomic_dec(&new_send->skb->users);
new_send->timetosend = jiffies;
} else {
cm_packets_sent++;
if (!send_retrans) {
+ cleanup_retrans_entry(cm_node);
if (close_when_complete)
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- dev_kfree_skb_any(new_send->skb);
- kfree(new_send);
+ rem_ref_cm_node(cm_core, cm_node);
return ret;
}
- new_send->timetosend = jiffies + NES_RETRY_TIMEOUT;
}
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- list_add_tail(&new_send->list, &cm_node->retrans_list);
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- }
- if (type == NES_TIMER_TYPE_RECV) {
- new_send->seq_num = ntohl(tcp_hdr(skb)->seq);
- new_send->timetosend = jiffies;
- spin_lock_irqsave(&cm_node->recv_list_lock, flags);
- list_add_tail(&new_send->list, &cm_node->recv_list);
- spin_unlock_irqrestore(&cm_node->recv_list_lock, flags);
}
- cm_core = cm_node->cm_core;
was_timer_set = timer_pending(&cm_core->tcp_timer);
@@ -476,23 +455,27 @@ static void nes_cm_timer_tick(unsigned long pass)
struct list_head *list_node, *list_node_temp;
struct nes_cm_core *cm_core = g_cm_core;
struct nes_qp *nesqp;
- struct sk_buff *skb;
u32 settimer = 0;
int ret = NETDEV_TX_OK;
- int node_done;
+ enum nes_cm_node_state last_state;
spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) {
+ list_for_each_safe(list_node, list_core_temp,
+ &cm_core->connected_nodes) {
cm_node = container_of(list_node, struct nes_cm_node, list);
add_ref_cm_node(cm_node);
spin_unlock_irqrestore(&cm_core->ht_lock, flags);
spin_lock_irqsave(&cm_node->recv_list_lock, flags);
- list_for_each_safe(list_core, list_node_temp, &cm_node->recv_list) {
- recv_entry = container_of(list_core, struct nes_timer_entry, list);
- if ((time_after(recv_entry->timetosend, jiffies)) &&
- (recv_entry->type == NES_TIMER_TYPE_CLOSE)) {
- if (nexttimeout > recv_entry->timetosend || !settimer) {
+ list_for_each_safe(list_core, list_node_temp,
+ &cm_node->recv_list) {
+ recv_entry = container_of(list_core,
+ struct nes_timer_entry, list);
+ if (!recv_entry)
+ break;
+ if (time_after(recv_entry->timetosend, jiffies)) {
+ if (nexttimeout > recv_entry->timetosend ||
+ !settimer) {
nexttimeout = recv_entry->timetosend;
settimer = 1;
}
@@ -501,157 +484,143 @@ static void nes_cm_timer_tick(unsigned long pass)
list_del(&recv_entry->list);
cm_id = cm_node->cm_id;
spin_unlock_irqrestore(&cm_node->recv_list_lock, flags);
- if (recv_entry->type == NES_TIMER_TYPE_CLOSE) {
- nesqp = (struct nes_qp *)recv_entry->skb;
- spin_lock_irqsave(&nesqp->lock, qplockflags);
- if (nesqp->cm_id) {
- nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, refcount = %d: "
- "****** HIT A NES_TIMER_TYPE_CLOSE"
- " with something to do!!! ******\n",
- nesqp->hwqp.qp_id, cm_id,
- atomic_read(&nesqp->refcount));
- nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
- nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
- nesqp->ibqp_state = IB_QPS_ERR;
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_cm_disconn(nesqp);
- } else {
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, refcount = %d:"
- " ****** HIT A NES_TIMER_TYPE_CLOSE"
- " with nothing to do!!! ******\n",
- nesqp->hwqp.qp_id, cm_id,
- atomic_read(&nesqp->refcount));
- nes_rem_ref(&nesqp->ibqp);
- }
- if (cm_id)
- cm_id->rem_ref(cm_id);
+ nesqp = (struct nes_qp *)recv_entry->skb;
+ spin_lock_irqsave(&nesqp->lock, qplockflags);
+ if (nesqp->cm_id) {
+ nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, "
+ "refcount = %d: HIT A "
+ "NES_TIMER_TYPE_CLOSE with something "
+ "to do!!!\n", nesqp->hwqp.qp_id, cm_id,
+ atomic_read(&nesqp->refcount));
+ nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
+ nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
+ nesqp->ibqp_state = IB_QPS_ERR;
+ spin_unlock_irqrestore(&nesqp->lock,
+ qplockflags);
+ nes_cm_disconn(nesqp);
+ } else {
+ spin_unlock_irqrestore(&nesqp->lock,
+ qplockflags);
+ nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, "
+ "refcount = %d: HIT A "
+ "NES_TIMER_TYPE_CLOSE with nothing "
+ "to do!!!\n", nesqp->hwqp.qp_id, cm_id,
+ atomic_read(&nesqp->refcount));
}
+ if (cm_id)
+ cm_id->rem_ref(cm_id);
+
kfree(recv_entry);
spin_lock_irqsave(&cm_node->recv_list_lock, flags);
}
spin_unlock_irqrestore(&cm_node->recv_list_lock, flags);
spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- node_done = 0;
- list_for_each_safe(list_core, list_node_temp, &cm_node->retrans_list) {
- if (node_done) {
- break;
- }
- send_entry = container_of(list_core, struct nes_timer_entry, list);
+ do {
+ send_entry = cm_node->send_entry;
+ if (!send_entry)
+ continue;
if (time_after(send_entry->timetosend, jiffies)) {
if (cm_node->state != NES_CM_STATE_TSA) {
- if ((nexttimeout > send_entry->timetosend) || !settimer) {
- nexttimeout = send_entry->timetosend;
+ if ((nexttimeout >
+ send_entry->timetosend) ||
+ !settimer) {
+ nexttimeout =
+ send_entry->timetosend;
settimer = 1;
+ continue;
}
- node_done = 1;
- continue;
} else {
- list_del(&send_entry->list);
- skb = send_entry->skb;
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- dev_kfree_skb_any(skb);
- kfree(send_entry);
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ free_retrans_entry(cm_node);
continue;
}
}
- if (send_entry->type == NES_TIMER_NODE_CLEANUP) {
- list_del(&send_entry->list);
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- kfree(send_entry);
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- continue;
- }
- if ((send_entry->seq_num < cm_node->tcp_cntxt.rem_ack_num) ||
- (cm_node->state == NES_CM_STATE_TSA) ||
- (cm_node->state == NES_CM_STATE_CLOSED)) {
- skb = send_entry->skb;
- list_del(&send_entry->list);
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- kfree(send_entry);
- dev_kfree_skb_any(skb);
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+
+ if ((cm_node->state == NES_CM_STATE_TSA) ||
+ (cm_node->state == NES_CM_STATE_CLOSED)) {
+ free_retrans_entry(cm_node);
continue;
}
- if (!send_entry->retranscount || !send_entry->retrycount) {
+ if (!send_entry->retranscount ||
+ !send_entry->retrycount) {
cm_packets_dropped++;
- skb = send_entry->skb;
- list_del(&send_entry->list);
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- dev_kfree_skb_any(skb);
- kfree(send_entry);
- if (cm_node->state == NES_CM_STATE_SYN_RCVD) {
- /* this node never even generated an indication up to the cm */
+ last_state = cm_node->state;
+ cm_node->state = NES_CM_STATE_CLOSED;
+ free_retrans_entry(cm_node);
+ spin_unlock_irqrestore(
+ &cm_node->retrans_list_lock, flags);
+ if (last_state == NES_CM_STATE_SYN_RCVD)
rem_ref_cm_node(cm_core, cm_node);
- } else {
- cm_node->state = NES_CM_STATE_CLOSED;
- create_event(cm_node, NES_CM_EVENT_ABORTED);
- }
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ else
+ create_event(cm_node,
+ NES_CM_EVENT_ABORTED);
+ spin_lock_irqsave(&cm_node->retrans_list_lock,
+ flags);
continue;
}
- /* this seems like the correct place, but leave send entry unprotected */
- /* spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); */
atomic_inc(&send_entry->skb->users);
cm_packets_retrans++;
- nes_debug(NES_DBG_CM, "Retransmitting send_entry %p for node %p,"
- " jiffies = %lu, time to send = %lu, retranscount = %u, "
- "send_entry->seq_num = 0x%08X, cm_node->tcp_cntxt.rem_ack_num = 0x%08X\n",
- send_entry, cm_node, jiffies, send_entry->timetosend, send_entry->retranscount,
- send_entry->seq_num, cm_node->tcp_cntxt.rem_ack_num);
-
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+ nes_debug(NES_DBG_CM, "Retransmitting send_entry %p "
+ "for node %p, jiffies = %lu, time to send = "
+ "%lu, retranscount = %u, send_entry->seq_num = "
+ "0x%08X, cm_node->tcp_cntxt.rem_ack_num = "
+ "0x%08X\n", send_entry, cm_node, jiffies,
+ send_entry->timetosend,
+ send_entry->retranscount,
+ send_entry->seq_num,
+ cm_node->tcp_cntxt.rem_ack_num);
+
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock,
+ flags);
ret = nes_nic_cm_xmit(send_entry->skb, cm_node->netdev);
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
if (ret != NETDEV_TX_OK) {
+ nes_debug(NES_DBG_CM, "rexmit failed for "
+ "node=%p\n", cm_node);
cm_packets_bounced++;
atomic_dec(&send_entry->skb->users);
send_entry->retrycount--;
nexttimeout = jiffies + NES_SHORT_TIME;
settimer = 1;
- node_done = 1;
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
continue;
} else {
cm_packets_sent++;
}
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- list_del(&send_entry->list);
- nes_debug(NES_DBG_CM, "Packet Sent: retrans count = %u, retry count = %u.\n",
- send_entry->retranscount, send_entry->retrycount);
+ nes_debug(NES_DBG_CM, "Packet Sent: retrans count = "
+ "%u, retry count = %u.\n",
+ send_entry->retranscount,
+ send_entry->retrycount);
if (send_entry->send_retrans) {
send_entry->retranscount--;
- send_entry->timetosend = jiffies + NES_RETRY_TIMEOUT;
- if (nexttimeout > send_entry->timetosend || !settimer) {
+ send_entry->timetosend = jiffies +
+ NES_RETRY_TIMEOUT;
+ if (nexttimeout > send_entry->timetosend ||
+ !settimer) {
nexttimeout = send_entry->timetosend;
settimer = 1;
}
- list_add(&send_entry->list, &cm_node->retrans_list);
- continue;
} else {
int close_when_complete;
- skb = send_entry->skb;
- close_when_complete = send_entry->close_when_complete;
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- if (close_when_complete) {
- BUG_ON(atomic_read(&cm_node->ref_count) == 1);
- rem_ref_cm_node(cm_core, cm_node);
- }
- dev_kfree_skb_any(skb);
- kfree(send_entry);
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- continue;
+ close_when_complete =
+ send_entry->close_when_complete;
+ nes_debug(NES_DBG_CM, "cm_node=%p state=%d\n",
+ cm_node, cm_node->state);
+ free_retrans_entry(cm_node);
+ if (close_when_complete)
+ rem_ref_cm_node(cm_node->cm_core,
+ cm_node);
}
- }
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
-
- rem_ref_cm_node(cm_core, cm_node);
+ } while (0);
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
spin_lock_irqsave(&cm_core->ht_lock, flags);
- if (ret != NETDEV_TX_OK)
+ if (ret != NETDEV_TX_OK) {
+ nes_debug(NES_DBG_CM, "rexmit failed for cm_node=%p\n",
+ cm_node);
break;
+ }
}
spin_unlock_irqrestore(&cm_core->ht_lock, flags);
@@ -667,14 +636,14 @@ static void nes_cm_timer_tick(unsigned long pass)
/**
* send_syn
*/
-static int send_syn(struct nes_cm_node *cm_node, u32 sendack)
+static int send_syn(struct nes_cm_node *cm_node, u32 sendack,
+ struct sk_buff *skb)
{
int ret;
int flags = SET_SYN;
- struct sk_buff *skb;
char optionsbuffer[sizeof(struct option_mss) +
- sizeof(struct option_windowscale) +
- sizeof(struct option_base) + 1];
+ sizeof(struct option_windowscale) + sizeof(struct option_base) +
+ TCP_OPTIONS_PADDING];
int optionssize = 0;
/* Sending MSS option */
@@ -695,8 +664,7 @@ static int send_syn(struct nes_cm_node *cm_node, u32 sendack)
options->as_windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale;
optionssize += sizeof(struct option_windowscale);
- if (sendack && !(NES_DRV_OPT_SUPRESS_OPTION_BC & nes_drv_opt)
- ) {
+ if (sendack && !(NES_DRV_OPT_SUPRESS_OPTION_BC & nes_drv_opt)) {
options = (union all_known_options *)&optionsbuffer[optionssize];
options->as_base.optionnum = OPTION_NUMBER_WRITE0;
options->as_base.length = sizeof(struct option_base);
@@ -714,7 +682,8 @@ static int send_syn(struct nes_cm_node *cm_node, u32 sendack)
options->as_end = OPTION_NUMBER_END;
optionssize += 1;
- skb = get_free_pkt(cm_node);
+ if (!skb)
+ skb = get_free_pkt(cm_node);
if (!skb) {
nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
return -1;
@@ -733,18 +702,18 @@ static int send_syn(struct nes_cm_node *cm_node, u32 sendack)
/**
* send_reset
*/
-static int send_reset(struct nes_cm_node *cm_node)
+static int send_reset(struct nes_cm_node *cm_node, struct sk_buff *skb)
{
int ret;
- struct sk_buff *skb = get_free_pkt(cm_node);
int flags = SET_RST | SET_ACK;
+ if (!skb)
+ skb = get_free_pkt(cm_node);
if (!skb) {
nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
return -1;
}
- add_ref_cm_node(cm_node);
form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, flags);
ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 0, 1);
@@ -755,10 +724,12 @@ static int send_reset(struct nes_cm_node *cm_node)
/**
* send_ack
*/
-static int send_ack(struct nes_cm_node *cm_node)
+static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb)
{
int ret;
- struct sk_buff *skb = get_free_pkt(cm_node);
+
+ if (!skb)
+ skb = get_free_pkt(cm_node);
if (!skb) {
nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
@@ -922,7 +893,8 @@ static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node
if (!cm_node || !cm_core)
return -EINVAL;
- nes_debug(NES_DBG_CM, "Adding Node to Active Connection HT\n");
+ nes_debug(NES_DBG_CM, "Adding Node %p to Active Connection HT\n",
+ cm_node);
/* first, make an index into our hash table */
hashkey = make_hashkey(cm_node->loc_port, cm_node->loc_addr,
@@ -946,10 +918,35 @@ static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node
* mini_cm_dec_refcnt_listen
*/
static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
- struct nes_cm_listener *listener, int free_hanging_nodes)
+ struct nes_cm_listener *listener, int free_hanging_nodes)
{
int ret = 1;
unsigned long flags;
+ struct list_head *list_pos = NULL;
+ struct list_head *list_temp = NULL;
+ struct nes_cm_node *cm_node = NULL;
+
+ nes_debug(NES_DBG_CM, "attempting listener= %p free_nodes= %d, "
+ "refcnt=%d\n", listener, free_hanging_nodes,
+ atomic_read(&listener->ref_count));
+ /* free non-accelerated child nodes for this listener */
+ if (free_hanging_nodes) {
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ list_for_each_safe(list_pos, list_temp,
+ &g_cm_core->connected_nodes) {
+ cm_node = container_of(list_pos, struct nes_cm_node,
+ list);
+ if ((cm_node->listener == listener) &&
+ (!cm_node->accelerated)) {
+ cleanup_retrans_entry(cm_node);
+ spin_unlock_irqrestore(&cm_core->ht_lock,
+ flags);
+ send_reset(cm_node, NULL);
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+ }
spin_lock_irqsave(&cm_core->listen_list_lock, flags);
if (!atomic_dec_return(&listener->ref_count)) {
list_del(&listener->list);
@@ -1067,18 +1064,18 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
cm_node->loc_port = cm_info->loc_port;
cm_node->rem_port = cm_info->rem_port;
cm_node->send_write0 = send_first;
- nes_debug(NES_DBG_CM, "Make node addresses : loc = " NIPQUAD_FMT ":%x, rem = " NIPQUAD_FMT ":%x\n",
- HIPQUAD(cm_node->loc_addr), cm_node->loc_port,
- HIPQUAD(cm_node->rem_addr), cm_node->rem_port);
+ nes_debug(NES_DBG_CM, "Make node addresses : loc = " NIPQUAD_FMT
+ ":%x, rem = " NIPQUAD_FMT ":%x\n",
+ HIPQUAD(cm_node->loc_addr), cm_node->loc_port,
+ HIPQUAD(cm_node->rem_addr), cm_node->rem_port);
cm_node->listener = listener;
cm_node->netdev = nesvnic->netdev;
cm_node->cm_id = cm_info->cm_id;
memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN);
- nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n",
- cm_node->listener, cm_node->cm_id);
+ nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", cm_node->listener,
+ cm_node->cm_id);
- INIT_LIST_HEAD(&cm_node->retrans_list);
spin_lock_init(&cm_node->retrans_list_lock);
INIT_LIST_HEAD(&cm_node->recv_list);
spin_lock_init(&cm_node->recv_list_lock);
@@ -1142,10 +1139,9 @@ static int add_ref_cm_node(struct nes_cm_node *cm_node)
* rem_ref_cm_node - destroy an instance of a cm node
*/
static int rem_ref_cm_node(struct nes_cm_core *cm_core,
- struct nes_cm_node *cm_node)
+ struct nes_cm_node *cm_node)
{
unsigned long flags, qplockflags;
- struct nes_timer_entry *send_entry;
struct nes_timer_entry *recv_entry;
struct iw_cm_id *cm_id;
struct list_head *list_core, *list_node_temp;
@@ -1169,48 +1165,33 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core,
atomic_dec(&cm_node->listener->pend_accepts_cnt);
BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0);
}
-
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- list_for_each_safe(list_core, list_node_temp, &cm_node->retrans_list) {
- send_entry = container_of(list_core, struct nes_timer_entry, list);
- list_del(&send_entry->list);
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- dev_kfree_skb_any(send_entry->skb);
- kfree(send_entry);
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- continue;
- }
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
-
+ BUG_ON(cm_node->send_entry);
spin_lock_irqsave(&cm_node->recv_list_lock, flags);
list_for_each_safe(list_core, list_node_temp, &cm_node->recv_list) {
- recv_entry = container_of(list_core, struct nes_timer_entry, list);
+ recv_entry = container_of(list_core, struct nes_timer_entry,
+ list);
list_del(&recv_entry->list);
cm_id = cm_node->cm_id;
spin_unlock_irqrestore(&cm_node->recv_list_lock, flags);
- if (recv_entry->type == NES_TIMER_TYPE_CLOSE) {
- nesqp = (struct nes_qp *)recv_entry->skb;
- spin_lock_irqsave(&nesqp->lock, qplockflags);
- if (nesqp->cm_id) {
- nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: ****** HIT A NES_TIMER_TYPE_CLOSE"
- " with something to do!!! ******\n",
- nesqp->hwqp.qp_id, cm_id);
- nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
- nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
- nesqp->ibqp_state = IB_QPS_ERR;
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_cm_disconn(nesqp);
- } else {
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: ****** HIT A NES_TIMER_TYPE_CLOSE"
- " with nothing to do!!! ******\n",
- nesqp->hwqp.qp_id, cm_id);
- nes_rem_ref(&nesqp->ibqp);
- }
- cm_id->rem_ref(cm_id);
- } else if (recv_entry->type == NES_TIMER_TYPE_RECV) {
- dev_kfree_skb_any(recv_entry->skb);
+ nesqp = (struct nes_qp *)recv_entry->skb;
+ spin_lock_irqsave(&nesqp->lock, qplockflags);
+ if (nesqp->cm_id) {
+ nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: HIT A "
+ "NES_TIMER_TYPE_CLOSE with something to do!\n",
+ nesqp->hwqp.qp_id, cm_id);
+ nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
+ nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
+ nesqp->ibqp_state = IB_QPS_ERR;
+ spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+ nes_cm_disconn(nesqp);
+ } else {
+ spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+ nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: HIT A "
+ "NES_TIMER_TYPE_CLOSE with nothing to do!\n",
+ nesqp->hwqp.qp_id, cm_id);
}
+ cm_id->rem_ref(cm_id);
+
kfree(recv_entry);
spin_lock_irqsave(&cm_node->recv_list_lock, flags);
}
@@ -1221,23 +1202,31 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core,
} else {
if (cm_node->apbvt_set && cm_node->nesvnic) {
nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port,
- PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn),
- NES_MANAGE_APBVT_DEL);
+ PCI_FUNC(
+ cm_node->nesvnic->nesdev->pcidev->devfn),
+ NES_MANAGE_APBVT_DEL);
}
}
- kfree(cm_node);
atomic_dec(&cm_core->node_cnt);
atomic_inc(&cm_nodes_destroyed);
+ nesqp = cm_node->nesqp;
+ if (nesqp) {
+ nesqp->cm_node = NULL;
+ nes_rem_ref(&nesqp->ibqp);
+ cm_node->nesqp = NULL;
+ }
+ cm_node->freed = 1;
+ kfree(cm_node);
return 0;
}
-
/**
* process_options
*/
-static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, u32 optionsize, u32 syn_packet)
+static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc,
+ u32 optionsize, u32 syn_packet)
{
u32 tmp;
u32 offset = 0;
@@ -1247,35 +1236,37 @@ static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, u32 opti
while (offset < optionsize) {
all_options = (union all_known_options *)(optionsloc + offset);
switch (all_options->as_base.optionnum) {
- case OPTION_NUMBER_END:
- offset = optionsize;
- break;
- case OPTION_NUMBER_NONE:
- offset += 1;
- continue;
- case OPTION_NUMBER_MSS:
- nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d Size: %d\n",
- __func__,
- all_options->as_mss.length, offset, optionsize);
- got_mss_option = 1;
- if (all_options->as_mss.length != 4) {
- return 1;
- } else {
- tmp = ntohs(all_options->as_mss.mss);
- if (tmp > 0 && tmp < cm_node->tcp_cntxt.mss)
- cm_node->tcp_cntxt.mss = tmp;
- }
- break;
- case OPTION_NUMBER_WINDOW_SCALE:
- cm_node->tcp_cntxt.snd_wscale = all_options->as_windowscale.shiftcount;
- break;
- case OPTION_NUMBER_WRITE0:
- cm_node->send_write0 = 1;
- break;
- default:
- nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n",
- all_options->as_base.optionnum);
- break;
+ case OPTION_NUMBER_END:
+ offset = optionsize;
+ break;
+ case OPTION_NUMBER_NONE:
+ offset += 1;
+ continue;
+ case OPTION_NUMBER_MSS:
+ nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d "
+ "Size: %d\n", __func__,
+ all_options->as_mss.length, offset, optionsize);
+ got_mss_option = 1;
+ if (all_options->as_mss.length != 4) {
+ return 1;
+ } else {
+ tmp = ntohs(all_options->as_mss.mss);
+ if (tmp > 0 && tmp <
+ cm_node->tcp_cntxt.mss)
+ cm_node->tcp_cntxt.mss = tmp;
+ }
+ break;
+ case OPTION_NUMBER_WINDOW_SCALE:
+ cm_node->tcp_cntxt.snd_wscale =
+ all_options->as_windowscale.shiftcount;
+ break;
+ case OPTION_NUMBER_WRITE0:
+ cm_node->send_write0 = 1;
+ break;
+ default:
+ nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n",
+ all_options->as_base.optionnum);
+ break;
}
offset += all_options->as_base.length;
}
@@ -1284,300 +1275,491 @@ static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, u32 opti
return 0;
}
+static void drop_packet(struct sk_buff *skb)
+{
+ atomic_inc(&cm_accel_dropped_pkts);
+ dev_kfree_skb_any(skb);
+}
-/**
- * process_packet
- */
-static int process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
- struct nes_cm_core *cm_core)
+static void handle_fin_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ struct tcphdr *tcph)
{
- int optionsize;
- int datasize;
- int ret = 0;
- struct tcphdr *tcph = tcp_hdr(skb);
- u32 inc_sequence;
- if (cm_node->state == NES_CM_STATE_SYN_SENT && tcph->syn) {
- inc_sequence = ntohl(tcph->seq);
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence;
+ atomic_inc(&cm_resets_recvd);
+ nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. "
+ "refcnt=%d\n", cm_node, cm_node->state,
+ atomic_read(&cm_node->ref_count));
+ cm_node->tcp_cntxt.rcv_nxt++;
+ cleanup_retrans_entry(cm_node);
+ switch (cm_node->state) {
+ case NES_CM_STATE_SYN_RCVD:
+ case NES_CM_STATE_SYN_SENT:
+ case NES_CM_STATE_ESTABLISHED:
+ case NES_CM_STATE_MPAREQ_SENT:
+ cm_node->state = NES_CM_STATE_LAST_ACK;
+ send_fin(cm_node, skb);
+ break;
+ case NES_CM_STATE_FIN_WAIT1:
+ cm_node->state = NES_CM_STATE_CLOSING;
+ send_ack(cm_node, skb);
+ break;
+ case NES_CM_STATE_FIN_WAIT2:
+ cm_node->state = NES_CM_STATE_TIME_WAIT;
+ send_ack(cm_node, skb);
+ cm_node->state = NES_CM_STATE_CLOSED;
+ break;
+ case NES_CM_STATE_TSA:
+ default:
+ nes_debug(NES_DBG_CM, "Error Rcvd FIN for node-%p state = %d\n",
+ cm_node, cm_node->state);
+ drop_packet(skb);
+ break;
}
+}
- if ((!tcph) || (cm_node->state == NES_CM_STATE_TSA)) {
- BUG_ON(!tcph);
- atomic_inc(&cm_accel_dropped_pkts);
- return -1;
- }
- if (tcph->rst) {
- atomic_inc(&cm_resets_recvd);
- nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u. refcnt=%d\n",
- cm_node, cm_node->state, atomic_read(&cm_node->ref_count));
- switch (cm_node->state) {
- case NES_CM_STATE_LISTENING:
- rem_ref_cm_node(cm_core, cm_node);
- break;
- case NES_CM_STATE_TSA:
- case NES_CM_STATE_CLOSED:
- break;
- case NES_CM_STATE_SYN_RCVD:
- nes_debug(NES_DBG_CM, "Received a reset for local 0x%08X:%04X,"
- " remote 0x%08X:%04X, node state = %u\n",
- cm_node->loc_addr, cm_node->loc_port,
- cm_node->rem_addr, cm_node->rem_port,
- cm_node->state);
- rem_ref_cm_node(cm_core, cm_node);
- break;
- case NES_CM_STATE_ONE_SIDE_ESTABLISHED:
- case NES_CM_STATE_ESTABLISHED:
- case NES_CM_STATE_MPAREQ_SENT:
- default:
- nes_debug(NES_DBG_CM, "Received a reset for local 0x%08X:%04X,"
- " remote 0x%08X:%04X, node state = %u refcnt=%d\n",
- cm_node->loc_addr, cm_node->loc_port,
- cm_node->rem_addr, cm_node->rem_port,
- cm_node->state, atomic_read(&cm_node->ref_count));
- /* create event */
- cm_node->state = NES_CM_STATE_CLOSED;
+static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ struct tcphdr *tcph)
+{
- create_event(cm_node, NES_CM_EVENT_ABORTED);
- break;
+ int reset = 0; /* whether to send reset in case of err.. */
+ atomic_inc(&cm_resets_recvd);
+ nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u."
+ " refcnt=%d\n", cm_node, cm_node->state,
+ atomic_read(&cm_node->ref_count));
+ cleanup_retrans_entry(cm_node);
+ switch (cm_node->state) {
+ case NES_CM_STATE_SYN_SENT:
+ case NES_CM_STATE_MPAREQ_SENT:
+ nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
+ "listener=%p state=%d\n", __func__, __LINE__, cm_node,
+ cm_node->listener, cm_node->state);
+ active_open_err(cm_node, skb, reset);
+ break;
+ /* For PASSIVE open states, remove the cm_node event */
+ case NES_CM_STATE_ESTABLISHED:
+ case NES_CM_STATE_SYN_RCVD:
+ case NES_CM_STATE_LISTENING:
+ nes_debug(NES_DBG_CM, "Bad state %s[%u]\n", __func__, __LINE__);
+ passive_open_err(cm_node, skb, reset);
+ break;
+ case NES_CM_STATE_TSA:
+ default:
+ break;
+ }
+}
+static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ enum nes_cm_event_type type)
+{
+
+ int ret;
+ int datasize = skb->len;
+ u8 *dataloc = skb->data;
+ ret = parse_mpa(cm_node, dataloc, datasize);
+ if (ret < 0) {
+ nes_debug(NES_DBG_CM, "didn't like MPA Request\n");
+ if (type == NES_CM_EVENT_CONNECTED) {
+ nes_debug(NES_DBG_CM, "%s[%u] create abort for "
+ "cm_node=%p listener=%p state=%d\n", __func__,
+ __LINE__, cm_node, cm_node->listener,
+ cm_node->state);
+ active_open_err(cm_node, skb, 1);
+ } else {
+ passive_open_err(cm_node, skb, 1);
}
- return -1;
+ } else {
+ cleanup_retrans_entry(cm_node);
+ dev_kfree_skb_any(skb);
+ if (type == NES_CM_EVENT_CONNECTED)
+ cm_node->state = NES_CM_STATE_TSA;
+ create_event(cm_node, type);
+
+ }
+ return ;
+}
+
+static void indicate_pkt_err(struct nes_cm_node *cm_node, struct sk_buff *skb)
+{
+ switch (cm_node->state) {
+ case NES_CM_STATE_SYN_SENT:
+ case NES_CM_STATE_MPAREQ_SENT:
+ nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
+ "listener=%p state=%d\n", __func__, __LINE__, cm_node,
+ cm_node->listener, cm_node->state);
+ active_open_err(cm_node, skb, 1);
+ break;
+ case NES_CM_STATE_ESTABLISHED:
+ case NES_CM_STATE_SYN_RCVD:
+ passive_open_err(cm_node, skb, 1);
+ break;
+ case NES_CM_STATE_TSA:
+ default:
+ drop_packet(skb);
}
+}
+
+static int check_syn(struct nes_cm_node *cm_node, struct tcphdr *tcph,
+ struct sk_buff *skb)
+{
+ int err;
+
+ err = ((ntohl(tcph->ack_seq) == cm_node->tcp_cntxt.loc_seq_num))? 0 : 1;
+ if (err)
+ active_open_err(cm_node, skb, 1);
+
+ return err;
+}
+
+static int check_seq(struct nes_cm_node *cm_node, struct tcphdr *tcph,
+ struct sk_buff *skb)
+{
+ int err = 0;
+ u32 seq;
+ u32 ack_seq;
+ u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num;
+ u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt;
+ u32 rcv_wnd;
+ seq = ntohl(tcph->seq);
+ ack_seq = ntohl(tcph->ack_seq);
+ rcv_wnd = cm_node->tcp_cntxt.rcv_wnd;
+ if (ack_seq != loc_seq_num)
+ err = 1;
+ else if ((seq + rcv_wnd) < rcv_nxt)
+ err = 1;
+ if (err) {
+ nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
+ "listener=%p state=%d\n", __func__, __LINE__, cm_node,
+ cm_node->listener, cm_node->state);
+ indicate_pkt_err(cm_node, skb);
+ nes_debug(NES_DBG_CM, "seq ERROR cm_node =%p seq=0x%08X "
+ "rcv_nxt=0x%08X rcv_wnd=0x%x\n", cm_node, seq, rcv_nxt,
+ rcv_wnd);
+ }
+ return err;
+}
+
+/*
+ * handle_syn_pkt() is for Passive node. The syn packet is received when a node
+ * is created with a listener or it may comein as rexmitted packet which in
+ * that case will be just dropped.
+ */
+
+static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ struct tcphdr *tcph)
+{
+ int ret;
+ u32 inc_sequence;
+ int optionsize;
optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+ skb_pull(skb, tcph->doff << 2);
+ inc_sequence = ntohl(tcph->seq);
- skb_pull(skb, ip_hdr(skb)->ihl << 2);
+ switch (cm_node->state) {
+ case NES_CM_STATE_SYN_SENT:
+ case NES_CM_STATE_MPAREQ_SENT:
+ /* Rcvd syn on active open connection*/
+ active_open_err(cm_node, skb, 1);
+ break;
+ case NES_CM_STATE_LISTENING:
+ /* Passive OPEN */
+ cm_node->accept_pend = 1;
+ atomic_inc(&cm_node->listener->pend_accepts_cnt);
+ if (atomic_read(&cm_node->listener->pend_accepts_cnt) >
+ cm_node->listener->backlog) {
+ nes_debug(NES_DBG_CM, "drop syn due to backlog "
+ "pressure \n");
+ cm_backlog_drops++;
+ passive_open_err(cm_node, skb, 0);
+ break;
+ }
+ ret = handle_tcp_options(cm_node, tcph, skb, optionsize,
+ 1);
+ if (ret) {
+ passive_open_err(cm_node, skb, 0);
+ /* drop pkt */
+ break;
+ }
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
+ BUG_ON(cm_node->send_entry);
+ cm_node->state = NES_CM_STATE_SYN_RCVD;
+ send_syn(cm_node, 1, skb);
+ break;
+ case NES_CM_STATE_TSA:
+ case NES_CM_STATE_ESTABLISHED:
+ case NES_CM_STATE_FIN_WAIT1:
+ case NES_CM_STATE_FIN_WAIT2:
+ case NES_CM_STATE_MPAREQ_RCVD:
+ case NES_CM_STATE_LAST_ACK:
+ case NES_CM_STATE_CLOSING:
+ case NES_CM_STATE_UNKNOWN:
+ case NES_CM_STATE_CLOSED:
+ default:
+ drop_packet(skb);
+ break;
+ }
+}
+
+static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ struct tcphdr *tcph)
+{
+
+ int ret;
+ u32 inc_sequence;
+ int optionsize;
+
+ optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
skb_pull(skb, tcph->doff << 2);
+ inc_sequence = ntohl(tcph->seq);
+ switch (cm_node->state) {
+ case NES_CM_STATE_SYN_SENT:
+ /* active open */
+ if (check_syn(cm_node, tcph, skb))
+ return;
+ cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+ /* setup options */
+ ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 0);
+ if (ret) {
+ nes_debug(NES_DBG_CM, "cm_node=%p tcp_options failed\n",
+ cm_node);
+ break;
+ }
+ cleanup_retrans_entry(cm_node);
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
+ send_mpa_request(cm_node, skb);
+ cm_node->state = NES_CM_STATE_MPAREQ_SENT;
+ break;
+ case NES_CM_STATE_MPAREQ_RCVD:
+ /* passive open, so should not be here */
+ passive_open_err(cm_node, skb, 1);
+ break;
+ case NES_CM_STATE_ESTABLISHED:
+ case NES_CM_STATE_FIN_WAIT1:
+ case NES_CM_STATE_FIN_WAIT2:
+ case NES_CM_STATE_LAST_ACK:
+ case NES_CM_STATE_TSA:
+ case NES_CM_STATE_CLOSING:
+ case NES_CM_STATE_UNKNOWN:
+ case NES_CM_STATE_CLOSED:
+ case NES_CM_STATE_MPAREQ_SENT:
+ default:
+ drop_packet(skb);
+ break;
+ }
+}
- datasize = skb->len;
+static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ struct tcphdr *tcph)
+{
+ int datasize = 0;
+ u32 inc_sequence;
+ u32 rem_seq_ack;
+ u32 rem_seq;
+ if (check_seq(cm_node, tcph, skb))
+ return;
+
+ skb_pull(skb, tcph->doff << 2);
inc_sequence = ntohl(tcph->seq);
- nes_debug(NES_DBG_CM, "datasize = %u, sequence = 0x%08X, ack_seq = 0x%08X,"
- " rcv_nxt = 0x%08X Flags: %s %s.\n",
- datasize, inc_sequence, ntohl(tcph->ack_seq),
- cm_node->tcp_cntxt.rcv_nxt, (tcph->syn ? "SYN":""),
- (tcph->ack ? "ACK":""));
-
- if (!tcph->syn && (inc_sequence != cm_node->tcp_cntxt.rcv_nxt)
- ) {
- nes_debug(NES_DBG_CM, "dropping packet, datasize = %u, sequence = 0x%08X,"
- " ack_seq = 0x%08X, rcv_nxt = 0x%08X Flags: %s.\n",
- datasize, inc_sequence, ntohl(tcph->ack_seq),
- cm_node->tcp_cntxt.rcv_nxt, (tcph->ack ? "ACK":""));
- if (cm_node->state == NES_CM_STATE_LISTENING) {
- rem_ref_cm_node(cm_core, cm_node);
+ rem_seq = ntohl(tcph->seq);
+ rem_seq_ack = ntohl(tcph->ack_seq);
+ datasize = skb->len;
+
+ switch (cm_node->state) {
+ case NES_CM_STATE_SYN_RCVD:
+ /* Passive OPEN */
+ cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+ cm_node->state = NES_CM_STATE_ESTABLISHED;
+ if (datasize) {
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+ cm_node->state = NES_CM_STATE_MPAREQ_RCVD;
+ handle_rcv_mpa(cm_node, skb, NES_CM_EVENT_MPA_REQ);
+ } else { /* rcvd ACK only */
+ dev_kfree_skb_any(skb);
+ cleanup_retrans_entry(cm_node);
+ }
+ break;
+ case NES_CM_STATE_ESTABLISHED:
+ /* Passive OPEN */
+ /* We expect mpa frame to be received only */
+ if (datasize) {
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+ cm_node->state = NES_CM_STATE_MPAREQ_RCVD;
+ handle_rcv_mpa(cm_node, skb,
+ NES_CM_EVENT_MPA_REQ);
+ } else
+ drop_packet(skb);
+ break;
+ case NES_CM_STATE_MPAREQ_SENT:
+ cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+ if (datasize) {
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+ handle_rcv_mpa(cm_node, skb, NES_CM_EVENT_CONNECTED);
+ } else { /* Could be just an ack pkt.. */
+ cleanup_retrans_entry(cm_node);
+ dev_kfree_skb_any(skb);
}
- return -1;
+ break;
+ case NES_CM_STATE_FIN_WAIT1:
+ case NES_CM_STATE_SYN_SENT:
+ case NES_CM_STATE_FIN_WAIT2:
+ case NES_CM_STATE_TSA:
+ case NES_CM_STATE_CLOSED:
+ case NES_CM_STATE_MPAREQ_RCVD:
+ case NES_CM_STATE_LAST_ACK:
+ case NES_CM_STATE_CLOSING:
+ case NES_CM_STATE_UNKNOWN:
+ default:
+ drop_packet(skb);
+ break;
}
+}
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph,
+ struct sk_buff *skb, int optionsize, int passive)
+{
+ u8 *optionsloc = (u8 *)&tcph[1];
if (optionsize) {
- u8 *optionsloc = (u8 *)&tcph[1];
- if (process_options(cm_node, optionsloc, optionsize, (u32)tcph->syn)) {
- nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n", __func__, cm_node);
- send_reset(cm_node);
- if (cm_node->state != NES_CM_STATE_SYN_SENT)
- rem_ref_cm_node(cm_core, cm_node);
- return 0;
+ if (process_options(cm_node, optionsloc, optionsize,
+ (u32)tcph->syn)) {
+ nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n",
+ __func__, cm_node);
+ if (passive)
+ passive_open_err(cm_node, skb, 0);
+ else
+ active_open_err(cm_node, skb, 0);
+ return 1;
}
- } else if (tcph->syn)
- cm_node->tcp_cntxt.mss = NES_CM_DEFAULT_MSS;
+ }
cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) <<
cm_node->tcp_cntxt.snd_wscale;
- if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd) {
+ if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd)
cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd;
- }
+ return 0;
+}
- if (tcph->ack) {
- cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
- switch (cm_node->state) {
- case NES_CM_STATE_SYN_RCVD:
- case NES_CM_STATE_SYN_SENT:
- /* read and stash current sequence number */
- if (cm_node->tcp_cntxt.rem_ack_num != cm_node->tcp_cntxt.loc_seq_num) {
- nes_debug(NES_DBG_CM, "ERROR - cm_node->tcp_cntxt.rem_ack_num !="
- " cm_node->tcp_cntxt.loc_seq_num\n");
- send_reset(cm_node);
- return 0;
- }
- if (cm_node->state == NES_CM_STATE_SYN_SENT)
- cm_node->state = NES_CM_STATE_ONE_SIDE_ESTABLISHED;
- else {
- cm_node->state = NES_CM_STATE_ESTABLISHED;
- }
- break;
- case NES_CM_STATE_LAST_ACK:
- cm_node->state = NES_CM_STATE_CLOSED;
- break;
- case NES_CM_STATE_FIN_WAIT1:
- cm_node->state = NES_CM_STATE_FIN_WAIT2;
- break;
- case NES_CM_STATE_CLOSING:
- cm_node->state = NES_CM_STATE_TIME_WAIT;
- /* need to schedule this to happen in 2MSL timeouts */
- cm_node->state = NES_CM_STATE_CLOSED;
- break;
- case NES_CM_STATE_ONE_SIDE_ESTABLISHED:
- case NES_CM_STATE_ESTABLISHED:
- case NES_CM_STATE_MPAREQ_SENT:
- case NES_CM_STATE_CLOSE_WAIT:
- case NES_CM_STATE_TIME_WAIT:
- case NES_CM_STATE_CLOSED:
- break;
- case NES_CM_STATE_LISTENING:
- nes_debug(NES_DBG_CM, "Received an ACK on a listening port (SYN %d)\n", tcph->syn);
- cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
- send_reset(cm_node);
- /* send_reset bumps refcount, this should have been a new node */
- rem_ref_cm_node(cm_core, cm_node);
- return -1;
- break;
- case NES_CM_STATE_TSA:
- nes_debug(NES_DBG_CM, "Received a packet with the ack bit set while in TSA state\n");
- break;
- case NES_CM_STATE_UNKNOWN:
- case NES_CM_STATE_INITED:
- case NES_CM_STATE_ACCEPTING:
- case NES_CM_STATE_FIN_WAIT2:
- default:
- nes_debug(NES_DBG_CM, "Received ack from unknown state: %x\n",
- cm_node->state);
- send_reset(cm_node);
- break;
- }
- }
+/*
+ * active_open_err() will send reset() if flag set..
+ * It will also send ABORT event.
+ */
- if (tcph->syn) {
- if (cm_node->state == NES_CM_STATE_LISTENING) {
- /* do not exceed backlog */
- atomic_inc(&cm_node->listener->pend_accepts_cnt);
- if (atomic_read(&cm_node->listener->pend_accepts_cnt) >
- cm_node->listener->backlog) {
- nes_debug(NES_DBG_CM, "drop syn due to backlog pressure \n");
- cm_backlog_drops++;
- atomic_dec(&cm_node->listener->pend_accepts_cnt);
- rem_ref_cm_node(cm_core, cm_node);
- return 0;
- }
- cm_node->accept_pend = 1;
+static void active_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ int reset)
+{
+ cleanup_retrans_entry(cm_node);
+ if (reset) {
+ nes_debug(NES_DBG_CM, "ERROR active err called for cm_node=%p, "
+ "state=%d\n", cm_node, cm_node->state);
+ add_ref_cm_node(cm_node);
+ send_reset(cm_node, skb);
+ } else
+ dev_kfree_skb_any(skb);
- }
- if (datasize == 0)
- cm_node->tcp_cntxt.rcv_nxt ++;
+ cm_node->state = NES_CM_STATE_CLOSED;
+ create_event(cm_node, NES_CM_EVENT_ABORTED);
+}
- if (cm_node->state == NES_CM_STATE_LISTENING) {
- cm_node->state = NES_CM_STATE_SYN_RCVD;
- send_syn(cm_node, 1);
- }
- if (cm_node->state == NES_CM_STATE_ONE_SIDE_ESTABLISHED) {
- cm_node->state = NES_CM_STATE_ESTABLISHED;
- /* send final handshake ACK */
- ret = send_ack(cm_node);
- if (ret < 0)
- return ret;
+/*
+ * passive_open_err() will either do a reset() or will free up the skb and
+ * remove the cm_node.
+ */
- cm_node->state = NES_CM_STATE_MPAREQ_SENT;
- ret = send_mpa_request(cm_node);
- if (ret < 0)
- return ret;
- }
+static void passive_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ int reset)
+{
+ cleanup_retrans_entry(cm_node);
+ cm_node->state = NES_CM_STATE_CLOSED;
+ if (reset) {
+ nes_debug(NES_DBG_CM, "passive_open_err sending RST for "
+ "cm_node=%p state =%d\n", cm_node, cm_node->state);
+ send_reset(cm_node, skb);
+ } else {
+ dev_kfree_skb_any(skb);
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
}
+}
- if (tcph->fin) {
- cm_node->tcp_cntxt.rcv_nxt++;
- switch (cm_node->state) {
- case NES_CM_STATE_SYN_RCVD:
- case NES_CM_STATE_SYN_SENT:
- case NES_CM_STATE_ONE_SIDE_ESTABLISHED:
- case NES_CM_STATE_ESTABLISHED:
- case NES_CM_STATE_ACCEPTING:
- case NES_CM_STATE_MPAREQ_SENT:
- cm_node->state = NES_CM_STATE_CLOSE_WAIT;
- cm_node->state = NES_CM_STATE_LAST_ACK;
- ret = send_fin(cm_node, NULL);
- break;
- case NES_CM_STATE_FIN_WAIT1:
- cm_node->state = NES_CM_STATE_CLOSING;
- ret = send_ack(cm_node);
- break;
- case NES_CM_STATE_FIN_WAIT2:
- cm_node->state = NES_CM_STATE_TIME_WAIT;
- cm_node->tcp_cntxt.loc_seq_num ++;
- ret = send_ack(cm_node);
- /* need to schedule this to happen in 2MSL timeouts */
- cm_node->state = NES_CM_STATE_CLOSED;
- break;
- case NES_CM_STATE_CLOSE_WAIT:
- case NES_CM_STATE_LAST_ACK:
- case NES_CM_STATE_CLOSING:
- case NES_CM_STATE_TSA:
- default:
- nes_debug(NES_DBG_CM, "Received a fin while in %x state\n",
- cm_node->state);
- ret = -EINVAL;
- break;
- }
+/*
+ * free_retrans_entry() routines assumes that the retrans_list_lock has
+ * been acquired before calling.
+ */
+static void free_retrans_entry(struct nes_cm_node *cm_node)
+{
+ struct nes_timer_entry *send_entry;
+ send_entry = cm_node->send_entry;
+ if (send_entry) {
+ cm_node->send_entry = NULL;
+ dev_kfree_skb_any(send_entry->skb);
+ kfree(send_entry);
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
}
+}
- if (datasize) {
- u8 *dataloc = skb->data;
- /* figure out what state we are in and handle transition to next state */
- switch (cm_node->state) {
- case NES_CM_STATE_LISTENING:
- case NES_CM_STATE_SYN_RCVD:
- case NES_CM_STATE_SYN_SENT:
- case NES_CM_STATE_FIN_WAIT1:
- case NES_CM_STATE_FIN_WAIT2:
- case NES_CM_STATE_CLOSE_WAIT:
- case NES_CM_STATE_LAST_ACK:
- case NES_CM_STATE_CLOSING:
- break;
- case NES_CM_STATE_MPAREQ_SENT:
- /* recv the mpa res frame, ret=frame len (incl priv data) */
- ret = parse_mpa(cm_node, dataloc, datasize);
- if (ret < 0)
- break;
- /* set the req frame payload len in skb */
- /* we are done handling this state, set node to a TSA state */
- cm_node->state = NES_CM_STATE_TSA;
- send_ack(cm_node);
- create_event(cm_node, NES_CM_EVENT_CONNECTED);
- break;
-
- case NES_CM_STATE_ESTABLISHED:
- /* we are expecting an MPA req frame */
- ret = parse_mpa(cm_node, dataloc, datasize);
- if (ret < 0) {
- break;
- }
- cm_node->state = NES_CM_STATE_TSA;
- send_ack(cm_node);
- /* we got a valid MPA request, create an event */
- create_event(cm_node, NES_CM_EVENT_MPA_REQ);
- break;
- case NES_CM_STATE_TSA:
- handle_exception_pkt(cm_node, skb);
- break;
- case NES_CM_STATE_UNKNOWN:
- case NES_CM_STATE_INITED:
- default:
- ret = -1;
- }
- }
+static void cleanup_retrans_entry(struct nes_cm_node *cm_node)
+{
+ unsigned long flags;
- return ret;
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ free_retrans_entry(cm_node);
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
}
+/**
+ * process_packet
+ * Returns skb if to be freed, else it will return NULL if already used..
+ */
+static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ struct nes_cm_core *cm_core)
+{
+ enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN;
+ struct tcphdr *tcph = tcp_hdr(skb);
+ skb_pull(skb, ip_hdr(skb)->ihl << 2);
+
+ nes_debug(NES_DBG_CM, "process_packet: cm_node=%p state =%d syn=%d "
+ "ack=%d rst=%d fin=%d\n", cm_node, cm_node->state, tcph->syn,
+ tcph->ack, tcph->rst, tcph->fin);
+
+ if (tcph->rst)
+ pkt_type = NES_PKT_TYPE_RST;
+ else if (tcph->syn) {
+ pkt_type = NES_PKT_TYPE_SYN;
+ if (tcph->ack)
+ pkt_type = NES_PKT_TYPE_SYNACK;
+ } else if (tcph->fin)
+ pkt_type = NES_PKT_TYPE_FIN;
+ else if (tcph->ack)
+ pkt_type = NES_PKT_TYPE_ACK;
+
+ switch (pkt_type) {
+ case NES_PKT_TYPE_SYN:
+ handle_syn_pkt(cm_node, skb, tcph);
+ break;
+ case NES_PKT_TYPE_SYNACK:
+ handle_synack_pkt(cm_node, skb, tcph);
+ break;
+ case NES_PKT_TYPE_ACK:
+ handle_ack_pkt(cm_node, skb, tcph);
+ break;
+ case NES_PKT_TYPE_RST:
+ handle_rst_pkt(cm_node, skb, tcph);
+ break;
+ case NES_PKT_TYPE_FIN:
+ handle_fin_pkt(cm_node, skb, tcph);
+ break;
+ default:
+ drop_packet(skb);
+ break;
+ }
+}
/**
* mini_cm_listen - create a listen node with params
*/
static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
- struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
+ struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
{
struct nes_cm_listener *listener;
unsigned long flags;
@@ -1603,7 +1785,6 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
return NULL;
}
- memset(listener, 0, sizeof(struct nes_cm_listener));
listener->loc_addr = htonl(cm_info->loc_addr);
listener->loc_port = htons(cm_info->loc_port);
listener->reused_node = 0;
@@ -1645,37 +1826,36 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
/**
* mini_cm_connect - make a connection node with params
*/
-static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
- struct nes_vnic *nesvnic,
- struct ietf_mpa_frame *mpa_frame,
- struct nes_cm_info *cm_info)
+struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
+ struct nes_vnic *nesvnic, u16 private_data_len,
+ void *private_data, struct nes_cm_info *cm_info)
{
int ret = 0;
struct nes_cm_node *cm_node;
struct nes_cm_listener *loopbackremotelistener;
struct nes_cm_node *loopbackremotenode;
struct nes_cm_info loopback_cm_info;
-
- u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) +
- ntohs(mpa_frame->priv_data_len);
-
- cm_info->loc_addr = htonl(cm_info->loc_addr);
- cm_info->rem_addr = htonl(cm_info->rem_addr);
- cm_info->loc_port = htons(cm_info->loc_port);
- cm_info->rem_port = htons(cm_info->rem_port);
+ u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) + private_data_len;
+ struct ietf_mpa_frame *mpa_frame = NULL;
/* create a CM connection node */
cm_node = make_cm_node(cm_core, nesvnic, cm_info, NULL);
if (!cm_node)
return NULL;
+ mpa_frame = &cm_node->mpa_frame;
+ strcpy(mpa_frame->key, IEFT_MPA_KEY_REQ);
+ mpa_frame->flags = IETF_MPA_FLAGS_CRC;
+ mpa_frame->rev = IETF_MPA_VERSION;
+ mpa_frame->priv_data_len = htons(private_data_len);
/* set our node side to client (active) side */
cm_node->tcp_cntxt.client = 1;
cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
if (cm_info->loc_addr == cm_info->rem_addr) {
- loopbackremotelistener = find_listener(cm_core, cm_node->rem_addr,
- cm_node->rem_port, NES_CM_LISTENER_ACTIVE_STATE);
+ loopbackremotelistener = find_listener(cm_core,
+ ntohl(nesvnic->local_ipaddr), cm_node->rem_port,
+ NES_CM_LISTENER_ACTIVE_STATE);
if (loopbackremotelistener == NULL) {
create_event(cm_node, NES_CM_EVENT_ABORTED);
} else {
@@ -1684,26 +1864,35 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
loopback_cm_info.loc_port = cm_info->rem_port;
loopback_cm_info.rem_port = cm_info->loc_port;
loopback_cm_info.cm_id = loopbackremotelistener->cm_id;
- loopbackremotenode = make_cm_node(cm_core, nesvnic, &loopback_cm_info,
- loopbackremotelistener);
+ loopbackremotenode = make_cm_node(cm_core, nesvnic,
+ &loopback_cm_info, loopbackremotelistener);
loopbackremotenode->loopbackpartner = cm_node;
- loopbackremotenode->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
+ loopbackremotenode->tcp_cntxt.rcv_wscale =
+ NES_CM_DEFAULT_RCV_WND_SCALE;
cm_node->loopbackpartner = loopbackremotenode;
- memcpy(loopbackremotenode->mpa_frame_buf, &mpa_frame->priv_data,
- mpa_frame_size);
- loopbackremotenode->mpa_frame_size = mpa_frame_size -
- sizeof(struct ietf_mpa_frame);
+ memcpy(loopbackremotenode->mpa_frame_buf, private_data,
+ private_data_len);
+ loopbackremotenode->mpa_frame_size = private_data_len;
- /* we are done handling this state, set node to a TSA state */
+ /* we are done handling this state. */
+ /* set node to a TSA state */
cm_node->state = NES_CM_STATE_TSA;
- cm_node->tcp_cntxt.rcv_nxt = loopbackremotenode->tcp_cntxt.loc_seq_num;
- loopbackremotenode->tcp_cntxt.rcv_nxt = cm_node->tcp_cntxt.loc_seq_num;
- cm_node->tcp_cntxt.max_snd_wnd = loopbackremotenode->tcp_cntxt.rcv_wnd;
- loopbackremotenode->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.rcv_wnd;
- cm_node->tcp_cntxt.snd_wnd = loopbackremotenode->tcp_cntxt.rcv_wnd;
- loopbackremotenode->tcp_cntxt.snd_wnd = cm_node->tcp_cntxt.rcv_wnd;
- cm_node->tcp_cntxt.snd_wscale = loopbackremotenode->tcp_cntxt.rcv_wscale;
- loopbackremotenode->tcp_cntxt.snd_wscale = cm_node->tcp_cntxt.rcv_wscale;
+ cm_node->tcp_cntxt.rcv_nxt =
+ loopbackremotenode->tcp_cntxt.loc_seq_num;
+ loopbackremotenode->tcp_cntxt.rcv_nxt =
+ cm_node->tcp_cntxt.loc_seq_num;
+ cm_node->tcp_cntxt.max_snd_wnd =
+ loopbackremotenode->tcp_cntxt.rcv_wnd;
+ loopbackremotenode->tcp_cntxt.max_snd_wnd =
+ cm_node->tcp_cntxt.rcv_wnd;
+ cm_node->tcp_cntxt.snd_wnd =
+ loopbackremotenode->tcp_cntxt.rcv_wnd;
+ loopbackremotenode->tcp_cntxt.snd_wnd =
+ cm_node->tcp_cntxt.rcv_wnd;
+ cm_node->tcp_cntxt.snd_wscale =
+ loopbackremotenode->tcp_cntxt.rcv_wscale;
+ loopbackremotenode->tcp_cntxt.snd_wscale =
+ cm_node->tcp_cntxt.rcv_wscale;
create_event(loopbackremotenode, NES_CM_EVENT_MPA_REQ);
}
@@ -1713,16 +1902,29 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
/* set our node side to client (active) side */
cm_node->tcp_cntxt.client = 1;
/* init our MPA frame ptr */
- memcpy(&cm_node->mpa_frame, mpa_frame, mpa_frame_size);
+ memcpy(mpa_frame->priv_data, private_data, private_data_len);
+
cm_node->mpa_frame_size = mpa_frame_size;
/* send a syn and goto syn sent state */
cm_node->state = NES_CM_STATE_SYN_SENT;
- ret = send_syn(cm_node, 0);
+ ret = send_syn(cm_node, 0, NULL);
+
+ if (ret) {
+ /* error in sending the syn free up the cm_node struct */
+ nes_debug(NES_DBG_CM, "Api - connect() FAILED: dest "
+ "addr=0x%08X, port=0x%04x, cm_node=%p, cm_id = %p.\n",
+ cm_node->rem_addr, cm_node->rem_port, cm_node,
+ cm_node->cm_id);
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
+ cm_node = NULL;
+ }
- nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X, port=0x%04x,"
- " cm_node=%p, cm_id = %p.\n",
- cm_node->rem_addr, cm_node->rem_port, cm_node, cm_node->cm_id);
+ if (cm_node)
+ nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X,"
+ "port=0x%04x, cm_node=%p, cm_id = %p.\n",
+ cm_node->rem_addr, cm_node->rem_port, cm_node,
+ cm_node->cm_id);
return cm_node;
}
@@ -1732,8 +1934,8 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
* mini_cm_accept - accept a connection
* This function is never called
*/
-static int mini_cm_accept(struct nes_cm_core *cm_core, struct ietf_mpa_frame *mpa_frame,
- struct nes_cm_node *cm_node)
+static int mini_cm_accept(struct nes_cm_core *cm_core,
+ struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node)
{
return 0;
}
@@ -1743,32 +1945,26 @@ static int mini_cm_accept(struct nes_cm_core *cm_core, struct ietf_mpa_frame *mp
* mini_cm_reject - reject and teardown a connection
*/
static int mini_cm_reject(struct nes_cm_core *cm_core,
- struct ietf_mpa_frame *mpa_frame,
- struct nes_cm_node *cm_node)
+ struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node)
{
int ret = 0;
- struct sk_buff *skb;
- u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) +
- ntohs(mpa_frame->priv_data_len);
- skb = get_free_pkt(cm_node);
- if (!skb) {
- nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
- return -1;
- }
-
- /* send an MPA Request frame */
- form_cm_frame(skb, cm_node, NULL, 0, mpa_frame, mpa_frame_size, SET_ACK | SET_FIN);
- ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
+ nes_debug(NES_DBG_CM, "%s cm_node=%p type=%d state=%d\n",
+ __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state);
+ if (cm_node->tcp_cntxt.client)
+ return ret;
+ cleanup_retrans_entry(cm_node);
cm_node->state = NES_CM_STATE_CLOSED;
ret = send_fin(cm_node, NULL);
- if (ret < 0) {
- printk(KERN_INFO PFX "failed to send MPA Reply (reject)\n");
- return ret;
+ if (cm_node->accept_pend) {
+ BUG_ON(!cm_node->listener);
+ atomic_dec(&cm_node->listener->pend_accepts_cnt);
+ BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0);
}
+ ret = send_reset(cm_node, NULL);
return ret;
}
@@ -1784,35 +1980,39 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod
return -EINVAL;
switch (cm_node->state) {
- /* if passed in node is null, create a reference key node for node search */
- /* check if we found an owner node for this pkt */
- case NES_CM_STATE_SYN_RCVD:
- case NES_CM_STATE_SYN_SENT:
- case NES_CM_STATE_ONE_SIDE_ESTABLISHED:
- case NES_CM_STATE_ESTABLISHED:
- case NES_CM_STATE_ACCEPTING:
- case NES_CM_STATE_MPAREQ_SENT:
- cm_node->state = NES_CM_STATE_FIN_WAIT1;
- send_fin(cm_node, NULL);
- break;
- case NES_CM_STATE_CLOSE_WAIT:
- cm_node->state = NES_CM_STATE_LAST_ACK;
- send_fin(cm_node, NULL);
- break;
- case NES_CM_STATE_FIN_WAIT1:
- case NES_CM_STATE_FIN_WAIT2:
- case NES_CM_STATE_LAST_ACK:
- case NES_CM_STATE_TIME_WAIT:
- case NES_CM_STATE_CLOSING:
- ret = -1;
- break;
- case NES_CM_STATE_LISTENING:
- case NES_CM_STATE_UNKNOWN:
- case NES_CM_STATE_INITED:
- case NES_CM_STATE_CLOSED:
- case NES_CM_STATE_TSA:
- ret = rem_ref_cm_node(cm_core, cm_node);
- break;
+ case NES_CM_STATE_SYN_RCVD:
+ case NES_CM_STATE_SYN_SENT:
+ case NES_CM_STATE_ONE_SIDE_ESTABLISHED:
+ case NES_CM_STATE_ESTABLISHED:
+ case NES_CM_STATE_ACCEPTING:
+ case NES_CM_STATE_MPAREQ_SENT:
+ case NES_CM_STATE_MPAREQ_RCVD:
+ cleanup_retrans_entry(cm_node);
+ send_reset(cm_node, NULL);
+ break;
+ case NES_CM_STATE_CLOSE_WAIT:
+ cm_node->state = NES_CM_STATE_LAST_ACK;
+ send_fin(cm_node, NULL);
+ break;
+ case NES_CM_STATE_FIN_WAIT1:
+ case NES_CM_STATE_FIN_WAIT2:
+ case NES_CM_STATE_LAST_ACK:
+ case NES_CM_STATE_TIME_WAIT:
+ case NES_CM_STATE_CLOSING:
+ ret = -1;
+ break;
+ case NES_CM_STATE_LISTENING:
+ case NES_CM_STATE_UNKNOWN:
+ case NES_CM_STATE_INITED:
+ case NES_CM_STATE_CLOSED:
+ ret = rem_ref_cm_node(cm_core, cm_node);
+ break;
+ case NES_CM_STATE_TSA:
+ if (cm_node->send_entry)
+ printk(KERN_ERR "ERROR Close got called from STATE_TSA "
+ "send_entry=%p\n", cm_node->send_entry);
+ ret = rem_ref_cm_node(cm_core, cm_node);
+ break;
}
cm_node->cm_id = NULL;
return ret;
@@ -1823,25 +2023,30 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod
* recv_pkt - recv an ETHERNET packet, and process it through CM
* node state machine
*/
-static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct nes_vnic *nesvnic,
- struct sk_buff *skb)
+static void mini_cm_recv_pkt(struct nes_cm_core *cm_core,
+ struct nes_vnic *nesvnic, struct sk_buff *skb)
{
struct nes_cm_node *cm_node = NULL;
struct nes_cm_listener *listener = NULL;
struct iphdr *iph;
struct tcphdr *tcph;
struct nes_cm_info nfo;
- int ret = 0;
- if (!skb || skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) {
- ret = -EINVAL;
- goto out;
+ if (!skb)
+ return;
+ if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) {
+ dev_kfree_skb_any(skb);
+ return;
}
iph = (struct iphdr *)skb->data;
tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr));
skb_reset_network_header(skb);
skb_set_transport_header(skb, sizeof(*tcph));
+ if (!tcph) {
+ dev_kfree_skb_any(skb);
+ return;
+ }
skb->len = ntohs(iph->tot_len);
nfo.loc_addr = ntohl(iph->daddr);
@@ -1854,61 +2059,60 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct nes_vnic *nesvni
NIPQUAD(iph->daddr), tcph->dest,
NIPQUAD(iph->saddr), tcph->source);
- /* note: this call is going to increment cm_node ref count */
- cm_node = find_node(cm_core,
+ do {
+ cm_node = find_node(cm_core,
nfo.rem_port, nfo.rem_addr,
nfo.loc_port, nfo.loc_addr);
- if (!cm_node) {
- listener = find_listener(cm_core, nfo.loc_addr, nfo.loc_port,
- NES_CM_LISTENER_ACTIVE_STATE);
- if (listener) {
- nfo.cm_id = listener->cm_id;
- nfo.conn_type = listener->conn_type;
- } else {
- nfo.cm_id = NULL;
- nfo.conn_type = 0;
- }
-
- cm_node = make_cm_node(cm_core, nesvnic, &nfo, listener);
if (!cm_node) {
- nes_debug(NES_DBG_CM, "Unable to allocate node\n");
+ /* Only type of packet accepted are for */
+ /* the PASSIVE open (syn only) */
+ if ((!tcph->syn) || (tcph->ack)) {
+ cm_packets_dropped++;
+ break;
+ }
+ listener = find_listener(cm_core, nfo.loc_addr,
+ nfo.loc_port,
+ NES_CM_LISTENER_ACTIVE_STATE);
if (listener) {
- nes_debug(NES_DBG_CM, "unable to allocate node and decrementing listener refcount\n");
+ nfo.cm_id = listener->cm_id;
+ nfo.conn_type = listener->conn_type;
+ } else {
+ nes_debug(NES_DBG_CM, "Unable to find listener "
+ "for the pkt\n");
+ cm_packets_dropped++;
+ dev_kfree_skb_any(skb);
+ break;
+ }
+
+ cm_node = make_cm_node(cm_core, nesvnic, &nfo,
+ listener);
+ if (!cm_node) {
+ nes_debug(NES_DBG_CM, "Unable to allocate "
+ "node\n");
+ cm_packets_dropped++;
atomic_dec(&listener->ref_count);
+ dev_kfree_skb_any(skb);
+ break;
}
- ret = -1;
- goto out;
- }
- if (!listener) {
- nes_debug(NES_DBG_CM, "Packet found for unknown port %x refcnt=%d\n",
- nfo.loc_port, atomic_read(&cm_node->ref_count));
- if (!tcph->rst) {
- nes_debug(NES_DBG_CM, "Packet found for unknown port=%d"
- " rem_port=%d refcnt=%d\n",
- nfo.loc_port, nfo.rem_port, atomic_read(&cm_node->ref_count));
-
- cm_node->tcp_cntxt.rcv_nxt = ntohl(tcph->seq);
- cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
- send_reset(cm_node);
+ if (!tcph->rst && !tcph->fin) {
+ cm_node->state = NES_CM_STATE_LISTENING;
+ } else {
+ cm_packets_dropped++;
+ rem_ref_cm_node(cm_core, cm_node);
+ dev_kfree_skb_any(skb);
+ break;
}
+ add_ref_cm_node(cm_node);
+ } else if (cm_node->state == NES_CM_STATE_TSA) {
rem_ref_cm_node(cm_core, cm_node);
- ret = -1;
- goto out;
+ atomic_inc(&cm_accel_dropped_pkts);
+ dev_kfree_skb_any(skb);
+ break;
}
- add_ref_cm_node(cm_node);
- cm_node->state = NES_CM_STATE_LISTENING;
- }
-
- nes_debug(NES_DBG_CM, "Processing Packet for node %p, data = (%p):\n",
- cm_node, skb->data);
- process_packet(cm_node, skb, cm_core);
-
- rem_ref_cm_node(cm_core, cm_node);
- out:
- if (skb)
- dev_kfree_skb_any(skb);
- return ret;
+ process_packet(cm_node, skb, cm_core);
+ rem_ref_cm_node(cm_core, cm_node);
+ } while (0);
}
@@ -2108,15 +2312,12 @@ int nes_cm_disconn(struct nes_qp *nesqp)
if (nesqp->disconn_pending == 0) {
nesqp->disconn_pending++;
spin_unlock_irqrestore(&nesqp->lock, flags);
- /* nes_add_ref(&nesqp->ibqp); */
/* init our disconnect work element, to */
INIT_WORK(&nesqp->disconn_work, nes_disconnect_worker);
queue_work(g_cm_core->disconn_wq, &nesqp->disconn_work);
- } else {
+ } else
spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_rem_ref(&nesqp->ibqp);
- }
return 0;
}
@@ -2162,7 +2363,6 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
nes_debug(NES_DBG_CM, "QP%u disconnect_worker cmid is NULL\n",
nesqp->hwqp.qp_id);
spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_rem_ref(&nesqp->ibqp);
return -1;
}
@@ -2183,30 +2383,31 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
atomic_inc(&cm_disconnects);
cm_event.event = IW_CM_EVENT_DISCONNECT;
if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET) {
- issued_disconnect_reset = 1;
cm_event.status = IW_CM_EVENT_STATUS_RESET;
- nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event (status reset) for "
- " QP%u, cm_id = %p. \n",
- nesqp->hwqp.qp_id, cm_id);
- } else {
+ nes_debug(NES_DBG_CM, "Generating a CM "
+ "Disconnect Event (status reset) for "
+ "QP%u, cm_id = %p. \n",
+ nesqp->hwqp.qp_id, cm_id);
+ } else
cm_event.status = IW_CM_EVENT_STATUS_OK;
- }
cm_event.local_addr = cm_id->local_addr;
cm_event.remote_addr = cm_id->remote_addr;
cm_event.private_data = NULL;
cm_event.private_data_len = 0;
- nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event for "
- " QP%u, SQ Head = %u, SQ Tail = %u. cm_id = %p, refcount = %u.\n",
- nesqp->hwqp.qp_id,
- nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail, cm_id,
- atomic_read(&nesqp->refcount));
+ nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event"
+ " for QP%u, SQ Head = %u, SQ Tail = %u. "
+ "cm_id = %p, refcount = %u.\n",
+ nesqp->hwqp.qp_id, nesqp->hwqp.sq_head,
+ nesqp->hwqp.sq_tail, cm_id,
+ atomic_read(&nesqp->refcount));
spin_unlock_irqrestore(&nesqp->lock, flags);
ret = cm_id->event_handler(cm_id, &cm_event);
if (ret)
- nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
+ nes_debug(NES_DBG_CM, "OFA CM event_handler "
+ "returned, ret=%d\n", ret);
spin_lock_irqsave(&nesqp->lock, flags);
}
@@ -2248,31 +2449,24 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
if (nesqp->flush_issued == 0) {
nesqp->flush_issued = 1;
spin_unlock_irqrestore(&nesqp->lock, flags);
- flush_wqes(nesvnic->nesdev, nesqp, NES_CQP_FLUSH_RQ, 1);
- } else {
+ flush_wqes(nesvnic->nesdev, nesqp,
+ NES_CQP_FLUSH_RQ, 1);
+ } else
spin_unlock_irqrestore(&nesqp->lock, flags);
- }
-
- /* This reference is from either ModifyQP or the AE processing,
- there is still a race here with modifyqp */
- nes_rem_ref(&nesqp->ibqp);
-
} else {
cm_id = nesqp->cm_id;
spin_unlock_irqrestore(&nesqp->lock, flags);
/* check to see if the inbound reset beat the outbound reset */
if ((!cm_id) && (last_ae==NES_AEQE_AEID_RESET_SENT)) {
- nes_debug(NES_DBG_CM, "QP%u: Decing refcount due to inbound reset"
- " beating the outbound reset.\n",
- nesqp->hwqp.qp_id);
- nes_rem_ref(&nesqp->ibqp);
+ nes_debug(NES_DBG_CM, "QP%u: Decing refcount "
+ "due to inbound reset beating the "
+ "outbound reset.\n", nesqp->hwqp.qp_id);
}
}
} else {
nesqp->disconn_pending = 0;
spin_unlock_irqrestore(&nesqp->lock, flags);
}
- nes_rem_ref(&nesqp->ibqp);
return 0;
}
@@ -2350,71 +2544,82 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
nesdev = nesvnic->nesdev;
adapter = nesdev->nesadapter;
- nes_debug(NES_DBG_CM, "nesvnic=%p, netdev=%p, %s\n",
- nesvnic, nesvnic->netdev, nesvnic->netdev->name);
-
- /* since this is from a listen, we were able to put node handle into cm_id */
cm_node = (struct nes_cm_node *)cm_id->provider_data;
+ nes_debug(NES_DBG_CM, "nes_accept: cm_node= %p nesvnic=%p, netdev=%p,"
+ "%s\n", cm_node, nesvnic, nesvnic->netdev,
+ nesvnic->netdev->name);
/* associate the node with the QP */
nesqp->cm_node = (void *)cm_node;
+ cm_node->nesqp = nesqp;
+ nes_add_ref(&nesqp->ibqp);
- nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu\n",
- nesqp->hwqp.qp_id, cm_node, jiffies);
+ nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu listener = %p\n",
+ nesqp->hwqp.qp_id, cm_node, jiffies, cm_node->listener);
atomic_inc(&cm_accepts);
nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
atomic_read(&nesvnic->netdev->refcnt));
- /* allocate the ietf frame and space for private data */
- nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev,
- sizeof(struct ietf_mpa_frame) + conn_param->private_data_len,
- &nesqp->ietf_frame_pbase);
-
- if (!nesqp->ietf_frame) {
- nes_debug(NES_DBG_CM, "Unable to allocate memory for private data\n");
- return -ENOMEM;
- }
+ /* allocate the ietf frame and space for private data */
+ nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev,
+ sizeof(struct ietf_mpa_frame) + conn_param->private_data_len,
+ &nesqp->ietf_frame_pbase);
+ if (!nesqp->ietf_frame) {
+ nes_debug(NES_DBG_CM, "Unable to allocate memory for private "
+ "data\n");
+ return -ENOMEM;
+ }
- /* setup the MPA frame */
- nesqp->private_data_len = conn_param->private_data_len;
- memcpy(nesqp->ietf_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
- memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data,
- conn_param->private_data_len);
+ /* setup the MPA frame */
+ nesqp->private_data_len = conn_param->private_data_len;
+ memcpy(nesqp->ietf_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
- nesqp->ietf_frame->priv_data_len = cpu_to_be16(conn_param->private_data_len);
- nesqp->ietf_frame->rev = mpa_version;
- nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC;
+ memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data,
+ conn_param->private_data_len);
- /* setup our first outgoing iWarp send WQE (the IETF frame response) */
- wqe = &nesqp->hwqp.sq_vbase[0];
+ nesqp->ietf_frame->priv_data_len =
+ cpu_to_be16(conn_param->private_data_len);
+ nesqp->ietf_frame->rev = mpa_version;
+ nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC;
- if (cm_id->remote_addr.sin_addr.s_addr != cm_id->local_addr.sin_addr.s_addr) {
- u64temp = (unsigned long)nesqp;
- u64temp |= NES_SW_CONTEXT_ALIGN>>1;
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX,
- u64temp);
- wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
- cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING | NES_IWARP_SQ_WQE_WRPDU);
- wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] =
- cpu_to_le32(conn_param->private_data_len + sizeof(struct ietf_mpa_frame));
- wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] =
- cpu_to_le32((u32)nesqp->ietf_frame_pbase);
- wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] =
- cpu_to_le32((u32)((u64)nesqp->ietf_frame_pbase >> 32));
- wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] =
- cpu_to_le32(conn_param->private_data_len + sizeof(struct ietf_mpa_frame));
- wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
-
- nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(
- NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | NES_QPCONTEXT_ORDIRD_WRPDU);
- } else {
- nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
- NES_QPCONTEXT_ORDIRD_WRPDU | NES_QPCONTEXT_ORDIRD_ALSMM));
- }
- nesqp->skip_lsmm = 1;
+ /* setup our first outgoing iWarp send WQE (the IETF frame response) */
+ wqe = &nesqp->hwqp.sq_vbase[0];
+
+ if (cm_id->remote_addr.sin_addr.s_addr !=
+ cm_id->local_addr.sin_addr.s_addr) {
+ u64temp = (unsigned long)nesqp;
+ u64temp |= NES_SW_CONTEXT_ALIGN>>1;
+ set_wqe_64bit_value(wqe->wqe_words,
+ NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX,
+ u64temp);
+ wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
+ cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING |
+ NES_IWARP_SQ_WQE_WRPDU);
+ wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] =
+ cpu_to_le32(conn_param->private_data_len +
+ sizeof(struct ietf_mpa_frame));
+ wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] =
+ cpu_to_le32((u32)nesqp->ietf_frame_pbase);
+ wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] =
+ cpu_to_le32((u32)((u64)nesqp->ietf_frame_pbase >> 32));
+ wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] =
+ cpu_to_le32(conn_param->private_data_len +
+ sizeof(struct ietf_mpa_frame));
+ wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
+
+ nesqp->nesqp_context->ird_ord_sizes |=
+ cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
+ NES_QPCONTEXT_ORDIRD_WRPDU);
+ } else {
+ nesqp->nesqp_context->ird_ord_sizes |=
+ cpu_to_le32((NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
+ NES_QPCONTEXT_ORDIRD_WRPDU |
+ NES_QPCONTEXT_ORDIRD_ALSMM));
+ }
+ nesqp->skip_lsmm = 1;
/* Cache the cm_id in the qp */
@@ -2425,55 +2630,75 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
cm_id->provider_data = nesqp;
nesqp->active_conn = 0;
+ if (cm_node->state == NES_CM_STATE_TSA)
+ nes_debug(NES_DBG_CM, "Already state = TSA for cm_node=%p\n",
+ cm_node);
+
nes_cm_init_tsa_conn(nesqp, cm_node);
- nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(cm_id->local_addr.sin_port));
- nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(cm_id->remote_addr.sin_port));
- nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr));
+ nesqp->nesqp_context->tcpPorts[0] =
+ cpu_to_le16(ntohs(cm_id->local_addr.sin_port));
+ nesqp->nesqp_context->tcpPorts[1] =
+ cpu_to_le16(ntohs(cm_id->remote_addr.sin_port));
+
+ if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr))
+ nesqp->nesqp_context->ip0 =
+ cpu_to_le32(ntohl(nesvnic->local_ipaddr));
+ else
+ nesqp->nesqp_context->ip0 =
+ cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr));
nesqp->nesqp_context->misc2 |= cpu_to_le32(
- (u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_QPCONTEXT_MISC2_SRC_IP_SHIFT);
+ (u32)PCI_FUNC(nesdev->pcidev->devfn) <<
+ NES_QPCONTEXT_MISC2_SRC_IP_SHIFT);
- nesqp->nesqp_context->arp_index_vlan |= cpu_to_le32(
- nes_arp_table(nesdev, le32_to_cpu(nesqp->nesqp_context->ip0), NULL,
+ nesqp->nesqp_context->arp_index_vlan |=
+ cpu_to_le32(nes_arp_table(nesdev,
+ le32_to_cpu(nesqp->nesqp_context->ip0), NULL,
NES_ARP_RESOLVE) << 16);
nesqp->nesqp_context->ts_val_delta = cpu_to_le32(
- jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW));
+ jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW));
nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id);
nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(
- ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT));
- nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord);
+ ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT));
+ nesqp->nesqp_context->ird_ord_sizes |=
+ cpu_to_le32((u32)conn_param->ord);
memset(&nes_quad, 0, sizeof(nes_quad));
- nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
- nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
- nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port;
- nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port;
+ nes_quad.DstIpAdrIndex =
+ cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
+ if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr))
+ nes_quad.SrcIpadr = nesvnic->local_ipaddr;
+ else
+ nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
+ nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port;
+ nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port;
/* Produce hash key */
crc_value = get_crc_value(&nes_quad);
nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff);
nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n",
- nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask);
+ nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask);
nesqp->hte_index &= adapter->hte_index_mask;
nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index);
cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node);
- nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = 0x%08X:0x%04X,"
- " rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + private data length=%zu.\n",
- nesqp->hwqp.qp_id,
+ nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = "
+ "0x%08X:0x%04X, rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + "
+ "private data length=%zu.\n", nesqp->hwqp.qp_id,
ntohl(cm_id->remote_addr.sin_addr.s_addr),
ntohs(cm_id->remote_addr.sin_port),
ntohl(cm_id->local_addr.sin_addr.s_addr),
ntohs(cm_id->local_addr.sin_port),
le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
le32_to_cpu(nesqp->nesqp_context->snd_nxt),
- conn_param->private_data_len+sizeof(struct ietf_mpa_frame));
+ conn_param->private_data_len +
+ sizeof(struct ietf_mpa_frame));
attr.qp_state = IB_QPS_RTS;
nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
@@ -2490,15 +2715,16 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
cm_event.private_data_len = 0;
ret = cm_id->event_handler(cm_id, &cm_event);
if (cm_node->loopbackpartner) {
- cm_node->loopbackpartner->mpa_frame_size = nesqp->private_data_len;
+ cm_node->loopbackpartner->mpa_frame_size =
+ nesqp->private_data_len;
/* copy entire MPA frame to our cm_node's frame */
- memcpy(cm_node->loopbackpartner->mpa_frame_buf, nesqp->ietf_frame->priv_data,
- nesqp->private_data_len);
+ memcpy(cm_node->loopbackpartner->mpa_frame_buf,
+ nesqp->ietf_frame->priv_data, nesqp->private_data_len);
create_event(cm_node->loopbackpartner, NES_CM_EVENT_CONNECTED);
}
if (ret)
- printk("%s[%u] OFA CM event_handler returned, ret=%d\n",
- __func__, __LINE__, ret);
+ printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
+ "ret=%d\n", __func__, __LINE__, ret);
return 0;
}
@@ -2556,74 +2782,61 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (!nesdev)
return -EINVAL;
- atomic_inc(&cm_connects);
-
- nesqp->ietf_frame = kzalloc(sizeof(struct ietf_mpa_frame) +
- conn_param->private_data_len, GFP_KERNEL);
- if (!nesqp->ietf_frame)
- return -ENOMEM;
+ nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = "
+ "0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id,
+ ntohl(nesvnic->local_ipaddr),
+ ntohl(cm_id->remote_addr.sin_addr.s_addr),
+ ntohs(cm_id->remote_addr.sin_port),
+ ntohl(cm_id->local_addr.sin_addr.s_addr),
+ ntohs(cm_id->local_addr.sin_port));
- /* set qp as having an active connection */
+ atomic_inc(&cm_connects);
nesqp->active_conn = 1;
- nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = 0x%08X:0x%04X.\n",
- nesqp->hwqp.qp_id,
- ntohl(cm_id->remote_addr.sin_addr.s_addr),
- ntohs(cm_id->remote_addr.sin_port),
- ntohl(cm_id->local_addr.sin_addr.s_addr),
- ntohs(cm_id->local_addr.sin_port));
-
/* cache the cm_id in the qp */
nesqp->cm_id = cm_id;
cm_id->provider_data = nesqp;
- /* copy the private data */
- if (conn_param->private_data_len) {
- memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data,
- conn_param->private_data_len);
- }
-
nesqp->private_data_len = conn_param->private_data_len;
nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord);
nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord);
- nes_debug(NES_DBG_CM, "mpa private data len =%u\n", conn_param->private_data_len);
-
- strcpy(&nesqp->ietf_frame->key[0], IEFT_MPA_KEY_REQ);
- nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC;
- nesqp->ietf_frame->rev = IETF_MPA_VERSION;
- nesqp->ietf_frame->priv_data_len = htons(conn_param->private_data_len);
+ nes_debug(NES_DBG_CM, "mpa private data len =%u\n",
+ conn_param->private_data_len);
- if (cm_id->local_addr.sin_addr.s_addr != cm_id->remote_addr.sin_addr.s_addr)
+ if (cm_id->local_addr.sin_addr.s_addr !=
+ cm_id->remote_addr.sin_addr.s_addr)
nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port),
- PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD);
+ PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD);
/* set up the connection params for the node */
- cm_info.loc_addr = (cm_id->local_addr.sin_addr.s_addr);
- cm_info.loc_port = (cm_id->local_addr.sin_port);
- cm_info.rem_addr = (cm_id->remote_addr.sin_addr.s_addr);
- cm_info.rem_port = (cm_id->remote_addr.sin_port);
+ cm_info.loc_addr = htonl(cm_id->local_addr.sin_addr.s_addr);
+ cm_info.loc_port = htons(cm_id->local_addr.sin_port);
+ cm_info.rem_addr = htonl(cm_id->remote_addr.sin_addr.s_addr);
+ cm_info.rem_port = htons(cm_id->remote_addr.sin_port);
cm_info.cm_id = cm_id;
cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
cm_id->add_ref(cm_id);
- nes_add_ref(&nesqp->ibqp);
/* create a connect CM node connection */
- cm_node = g_cm_core->api->connect(g_cm_core, nesvnic, nesqp->ietf_frame, &cm_info);
+ cm_node = g_cm_core->api->connect(g_cm_core, nesvnic,
+ conn_param->private_data_len, (void *)conn_param->private_data,
+ &cm_info);
if (!cm_node) {
- if (cm_id->local_addr.sin_addr.s_addr != cm_id->remote_addr.sin_addr.s_addr)
+ if (cm_id->local_addr.sin_addr.s_addr !=
+ cm_id->remote_addr.sin_addr.s_addr)
nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port),
- PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL);
- nes_rem_ref(&nesqp->ibqp);
- kfree(nesqp->ietf_frame);
- nesqp->ietf_frame = NULL;
+ PCI_FUNC(nesdev->pcidev->devfn),
+ NES_MANAGE_APBVT_DEL);
+
cm_id->rem_ref(cm_id);
return -ENOMEM;
}
cm_node->apbvt_set = 1;
nesqp->cm_node = cm_node;
+ cm_node->nesqp = nesqp;
return 0;
}
@@ -2665,7 +2878,7 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info);
if (!cm_node) {
- printk("%s[%u] Error returned from listen API call\n",
+ printk(KERN_ERR "%s[%u] Error returned from listen API call\n",
__func__, __LINE__);
return -ENOMEM;
}
@@ -2673,10 +2886,13 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
cm_id->provider_data = cm_node;
if (!cm_node->reused_node) {
- err = nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port),
- PCI_FUNC(nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD);
+ err = nes_manage_apbvt(nesvnic,
+ ntohs(cm_id->local_addr.sin_port),
+ PCI_FUNC(nesvnic->nesdev->pcidev->devfn),
+ NES_MANAGE_APBVT_ADD);
if (err) {
- printk("nes_manage_apbvt call returned %d.\n", err);
+ printk(KERN_ERR "nes_manage_apbvt call returned %d.\n",
+ err);
g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node);
return err;
}
@@ -2796,53 +3012,70 @@ static void cm_event_connected(struct nes_cm_event *event)
nes_cm_init_tsa_conn(nesqp, cm_node);
/* set the QP tsa context */
- nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(cm_id->local_addr.sin_port));
- nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(cm_id->remote_addr.sin_port));
- nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr));
+ nesqp->nesqp_context->tcpPorts[0] =
+ cpu_to_le16(ntohs(cm_id->local_addr.sin_port));
+ nesqp->nesqp_context->tcpPorts[1] =
+ cpu_to_le16(ntohs(cm_id->remote_addr.sin_port));
+ if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr))
+ nesqp->nesqp_context->ip0 =
+ cpu_to_le32(ntohl(nesvnic->local_ipaddr));
+ else
+ nesqp->nesqp_context->ip0 =
+ cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr));
nesqp->nesqp_context->misc2 |= cpu_to_le32(
- (u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_QPCONTEXT_MISC2_SRC_IP_SHIFT);
+ (u32)PCI_FUNC(nesdev->pcidev->devfn) <<
+ NES_QPCONTEXT_MISC2_SRC_IP_SHIFT);
nesqp->nesqp_context->arp_index_vlan |= cpu_to_le32(
- nes_arp_table(nesdev, le32_to_cpu(nesqp->nesqp_context->ip0),
+ nes_arp_table(nesdev,
+ le32_to_cpu(nesqp->nesqp_context->ip0),
NULL, NES_ARP_RESOLVE) << 16);
nesqp->nesqp_context->ts_val_delta = cpu_to_le32(
jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW));
nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id);
nesqp->nesqp_context->ird_ord_sizes |=
- cpu_to_le32((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT);
+ cpu_to_le32((u32)1 <<
+ NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT);
/* Adjust tail for not having a LSMM */
nesqp->hwqp.sq_tail = 1;
#if defined(NES_SEND_FIRST_WRITE)
- if (cm_node->send_write0) {
- nes_debug(NES_DBG_CM, "Sending first write.\n");
- wqe = &nesqp->hwqp.sq_vbase[0];
- u64temp = (unsigned long)nesqp;
- u64temp |= NES_SW_CONTEXT_ALIGN>>1;
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX,
- u64temp);
- wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(NES_IWARP_SQ_OP_RDMAW);
- wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0;
- wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0;
- wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0;
- wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0;
- wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
-
- /* use the reserved spot on the WQ for the extra first WQE */
- nesqp->nesqp_context->ird_ord_sizes &= cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
- NES_QPCONTEXT_ORDIRD_WRPDU | NES_QPCONTEXT_ORDIRD_ALSMM));
- nesqp->skip_lsmm = 1;
- nesqp->hwqp.sq_tail = 0;
- nes_write32(nesdev->regs + NES_WQE_ALLOC,
- (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
- }
+ if (cm_node->send_write0) {
+ nes_debug(NES_DBG_CM, "Sending first write.\n");
+ wqe = &nesqp->hwqp.sq_vbase[0];
+ u64temp = (unsigned long)nesqp;
+ u64temp |= NES_SW_CONTEXT_ALIGN>>1;
+ set_wqe_64bit_value(wqe->wqe_words,
+ NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp);
+ wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
+ cpu_to_le32(NES_IWARP_SQ_OP_RDMAW);
+ wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0;
+ wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0;
+ wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0;
+ wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0;
+ wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
+
+ /* use the reserved spot on the WQ for the extra first WQE */
+ nesqp->nesqp_context->ird_ord_sizes &=
+ cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
+ NES_QPCONTEXT_ORDIRD_WRPDU |
+ NES_QPCONTEXT_ORDIRD_ALSMM));
+ nesqp->skip_lsmm = 1;
+ nesqp->hwqp.sq_tail = 0;
+ nes_write32(nesdev->regs + NES_WQE_ALLOC,
+ (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
+ }
#endif
memset(&nes_quad, 0, sizeof(nes_quad));
- nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
- nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
+ nes_quad.DstIpAdrIndex =
+ cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
+ if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr))
+ nes_quad.SrcIpadr = nesvnic->local_ipaddr;
+ else
+ nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port;
nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port;
@@ -2859,10 +3092,6 @@ static void cm_event_connected(struct nes_cm_event *event)
nesqp->private_data_len = (u8) cm_node->mpa_frame_size;
cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node);
- /* modify QP state to rts */
- attr.qp_state = IB_QPS_RTS;
- nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
-
/* notify OF layer we successfully created the requested connection */
cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
cm_event.status = IW_CM_EVENT_STATUS_ACCEPTED;
@@ -2871,20 +3100,21 @@ static void cm_event_connected(struct nes_cm_event *event)
cm_event.local_addr.sin_port = cm_id->local_addr.sin_port;
cm_event.remote_addr = cm_id->remote_addr;
- cm_event.private_data = (void *)event->cm_node->mpa_frame_buf;
- cm_event.private_data_len = (u8) event->cm_node->mpa_frame_size;
+ cm_event.private_data = (void *)event->cm_node->mpa_frame_buf;
+ cm_event.private_data_len = (u8) event->cm_node->mpa_frame_size;
cm_event.local_addr.sin_addr.s_addr = event->cm_info.rem_addr;
ret = cm_id->event_handler(cm_id, &cm_event);
nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
if (ret)
- printk("%s[%u] OFA CM event_handler returned, ret=%d\n",
- __func__, __LINE__, ret);
- nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = %lu\n",
- nesqp->hwqp.qp_id, jiffies );
+ printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
+ "ret=%d\n", __func__, __LINE__, ret);
+ attr.qp_state = IB_QPS_RTS;
+ nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
- nes_rem_ref(&nesqp->ibqp);
+ nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = "
+ "%lu\n", nesqp->hwqp.qp_id, jiffies);
return;
}
@@ -2928,17 +3158,19 @@ static void cm_event_connect_error(struct nes_cm_event *event)
cm_event.private_data = NULL;
cm_event.private_data_len = 0;
- nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, remove_addr=%08x\n",
- cm_event.local_addr.sin_addr.s_addr, cm_event.remote_addr.sin_addr.s_addr);
+ nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, "
+ "remove_addr=%08x\n", cm_event.local_addr.sin_addr.s_addr,
+ cm_event.remote_addr.sin_addr.s_addr);
ret = cm_id->event_handler(cm_id, &cm_event);
nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
if (ret)
- printk("%s[%u] OFA CM event_handler returned, ret=%d\n",
- __func__, __LINE__, ret);
+ printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
+ "ret=%d\n", __func__, __LINE__, ret);
nes_rem_ref(&nesqp->ibqp);
- cm_id->rem_ref(cm_id);
+ cm_id->rem_ref(cm_id);
+ rem_ref_cm_node(event->cm_node->cm_core, event->cm_node);
return;
}
@@ -3041,7 +3273,8 @@ static int nes_cm_post_event(struct nes_cm_event *event)
add_ref_cm_node(event->cm_node);
event->cm_info.cm_id->add_ref(event->cm_info.cm_id);
INIT_WORK(&event->event_work, nes_cm_event_handler);
- nes_debug(NES_DBG_CM, "queue_work, event=%p\n", event);
+ nes_debug(NES_DBG_CM, "cm_node=%p queue_work, event=%p\n",
+ event->cm_node, event);
queue_work(event->cm_node->cm_core->event_wq, &event->event_work);
@@ -3057,46 +3290,48 @@ static int nes_cm_post_event(struct nes_cm_event *event)
*/
static void nes_cm_event_handler(struct work_struct *work)
{
- struct nes_cm_event *event = container_of(work, struct nes_cm_event, event_work);
+ struct nes_cm_event *event = container_of(work, struct nes_cm_event,
+ event_work);
struct nes_cm_core *cm_core;
- if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core)) {
+ if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core))
return;
- }
+
cm_core = event->cm_node->cm_core;
nes_debug(NES_DBG_CM, "event=%p, event->type=%u, events posted=%u\n",
- event, event->type, atomic_read(&cm_core->events_posted));
+ event, event->type, atomic_read(&cm_core->events_posted));
switch (event->type) {
- case NES_CM_EVENT_MPA_REQ:
- cm_event_mpa_req(event);
- nes_debug(NES_DBG_CM, "CM Event: MPA REQUEST\n");
- break;
- case NES_CM_EVENT_RESET:
- nes_debug(NES_DBG_CM, "CM Event: RESET\n");
- cm_event_reset(event);
- break;
- case NES_CM_EVENT_CONNECTED:
- if ((!event->cm_node->cm_id) ||
- (event->cm_node->state != NES_CM_STATE_TSA)) {
- break;
- }
- cm_event_connected(event);
- nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n");
+ case NES_CM_EVENT_MPA_REQ:
+ cm_event_mpa_req(event);
+ nes_debug(NES_DBG_CM, "cm_node=%p CM Event: MPA REQUEST\n",
+ event->cm_node);
+ break;
+ case NES_CM_EVENT_RESET:
+ nes_debug(NES_DBG_CM, "cm_node = %p CM Event: RESET\n",
+ event->cm_node);
+ cm_event_reset(event);
+ break;
+ case NES_CM_EVENT_CONNECTED:
+ if ((!event->cm_node->cm_id) ||
+ (event->cm_node->state != NES_CM_STATE_TSA))
break;
- case NES_CM_EVENT_ABORTED:
- if ((!event->cm_node->cm_id) || (event->cm_node->state == NES_CM_STATE_TSA)) {
- break;
- }
- cm_event_connect_error(event);
- nes_debug(NES_DBG_CM, "CM Event: ABORTED\n");
- break;
- case NES_CM_EVENT_DROPPED_PKT:
- nes_debug(NES_DBG_CM, "CM Event: DROPPED PKT\n");
- break;
- default:
- nes_debug(NES_DBG_CM, "CM Event: UNKNOWN EVENT TYPE\n");
+ cm_event_connected(event);
+ nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n");
+ break;
+ case NES_CM_EVENT_ABORTED:
+ if ((!event->cm_node->cm_id) ||
+ (event->cm_node->state == NES_CM_STATE_TSA))
break;
+ cm_event_connect_error(event);
+ nes_debug(NES_DBG_CM, "CM Event: ABORTED\n");
+ break;
+ case NES_CM_EVENT_DROPPED_PKT:
+ nes_debug(NES_DBG_CM, "CM Event: DROPPED PKT\n");
+ break;
+ default:
+ nes_debug(NES_DBG_CM, "CM Event: UNKNOWN EVENT TYPE\n");
+ break;
}
atomic_dec(&cm_core->events_posted);
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index 7717cb2ab50..367b3d29014 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -83,6 +83,8 @@ enum nes_timer_type {
#define SET_FIN 4
#define SET_RST 8
+#define TCP_OPTIONS_PADDING 3
+
struct option_base {
u8 optionnum;
u8 length;
@@ -177,6 +179,7 @@ enum nes_cm_node_state {
NES_CM_STATE_ESTABLISHED,
NES_CM_STATE_ACCEPTING,
NES_CM_STATE_MPAREQ_SENT,
+ NES_CM_STATE_MPAREQ_RCVD,
NES_CM_STATE_TSA,
NES_CM_STATE_FIN_WAIT1,
NES_CM_STATE_FIN_WAIT2,
@@ -187,6 +190,16 @@ enum nes_cm_node_state {
NES_CM_STATE_CLOSED
};
+enum nes_tcpip_pkt_type {
+ NES_PKT_TYPE_UNKNOWN,
+ NES_PKT_TYPE_SYN,
+ NES_PKT_TYPE_SYNACK,
+ NES_PKT_TYPE_ACK,
+ NES_PKT_TYPE_FIN,
+ NES_PKT_TYPE_RST
+};
+
+
/* type of nes connection */
enum nes_cm_conn_type {
NES_CM_IWARP_CONN_TYPE,
@@ -257,7 +270,9 @@ struct nes_cm_node {
struct net_device *netdev;
struct nes_cm_node *loopbackpartner;
- struct list_head retrans_list;
+
+ struct nes_timer_entry *send_entry;
+
spinlock_t retrans_list_lock;
struct list_head recv_list;
spinlock_t recv_list_lock;
@@ -276,6 +291,8 @@ struct nes_cm_node {
struct nes_vnic *nesvnic;
int apbvt_set;
int accept_pend;
+ int freed;
+ struct nes_qp *nesqp;
};
/* structure for client or CM to fill when making CM api calls. */
@@ -366,14 +383,14 @@ struct nes_cm_ops {
struct nes_cm_info *);
int (*stop_listener)(struct nes_cm_core *, struct nes_cm_listener *);
struct nes_cm_node * (*connect)(struct nes_cm_core *,
- struct nes_vnic *, struct ietf_mpa_frame *,
+ struct nes_vnic *, u16, void *,
struct nes_cm_info *);
int (*close)(struct nes_cm_core *, struct nes_cm_node *);
int (*accept)(struct nes_cm_core *, struct ietf_mpa_frame *,
struct nes_cm_node *);
int (*reject)(struct nes_cm_core *, struct ietf_mpa_frame *,
struct nes_cm_node *);
- int (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *,
+ void (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *,
struct sk_buff *);
int (*destroy_cm_core)(struct nes_cm_core *);
int (*get)(struct nes_cm_core *);
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 8dc70f9bad2..1513d4066f1 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -42,6 +42,10 @@
#include "nes.h"
+static unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR;
+module_param(nes_lro_max_aggr, uint, 0444);
+MODULE_PARM_DESC(nes_lro_max_aggr, "NIC LRO max packet aggregation");
+
static u32 crit_err_count;
u32 int_mod_timer_init;
u32 int_mod_cq_depth_256;
@@ -394,7 +398,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
nesadapter->base_pd = 1;
nesadapter->device_cap_flags =
- IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW;
+ IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter)
[(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]);
@@ -1738,7 +1742,7 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
jumbomode = 1;
nes_nic_init_timer_defaults(nesdev, jumbomode);
}
- nesvnic->lro_mgr.max_aggr = NES_LRO_MAX_AGGR;
+ nesvnic->lro_mgr.max_aggr = nes_lro_max_aggr;
nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS;
nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc;
nesvnic->lro_mgr.get_skb_header = nes_lro_get_skb_hdr;
@@ -2706,39 +2710,11 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
barrier();
cqp_request->request_done = 1;
wake_up(&cqp_request->waitq);
- if (atomic_dec_and_test(&cqp_request->refcount)) {
- nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n",
- cqp_request,
- le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f);
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
- }
- } else if (cqp_request->callback) {
- /* Envoke the callback routine */
- cqp_request->cqp_callback(nesdev, cqp_request);
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
+ nes_put_cqp_request(nesdev, cqp_request);
} else {
- nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n",
- cqp_request,
- le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]) & 0x3f);
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
+ if (cqp_request->callback)
+ cqp_request->cqp_callback(nesdev, cqp_request);
+ nes_free_cqp_request(nesdev, cqp_request);
}
} else {
wake_up(&nesdev->cqp.waitq);
@@ -2838,7 +2814,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
nesqp = *((struct nes_qp **)&context);
if (atomic_inc_return(&nesqp->close_timer_started) == 1) {
nesqp->cm_id->add_ref(nesqp->cm_id);
- nes_add_ref(&nesqp->ibqp);
schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp,
NES_TIMER_TYPE_CLOSE, 1, 0);
nes_debug(NES_DBG_AEQ, "QP%u Not decrementing QP refcount (%d),"
@@ -2862,7 +2837,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
if (async_event_id == NES_AEQE_AEID_RESET_SENT) {
tcp_state = NES_AEQE_TCP_STATE_CLOSED;
}
- nes_add_ref(&nesqp->ibqp);
spin_lock_irqsave(&nesqp->lock, flags);
nesqp->hw_iwarp_state = iwarp_state;
nesqp->hw_tcp_state = tcp_state;
@@ -2900,7 +2874,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
}
spin_unlock_irqrestore(&nesqp->lock, flags);
if (next_iwarp_state) {
- nes_add_ref(&nesqp->ibqp);
nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X,"
" also added another reference\n",
nesqp->hwqp.qp_id, next_iwarp_state);
@@ -2912,7 +2885,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
/* FIN Received but ib state not RTS,
close complete will be on its way */
spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_rem_ref(&nesqp->ibqp);
return;
}
spin_unlock_irqrestore(&nesqp->lock, flags);
@@ -2946,7 +2918,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
((nesqp->ibqp_state == IB_QPS_RTS)&&
(async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
- nes_add_ref(&nesqp->ibqp);
nes_cm_disconn(nesqp);
} else {
nesqp->in_disconnect = 0;
@@ -2955,7 +2926,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
break;
case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES:
nesqp = *((struct nes_qp **)&context);
- nes_add_ref(&nesqp->ibqp);
spin_lock_irqsave(&nesqp->lock, flags);
nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_ERROR;
nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
@@ -3066,7 +3036,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
}
/* tell cm to disconnect, cm will queue work to thread */
- nes_add_ref(&nesqp->ibqp);
nes_cm_disconn(nesqp);
break;
case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
@@ -3086,7 +3055,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
}
/* tell cm to disconnect, cm will queue work to thread */
- nes_add_ref(&nesqp->ibqp);
nes_cm_disconn(nesqp);
break;
case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
@@ -3106,7 +3074,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
}
/* tell cm to disconnect, cm will queue work to thread */
- nes_add_ref(&nesqp->ibqp);
nes_cm_disconn(nesqp);
break;
/* TODO: additional AEs need to be here */
@@ -3145,7 +3112,6 @@ int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port,
{
struct nes_device *nesdev = nesvnic->nesdev;
struct nes_hw_cqp_wqe *cqp_wqe;
- unsigned long flags;
struct nes_cqp_request *cqp_request;
int ret = 0;
u16 major_code;
@@ -3172,7 +3138,7 @@ int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port,
nes_debug(NES_DBG_QP, "Waiting for CQP completion for APBVT.\n");
atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+ nes_post_cqp_request(nesdev, cqp_request);
if (add_port == NES_MANAGE_APBVT_ADD)
ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
@@ -3180,15 +3146,9 @@ int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port,
nes_debug(NES_DBG_QP, "Completed, ret=%u, CQP Major:Minor codes = 0x%04X:0x%04X\n",
ret, cqp_request->major_code, cqp_request->minor_code);
major_code = cqp_request->major_code;
- if (atomic_dec_and_test(&cqp_request->refcount)) {
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
- }
+
+ nes_put_cqp_request(nesdev, cqp_request);
+
if (!ret)
return -ETIME;
else if (major_code)
@@ -3248,7 +3208,7 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr,
nesdev->cqp.sq_head, nesdev->cqp.sq_tail);
atomic_set(&cqp_request->refcount, 1);
- nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+ nes_post_cqp_request(nesdev, cqp_request);
}
@@ -3258,7 +3218,6 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr,
void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp,
u32 which_wq, u32 wait_completion)
{
- unsigned long flags;
struct nes_cqp_request *cqp_request;
struct nes_hw_cqp_wqe *cqp_wqe;
int ret;
@@ -3281,7 +3240,7 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp,
cpu_to_le32(NES_CQP_FLUSH_WQES | which_wq);
cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id);
- nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+ nes_post_cqp_request(nesdev, cqp_request);
if (wait_completion) {
/* Wait for CQP */
@@ -3290,14 +3249,6 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp,
nes_debug(NES_DBG_QP, "Flush SQ QP WQEs completed, ret=%u,"
" CQP Major:Minor codes = 0x%04X:0x%04X\n",
ret, cqp_request->major_code, cqp_request->minor_code);
- if (atomic_dec_and_test(&cqp_request->refcount)) {
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
- }
+ nes_put_cqp_request(nesdev, cqp_request);
}
}
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
index 745bf94f3f0..7b81e0ae007 100644
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ b/drivers/infiniband/hw/nes/nes_hw.h
@@ -1172,7 +1172,7 @@ struct nes_vnic {
u32 mcrq_qp_id;
struct nes_ucontext *mcrq_ucontext;
struct nes_cqp_request* (*get_cqp_request)(struct nes_device *nesdev);
- void (*post_cqp_request)(struct nes_device*, struct nes_cqp_request *, int);
+ void (*post_cqp_request)(struct nes_device*, struct nes_cqp_request *);
int (*mcrq_mcast_filter)( struct nes_vnic* nesvnic, __u8* dmi_addr );
struct net_device_stats netstats;
/* used to put the netdev on the adapters logical port list */
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index fe83d1b2b17..fb8cbd71a2e 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -567,12 +567,36 @@ struct nes_cqp_request *nes_get_cqp_request(struct nes_device *nesdev)
return cqp_request;
}
+void nes_free_cqp_request(struct nes_device *nesdev,
+ struct nes_cqp_request *cqp_request)
+{
+ unsigned long flags;
+
+ nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n",
+ cqp_request,
+ le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]) & 0x3f);
+
+ if (cqp_request->dynamic) {
+ kfree(cqp_request);
+ } else {
+ spin_lock_irqsave(&nesdev->cqp.lock, flags);
+ list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+ spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+ }
+}
+
+void nes_put_cqp_request(struct nes_device *nesdev,
+ struct nes_cqp_request *cqp_request)
+{
+ if (atomic_dec_and_test(&cqp_request->refcount))
+ nes_free_cqp_request(nesdev, cqp_request);
+}
/**
* nes_post_cqp_request
*/
void nes_post_cqp_request(struct nes_device *nesdev,
- struct nes_cqp_request *cqp_request, int ring_doorbell)
+ struct nes_cqp_request *cqp_request)
{
struct nes_hw_cqp_wqe *cqp_wqe;
unsigned long flags;
@@ -600,10 +624,9 @@ void nes_post_cqp_request(struct nes_device *nesdev,
nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size,
cqp_request->waiting, atomic_read(&cqp_request->refcount));
barrier();
- if (ring_doorbell) {
- /* Ring doorbell (1 WQEs) */
- nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id);
- }
+
+ /* Ring doorbell (1 WQEs) */
+ nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id);
barrier();
} else {
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 99b3c4ae86e..d79942e8497 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -55,7 +55,6 @@ static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev);
* nes_alloc_mw
*/
static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
- unsigned long flags;
struct nes_pd *nespd = to_nespd(ibpd);
struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
struct nes_device *nesdev = nesvnic->nesdev;
@@ -119,7 +118,7 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+ nes_post_cqp_request(nesdev, cqp_request);
/* Wait for CQP */
ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
@@ -128,15 +127,7 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
" CQP Major:Minor codes = 0x%04X:0x%04X.\n",
stag, ret, cqp_request->major_code, cqp_request->minor_code);
if ((!ret) || (cqp_request->major_code)) {
- if (atomic_dec_and_test(&cqp_request->refcount)) {
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
- }
+ nes_put_cqp_request(nesdev, cqp_request);
kfree(nesmr);
nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
if (!ret) {
@@ -144,17 +135,8 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
} else {
return ERR_PTR(-ENOMEM);
}
- } else {
- if (atomic_dec_and_test(&cqp_request->refcount)) {
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
- }
}
+ nes_put_cqp_request(nesdev, cqp_request);
nesmr->ibmw.rkey = stag;
nesmr->mode = IWNES_MEMREG_TYPE_MW;
@@ -178,7 +160,6 @@ static int nes_dealloc_mw(struct ib_mw *ibmw)
struct nes_hw_cqp_wqe *cqp_wqe;
struct nes_cqp_request *cqp_request;
int err = 0;
- unsigned long flags;
int ret;
/* Deallocate the window with the adapter */
@@ -194,7 +175,7 @@ static int nes_dealloc_mw(struct ib_mw *ibmw)
set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ibmw->rkey);
atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+ nes_post_cqp_request(nesdev, cqp_request);
/* Wait for CQP */
nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X to complete.\n",
@@ -204,32 +185,12 @@ static int nes_dealloc_mw(struct ib_mw *ibmw)
nes_debug(NES_DBG_MR, "Deallocate STag completed, wait_event_timeout ret = %u,"
" CQP Major:Minor codes = 0x%04X:0x%04X.\n",
ret, cqp_request->major_code, cqp_request->minor_code);
- if ((!ret) || (cqp_request->major_code)) {
- if (atomic_dec_and_test(&cqp_request->refcount)) {
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
- }
- if (!ret) {
- err = -ETIME;
- } else {
- err = -EIO;
- }
- } else {
- if (atomic_dec_and_test(&cqp_request->refcount)) {
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
- }
- }
+ if (!ret)
+ err = -ETIME;
+ else if (cqp_request->major_code)
+ err = -EIO;
+
+ nes_put_cqp_request(nesdev, cqp_request);
nes_free_resource(nesadapter, nesadapter->allocated_mrs,
(ibmw->rkey & 0x0fffff00) >> 8);
@@ -516,7 +477,7 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd,
(nesfmr->nesmr.pbls_used-1) : nesfmr->nesmr.pbls_used);
atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+ nes_post_cqp_request(nesdev, cqp_request);
/* Wait for CQP */
ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
@@ -526,29 +487,11 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd,
stag, ret, cqp_request->major_code, cqp_request->minor_code);
if ((!ret) || (cqp_request->major_code)) {
- if (atomic_dec_and_test(&cqp_request->refcount)) {
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
- }
+ nes_put_cqp_request(nesdev, cqp_request);
ret = (!ret) ? -ETIME : -EIO;
goto failed_leaf_vpbl_pages_alloc;
- } else {
- if (atomic_dec_and_test(&cqp_request->refcount)) {
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
- }
}
-
+ nes_put_cqp_request(nesdev, cqp_request);
nesfmr->nesmr.ibfmr.lkey = stag;
nesfmr->nesmr.ibfmr.rkey = stag;
nesfmr->attr = *ibfmr_attr;
@@ -1474,7 +1417,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+ nes_post_cqp_request(nesdev, cqp_request);
/* Wait for CQP */
nes_debug(NES_DBG_QP, "Waiting for create iWARP QP%u to complete.\n",
@@ -1487,15 +1430,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
nesqp->hwqp.qp_id, ret, nesdev->cqp.sq_head, nesdev->cqp.sq_tail,
cqp_request->major_code, cqp_request->minor_code);
if ((!ret) || (cqp_request->major_code)) {
- if (atomic_dec_and_test(&cqp_request->refcount)) {
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
- }
+ nes_put_cqp_request(nesdev, cqp_request);
nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
nes_free_qp_mem(nesdev, nesqp,virt_wqs);
kfree(nesqp->allocated_buffer);
@@ -1504,18 +1439,10 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
} else {
return ERR_PTR(-EIO);
}
- } else {
- if (atomic_dec_and_test(&cqp_request->refcount)) {
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
- }
}
+ nes_put_cqp_request(nesdev, cqp_request);
+
if (ibpd->uobject) {
uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index;
uresp.actual_sq_size = sq_size;
@@ -1817,7 +1744,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF);
atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+ nes_post_cqp_request(nesdev, cqp_request);
/* Wait for CQP */
nes_debug(NES_DBG_CQ, "Waiting for create iWARP CQ%u to complete.\n",
@@ -1827,32 +1754,15 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
nes_debug(NES_DBG_CQ, "Create iWARP CQ%u completed, wait_event_timeout ret = %d.\n",
nescq->hw_cq.cq_number, ret);
if ((!ret) || (cqp_request->major_code)) {
- if (atomic_dec_and_test(&cqp_request->refcount)) {
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
- }
+ nes_put_cqp_request(nesdev, cqp_request);
if (!context)
pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
nescq->hw_cq.cq_pbase);
nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
kfree(nescq);
return ERR_PTR(-EIO);
- } else {
- if (atomic_dec_and_test(&cqp_request->refcount)) {
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
- }
}
+ nes_put_cqp_request(nesdev, cqp_request);
if (context) {
/* free the nespbl */
@@ -1931,7 +1841,7 @@ static int nes_destroy_cq(struct ib_cq *ib_cq)
(nescq->hw_cq.cq_number | ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 16)));
nes_free_resource(nesadapter, nesadapter->allocated_cqs, nescq->hw_cq.cq_number);
atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+ nes_post_cqp_request(nesdev, cqp_request);
/* Wait for CQP */
nes_debug(NES_DBG_CQ, "Waiting for destroy iWARP CQ%u to complete.\n",
@@ -1942,37 +1852,18 @@ static int nes_destroy_cq(struct ib_cq *ib_cq)
" CQP Major:Minor codes = 0x%04X:0x%04X.\n",
nescq->hw_cq.cq_number, ret, cqp_request->major_code,
cqp_request->minor_code);
- if ((!ret) || (cqp_request->major_code)) {
- if (atomic_dec_and_test(&cqp_request->refcount)) {
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
- }
- if (!ret) {
- nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy timeout expired\n",
+ if (!ret) {
+ nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy timeout expired\n",
nescq->hw_cq.cq_number);
- ret = -ETIME;
- } else {
- nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy failed\n",
+ ret = -ETIME;
+ } else if (cqp_request->major_code) {
+ nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy failed\n",
nescq->hw_cq.cq_number);
- ret = -EIO;
- }
+ ret = -EIO;
} else {
ret = 0;
- if (atomic_dec_and_test(&cqp_request->refcount)) {
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
- }
}
+ nes_put_cqp_request(nesdev, cqp_request);
if (nescq->cq_mem_size)
pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size,
@@ -2096,7 +1987,7 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
barrier();
atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+ nes_post_cqp_request(nesdev, cqp_request);
/* Wait for CQP */
ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done),
@@ -2105,15 +1996,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
" CQP Major:Minor codes = 0x%04X:0x%04X.\n",
stag, ret, cqp_request->major_code, cqp_request->minor_code);
major_code = cqp_request->major_code;
- if (atomic_dec_and_test(&cqp_request->refcount)) {
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
- }
+ nes_put_cqp_request(nesdev, cqp_request);
+
if (!ret)
return -ETIME;
else if (major_code)
@@ -2456,10 +2340,8 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if ((page_count!=0)&&(page_count<<12)-(region->offset&(4096-1))>=region->length)
goto enough_pages;
if ((page_count&0x01FF) == 0) {
- if (page_count>(1024*512)) {
+ if (page_count >= 1024 * 512) {
ib_umem_release(region);
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
- vpbl.pbl_pbase);
nes_free_resource(nesadapter,
nesadapter->allocated_mrs, stag_index);
kfree(nesmr);
@@ -2756,7 +2638,7 @@ static int nes_dereg_mr(struct ib_mr *ib_mr)
set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ib_mr->rkey);
atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+ nes_post_cqp_request(nesdev, cqp_request);
/* Wait for CQP */
nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X completed\n", ib_mr->rkey);
@@ -2773,15 +2655,9 @@ static int nes_dereg_mr(struct ib_mr *ib_mr)
major_code = cqp_request->major_code;
minor_code = cqp_request->minor_code;
- if (atomic_dec_and_test(&cqp_request->refcount)) {
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
- }
+
+ nes_put_cqp_request(nesdev, cqp_request);
+
if (!ret) {
nes_debug(NES_DBG_MR, "Timeout waiting to destroy STag,"
" ib_mr=%p, rkey = 0x%08X\n",
@@ -2906,7 +2782,6 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp,
/* struct iw_cm_id *cm_id = nesqp->cm_id; */
/* struct iw_cm_event cm_event; */
struct nes_cqp_request *cqp_request;
- unsigned long flags;
int ret;
u16 major_code;
@@ -2934,7 +2809,7 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp,
set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, (u64)nesqp->nesqp_context_pbase);
atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL);
+ nes_post_cqp_request(nesdev, cqp_request);
/* Wait for CQP */
if (wait_completion) {
@@ -2952,15 +2827,9 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp,
nesqp->hwqp.qp_id, cqp_request->major_code,
cqp_request->minor_code, next_iwarp_state);
}
- if (atomic_dec_and_test(&cqp_request->refcount)) {
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
- }
+
+ nes_put_cqp_request(nesdev, cqp_request);
+
if (!ret)
return -ETIME;
else if (major_code)
@@ -2998,7 +2867,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
nesqp->hwqp.qp_id, attr->qp_state, nesqp->ibqp_state,
nesqp->iwarp_state, atomic_read(&nesqp->refcount));
- nes_add_ref(&nesqp->ibqp);
spin_lock_irqsave(&nesqp->lock, qplockflags);
nes_debug(NES_DBG_MOD_QP, "QP%u: hw_iwarp_state=0x%X, hw_tcp_state=0x%X,"
@@ -3013,7 +2881,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
nesqp->hwqp.qp_id);
if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_IDLE) {
spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_rem_ref(&nesqp->ibqp);
return -EINVAL;
}
next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE;
@@ -3024,7 +2891,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
nesqp->hwqp.qp_id);
if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_IDLE) {
spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_rem_ref(&nesqp->ibqp);
return -EINVAL;
}
next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE;
@@ -3035,14 +2901,12 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
nesqp->hwqp.qp_id);
if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_RTS) {
spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_rem_ref(&nesqp->ibqp);
return -EINVAL;
}
if (nesqp->cm_id == NULL) {
nes_debug(NES_DBG_MOD_QP, "QP%u: Failing attempt to move QP to RTS without a CM_ID. \n",
nesqp->hwqp.qp_id );
spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_rem_ref(&nesqp->ibqp);
return -EINVAL;
}
next_iwarp_state = NES_CQP_QP_IWARP_STATE_RTS;
@@ -3060,7 +2924,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail);
if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_CLOSING) {
spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_rem_ref(&nesqp->ibqp);
return 0;
} else {
if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_CLOSING) {
@@ -3068,7 +2931,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
" ignored due to current iWARP state\n",
nesqp->hwqp.qp_id);
spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_rem_ref(&nesqp->ibqp);
return -EINVAL;
}
if (nesqp->hw_iwarp_state != NES_AEQE_IWARP_STATE_RTS) {
@@ -3100,7 +2962,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
nesqp->hwqp.qp_id);
if (nesqp->iwarp_state>=(u32)NES_CQP_QP_IWARP_STATE_TERMINATE) {
spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_rem_ref(&nesqp->ibqp);
return -EINVAL;
}
/* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */
@@ -3113,7 +2974,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
case IB_QPS_RESET:
if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_ERROR) {
spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_rem_ref(&nesqp->ibqp);
return -EINVAL;
}
nes_debug(NES_DBG_MOD_QP, "QP%u: new state = error\n",
@@ -3139,7 +2999,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
break;
default:
spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_rem_ref(&nesqp->ibqp);
return -EINVAL;
break;
}
@@ -3219,7 +3078,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
original_last_aeq, nesqp->last_aeq);
/* this one is for the cm_disconnect thread */
- nes_add_ref(&nesqp->ibqp);
spin_lock_irqsave(&nesqp->lock, qplockflags);
nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
@@ -3228,14 +3086,12 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
} else {
nes_debug(NES_DBG_MOD_QP, "QP%u No fake disconnect, QP refcount=%d\n",
nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount));
- nes_rem_ref(&nesqp->ibqp);
}
} else {
spin_lock_irqsave(&nesqp->lock, qplockflags);
if (nesqp->cm_id) {
/* These two are for the timer thread */
if (atomic_inc_return(&nesqp->close_timer_started) == 1) {
- nes_add_ref(&nesqp->ibqp);
nesqp->cm_id->add_ref(nesqp->cm_id);
nes_debug(NES_DBG_MOD_QP, "QP%u Not decrementing QP refcount (%d),"
" need ae to finish up, original_last_aeq = 0x%04X."
@@ -3259,14 +3115,12 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
" original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
original_last_aeq, nesqp->last_aeq);
- nes_rem_ref(&nesqp->ibqp);
}
} else {
nes_debug(NES_DBG_MOD_QP, "QP%u Decrementing QP refcount (%d), No ae to finish up,"
" original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
original_last_aeq, nesqp->last_aeq);
- nes_rem_ref(&nesqp->ibqp);
}
err = 0;