diff options
Diffstat (limited to 'drivers/infiniband/hw')
84 files changed, 3344 insertions, 2399 deletions
diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c index 9a054c6941a..dd05c483564 100644 --- a/drivers/infiniband/hw/amso1100/c2_rnic.c +++ b/drivers/infiniband/hw/amso1100/c2_rnic.c @@ -454,9 +454,8 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev) (IB_DEVICE_RESIZE_MAX_WR | IB_DEVICE_CURR_QP_STATE_MOD | IB_DEVICE_SYS_IMAGE_GUID | - IB_DEVICE_ZERO_STAG | - IB_DEVICE_MEM_WINDOW | - IB_DEVICE_SEND_W_INV); + IB_DEVICE_LOCAL_DMA_LKEY | + IB_DEVICE_MEM_WINDOW); /* Allocate the qptr_array */ c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *)); diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index ed2ee4ba4b7..f6d5747153a 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -145,7 +145,9 @@ static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid) } wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe)); memset(wqe, 0, sizeof(*wqe)); - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0, qpid, 7); + build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, + T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 0, qpid, 7, + T3_SOPEOP); wqe->flags = cpu_to_be32(MODQP_WRITE_EC); sge_cmd = qpid << 8 | 3; wqe->sge_cmd = cpu_to_be64(sge_cmd); @@ -276,7 +278,7 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, if (!wq->qpid) return -ENOMEM; - wq->rq = kzalloc(depth * sizeof(u64), GFP_KERNEL); + wq->rq = kzalloc(depth * sizeof(struct t3_swrq), GFP_KERNEL); if (!wq->rq) goto err1; @@ -300,6 +302,7 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, if (!kernel_domain) wq->udb = (u64)rdev_p->rnic_info.udbell_physbase + (wq->qpid << rdev_p->qpshift); + wq->rdev = rdev_p; PDBG("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", __func__, wq->qpid, wq->doorbell, (unsigned long long) wq->udb); return 0; @@ -359,9 +362,10 @@ static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq) cq->sw_wptr++; } -void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count) +int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count) { u32 ptr; + int flushed = 0; PDBG("%s wq %p cq %p\n", __func__, wq, cq); @@ -369,8 +373,11 @@ void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count) PDBG("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__, wq->rq_rptr, wq->rq_wptr, count); ptr = wq->rq_rptr + count; - while (ptr++ != wq->rq_wptr) + while (ptr++ != wq->rq_wptr) { insert_recv_cqe(wq, cq); + flushed++; + } + return flushed; } static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq, @@ -394,18 +401,21 @@ static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq, cq->sw_wptr++; } -void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) +int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) { __u32 ptr; + int flushed = 0; struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2); ptr = wq->sq_rptr + count; - sqp += count; + sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); while (ptr != wq->sq_wptr) { insert_sq_cqe(wq, cq, sqp); - sqp++; ptr++; + sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); + flushed++; } + return flushed; } /* @@ -551,7 +561,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe)); memset(wqe, 0, sizeof(*wqe)); build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0, - T3_CTL_QP_TID, 7); + T3_CTL_QP_TID, 7, T3_SOPEOP); wqe->flags = cpu_to_be32(MODQP_WRITE_EC); sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3; wqe->sge_cmd = cpu_to_be64(sge_cmd); @@ -581,7 +591,7 @@ static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p) * caller aquires the ctrl_qp lock before the call */ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, - u32 len, void *data, int completion) + u32 len, void *data) { u32 i, nr_wqe, copy_len; u8 *copy_data; @@ -617,7 +627,7 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, flag = 0; if (i == (nr_wqe - 1)) { /* last WQE */ - flag = completion ? T3_COMPLETION_FLAG : 0; + flag = T3_COMPLETION_FLAG; if (len % 32) utx_len = len / 32 + 1; else @@ -667,7 +677,7 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag, Q_GENBIT(rdev_p->ctrl_qp.wptr, T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID, - wr_len); + wr_len, T3_SOPEOP); if (flag == T3_COMPLETION_FLAG) ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID); len -= 96; @@ -676,21 +686,20 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, return 0; } -/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl, and pbl_size - * OUT: stag index, actual pbl_size, pbl_addr allocated. +/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl_size and pbl_addr + * OUT: stag index * TBD: shared memory region support */ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, u32 *stag, u8 stag_state, u32 pdid, enum tpt_mem_type type, enum tpt_mem_perm perm, - u32 zbva, u64 to, u32 len, u8 page_size, __be64 *pbl, - u32 *pbl_size, u32 *pbl_addr) + u32 zbva, u64 to, u32 len, u8 page_size, + u32 pbl_size, u32 pbl_addr) { int err; struct tpt_entry tpt; u32 stag_idx; u32 wptr; - int rereg = (*stag != T3_STAG_UNSET); stag_state = stag_state > 0; stag_idx = (*stag) >> 8; @@ -704,30 +713,8 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n", __func__, stag_state, type, pdid, stag_idx); - if (reset_tpt_entry) - cxio_hal_pblpool_free(rdev_p, *pbl_addr, *pbl_size << 3); - else if (!rereg) { - *pbl_addr = cxio_hal_pblpool_alloc(rdev_p, *pbl_size << 3); - if (!*pbl_addr) { - return -ENOMEM; - } - } - mutex_lock(&rdev_p->ctrl_qp.lock); - /* write PBL first if any - update pbl only if pbl list exist */ - if (pbl) { - - PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n", - __func__, *pbl_addr, rdev_p->rnic_info.pbl_base, - *pbl_size); - err = cxio_hal_ctrl_qp_write_mem(rdev_p, - (*pbl_addr >> 5), - (*pbl_size << 3), pbl, 0); - if (err) - goto ret; - } - /* write TPT entry */ if (reset_tpt_entry) memset(&tpt, 0, sizeof(tpt)); @@ -742,23 +729,23 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) | V_TPT_PAGE_SIZE(page_size)); tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 : - cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, *pbl_addr)>>3)); + cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3)); tpt.len = cpu_to_be32(len); tpt.va_hi = cpu_to_be32((u32) (to >> 32)); tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL)); tpt.rsvd_bind_cnt_or_pstag = 0; tpt.rsvd_pbl_size = reset_tpt_entry ? 0 : - cpu_to_be32(V_TPT_PBL_SIZE((*pbl_size) >> 2)); + cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2)); } err = cxio_hal_ctrl_qp_write_mem(rdev_p, stag_idx + (rdev_p->rnic_info.tpt_base >> 5), - sizeof(tpt), &tpt, 1); + sizeof(tpt), &tpt); /* release the stag index to free pool */ if (reset_tpt_entry) cxio_hal_put_stag(rdev_p->rscp, stag_idx); -ret: + wptr = rdev_p->ctrl_qp.wptr; mutex_unlock(&rdev_p->ctrl_qp.lock); if (!err) @@ -769,44 +756,74 @@ ret: return err; } +int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl, + u32 pbl_addr, u32 pbl_size) +{ + u32 wptr; + int err; + + PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n", + __func__, pbl_addr, rdev_p->rnic_info.pbl_base, + pbl_size); + + mutex_lock(&rdev_p->ctrl_qp.lock); + err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3, + pbl); + wptr = rdev_p->ctrl_qp.wptr; + mutex_unlock(&rdev_p->ctrl_qp.lock); + if (err) + return err; + + if (wait_event_interruptible(rdev_p->ctrl_qp.waitq, + SEQ32_GE(rdev_p->ctrl_qp.rptr, + wptr))) + return -ERESTARTSYS; + + return 0; +} + int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, __be64 *pbl, u32 *pbl_size, - u32 *pbl_addr) + u8 page_size, u32 pbl_size, u32 pbl_addr) { *stag = T3_STAG_UNSET; return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, - zbva, to, len, page_size, pbl, pbl_size, pbl_addr); + zbva, to, len, page_size, pbl_size, pbl_addr); } int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, __be64 *pbl, u32 *pbl_size, - u32 *pbl_addr) + u8 page_size, u32 pbl_size, u32 pbl_addr) { return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, - zbva, to, len, page_size, pbl, pbl_size, pbl_addr); + zbva, to, len, page_size, pbl_size, pbl_addr); } int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size, u32 pbl_addr) { - return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL, - &pbl_size, &pbl_addr); + return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, + pbl_size, pbl_addr); } int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid) { - u32 pbl_size = 0; *stag = T3_STAG_UNSET; return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0, - NULL, &pbl_size, NULL); + 0, 0); } int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag) { - return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL, - NULL, NULL); + return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, + 0, 0); +} + +int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr) +{ + *stag = T3_STAG_UNSET; + return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR, + 0, 0, 0ULL, 0, 0, pbl_size, pbl_addr); } int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) @@ -1250,13 +1267,16 @@ proc_cqe: wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe); PDBG("%s completing sq idx %ld\n", __func__, Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)); - *cookie = (wq->sq + - Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2))->wr_id; + *cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id; wq->sq_rptr++; } else { PDBG("%s completing rq idx %ld\n", __func__, Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)); - *cookie = *(wq->rq + Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)); + *cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id; + if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr) + cxio_hal_pblpool_free(wq->rdev, + wq->rq[Q_PTR2IDX(wq->rq_rptr, + wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE); wq->rq_rptr++; } diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h index 2bcff7f5046..656fe47bc84 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h @@ -45,15 +45,17 @@ #define T3_CTRL_QP_SIZE_LOG2 8 #define T3_CTRL_CQ_ID 0 -/* TBD */ #define T3_MAX_NUM_RI (1<<15) #define T3_MAX_NUM_QP (1<<15) #define T3_MAX_NUM_CQ (1<<15) #define T3_MAX_NUM_PD (1<<15) #define T3_MAX_PBL_SIZE 256 #define T3_MAX_RQ_SIZE 1024 +#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1) +#define T3_MAX_CQ_DEPTH 8192 #define T3_MAX_NUM_STAG (1<<15) #define T3_MAX_MR_SIZE 0x100000000ULL +#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ #define T3_STAG_UNSET 0xffffffff @@ -154,17 +156,18 @@ int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq, int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq, struct cxio_ucontext *uctx); int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode); +int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl, + u32 pbl_addr, u32 pbl_size); int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, __be64 *pbl, u32 *pbl_size, - u32 *pbl_addr); + u8 page_size, u32 pbl_size, u32 pbl_addr); int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, __be64 *pbl, u32 *pbl_size, - u32 *pbl_addr); + u8 page_size, u32 pbl_size, u32 pbl_addr); int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size, u32 pbl_addr); int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid); +int cxio_allocate_stag(struct cxio_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr); int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag); int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr); void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb); @@ -173,8 +176,8 @@ u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp); void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid); int __init cxio_hal_init(void); void __exit cxio_hal_exit(void); -void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count); -void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count); +int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count); +int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count); void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count); void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count); void cxio_flush_hw_cq(struct t3_cq *cq); diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c index 45ed4f25ef7..bd233c08765 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_resource.c +++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c @@ -250,7 +250,6 @@ void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp) */ #define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */ -#define PBL_CHUNK 2*1024*1024 u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size) { @@ -267,14 +266,35 @@ void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p) { - unsigned long i; + unsigned pbl_start, pbl_chunk; + rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1); - if (rdev_p->pbl_pool) - for (i = rdev_p->rnic_info.pbl_base; - i <= rdev_p->rnic_info.pbl_top - PBL_CHUNK + 1; - i += PBL_CHUNK) - gen_pool_add(rdev_p->pbl_pool, i, PBL_CHUNK, -1); - return rdev_p->pbl_pool ? 0 : -ENOMEM; + if (!rdev_p->pbl_pool) + return -ENOMEM; + + pbl_start = rdev_p->rnic_info.pbl_base; + pbl_chunk = rdev_p->rnic_info.pbl_top - pbl_start + 1; + + while (pbl_start < rdev_p->rnic_info.pbl_top) { + pbl_chunk = min(rdev_p->rnic_info.pbl_top - pbl_start + 1, + pbl_chunk); + if (gen_pool_add(rdev_p->pbl_pool, pbl_start, pbl_chunk, -1)) { + PDBG("%s failed to add PBL chunk (%x/%x)\n", + __func__, pbl_start, pbl_chunk); + if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) { + printk(KERN_WARNING MOD "%s: Failed to add all PBL chunks (%x/%x)\n", + __func__, pbl_start, rdev_p->rnic_info.pbl_top - pbl_start); + return 0; + } + pbl_chunk >>= 1; + } else { + PDBG("%s added PBL chunk (%x/%x)\n", + __func__, pbl_start, pbl_chunk); + pbl_start += pbl_chunk; + } + } + + return 0; } void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p) diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index f1a25a821a4..04618f7bfbb 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -39,6 +39,9 @@ #define T3_MAX_SGE 4 #define T3_MAX_INLINE 64 +#define T3_STAG0_PBL_SIZE (2 * T3_MAX_SGE << 3) +#define T3_STAG0_MAX_PBE_LEN (128 * 1024 * 1024) +#define T3_STAG0_PAGE_SHIFT 15 #define Q_EMPTY(rptr,wptr) ((rptr)==(wptr)) #define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \ @@ -72,7 +75,8 @@ enum t3_wr_opcode { T3_WR_BIND = FW_WROPCODE_RI_BIND_MW, T3_WR_RCV = FW_WROPCODE_RI_RECEIVE, T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT, - T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP + T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP, + T3_WR_FASTREG = FW_WROPCODE_RI_FASTREGISTER_MR } __attribute__ ((packed)); enum t3_rdma_opcode { @@ -89,7 +93,8 @@ enum t3_rdma_opcode { T3_FAST_REGISTER, T3_LOCAL_INV, T3_QP_MOD, - T3_BYPASS + T3_BYPASS, + T3_RDMA_READ_REQ_WITH_INV, } __attribute__ ((packed)); static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop) @@ -103,6 +108,7 @@ static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop) case T3_WR_BIND: return T3_BIND_MW; case T3_WR_INIT: return T3_RDMA_INIT; case T3_WR_QP_MOD: return T3_QP_MOD; + case T3_WR_FASTREG: return T3_FAST_REGISTER; default: break; } return -1; @@ -170,11 +176,54 @@ struct t3_send_wr { struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */ }; +#define T3_MAX_FASTREG_DEPTH 24 +#define T3_MAX_FASTREG_FRAG 10 + +struct t3_fastreg_wr { + struct fw_riwrh wrh; /* 0 */ + union t3_wrid wrid; /* 1 */ + __be32 stag; /* 2 */ + __be32 len; + __be32 va_base_hi; /* 3 */ + __be32 va_base_lo_fbo; + __be32 page_type_perms; /* 4 */ + __be32 reserved1; + __be64 pbl_addrs[0]; /* 5+ */ +}; + +/* + * If a fastreg wr spans multiple wqes, then the 2nd fragment look like this. + */ +struct t3_pbl_frag { + struct fw_riwrh wrh; /* 0 */ + __be64 pbl_addrs[14]; /* 1..14 */ +}; + +#define S_FR_PAGE_COUNT 24 +#define M_FR_PAGE_COUNT 0xff +#define V_FR_PAGE_COUNT(x) ((x) << S_FR_PAGE_COUNT) +#define G_FR_PAGE_COUNT(x) ((((x) >> S_FR_PAGE_COUNT)) & M_FR_PAGE_COUNT) + +#define S_FR_PAGE_SIZE 16 +#define M_FR_PAGE_SIZE 0x1f +#define V_FR_PAGE_SIZE(x) ((x) << S_FR_PAGE_SIZE) +#define G_FR_PAGE_SIZE(x) ((((x) >> S_FR_PAGE_SIZE)) & M_FR_PAGE_SIZE) + +#define S_FR_TYPE 8 +#define M_FR_TYPE 0x1 +#define V_FR_TYPE(x) ((x) << S_FR_TYPE) +#define G_FR_TYPE(x) ((((x) >> S_FR_TYPE)) & M_FR_TYPE) + +#define S_FR_PERMS 0 +#define M_FR_PERMS 0xff +#define V_FR_PERMS(x) ((x) << S_FR_PERMS) +#define G_FR_PERMS(x) ((((x) >> S_FR_PERMS)) & M_FR_PERMS) + struct t3_local_inv_wr { struct fw_riwrh wrh; /* 0 */ union t3_wrid wrid; /* 1 */ __be32 stag; /* 2 */ - __be32 reserved3; + __be32 reserved; }; struct t3_rdma_write_wr { @@ -193,7 +242,8 @@ struct t3_rdma_read_wr { struct fw_riwrh wrh; /* 0 */ union t3_wrid wrid; /* 1 */ u8 rdmaop; /* 2 */ - u8 reserved[3]; + u8 local_inv; + u8 reserved[2]; __be32 rem_stag; __be64 rem_to; /* 3 */ __be32 local_stag; /* 4 */ @@ -201,18 +251,6 @@ struct t3_rdma_read_wr { __be64 local_to; /* 5 */ }; -enum t3_addr_type { - T3_VA_BASED_TO = 0x0, - T3_ZERO_BASED_TO = 0x1 -} __attribute__ ((packed)); - -enum t3_mem_perms { - T3_MEM_ACCESS_LOCAL_READ = 0x1, - T3_MEM_ACCESS_LOCAL_WRITE = 0x2, - T3_MEM_ACCESS_REM_READ = 0x4, - T3_MEM_ACCESS_REM_WRITE = 0x8 -} __attribute__ ((packed)); - struct t3_bind_mw_wr { struct fw_riwrh wrh; /* 0 */ union t3_wrid wrid; /* 1 */ @@ -336,6 +374,11 @@ struct t3_genbit { __be64 genbit; }; +struct t3_wq_in_err { + u64 flit[13]; + u64 err; +}; + enum rdma_init_wr_flags { MPA_INITIATOR = (1<<0), PRIV_QP = (1<<1), @@ -346,13 +389,16 @@ union t3_wr { struct t3_rdma_write_wr write; struct t3_rdma_read_wr read; struct t3_receive_wr recv; + struct t3_fastreg_wr fastreg; + struct t3_pbl_frag pbl_frag; struct t3_local_inv_wr local_inv; struct t3_bind_mw_wr bind; struct t3_bypass_wr bypass; struct t3_rdma_init_wr init; struct t3_modify_qp_wr qp_mod; struct t3_genbit genbit; - u64 flit[16]; + struct t3_wq_in_err wq_in_err; + __be64 flit[16]; }; #define T3_SQ_CQE_FLIT 13 @@ -366,12 +412,18 @@ static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe) return G_FW_RIWR_OP(be32_to_cpu(wqe->op_seop_flags)); } +enum t3_wr_hdr_bits { + T3_EOP = 1, + T3_SOP = 2, + T3_SOPEOP = T3_EOP|T3_SOP, +}; + static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op, enum t3_wr_flags flags, u8 genbit, u32 tid, - u8 len) + u8 len, u8 sopeop) { wqe->op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(op) | - V_FW_RIWR_SOPEOP(M_FW_RIWR_SOPEOP) | + V_FW_RIWR_SOPEOP(sopeop) | V_FW_RIWR_FLAGS(flags)); wmb(); wqe->gen_tid_len = cpu_to_be32(V_FW_RIWR_GEN(genbit) | @@ -404,6 +456,7 @@ enum tpt_addr_type { }; enum tpt_mem_perm { + TPT_MW_BIND = 0x10, TPT_LOCAL_READ = 0x8, TPT_LOCAL_WRITE = 0x4, TPT_REMOTE_READ = 0x2, @@ -615,6 +668,11 @@ struct t3_swsq { int signaled; }; +struct t3_swrq { + __u64 wr_id; + __u32 pbl_addr; +}; + /* * A T3 WQ implements both the SQ and RQ. */ @@ -631,14 +689,15 @@ struct t3_wq { u32 sq_wptr; /* sq_wptr - sq_rptr == count of */ u32 sq_rptr; /* pending wrs */ u32 sq_size_log2; /* sq size */ - u64 *rq; /* SW RQ (holds consumer wr_ids */ + struct t3_swrq *rq; /* SW RQ (holds consumer wr_ids */ u32 rq_wptr; /* rq_wptr - rq_rptr == count of */ u32 rq_rptr; /* pending wrs */ - u64 *rq_oldest_wr; /* oldest wr on the SW RQ */ + struct t3_swrq *rq_oldest_wr; /* oldest wr on the SW RQ */ u32 rq_size_log2; /* rq size */ u32 rq_addr; /* rq adapter address */ void __iomem *doorbell; /* kernel db */ u64 udb; /* user db if any */ + struct cxio_rdev *rdev; }; struct t3_cq { @@ -659,7 +718,7 @@ struct t3_cq { static inline void cxio_set_wq_in_error(struct t3_wq *wq) { - wq->queue->flit[13] = 1; + wq->queue->wq_in_err.err = 1; } static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq) diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index 71554eacb13..4489c89d671 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -71,18 +71,16 @@ static void rnic_init(struct iwch_dev *rnicp) idr_init(&rnicp->mmidr); spin_lock_init(&rnicp->lock); - rnicp->attr.vendor_id = 0x168; - rnicp->attr.vendor_part_id = 7; rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; - rnicp->attr.max_wrs = (1UL << 24) - 1; + rnicp->attr.max_wrs = T3_MAX_QP_DEPTH; rnicp->attr.max_sge_per_wr = T3_MAX_SGE; rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE; rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1; - rnicp->attr.max_cqes_per_cq = (1UL << 24) - 1; + rnicp->attr.max_cqes_per_cq = T3_MAX_CQ_DEPTH; rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev); rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE; rnicp->attr.max_pds = T3_MAX_NUM_PD - 1; - rnicp->attr.mem_pgsizes_bitmask = 0x7FFF; /* 4KB-128MB */ + rnicp->attr.mem_pgsizes_bitmask = T3_PAGESIZE_MASK; rnicp->attr.max_mr_size = T3_MAX_MR_SIZE; rnicp->attr.can_resize_wq = 0; rnicp->attr.max_rdma_reads_per_qp = 8; diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h index d2409a505e8..3773453b2cf 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -48,8 +48,6 @@ struct iwch_qp; struct iwch_mr; struct iwch_rnic_attributes { - u32 vendor_id; - u32 vendor_part_id; u32 max_qps; u32 max_wrs; /* Max for any SQ/RQ */ u32 max_sge_per_wr; diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index d44a6df9ad8..c325c44807e 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -67,10 +67,10 @@ int peer2peer = 0; module_param(peer2peer, int, 0644); MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)"); -static int ep_timeout_secs = 10; +static int ep_timeout_secs = 60; module_param(ep_timeout_secs, int, 0644); MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " - "in seconds (default=10)"); + "in seconds (default=60)"); static int mpa_rev = 1; module_param(mpa_rev, int, 0644); @@ -1650,8 +1650,8 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) release = 1; break; case ABORTING: - break; case DEAD: + break; default: BUG_ON(1); break; diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c index 4ee8ccd0a9e..cf5474ae68f 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cq.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c @@ -81,6 +81,7 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, wc->wr_id = cookie; wc->qp = &qhp->ibqp; wc->vendor_err = CQE_STATUS(cqe); + wc->wc_flags = 0; PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x " "lo 0x%x cookie 0x%llx\n", __func__, @@ -94,6 +95,11 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, else wc->byte_len = 0; wc->opcode = IB_WC_RECV; + if (CQE_OPCODE(cqe) == T3_SEND_WITH_INV || + CQE_OPCODE(cqe) == T3_SEND_WITH_SE_INV) { + wc->ex.invalidate_rkey = CQE_WRID_STAG(cqe); + wc->wc_flags |= IB_WC_WITH_INVALIDATE; + } } else { switch (CQE_OPCODE(cqe)) { case T3_RDMA_WRITE: @@ -105,17 +111,20 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, break; case T3_SEND: case T3_SEND_WITH_SE: + case T3_SEND_WITH_INV: + case T3_SEND_WITH_SE_INV: wc->opcode = IB_WC_SEND; break; case T3_BIND_MW: wc->opcode = IB_WC_BIND_MW; break; - /* these aren't supported yet */ - case T3_SEND_WITH_INV: - case T3_SEND_WITH_SE_INV: case T3_LOCAL_INV: + wc->opcode = IB_WC_LOCAL_INV; + break; case T3_FAST_REGISTER: + wc->opcode = IB_WC_FAST_REG_MR; + break; default: printk(KERN_ERR MOD "Unexpected opcode %d " "in the CQE received for QPID=0x%0x\n", diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c index 58c3d61bcd1..ec49a5cbdeb 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_mem.c +++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c @@ -35,17 +35,26 @@ #include <rdma/ib_verbs.h> #include "cxio_hal.h" +#include "cxio_resource.h" #include "iwch.h" #include "iwch_provider.h" -int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, - struct iwch_mr *mhp, - int shift, - __be64 *page_list) +static void iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag) { - u32 stag; u32 mmid; + mhp->attr.state = 1; + mhp->attr.stag = stag; + mmid = stag >> 8; + mhp->ibmr.rkey = mhp->ibmr.lkey = stag; + insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid); + PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); +} + +int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, + struct iwch_mr *mhp, int shift) +{ + u32 stag; if (cxio_register_phys_mem(&rhp->rdev, &stag, mhp->attr.pdid, @@ -53,28 +62,21 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, mhp->attr.zbva, mhp->attr.va_fbo, mhp->attr.len, - shift-12, - page_list, - &mhp->attr.pbl_size, &mhp->attr.pbl_addr)) + shift - 12, + mhp->attr.pbl_size, mhp->attr.pbl_addr)) return -ENOMEM; - mhp->attr.state = 1; - mhp->attr.stag = stag; - mmid = stag >> 8; - mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - insert_handle(rhp, &rhp->mmidr, mhp, mmid); - PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); + + iwch_finish_mem_reg(mhp, stag); + return 0; } int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, struct iwch_mr *mhp, int shift, - __be64 *page_list, int npages) { u32 stag; - u32 mmid; - /* We could support this... */ if (npages > mhp->attr.pbl_size) @@ -87,19 +89,40 @@ int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, mhp->attr.zbva, mhp->attr.va_fbo, mhp->attr.len, - shift-12, - page_list, - &mhp->attr.pbl_size, &mhp->attr.pbl_addr)) + shift - 12, + mhp->attr.pbl_size, mhp->attr.pbl_addr)) return -ENOMEM; - mhp->attr.state = 1; - mhp->attr.stag = stag; - mmid = stag >> 8; - mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - insert_handle(rhp, &rhp->mmidr, mhp, mmid); - PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); + + iwch_finish_mem_reg(mhp, stag); + + return 0; +} + +int iwch_alloc_pbl(struct iwch_mr *mhp, int npages) +{ + mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev, + npages << 3); + + if (!mhp->attr.pbl_addr) + return -ENOMEM; + + mhp->attr.pbl_size = npages; + return 0; } +void iwch_free_pbl(struct iwch_mr *mhp) +{ + cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, + mhp->attr.pbl_size << 3); +} + +int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset) +{ + return cxio_write_pbl(&mhp->rhp->rdev, pages, + mhp->attr.pbl_addr + (offset << 3), npages); +} + int build_phys_page_list(struct ib_phys_buf *buffer_list, int num_phys_buf, u64 *iova_start, diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index d07d3a377b5..b89640aa6e1 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -56,6 +56,7 @@ #include "iwch_provider.h" #include "iwch_cm.h" #include "iwch_user.h" +#include "common.h" static int iwch_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask, @@ -442,6 +443,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr) mmid = mhp->attr.stag >> 8; cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, mhp->attr.pbl_addr); + iwch_free_pbl(mhp); remove_handle(rhp, &rhp->mmidr, mmid); if (mhp->kva) kfree((void *) (unsigned long) mhp->kva); @@ -475,6 +477,8 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd, if (!mhp) return ERR_PTR(-ENOMEM); + mhp->rhp = rhp; + /* First check that we have enough alignment */ if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) { ret = -EINVAL; @@ -492,7 +496,17 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd, if (ret) goto err; - mhp->rhp = rhp; + ret = iwch_alloc_pbl(mhp, npages); + if (ret) { + kfree(page_list); + goto err_pbl; + } + + ret = iwch_write_pbl(mhp, page_list, npages, 0); + kfree(page_list); + if (ret) + goto err_pbl; + mhp->attr.pdid = php->pdid; mhp->attr.zbva = 0; @@ -502,12 +516,15 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd, mhp->attr.len = (u32) total_size; mhp->attr.pbl_size = npages; - ret = iwch_register_mem(rhp, php, mhp, shift, page_list); - kfree(page_list); - if (ret) { - goto err; - } + ret = iwch_register_mem(rhp, php, mhp, shift); + if (ret) + goto err_pbl; + return &mhp->ibmr; + +err_pbl: + iwch_free_pbl(mhp); + err: kfree(mhp); return ERR_PTR(ret); @@ -560,7 +577,7 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr, return ret; } - ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages); + ret = iwch_reregister_mem(rhp, php, &mh, shift, npages); kfree(page_list); if (ret) { return ret; @@ -602,6 +619,8 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mhp) return ERR_PTR(-ENOMEM); + mhp->rhp = rhp; + mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); if (IS_ERR(mhp->umem)) { err = PTR_ERR(mhp->umem); @@ -615,10 +634,14 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, list_for_each_entry(chunk, &mhp->umem->chunk_list, list) n += chunk->nents; - pages = kmalloc(n * sizeof(u64), GFP_KERNEL); + err = iwch_alloc_pbl(mhp, n); + if (err) + goto err; + + pages = (__be64 *) __get_free_page(GFP_KERNEL); if (!pages) { err = -ENOMEM; - goto err; + goto err_pbl; } i = n = 0; @@ -630,25 +653,38 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, pages[i++] = cpu_to_be64(sg_dma_address( &chunk->page_list[j]) + mhp->umem->page_size * k); + if (i == PAGE_SIZE / sizeof *pages) { + err = iwch_write_pbl(mhp, pages, i, n); + if (err) + goto pbl_done; + n += i; + i = 0; + } } } - mhp->rhp = rhp; + if (i) + err = iwch_write_pbl(mhp, pages, i, n); + +pbl_done: + free_page((unsigned long) pages); + if (err) + goto err_pbl; + mhp->attr.pdid = php->pdid; mhp->attr.zbva = 0; mhp->attr.perms = iwch_ib_to_tpt_access(acc); mhp->attr.va_fbo = virt; mhp->attr.page_size = shift - 12; mhp->attr.len = (u32) length; - mhp->attr.pbl_size = i; - err = iwch_register_mem(rhp, php, mhp, shift, pages); - kfree(pages); + + err = iwch_register_mem(rhp, php, mhp, shift); if (err) - goto err; + goto err_pbl; if (udata && !t3a_device(rhp)) { uresp.pbl_addr = (mhp->attr.pbl_addr - - rhp->rdev.rnic_info.pbl_base) >> 3; + rhp->rdev.rnic_info.pbl_base) >> 3; PDBG("%s user resp pbl_addr 0x%x\n", __func__, uresp.pbl_addr); @@ -661,6 +697,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return &mhp->ibmr; +err_pbl: + iwch_free_pbl(mhp); + err: ib_umem_release(mhp->umem); kfree(mhp); @@ -709,6 +748,7 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd) mhp->attr.type = TPT_MW; mhp->attr.stag = stag; mmid = (stag) >> 8; + mhp->ibmw.rkey = stag; insert_handle(rhp, &rhp->mmidr, mhp, mmid); PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); return &(mhp->ibmw); @@ -730,6 +770,68 @@ static int iwch_dealloc_mw(struct ib_mw *mw) return 0; } +static struct ib_mr *iwch_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth) +{ + struct iwch_dev *rhp; + struct iwch_pd *php; + struct iwch_mr *mhp; + u32 mmid; + u32 stag = 0; + int ret; + + php = to_iwch_pd(pd); + rhp = php->rhp; + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); + if (!mhp) + return ERR_PTR(-ENOMEM); + + mhp->rhp = rhp; + ret = iwch_alloc_pbl(mhp, pbl_depth); + if (ret) { + kfree(mhp); + return ERR_PTR(ret); + } + mhp->attr.pbl_size = pbl_depth; + ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid, + mhp->attr.pbl_size, mhp->attr.pbl_addr); + if (ret) { + iwch_free_pbl(mhp); + kfree(mhp); + return ERR_PTR(ret); + } + mhp->attr.pdid = php->pdid; + mhp->attr.type = TPT_NON_SHARED_MR; + mhp->attr.stag = stag; + mhp->attr.state = 1; + mmid = (stag) >> 8; + mhp->ibmr.rkey = mhp->ibmr.lkey = stag; + insert_handle(rhp, &rhp->mmidr, mhp, mmid); + PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); + return &(mhp->ibmr); +} + +static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl( + struct ib_device *device, + int page_list_len) +{ + struct ib_fast_reg_page_list *page_list; + + page_list = kmalloc(sizeof *page_list + page_list_len * sizeof(u64), + GFP_KERNEL); + if (!page_list) + return ERR_PTR(-ENOMEM); + + page_list->page_list = (u64 *)(page_list + 1); + page_list->max_page_list_len = page_list_len; + + return page_list; +} + +static void iwch_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list) +{ + kfree(page_list); +} + static int iwch_destroy_qp(struct ib_qp *ib_qp) { struct iwch_dev *rhp; @@ -805,6 +907,15 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, */ sqsize = roundup_pow_of_two(attrs->cap.max_send_wr); wqsize = roundup_pow_of_two(rqsize + sqsize); + + /* + * Kernel users need more wq space for fastreg WRs which can take + * 2 WR fragments. + */ + ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL; + if (!ucontext && wqsize < (rqsize + (2 * sqsize))) + wqsize = roundup_pow_of_two(rqsize + + roundup_pow_of_two(attrs->cap.max_send_wr * 2)); PDBG("%s wqsize %d sqsize %d rqsize %d\n", __func__, wqsize, sqsize, rqsize); qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); @@ -813,7 +924,6 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, qhp->wq.size_log2 = ilog2(wqsize); qhp->wq.rq_size_log2 = ilog2(rqsize); qhp->wq.sq_size_log2 = ilog2(sqsize); - ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL; if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) { kfree(qhp); @@ -897,10 +1007,10 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, qhp->ibqp.qp_num = qhp->wq.qpid; init_timer(&(qhp->timer)); PDBG("%s sq_num_entries %d, rq_num_entries %d " - "qpid 0x%0x qhp %p dma_addr 0x%llx size %d\n", + "qpid 0x%0x qhp %p dma_addr 0x%llx size %d rq_addr 0x%x\n", __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, qhp->wq.qpid, qhp, (unsigned long long) qhp->wq.dma_addr, - 1 << qhp->wq.size_log2); + 1 << qhp->wq.size_log2, qhp->wq.rq_addr); return &qhp->ibqp; } @@ -985,6 +1095,29 @@ static int iwch_query_gid(struct ib_device *ibdev, u8 port, return 0; } +static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev) +{ + struct ethtool_drvinfo info; + struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; + char *cp, *next; + unsigned fw_maj, fw_min, fw_mic; + + rtnl_lock(); + lldev->ethtool_ops->get_drvinfo(lldev, &info); + rtnl_unlock(); + + next = info.fw_version + 1; + cp = strsep(&next, "."); + sscanf(cp, "%i", &fw_maj); + cp = strsep(&next, "."); + sscanf(cp, "%i", &fw_min); + cp = strsep(&next, "."); + sscanf(cp, "%i", &fw_mic); + + return (((u64)fw_maj & 0xffff) << 32) | ((fw_min & 0xffff) << 16) | + (fw_mic & 0xffff); +} + static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { @@ -995,7 +1128,10 @@ static int iwch_query_device(struct ib_device *ibdev, dev = to_iwch_dev(ibdev); memset(props, 0, sizeof *props); memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); + props->hw_ver = dev->rdev.t3cdev_p->type; + props->fw_ver = fw_vers_string_to_u64(dev); props->device_cap_flags = dev->device_cap_flags; + props->page_size_cap = dev->attr.mem_pgsizes_bitmask; props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor; props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device; props->max_mr_size = dev->attr.max_mr_size; @@ -1010,6 +1146,7 @@ static int iwch_query_device(struct ib_device *ibdev, props->max_mr = dev->attr.max_mem_regs; props->max_pd = dev->attr.max_pds; props->local_ca_ack_delay = 0; + props->max_fast_reg_page_list_len = T3_MAX_FASTREG_DEPTH; return 0; } @@ -1050,6 +1187,28 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type); } +static int fw_supports_fastreg(struct iwch_dev *iwch_dev) +{ + struct ethtool_drvinfo info; + struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; + char *cp, *next; + unsigned fw_maj, fw_min; + + rtnl_lock(); + lldev->ethtool_ops->get_drvinfo(lldev, &info); + rtnl_unlock(); + + next = info.fw_version+1; + cp = strsep(&next, "."); + sscanf(cp, "%i", &fw_maj); + cp = strsep(&next, "."); + sscanf(cp, "%i", &fw_min); + + PDBG("%s maj %u min %u\n", __func__, fw_maj, fw_min); + + return fw_maj > 6 || (fw_maj == 6 && fw_min > 0); +} + static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, char *buf) { struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, @@ -1058,7 +1217,9 @@ static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, ch struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; PDBG("%s dev 0x%p\n", __func__, dev); + rtnl_lock(); lldev->ethtool_ops->get_drvinfo(lldev, &info); + rtnl_unlock(); return sprintf(buf, "%s\n", info.fw_version); } @@ -1071,7 +1232,9 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr, struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; PDBG("%s dev 0x%p\n", __func__, dev); + rtnl_lock(); lldev->ethtool_ops->get_drvinfo(lldev, &info); + rtnl_unlock(); return sprintf(buf, "%s\n", info.driver); } @@ -1085,6 +1248,61 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr, iwch_dev->rdev.rnic_info.pdev->device); } +static int iwch_get_mib(struct ib_device *ibdev, + union rdma_protocol_stats *stats) +{ + struct iwch_dev *dev; + struct tp_mib_stats m; + int ret; + + PDBG("%s ibdev %p\n", __func__, ibdev); + dev = to_iwch_dev(ibdev); + ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m); + if (ret) + return -ENOSYS; + + memset(stats, 0, sizeof *stats); + stats->iw.ipInReceives = ((u64) m.ipInReceive_hi << 32) + + m.ipInReceive_lo; + stats->iw.ipInHdrErrors = ((u64) m.ipInHdrErrors_hi << 32) + + m.ipInHdrErrors_lo; + stats->iw.ipInAddrErrors = ((u64) m.ipInAddrErrors_hi << 32) + + m.ipInAddrErrors_lo; + stats->iw.ipInUnknownProtos = ((u64) m.ipInUnknownProtos_hi << 32) + + m.ipInUnknownProtos_lo; + stats->iw.ipInDiscards = ((u64) m.ipInDiscards_hi << 32) + + m.ipInDiscards_lo; + stats->iw.ipInDelivers = ((u64) m.ipInDelivers_hi << 32) + + m.ipInDelivers_lo; + stats->iw.ipOutRequests = ((u64) m.ipOutRequests_hi << 32) + + m.ipOutRequests_lo; + stats->iw.ipOutDiscards = ((u64) m.ipOutDiscards_hi << 32) + + m.ipOutDiscards_lo; + stats->iw.ipOutNoRoutes = ((u64) m.ipOutNoRoutes_hi << 32) + + m.ipOutNoRoutes_lo; + stats->iw.ipReasmTimeout = (u64) m.ipReasmTimeout; + stats->iw.ipReasmReqds = (u64) m.ipReasmReqds; + stats->iw.ipReasmOKs = (u64) m.ipReasmOKs; + stats->iw.ipReasmFails = (u64) m.ipReasmFails; + stats->iw.tcpActiveOpens = (u64) m.tcpActiveOpens; + stats->iw.tcpPassiveOpens = (u64) m.tcpPassiveOpens; + stats->iw.tcpAttemptFails = (u64) m.tcpAttemptFails; + stats->iw.tcpEstabResets = (u64) m.tcpEstabResets; + stats->iw.tcpOutRsts = (u64) m.tcpOutRsts; + stats->iw.tcpCurrEstab = (u64) m.tcpCurrEstab; + stats->iw.tcpInSegs = ((u64) m.tcpInSegs_hi << 32) + + m.tcpInSegs_lo; + stats->iw.tcpOutSegs = ((u64) m.tcpOutSegs_hi << 32) + + m.tcpOutSegs_lo; + stats->iw.tcpRetransSegs = ((u64) m.tcpRetransSeg_hi << 32) + + m.tcpRetransSeg_lo; + stats->iw.tcpInErrs = ((u64) m.tcpInErrs_hi << 32) + + m.tcpInErrs_lo; + stats->iw.tcpRtoMin = (u64) m.tcpRtoMin; + stats->iw.tcpRtoMax = (u64) m.tcpRtoMax; + return 0; +} + static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); @@ -1094,7 +1312,7 @@ static struct device_attribute *iwch_class_attributes[] = { &dev_attr_hw_rev, &dev_attr_fw_ver, &dev_attr_hca_type, - &dev_attr_board_id + &dev_attr_board_id, }; int iwch_register_device(struct iwch_dev *dev) @@ -1107,8 +1325,12 @@ int iwch_register_device(struct iwch_dev *dev) memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); dev->ibdev.owner = THIS_MODULE; - dev->device_cap_flags = - (IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW); + dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW; + + /* cxgb3 supports STag 0. */ + dev->ibdev.local_dma_lkey = 0; + if (fw_supports_fastreg(dev)) + dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; dev->ibdev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | @@ -1160,15 +1382,16 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.alloc_mw = iwch_alloc_mw; dev->ibdev.bind_mw = iwch_bind_mw; dev->ibdev.dealloc_mw = iwch_dealloc_mw; - + dev->ibdev.alloc_fast_reg_mr = iwch_alloc_fast_reg_mr; + dev->ibdev.alloc_fast_reg_page_list = iwch_alloc_fastreg_pbl; + dev->ibdev.free_fast_reg_page_list = iwch_free_fastreg_pbl; dev->ibdev.attach_mcast = iwch_multicast_attach; dev->ibdev.detach_mcast = iwch_multicast_detach; dev->ibdev.process_mad = iwch_process_mad; - dev->ibdev.req_notify_cq = iwch_arm_cq; dev->ibdev.post_send = iwch_post_send; dev->ibdev.post_recv = iwch_post_receive; - + dev->ibdev.get_protocol_stats = iwch_get_mib; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h index db5100d27ca..f5ceca05c43 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -296,14 +296,6 @@ static inline u32 iwch_ib_to_tpt_access(int acc) TPT_LOCAL_READ; } -static inline u32 iwch_ib_to_mwbind_access(int acc) -{ - return (acc & IB_ACCESS_REMOTE_WRITE ? T3_MEM_ACCESS_REM_WRITE : 0) | - (acc & IB_ACCESS_REMOTE_READ ? T3_MEM_ACCESS_REM_READ : 0) | - (acc & IB_ACCESS_LOCAL_WRITE ? T3_MEM_ACCESS_LOCAL_WRITE : 0) | - T3_MEM_ACCESS_LOCAL_READ; -} - enum iwch_mmid_state { IWCH_STAG_STATE_VALID, IWCH_STAG_STATE_INVALID @@ -340,14 +332,14 @@ int iwch_quiesce_qps(struct iwch_cq *chp); int iwch_resume_qps(struct iwch_cq *chp); void stop_read_rep_timer(struct iwch_qp *qhp); int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, - struct iwch_mr *mhp, - int shift, - __be64 *page_list); + struct iwch_mr *mhp, int shift); int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, struct iwch_mr *mhp, int shift, - __be64 *page_list, int npages); +int iwch_alloc_pbl(struct iwch_mr *mhp, int npages); +void iwch_free_pbl(struct iwch_mr *mhp); +int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset); int build_phys_page_list(struct ib_phys_buf *buffer_list, int num_phys_buf, u64 *iova_start, diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 9b4be889c58..9a3be3a9d5d 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -33,10 +33,11 @@ #include "iwch.h" #include "iwch_cm.h" #include "cxio_hal.h" +#include "cxio_resource.h" #define NO_SUPPORT -1 -static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, +static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, u8 * flit_cnt) { int i; @@ -44,59 +45,44 @@ static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_SEND: - case IB_WR_SEND_WITH_IMM: if (wr->send_flags & IB_SEND_SOLICITED) wqe->send.rdmaop = T3_SEND_WITH_SE; else wqe->send.rdmaop = T3_SEND; wqe->send.rem_stag = 0; break; -#if 0 /* Not currently supported */ - case TYPE_SEND_INVALIDATE: - case TYPE_SEND_INVALIDATE_IMMEDIATE: - wqe->send.rdmaop = T3_SEND_WITH_INV; - wqe->send.rem_stag = cpu_to_be32(wr->wr.rdma.rkey); - break; - case TYPE_SEND_SE_INVALIDATE: - wqe->send.rdmaop = T3_SEND_WITH_SE_INV; - wqe->send.rem_stag = cpu_to_be32(wr->wr.rdma.rkey); + case IB_WR_SEND_WITH_INV: + if (wr->send_flags & IB_SEND_SOLICITED) + wqe->send.rdmaop = T3_SEND_WITH_SE_INV; + else + wqe->send.rdmaop = T3_SEND_WITH_INV; + wqe->send.rem_stag = cpu_to_be32(wr->ex.invalidate_rkey); break; -#endif default: - break; + return -EINVAL; } if (wr->num_sge > T3_MAX_SGE) return -EINVAL; wqe->send.reserved[0] = 0; wqe->send.reserved[1] = 0; wqe->send.reserved[2] = 0; - if (wr->opcode == IB_WR_SEND_WITH_IMM) { - plen = 4; - wqe->send.sgl[0].stag = wr->ex.imm_data; - wqe->send.sgl[0].len = __constant_cpu_to_be32(0); - wqe->send.num_sgle = __constant_cpu_to_be32(0); - *flit_cnt = 5; - } else { - plen = 0; - for (i = 0; i < wr->num_sge; i++) { - if ((plen + wr->sg_list[i].length) < plen) { - return -EMSGSIZE; - } - plen += wr->sg_list[i].length; - wqe->send.sgl[i].stag = - cpu_to_be32(wr->sg_list[i].lkey); - wqe->send.sgl[i].len = - cpu_to_be32(wr->sg_list[i].length); - wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); - } - wqe->send.num_sgle = cpu_to_be32(wr->num_sge); - *flit_cnt = 4 + ((wr->num_sge) << 1); + plen = 0; + for (i = 0; i < wr->num_sge; i++) { + if ((plen + wr->sg_list[i].length) < plen) + return -EMSGSIZE; + + plen += wr->sg_list[i].length; + wqe->send.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); + wqe->send.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); + wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); } + wqe->send.num_sgle = cpu_to_be32(wr->num_sge); + *flit_cnt = 4 + ((wr->num_sge) << 1); wqe->send.plen = cpu_to_be32(plen); return 0; } -static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, +static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, u8 *flit_cnt) { int i; @@ -137,15 +123,18 @@ static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, +static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, u8 *flit_cnt) { if (wr->num_sge > 1) return -EINVAL; wqe->read.rdmaop = T3_READ_REQ; + if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) + wqe->read.local_inv = 1; + else + wqe->read.local_inv = 0; wqe->read.reserved[0] = 0; wqe->read.reserved[1] = 0; - wqe->read.reserved[2] = 0; wqe->read.rem_stag = cpu_to_be32(wr->wr.rdma.rkey); wqe->read.rem_to = cpu_to_be64(wr->wr.rdma.remote_addr); wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey); @@ -155,6 +144,57 @@ static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } +static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr, + u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) +{ + int i; + __be64 *p; + + if (wr->wr.fast_reg.page_list_len > T3_MAX_FASTREG_DEPTH) + return -EINVAL; + *wr_cnt = 1; + wqe->fastreg.stag = cpu_to_be32(wr->wr.fast_reg.rkey); + wqe->fastreg.len = cpu_to_be32(wr->wr.fast_reg.length); + wqe->fastreg.va_base_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); + wqe->fastreg.va_base_lo_fbo = + cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff); + wqe->fastreg.page_type_perms = cpu_to_be32( + V_FR_PAGE_COUNT(wr->wr.fast_reg.page_list_len) | + V_FR_PAGE_SIZE(wr->wr.fast_reg.page_shift-12) | + V_FR_TYPE(TPT_VATO) | + V_FR_PERMS(iwch_ib_to_tpt_access(wr->wr.fast_reg.access_flags))); + p = &wqe->fastreg.pbl_addrs[0]; + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++, p++) { + + /* If we need a 2nd WR, then set it up */ + if (i == T3_MAX_FASTREG_FRAG) { + *wr_cnt = 2; + wqe = (union t3_wr *)(wq->queue + + Q_PTR2IDX((wq->wptr+1), wq->size_log2)); + build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0, + Q_GENBIT(wq->wptr + 1, wq->size_log2), + 0, 1 + wr->wr.fast_reg.page_list_len - T3_MAX_FASTREG_FRAG, + T3_EOP); + + p = &wqe->pbl_frag.pbl_addrs[0]; + } + *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]); + } + *flit_cnt = 5 + wr->wr.fast_reg.page_list_len; + if (*flit_cnt > 15) + *flit_cnt = 15; + return 0; +} + +static int build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr, + u8 *flit_cnt) +{ + wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey); + wqe->local_inv.reserved = 0; + *flit_cnt = sizeof(struct t3_local_inv_wr) >> 3; + return 0; +} + /* * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now. */ @@ -205,23 +245,106 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, return 0; } -static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe, +static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe, struct ib_recv_wr *wr) { - int i; - if (wr->num_sge > T3_MAX_SGE) - return -EINVAL; + int i, err = 0; + u32 pbl_addr[T3_MAX_SGE]; + u8 page_size[T3_MAX_SGE]; + + err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr, + page_size); + if (err) + return err; + wqe->recv.pagesz[0] = page_size[0]; + wqe->recv.pagesz[1] = page_size[1]; + wqe->recv.pagesz[2] = page_size[2]; + wqe->recv.pagesz[3] = page_size[3]; wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); for (i = 0; i < wr->num_sge; i++) { wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); + + /* to in the WQE == the offset into the page */ + wqe->recv.sgl[i].to = cpu_to_be64(((u32) wr->sg_list[i].addr) % + (1UL << (12 + page_size[i]))); + + /* pbl_addr is the adapters address in the PBL */ + wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]); + } + for (; i < T3_MAX_SGE; i++) { + wqe->recv.sgl[i].stag = 0; + wqe->recv.sgl[i].len = 0; + wqe->recv.sgl[i].to = 0; + wqe->recv.pbl_addr[i] = 0; + } + qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, + qhp->wq.rq_size_log2)].wr_id = wr->wr_id; + qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, + qhp->wq.rq_size_log2)].pbl_addr = 0; + return 0; +} + +static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe, + struct ib_recv_wr *wr) +{ + int i; + u32 pbl_addr; + u32 pbl_offset; + + + /* + * The T3 HW requires the PBL in the HW recv descriptor to reference + * a PBL entry. So we allocate the max needed PBL memory here and pass + * it to the uP in the recv WR. The uP will build the PBL and setup + * the HW recv descriptor. + */ + pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE); + if (!pbl_addr) + return -ENOMEM; + + /* + * Compute the 8B aligned offset. + */ + pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3; + + wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); + + for (i = 0; i < wr->num_sge; i++) { + + /* + * Use a 128MB page size. This and an imposed 128MB + * sge length limit allows us to require only a 2-entry HW + * PBL for each SGE. This restriction is acceptable since + * since it is not possible to allocate 128MB of contiguous + * DMA coherent memory! + */ + if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN) + return -EINVAL; + wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT; + + /* + * T3 restricts a recv to all zero-stag or all non-zero-stag. + */ + if (wr->sg_list[i].lkey != 0) + return -EINVAL; + wqe->recv.sgl[i].stag = 0; + wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); + wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_offset); + pbl_offset += 2; } for (; i < T3_MAX_SGE; i++) { + wqe->recv.pagesz[i] = 0; wqe->recv.sgl[i].stag = 0; wqe->recv.sgl[i].len = 0; wqe->recv.sgl[i].to = 0; + wqe->recv.pbl_addr[i] = 0; } + qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, + qhp->wq.rq_size_log2)].wr_id = wr->wr_id; + qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, + qhp->wq.rq_size_log2)].pbl_addr = pbl_addr; return 0; } @@ -229,7 +352,7 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { int err = 0; - u8 t3_wr_flit_cnt; + u8 uninitialized_var(t3_wr_flit_cnt); enum t3_wr_opcode t3_wr_opcode = 0; enum t3_wr_flags t3_wr_flags; struct iwch_qp *qhp; @@ -238,6 +361,7 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, u32 num_wrs; unsigned long flag; struct t3_swsq *sqp; + int wr_cnt = 1; qhp = to_iwch_qp(ibqp); spin_lock_irqsave(&qhp->lock, flag); @@ -262,33 +386,45 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, t3_wr_flags = 0; if (wr->send_flags & IB_SEND_SOLICITED) t3_wr_flags |= T3_SOLICITED_EVENT_FLAG; - if (wr->send_flags & IB_SEND_FENCE) - t3_wr_flags |= T3_READ_FENCE_FLAG; if (wr->send_flags & IB_SEND_SIGNALED) t3_wr_flags |= T3_COMPLETION_FLAG; sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); switch (wr->opcode) { case IB_WR_SEND: - case IB_WR_SEND_WITH_IMM: + case IB_WR_SEND_WITH_INV: + if (wr->send_flags & IB_SEND_FENCE) + t3_wr_flags |= T3_READ_FENCE_FLAG; t3_wr_opcode = T3_WR_SEND; - err = iwch_build_rdma_send(wqe, wr, &t3_wr_flit_cnt); + err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt); break; case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: t3_wr_opcode = T3_WR_WRITE; - err = iwch_build_rdma_write(wqe, wr, &t3_wr_flit_cnt); + err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt); break; case IB_WR_RDMA_READ: + case IB_WR_RDMA_READ_WITH_INV: t3_wr_opcode = T3_WR_READ; t3_wr_flags = 0; /* T3 reads are always signaled */ - err = iwch_build_rdma_read(wqe, wr, &t3_wr_flit_cnt); + err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt); if (err) break; sqp->read_len = wqe->read.local_len; if (!qhp->wq.oldest_read) qhp->wq.oldest_read = sqp; break; + case IB_WR_FAST_REG_MR: + t3_wr_opcode = T3_WR_FASTREG; + err = build_fastreg(wqe, wr, &t3_wr_flit_cnt, + &wr_cnt, &qhp->wq); + break; + case IB_WR_LOCAL_INV: + if (wr->send_flags & IB_SEND_FENCE) + t3_wr_flags |= T3_LOCAL_FENCE_FLAG; + t3_wr_opcode = T3_WR_INV_STAG; + err = build_inv_stag(wqe, wr, &t3_wr_flit_cnt); + break; default: PDBG("%s post of type=%d TBD!\n", __func__, wr->opcode); @@ -307,14 +443,15 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags, Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), - 0, t3_wr_flit_cnt); + 0, t3_wr_flit_cnt, + (wr_cnt == 1) ? T3_SOPEOP : T3_SOP); PDBG("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n", __func__, (unsigned long long) wr->wr_id, idx, Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2), sqp->opcode); wr = wr->next; num_wrs--; - ++(qhp->wq.wptr); + qhp->wq.wptr += wr_cnt; ++(qhp->wq.sq_wptr); } spin_unlock_irqrestore(&qhp->lock, flag); @@ -345,21 +482,27 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, return -EINVAL; } while (wr) { + if (wr->num_sge > T3_MAX_SGE) { + err = -EINVAL; + *bad_wr = wr; + break; + } idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); wqe = (union t3_wr *) (qhp->wq.queue + idx); if (num_wrs) - err = iwch_build_rdma_recv(qhp->rhp, wqe, wr); + if (wr->sg_list[0].lkey) + err = build_rdma_recv(qhp, wqe, wr); + else + err = build_zero_stag_recv(qhp, wqe, wr); else err = -ENOMEM; if (err) { *bad_wr = wr; break; } - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)] = - wr->wr_id; build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG, Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), - 0, sizeof(struct t3_receive_wr) >> 3); + 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP); PDBG("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x " "wqe %p \n", __func__, (unsigned long long) wr->wr_id, idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe); @@ -419,10 +562,10 @@ int iwch_bind_mw(struct ib_qp *qp, sgl.lkey = mw_bind->mr->lkey; sgl.length = mw_bind->length; wqe->bind.reserved = 0; - wqe->bind.type = T3_VA_BASED_TO; + wqe->bind.type = TPT_VATO; /* TBD: check perms */ - wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->mw_access_flags); + wqe->bind.perms = iwch_ib_to_tpt_access(mw_bind->mw_access_flags); wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey); wqe->bind.mw_stag = cpu_to_be32(mw->rkey); wqe->bind.mw_len = cpu_to_be32(mw_bind->length); @@ -430,7 +573,7 @@ int iwch_bind_mw(struct ib_qp *qp, err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size); if (err) { spin_unlock_irqrestore(&qhp->lock, flag); - return err; + return err; } wqe->send.wrid.id0.hi = qhp->wq.sq_wptr; sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); @@ -441,10 +584,9 @@ int iwch_bind_mw(struct ib_qp *qp, sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED); wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr); wqe->bind.mr_pagesz = page_size; - wqe->flit[T3_SQ_COOKIE_FLIT] = mw_bind->wr_id; build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags, Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0, - sizeof(struct t3_bind_mw_wr) >> 3); + sizeof(struct t3_bind_mw_wr) >> 3, T3_SOPEOP); ++(qhp->wq.wptr); ++(qhp->wq.sq_wptr); spin_unlock_irqrestore(&qhp->lock, flag); @@ -655,6 +797,7 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag) { struct iwch_cq *rchp, *schp; int count; + int flushed; rchp = get_chp(qhp->rhp, qhp->attr.rcq); schp = get_chp(qhp->rhp, qhp->attr.scq); @@ -669,20 +812,22 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag) spin_lock(&qhp->lock); cxio_flush_hw_cq(&rchp->cq); cxio_count_rcqes(&rchp->cq, &qhp->wq, &count); - cxio_flush_rq(&qhp->wq, &rchp->cq, count); + flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&rchp->lock, *flag); - (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + if (flushed) + (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); /* locking heirarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&schp->lock, *flag); spin_lock(&qhp->lock); cxio_flush_hw_cq(&schp->cq); cxio_count_scqes(&schp->cq, &qhp->wq, &count); - cxio_flush_sq(&qhp->wq, &schp->cq, count); + flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&schp->lock, *flag); - (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); + if (flushed) + (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); /* deref */ if (atomic_dec_and_test(&qhp->refcnt)) @@ -755,7 +900,8 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); init_attr.rqe_count = iwch_rqes_posted(qhp); init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0; - init_attr.flags |= capable(CAP_NET_BIND_SERVICE) ? PRIV_QP : 0; + if (!qhp->ibqp.uobject) + init_attr.flags |= PRIV_QP; if (peer2peer) { init_attr.rtr_type = RTR_READ; if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator) @@ -880,7 +1026,6 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, ep = qhp->ep; get_ep(&ep->com); } - flush_qp(qhp, &flag); break; case IWCH_QP_STATE_TERMINATE: qhp->attr.state = IWCH_QP_STATE_TERMINATE; @@ -911,6 +1056,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, } switch (attrs->next_state) { case IWCH_QP_STATE_IDLE: + flush_qp(qhp, &flag); qhp->attr.state = IWCH_QP_STATE_IDLE; qhp->attr.llp_stream_handle = NULL; put_ep(&qhp->ep->com); diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 00bab60f6de..0b0618edd64 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -192,6 +192,9 @@ struct ehca_qp { int mtu_shift; u32 message_count; u32 packet_count; + atomic_t nr_events; /* events seen */ + wait_queue_head_t wait_completion; + int mig_armed; }; #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index 2515cbde7e6..46288220cfb 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -101,7 +101,6 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) props->max_ee = limit_uint(rblock->max_rd_ee_context); props->max_rdd = limit_uint(rblock->max_rd_domain); props->max_fmr = limit_uint(rblock->max_mr); - props->local_ca_ack_delay = limit_uint(rblock->local_ca_ack_delay); props->max_qp_rd_atom = limit_uint(rblock->max_rr_qp); props->max_ee_rd_atom = limit_uint(rblock->max_rr_ee_context); props->max_res_rd_atom = limit_uint(rblock->max_rr_hca); @@ -115,7 +114,9 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) } props->max_pkeys = 16; - props->local_ca_ack_delay = limit_uint(rblock->local_ca_ack_delay); + /* Some FW versions say 0 here; insert sensible value in that case */ + props->local_ca_ack_delay = rblock->local_ca_ack_delay ? + min_t(u8, rblock->local_ca_ack_delay, 255) : 12; props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp); props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp); props->max_mcast_grp = limit_uint(rblock->max_mcast_grp); @@ -136,7 +137,7 @@ query_device1: return ret; } -static int map_mtu(struct ehca_shca *shca, u32 fw_mtu) +static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu) { switch (fw_mtu) { case 0x1: @@ -156,7 +157,7 @@ static int map_mtu(struct ehca_shca *shca, u32 fw_mtu) } } -static int map_number_of_vls(struct ehca_shca *shca, u32 vl_cap) +static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap) { switch (vl_cap) { case 0x1: diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index ca5eb0cb628..cb55be04442 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -178,6 +178,10 @@ static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp, { struct ib_event event; + /* PATH_MIG without the QP ever having been armed is false alarm */ + if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed) + return; + event.device = &shca->ib_device; event.event = event_type; @@ -204,6 +208,8 @@ static void qp_event_callback(struct ehca_shca *shca, u64 eqe, read_lock(&ehca_qp_idr_lock); qp = idr_find(&ehca_qp_idr, token); + if (qp) + atomic_inc(&qp->nr_events); read_unlock(&ehca_qp_idr_lock); if (!qp) @@ -223,6 +229,8 @@ static void qp_event_callback(struct ehca_shca *shca, u64 eqe, if (fatal && qp->ext_type == EQPT_SRQBASE) dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED); + if (atomic_dec_and_test(&qp->nr_events)) + wake_up(&qp->wait_completion); return; } @@ -527,7 +535,7 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) { struct ehca_eq *eq = &shca->eq; struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; - u64 eqe_value; + u64 eqe_value, ret; unsigned long flags; int eqe_cnt, i; int eq_empty = 0; @@ -579,8 +587,13 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) ehca_dbg(&shca->ib_device, "No eqe found for irq event"); goto unlock_irq_spinlock; - } else if (!is_irq) + } else if (!is_irq) { + ret = hipz_h_eoi(eq->ist); + if (ret != H_SUCCESS) + ehca_err(&shca->ib_device, + "bad return code EOI -rc = %ld\n", ret); ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt); + } if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE)) ehca_dbg(&shca->ib_device, "too many eqes for one irq event"); /* enable irq for new packets */ @@ -637,8 +650,8 @@ static inline int find_next_online_cpu(struct ehca_comp_pool *pool) ehca_dmp(&cpu_online_map, sizeof(cpumask_t), ""); spin_lock_irqsave(&pool->last_cpu_lock, flags); - cpu = next_cpu(pool->last_cpu, cpu_online_map); - if (cpu == NR_CPUS) + cpu = next_cpu_nr(pool->last_cpu, cpu_online_map); + if (cpu >= nr_cpu_ids) cpu = first_cpu(cpu_online_map); pool->last_cpu = cpu; spin_unlock_irqrestore(&pool->last_cpu_lock, flags); diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 482103eb6ea..598844d2edc 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -923,6 +923,7 @@ static struct of_device_id ehca_device_table[] = }, {}, }; +MODULE_DEVICE_TABLE(of, ehca_device_table); static struct of_platform_driver ehca_driver = { .name = "ehca", diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 18fba92fa7a..ea13efddf17 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -566,6 +566,8 @@ static struct ehca_qp *internal_create_qp( return ERR_PTR(-ENOMEM); } + atomic_set(&my_qp->nr_events, 0); + init_waitqueue_head(&my_qp->wait_completion); spin_lock_init(&my_qp->spinlock_s); spin_lock_init(&my_qp->spinlock_r); my_qp->qp_type = qp_type; @@ -1458,6 +1460,8 @@ static int internal_modify_qp(struct ib_qp *ibqp, goto modify_qp_exit2; } mqpcb->path_migration_state = attr->path_mig_state + 1; + if (attr->path_mig_state == IB_MIG_REARM) + my_qp->mig_armed = 1; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1); } @@ -1934,6 +1938,9 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, idr_remove(&ehca_qp_idr, my_qp->token); write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + /* now wait until all pending events have completed */ + wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events)); + h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); if (h_ret != H_SUCCESS) { ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%li " diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index bbe0436f4f7..dd9bc68f1c7 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -421,8 +421,10 @@ int ehca_post_send(struct ib_qp *qp, int ret = 0; unsigned long flags; - if (unlikely(my_qp->state != IB_QPS_RTS)) { - ehca_err(qp->device, "QP not in RTS state qpn=%x", qp->qp_num); + /* Reject WR if QP is in RESET, INIT or RTR state */ + if (unlikely(my_qp->state < IB_QPS_RTS)) { + ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", + my_qp->state, qp->qp_num); return -EINVAL; } @@ -542,8 +544,16 @@ int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr) { - return internal_post_recv(container_of(qp, struct ehca_qp, ib_qp), - qp->device, recv_wr, bad_recv_wr); + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + + /* Reject WR if QP is in RESET state */ + if (unlikely(my_qp->state == IB_QPS_RESET)) { + ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", + my_qp->state, qp->qp_num); + return -EINVAL; + } + + return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr); } int ehca_post_srq_recv(struct ib_srq *srq, @@ -679,7 +689,7 @@ poll_cq_one_read_cqe: wc->dlid_path_bits = cqe->dlid; wc->src_qp = cqe->remote_qp_number; wc->wc_flags = cqe->w_completion_flags; - wc->imm_data = cpu_to_be32(cqe->immediate_data); + wc->ex.imm_data = cpu_to_be32(cqe->immediate_data); wc->sl = cqe->service_level; poll_cq_one_exit0: diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index 5245e13c3a3..415d3a465de 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -933,3 +933,13 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, r_cb, 0, 0, 0, 0); } + +u64 hipz_h_eoi(int irq) +{ + unsigned long xirr; + + iosync(); + xirr = (0xffULL << 24) | irq; + + return plpar_hcall_norets(H_EOI, xirr); +} diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h index 60ce02b7066..2c3c6e0ea5c 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.h +++ b/drivers/infiniband/hw/ehca/hcp_if.h @@ -260,5 +260,6 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, const u64 ressource_handle, void *rblock, unsigned long *byte_count); +u64 hipz_h_eoi(int irq); #endif /* __HCP_IF_H__ */ diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c index 661f8db6270..c3a32846543 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -163,6 +163,7 @@ static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) out: ehca_err(pd->ib_pd.device, "failed to allocate small queue page"); + mutex_unlock(&pd->lock); return 0; } diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index a03bd28d9b4..d385e4168c9 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -82,7 +82,7 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) wc->uqueue[head].opcode = entry->opcode; wc->uqueue[head].vendor_err = entry->vendor_err; wc->uqueue[head].byte_len = entry->byte_len; - wc->uqueue[head].imm_data = (__u32 __force)entry->imm_data; + wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data; wc->uqueue[head].qp_num = entry->qp->qp_num; wc->uqueue[head].src_qp = entry->src_qp; wc->uqueue[head].wc_flags = entry->wc_flags; diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index acf30c06a0c..daad09a4591 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -1197,7 +1197,7 @@ void ipath_kreceive(struct ipath_portdata *pd) } reloop: - for (last = 0, i = 1; !last; i++) { + for (last = 0, i = 1; !last; i += !last) { hdr = dd->ipath_f_get_msgheader(dd, rhf_addr); eflags = ipath_hdrget_err_flags(rhf_addr); etype = ipath_hdrget_rcv_type(rhf_addr); @@ -1428,6 +1428,40 @@ static void ipath_update_pio_bufs(struct ipath_devdata *dd) spin_unlock_irqrestore(&ipath_pioavail_lock, flags); } +/* + * used to force update of pioavailshadow if we can't get a pio buffer. + * Needed primarily due to exitting freeze mode after recovering + * from errors. Done lazily, because it's safer (known to not + * be writing pio buffers). + */ +static void ipath_reset_availshadow(struct ipath_devdata *dd) +{ + int i, im; + unsigned long flags; + + spin_lock_irqsave(&ipath_pioavail_lock, flags); + for (i = 0; i < dd->ipath_pioavregs; i++) { + u64 val, oldval; + /* deal with 6110 chip bug on high register #s */ + im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ? + i ^ 1 : i; + val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]); + /* + * busy out the buffers not in the kernel avail list, + * without changing the generation bits. + */ + oldval = dd->ipath_pioavailshadow[i]; + dd->ipath_pioavailshadow[i] = val | + ((~dd->ipath_pioavailkernel[i] << + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) & + 0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */ + if (oldval != dd->ipath_pioavailshadow[i]) + ipath_dbg("shadow[%d] was %Lx, now %lx\n", + i, oldval, dd->ipath_pioavailshadow[i]); + } + spin_unlock_irqrestore(&ipath_pioavail_lock, flags); +} + /** * ipath_setrcvhdrsize - set the receive header size * @dd: the infinipath device @@ -1482,9 +1516,12 @@ static noinline void no_pio_bufs(struct ipath_devdata *dd) */ ipath_stats.sps_nopiobufs++; if (!(++dd->ipath_consec_nopiobuf % 100000)) { - ipath_dbg("%u pio sends with no bufavail; dmacopy: " - "%llx %llx %llx %llx; shadow: %lx %lx %lx %lx\n", + ipath_force_pio_avail_update(dd); /* at start */ + ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: " + "%llx %llx %llx %llx\n" + "ipath shadow: %lx %lx %lx %lx\n", dd->ipath_consec_nopiobuf, + (unsigned long)get_cycles(), (unsigned long long) le64_to_cpu(dma[0]), (unsigned long long) le64_to_cpu(dma[1]), (unsigned long long) le64_to_cpu(dma[2]), @@ -1496,14 +1533,17 @@ static noinline void no_pio_bufs(struct ipath_devdata *dd) */ if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > (sizeof(shadow[0]) * 4 * 4)) - ipath_dbg("2nd group: dmacopy: %llx %llx " - "%llx %llx; shadow: %lx %lx %lx %lx\n", + ipath_dbg("2nd group: dmacopy: " + "%llx %llx %llx %llx\n" + "ipath shadow: %lx %lx %lx %lx\n", (unsigned long long)le64_to_cpu(dma[4]), (unsigned long long)le64_to_cpu(dma[5]), (unsigned long long)le64_to_cpu(dma[6]), (unsigned long long)le64_to_cpu(dma[7]), - shadow[4], shadow[5], shadow[6], - shadow[7]); + shadow[4], shadow[5], shadow[6], shadow[7]); + + /* at end, so update likely happened */ + ipath_reset_availshadow(dd); } } @@ -1652,19 +1692,46 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start, unsigned len, int avail) { unsigned long flags; - unsigned end; + unsigned end, cnt = 0, next; /* There are two bits per send buffer (busy and generation) */ start *= 2; - len *= 2; - end = start + len; + end = start + len * 2; - /* Set or clear the generation bits. */ spin_lock_irqsave(&ipath_pioavail_lock, flags); + /* Set or clear the busy bit in the shadow. */ while (start < end) { if (avail) { - __clear_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT, - dd->ipath_pioavailshadow); + unsigned long dma; + int i, im; + /* + * the BUSY bit will never be set, because we disarm + * the user buffers before we hand them back to the + * kernel. We do have to make sure the generation + * bit is set correctly in shadow, since it could + * have changed many times while allocated to user. + * We can't use the bitmap functions on the full + * dma array because it is always little-endian, so + * we have to flip to host-order first. + * BITS_PER_LONG is slightly wrong, since it's + * always 64 bits per register in chip... + * We only work on 64 bit kernels, so that's OK. + */ + /* deal with 6110 chip bug on high register #s */ + i = start / BITS_PER_LONG; + im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ? + i ^ 1 : i; + __clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT + + start, dd->ipath_pioavailshadow); + dma = (unsigned long) le64_to_cpu( + dd->ipath_pioavailregs_dma[im]); + if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT + + start) % BITS_PER_LONG, &dma)) + __set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT + + start, dd->ipath_pioavailshadow); + else + __clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT + + start, dd->ipath_pioavailshadow); __set_bit(start, dd->ipath_pioavailkernel); } else { __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT, @@ -1673,7 +1740,44 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start, } start += 2; } + + if (dd->ipath_pioupd_thresh) { + end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); + next = find_first_bit(dd->ipath_pioavailkernel, end); + while (next < end) { + cnt++; + next = find_next_bit(dd->ipath_pioavailkernel, end, + next + 1); + } + } spin_unlock_irqrestore(&ipath_pioavail_lock, flags); + + /* + * When moving buffers from kernel to user, if number assigned to + * the user is less than the pio update threshold, and threshold + * is supported (cnt was computed > 0), drop the update threshold + * so we update at least once per allocated number of buffers. + * In any case, if the kernel buffers are less than the threshold, + * drop the threshold. We don't bother increasing it, having once + * decreased it, since it would typically just cycle back and forth. + * If we don't decrease below buffers in use, we can wait a long + * time for an update, until some other context uses PIO buffers. + */ + if (!avail && len < cnt) + cnt = len; + if (cnt < dd->ipath_pioupd_thresh) { + dd->ipath_pioupd_thresh = cnt; + ipath_dbg("Decreased pio update threshold to %u\n", + dd->ipath_pioupd_thresh); + spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); + dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK + << INFINIPATH_S_UPDTHRESH_SHIFT); + dd->ipath_sendctrl |= dd->ipath_pioupd_thresh + << INFINIPATH_S_UPDTHRESH_SHIFT; + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl); + spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + } } /** @@ -1790,12 +1894,12 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) */ if (dd->ipath_flags & IPATH_HAS_SEND_DMA) { int skip_cancel; - u64 *statp = &dd->ipath_sdma_status; + unsigned long *statp = &dd->ipath_sdma_status; spin_lock_irqsave(&dd->ipath_sdma_lock, flags); skip_cancel = - !test_bit(IPATH_SDMA_DISABLED, statp) && - test_and_set_bit(IPATH_SDMA_ABORTING, statp); + test_and_set_bit(IPATH_SDMA_ABORTING, statp) + && !test_bit(IPATH_SDMA_DISABLED, statp); spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); if (skip_cancel) goto bail; @@ -1826,6 +1930,9 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) ipath_disarm_piobufs(dd, 0, dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); + if (dd->ipath_flags & IPATH_HAS_SEND_DMA) + set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status); + if (restore_sendctrl) { /* else done by caller later if needed */ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); @@ -1845,7 +1952,6 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) /* only wait so long for intr */ dd->ipath_sdma_abort_intr_timeout = jiffies + HZ; dd->ipath_sdma_reset_wait = 200; - __set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status); if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) tasklet_hi_schedule(&dd->ipath_sdma_abort_task); spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); @@ -2510,7 +2616,7 @@ int ipath_reset_device(int unit) ipath_dbg("unit %u port %d is in use " "(PID %u cmd %s), can't reset\n", unit, i, - dd->ipath_pd[i]->port_pid, + pid_nr(dd->ipath_pd[i]->port_pid), dd->ipath_pd[i]->port_comm); ret = -EBUSY; goto bail; @@ -2548,19 +2654,21 @@ bail: static int ipath_signal_procs(struct ipath_devdata *dd, int sig) { int i, sub, any = 0; - pid_t pid; + struct pid *pid; if (!dd->ipath_pd) return 0; for (i = 1; i < dd->ipath_cfgports; i++) { - if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt || - !dd->ipath_pd[i]->port_pid) + if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt) continue; pid = dd->ipath_pd[i]->port_pid; + if (!pid) + continue; + dev_info(&dd->pcidev->dev, "context %d in use " "(PID %u), sending signal %d\n", - i, pid, sig); - kill_proc(pid, sig, 1); + i, pid_nr(pid), sig); + kill_pid(pid, sig, 1); any++; for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) { pid = dd->ipath_pd[i]->port_subpid[sub]; @@ -2568,8 +2676,8 @@ static int ipath_signal_procs(struct ipath_devdata *dd, int sig) continue; dev_info(&dd->pcidev->dev, "sub-context " "%d:%d in use (PID %u), sending " - "signal %d\n", i, sub, pid, sig); - kill_proc(pid, sig, 1); + "signal %d\n", i, sub, pid_nr(pid), sig); + kill_pid(pid, sig, 1); any++; } } diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 8b1752202e7..56c0eda3c07 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -39,6 +39,7 @@ #include <linux/highmem.h> #include <linux/io.h> #include <linux/jiffies.h> +#include <linux/smp_lock.h> #include <asm/pgtable.h> #include "ipath_kernel.h" @@ -173,47 +174,25 @@ static int ipath_get_base_info(struct file *fp, (void *) dd->ipath_statusp - (void *) dd->ipath_pioavailregs_dma; if (!shared) { - kinfo->spi_piocnt = dd->ipath_pbufsport; + kinfo->spi_piocnt = pd->port_piocnt; kinfo->spi_piobufbase = (u64) pd->port_piobufs; kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port; } else if (master) { - kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) + - (dd->ipath_pbufsport % subport_cnt); + kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) + + (pd->port_piocnt % subport_cnt); /* Master's PIO buffers are after all the slave's */ kinfo->spi_piobufbase = (u64) pd->port_piobufs + dd->ipath_palign * - (dd->ipath_pbufsport - kinfo->spi_piocnt); + (pd->port_piocnt - kinfo->spi_piocnt); } else { unsigned slave = subport_fp(fp) - 1; - kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt; + kinfo->spi_piocnt = pd->port_piocnt / subport_cnt; kinfo->spi_piobufbase = (u64) pd->port_piobufs + dd->ipath_palign * kinfo->spi_piocnt * slave; } - /* - * Set the PIO avail update threshold to no larger - * than the number of buffers per process. Note that - * we decrease it here, but won't ever increase it. - */ - if (dd->ipath_pioupd_thresh && - kinfo->spi_piocnt < dd->ipath_pioupd_thresh) { - unsigned long flags; - - dd->ipath_pioupd_thresh = kinfo->spi_piocnt; - ipath_dbg("Decreased pio update threshold to %u\n", - dd->ipath_pioupd_thresh); - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK - << INFINIPATH_S_UPDTHRESH_SHIFT); - dd->ipath_sendctrl |= dd->ipath_pioupd_thresh - << INFINIPATH_S_UPDTHRESH_SHIFT; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - dd->ipath_sendctrl); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - } - if (shared) { kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port; @@ -577,7 +556,7 @@ static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport, p = dd->ipath_pageshadow[porttid + tid]; dd->ipath_pageshadow[porttid + tid] = NULL; ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n", - pd->port_pid, tid); + pid_nr(pd->port_pid), tid); dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED, dd->ipath_tidinvalid); @@ -1309,19 +1288,19 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port; if (!pd->port_subport_cnt) { /* port is not shared */ - piocnt = dd->ipath_pbufsport; + piocnt = pd->port_piocnt; piobufs = pd->port_piobufs; } else if (!subport_fp(fp)) { /* caller is the master */ - piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) + - (dd->ipath_pbufsport % pd->port_subport_cnt); + piocnt = (pd->port_piocnt / pd->port_subport_cnt) + + (pd->port_piocnt % pd->port_subport_cnt); piobufs = pd->port_piobufs + - dd->ipath_palign * (dd->ipath_pbufsport - piocnt); + dd->ipath_palign * (pd->port_piocnt - piocnt); } else { unsigned slave = subport_fp(fp) - 1; /* caller is a slave */ - piocnt = dd->ipath_pbufsport / pd->port_subport_cnt; + piocnt = pd->port_piocnt / pd->port_subport_cnt; piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave; } @@ -1631,11 +1610,8 @@ static int try_alloc_port(struct ipath_devdata *dd, int port, port); pd->port_cnt = 1; port_fp(fp) = pd; - pd->port_pid = current->pid; + pd->port_pid = get_pid(task_pid(current)); strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm)); - ipath_chg_pioavailkernel(dd, - dd->ipath_pbufsport * (pd->port_port - 1), - dd->ipath_pbufsport, 0); ipath_stats.sps_ports++; ret = 0; } else @@ -1818,14 +1794,15 @@ static int find_shared_port(struct file *fp, } port_fp(fp) = pd; subport_fp(fp) = pd->port_cnt++; - pd->port_subpid[subport_fp(fp)] = current->pid; + pd->port_subpid[subport_fp(fp)] = + get_pid(task_pid(current)); tidcursor_fp(fp) = 0; pd->active_slaves |= 1 << subport_fp(fp); ipath_cdbg(PROC, "%s[%u] %u sharing %s[%u] unit:port %u:%u\n", current->comm, current->pid, subport_fp(fp), - pd->port_comm, pd->port_pid, + pd->port_comm, pid_nr(pd->port_pid), dd->ipath_unit, pd->port_port); ret = 1; goto done; @@ -1839,6 +1816,7 @@ done: static int ipath_open(struct inode *in, struct file *fp) { /* The real work is performed later in ipath_assign_port() */ + cycle_kernel_lock(); fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL); return fp->private_data ? 0 : -ENOMEM; } @@ -1938,11 +1916,25 @@ static int ipath_do_user_init(struct file *fp, /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */ + /* some ports may get extra buffers, calculate that here */ + if (pd->port_port <= dd->ipath_ports_extrabuf) + pd->port_piocnt = dd->ipath_pbufsport + 1; + else + pd->port_piocnt = dd->ipath_pbufsport; + /* for right now, kernel piobufs are at end, so port 1 is at 0 */ + if (pd->port_port <= dd->ipath_ports_extrabuf) + pd->port_pio_base = (dd->ipath_pbufsport + 1) + * (pd->port_port - 1); + else + pd->port_pio_base = dd->ipath_ports_extrabuf + + dd->ipath_pbufsport * (pd->port_port - 1); pd->port_piobufs = dd->ipath_piobufbase + - dd->ipath_pbufsport * (pd->port_port - 1) * dd->ipath_palign; - ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n", - pd->port_port, pd->port_piobufs); + pd->port_pio_base * dd->ipath_palign; + ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u," + " first pio %u\n", pd->port_port, pd->port_piobufs, + pd->port_piocnt, pd->port_pio_base); + ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0); /* * Now allocate the rcvhdr Q and eager TIDs; skip the TID @@ -2077,7 +2069,8 @@ static int ipath_close(struct inode *in, struct file *fp) * the slave(s) don't wait for receive data forever. */ pd->active_slaves &= ~(1 << fd->subport); - pd->port_subpid[fd->subport] = 0; + put_pid(pd->port_subpid[fd->subport]); + pd->port_subpid[fd->subport] = NULL; mutex_unlock(&ipath_mutex); goto bail; } @@ -2085,7 +2078,7 @@ static int ipath_close(struct inode *in, struct file *fp) if (pd->port_hdrqfull) { ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors " - "during run\n", pd->port_comm, pd->port_pid, + "during run\n", pd->port_comm, pid_nr(pd->port_pid), pd->port_hdrqfull); pd->port_hdrqfull = 0; } @@ -2107,7 +2100,6 @@ static int ipath_close(struct inode *in, struct file *fp) } if (dd->ipath_kregbase) { - int i; /* atomically clear receive enable port and intr avail. */ clear_bit(dd->ipath_r_portenable_shift + port, &dd->ipath_rcvctrl); @@ -2136,9 +2128,9 @@ static int ipath_close(struct inode *in, struct file *fp) ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr, pd->port_port, dd->ipath_dummy_hdrq_phys); - i = dd->ipath_pbufsport * (port - 1); - ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport); - ipath_chg_pioavailkernel(dd, i, dd->ipath_pbufsport, 1); + ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt); + ipath_chg_pioavailkernel(dd, pd->port_pio_base, + pd->port_piocnt, 1); dd->ipath_f_clear_tids(dd, pd->port_port); @@ -2146,11 +2138,12 @@ static int ipath_close(struct inode *in, struct file *fp) unlock_expected_tids(pd); ipath_stats.sps_ports--; ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n", - pd->port_comm, pd->port_pid, + pd->port_comm, pid_nr(pd->port_pid), dd->ipath_unit, port); } - pd->port_pid = 0; + put_pid(pd->port_pid); + pd->port_pid = NULL; dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */ mutex_unlock(&ipath_mutex); ipath_free_pddata(dd, pd); /* after releasing the mutex */ @@ -2462,7 +2455,7 @@ static int init_cdev(int minor, char *name, const struct file_operations *fops, goto err_cdev; } - device = device_create(ipath_class, NULL, dev, name); + device = device_create_drvdata(ipath_class, NULL, dev, NULL, name); if (IS_ERR(device)) { ret = PTR_ERR(device); diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c index e3ec0d1bdf5..fb70712ac85 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba7220.c +++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c @@ -595,7 +595,7 @@ static void ipath_7220_txe_recover(struct ipath_devdata *dd) dev_info(&dd->pcidev->dev, "Recovering from TXE PIO parity error\n"); - ipath_disarm_senderrbufs(dd, 1); + ipath_disarm_senderrbufs(dd); } @@ -675,10 +675,8 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg, ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) { /* - * Parity errors in send memory are recoverable, - * just cancel the send (if indicated in * sendbuffererror), - * count the occurrence, unfreeze (if no other handled - * hardware error bits are set), and continue. + * Parity errors in send memory are recoverable by h/w + * just do housekeeping, exit freeze mode and continue. */ if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) @@ -687,13 +685,6 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg, hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT); - if (!hwerrs) { - /* else leave in freeze mode */ - ipath_write_kreg(dd, - dd->ipath_kregs->kr_control, - dd->ipath_control); - goto bail; - } } if (hwerrs) { /* @@ -723,8 +714,8 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg, *dd->ipath_statusp |= IPATH_STATUS_HWERROR; dd->ipath_flags &= ~IPATH_INITTED; } else { - ipath_dbg("Clearing freezemode on ignored hardware " - "error\n"); + ipath_dbg("Clearing freezemode on ignored or " + "recovered hardware error\n"); ipath_clear_freeze(dd); } } @@ -870,8 +861,9 @@ static int ipath_7220_boardname(struct ipath_devdata *dd, char *name, "revision %u.%u!\n", dd->ipath_majrev, dd->ipath_minrev); ret = 1; - } else if (dd->ipath_minrev == 1) { - /* Rev1 chips are prototype. Complain, but allow use */ + } else if (dd->ipath_minrev == 1 && + !(dd->ipath_flags & IPATH_INITTED)) { + /* Rev1 chips are prototype. Complain at init, but allow use */ ipath_dev_err(dd, "Unsupported hardware " "revision %u.%u, Contact support@qlogic.com\n", dd->ipath_majrev, dd->ipath_minrev); @@ -1966,7 +1958,7 @@ static void ipath_7220_config_ports(struct ipath_devdata *dd, ushort cfgports) dd->ipath_rcvctrl); dd->ipath_p0_rcvegrcnt = 2048; /* always */ if (dd->ipath_flags & IPATH_HAS_SEND_DMA) - dd->ipath_pioreserved = 1; /* reserve a buffer */ + dd->ipath_pioreserved = 3; /* kpiobufs used for PIO */ } @@ -2236,8 +2228,8 @@ static void ipath_autoneg_send(struct ipath_devdata *dd, int which) 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40000001, 0x1388, 0x15e, /* rest 0's */ }; - dcnt = sizeof(madpayload_start)/sizeof(madpayload_start[0]); - hcnt = sizeof(hdr)/sizeof(hdr[0]); + dcnt = ARRAY_SIZE(madpayload_start); + hcnt = ARRAY_SIZE(hdr); if (!swapped) { /* for maintainability, do it at runtime */ for (i = 0; i < hcnt; i++) { diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 27dd8947666..3e5baa43fc8 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -41,7 +41,7 @@ /* * min buffers we want to have per port, after driver */ -#define IPATH_MIN_USER_PORT_BUFCNT 8 +#define IPATH_MIN_USER_PORT_BUFCNT 7 /* * Number of ports we are configured to use (to allow for more pio @@ -54,13 +54,9 @@ MODULE_PARM_DESC(cfgports, "Set max number of ports to use"); /* * Number of buffers reserved for driver (verbs and layered drivers.) - * Reserved at end of buffer list. Initialized based on - * number of PIO buffers if not set via module interface. + * Initialized based on number of PIO buffers if not set via module interface. * The problem with this is that it's global, but we'll use different - * numbers for different chip types. So the default value is not - * very useful. I've redefined it for the 1.3 release so that it's - * zero unless set by the user to something else, in which case we - * try to respect it. + * numbers for different chip types. */ static ushort ipath_kpiobufs; @@ -546,9 +542,12 @@ static void enable_chip(struct ipath_devdata *dd, int reinit) pioavail = dd->ipath_pioavailregs_dma[i ^ 1]; else pioavail = dd->ipath_pioavailregs_dma[i]; - dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail) | - (~dd->ipath_pioavailkernel[i] << - INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT); + /* + * don't need to worry about ipath_pioavailkernel here + * because we will call ipath_chg_pioavailkernel() later + * in initialization, to busy out buffers as needed + */ + dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail); } /* can get counters, stats, etc. */ dd->ipath_flags |= IPATH_PRESENT; @@ -708,12 +707,11 @@ static void verify_interrupt(unsigned long opaque) int ipath_init_chip(struct ipath_devdata *dd, int reinit) { int ret = 0; - u32 val32, kpiobufs; + u32 kpiobufs, defkbufs; u32 piobufs, uports; u64 val; struct ipath_portdata *pd; gfp_t gfp_flags = GFP_USER | __GFP_COMP; - unsigned long flags; ret = init_housekeeping(dd, reinit); if (ret) @@ -753,56 +751,46 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2) / (sizeof(u64) * BITS_PER_BYTE / 2); uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0; - if (ipath_kpiobufs == 0) { - /* not set by user (this is default) */ - if (piobufs > 144) - kpiobufs = 32; - else - kpiobufs = 16; - } + if (piobufs > 144) + defkbufs = 32 + dd->ipath_pioreserved; else - kpiobufs = ipath_kpiobufs; + defkbufs = 16 + dd->ipath_pioreserved; - if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) { + if (ipath_kpiobufs && (ipath_kpiobufs + + (uports * IPATH_MIN_USER_PORT_BUFCNT)) > piobufs) { int i = (int) piobufs - (int) (uports * IPATH_MIN_USER_PORT_BUFCNT); if (i < 1) i = 1; dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of " "%d for kernel leaves too few for %d user ports " - "(%d each); using %u\n", kpiobufs, + "(%d each); using %u\n", ipath_kpiobufs, piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i); /* * shouldn't change ipath_kpiobufs, because could be * different for different devices... */ kpiobufs = i; - } + } else if (ipath_kpiobufs) + kpiobufs = ipath_kpiobufs; + else + kpiobufs = defkbufs; dd->ipath_lastport_piobuf = piobufs - kpiobufs; dd->ipath_pbufsport = uports ? dd->ipath_lastport_piobuf / uports : 0; - val32 = dd->ipath_lastport_piobuf - (dd->ipath_pbufsport * uports); - if (val32 > 0) { - ipath_dbg("allocating %u pbufs/port leaves %u unused, " - "add to kernel\n", dd->ipath_pbufsport, val32); - dd->ipath_lastport_piobuf -= val32; - kpiobufs += val32; - ipath_dbg("%u pbufs/port leaves %u unused, add to kernel\n", - dd->ipath_pbufsport, val32); - } + /* if not an even divisor, some user ports get extra buffers */ + dd->ipath_ports_extrabuf = dd->ipath_lastport_piobuf - + (dd->ipath_pbufsport * uports); + if (dd->ipath_ports_extrabuf) + ipath_dbg("%u pbufs/port leaves some unused, add 1 buffer to " + "ports <= %u\n", dd->ipath_pbufsport, + dd->ipath_ports_extrabuf); dd->ipath_lastpioindex = 0; dd->ipath_lastpioindexl = dd->ipath_piobcnt2k; - ipath_chg_pioavailkernel(dd, 0, piobufs, 1); + /* ipath_pioavailshadow initialized earlier */ ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u " "each for %u user ports\n", kpiobufs, piobufs, dd->ipath_pbufsport, uports); - if (dd->ipath_pioupd_thresh) { - if (dd->ipath_pbufsport < dd->ipath_pioupd_thresh) - dd->ipath_pioupd_thresh = dd->ipath_pbufsport; - if (kpiobufs < dd->ipath_pioupd_thresh) - dd->ipath_pioupd_thresh = kpiobufs; - } - ret = dd->ipath_f_early_init(dd); if (ret) { ipath_dev_err(dd, "Early initialization failure\n"); @@ -810,13 +798,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) } /* - * Cancel any possible active sends from early driver load. - * Follows early_init because some chips have to initialize - * PIO buffers in early_init to avoid false parity errors. - */ - ipath_cancel_sends(dd, 0); - - /* * Early_init sets rcvhdrentsize and rcvhdrsize, so this must be * done after early_init. */ @@ -836,6 +817,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr, dd->ipath_pioavailregs_phys); + /* * this is to detect s/w errors, which the h/w works around by * ignoring the low 6 bits of address, if it wasn't aligned. @@ -862,12 +844,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED); ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL); - spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); - dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - /* * before error clears, since we expect serdes pll errors during * this, the first time after reset @@ -940,6 +916,19 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) else enable_chip(dd, reinit); + /* after enable_chip, so pioavailshadow setup */ + ipath_chg_pioavailkernel(dd, 0, piobufs, 1); + + /* + * Cancel any possible active sends from early driver load. + * Follows early_init because some chips have to initialize + * PIO buffers in early_init to avoid false parity errors. + * After enable and ipath_chg_pioavailkernel so we can safely + * enable pioavail updates and PIOENABLE; packets are now + * ready to go out. + */ + ipath_cancel_sends(dd, 1); + if (!reinit) { /* * Used when we close a port, for DMA already in flight diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 1b58f4737c7..26900b3b7a4 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -38,42 +38,12 @@ #include "ipath_verbs.h" #include "ipath_common.h" -/* - * clear (write) a pio buffer, to clear a parity error. This routine - * should only be called when in freeze mode, and the buffer should be - * canceled afterwards. - */ -static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum) -{ - u32 __iomem *pbuf; - u32 dwcnt; /* dword count to write */ - if (pnum < dd->ipath_piobcnt2k) { - pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum * - dd->ipath_palign); - dwcnt = dd->ipath_piosize2k >> 2; - } - else { - pbuf = (u32 __iomem *) (dd->ipath_pio4kbase + - (pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign); - dwcnt = dd->ipath_piosize4k >> 2; - } - dev_info(&dd->pcidev->dev, - "Rewrite PIO buffer %u, to recover from parity error\n", - pnum); - - /* no flush required, since already in freeze */ - writel(dwcnt + 1, pbuf); - while (--dwcnt) - writel(0, pbuf++); -} /* * Called when we might have an error that is specific to a particular * PIO buffer, and may need to cancel that buffer, so it can be re-used. - * If rewrite is true, and bits are set in the sendbufferror registers, - * we'll write to the buffer, for error recovery on parity errors. */ -void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) +void ipath_disarm_senderrbufs(struct ipath_devdata *dd) { u32 piobcnt; unsigned long sbuf[4]; @@ -109,11 +79,8 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) } for (i = 0; i < piobcnt; i++) - if (test_bit(i, sbuf)) { - if (rewrite) - ipath_clrpiobuf(dd, i); + if (test_bit(i, sbuf)) ipath_disarm_piobufs(dd, i, 1); - } /* ignore armlaunch errs for a bit */ dd->ipath_lastcancel = jiffies+3; } @@ -164,7 +131,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs) { u64 ignore_this_time = 0; - ipath_disarm_senderrbufs(dd, 0); + ipath_disarm_senderrbufs(dd); if ((errs & E_SUM_LINK_PKTERRS) && !(dd->ipath_flags & IPATH_LINKACTIVE)) { /* @@ -909,8 +876,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) * processes (causing armlaunch), send errors due to going into freeze mode, * etc., and try to avoid causing extra interrupts while doing so. * Forcibly update the in-memory pioavail register copies after cleanup - * because the chip won't do it for anything changing while in freeze mode - * (we don't want to wait for the next pio buffer state change). + * because the chip won't do it while in freeze mode (the register values + * themselves are kept correct). * Make sure that we don't lose any important interrupts by using the chip * feature that says that writing 0 to a bit in *clear that is set in * *status will cause an interrupt to be generated again (if allowed by @@ -918,44 +885,23 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) */ void ipath_clear_freeze(struct ipath_devdata *dd) { - int i, im; - u64 val; - /* disable error interrupts, to avoid confusion */ ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL); /* also disable interrupts; errormask is sometimes overwriten */ ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); - /* - * clear all sends, because they have may been - * completed by usercode while in freeze mode, and - * therefore would not be sent, and eventually - * might cause the process to run out of bufs - */ - ipath_cancel_sends(dd, 0); + ipath_cancel_sends(dd, 1); + + /* clear the freeze, and be sure chip saw it */ ipath_write_kreg(dd, dd->ipath_kregs->kr_control, dd->ipath_control); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - /* ensure pio avail updates continue */ + /* force in-memory update now we are out of freeze */ ipath_force_pio_avail_update(dd); /* - * We just enabled pioavailupdate, so dma copy is almost certainly - * not yet right, so read the registers directly. Similar to init - */ - for (i = 0; i < dd->ipath_pioavregs; i++) { - /* deal with 6110 chip bug */ - im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ? - i ^ 1 : i; - val = ipath_read_kreg64(dd, (0x1000 / sizeof(u64)) + im); - dd->ipath_pioavailregs_dma[i] = cpu_to_le64(val); - dd->ipath_pioavailshadow[i] = val | - (~dd->ipath_pioavailkernel[i] << - INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT); - } - - /* * force new interrupt if any hwerr, error or interrupt bits are * still set, and clear "safe" send packet errors related to freeze * and cancelling sends. Re-enable error interrupts before possible @@ -1312,10 +1258,8 @@ irqreturn_t ipath_intr(int irq, void *data) ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); - if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) - handle_layer_pioavail(dd); - else - ipath_dbg("unexpected BUFAVAIL intr\n"); + /* always process; sdma verbs uses PIO for acks and VL15 */ + handle_layer_pioavail(dd); } ret = IRQ_HANDLED; diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 202337ae90d..0bd8bcb184a 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -117,6 +117,10 @@ struct ipath_portdata { u16 port_subport_cnt; /* non-zero if port is being shared. */ u16 port_subport_id; + /* number of pio bufs for this port (all procs, if shared) */ + u32 port_piocnt; + /* first pio buffer for this port */ + u32 port_pio_base; /* chip offset of PIO buffers for this port */ u32 port_piobufs; /* how many alloc_pages() chunks in port_rcvegrbuf_pages */ @@ -155,8 +159,8 @@ struct ipath_portdata { /* saved total number of polled urgent packets for poll edge trigger */ u32 port_urgent_poll; /* pid of process using this port */ - pid_t port_pid; - pid_t port_subpid[INFINIPATH_MAX_SUBPORT]; + struct pid *port_pid; + struct pid *port_subpid[INFINIPATH_MAX_SUBPORT]; /* same size as task_struct .comm[] */ char port_comm[16]; /* pkeys set by this use of this port */ @@ -228,6 +232,11 @@ struct ipath_sdma_desc { #define IPATH_SDMA_TXREQ_S_ABORTED 2 #define IPATH_SDMA_TXREQ_S_SHUTDOWN 3 +#define IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG (1ull << 63) +#define IPATH_SDMA_STATUS_ABORT_IN_PROG (1ull << 62) +#define IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE (1ull << 61) +#define IPATH_SDMA_STATUS_SCB_EMPTY (1ull << 30) + /* max dwords in small buffer packet */ #define IPATH_SMALLBUF_DWORDS (dd->ipath_piosize2k >> 2) @@ -384,6 +393,8 @@ struct ipath_devdata { u32 ipath_lastrpkts; /* pio bufs allocated per port */ u32 ipath_pbufsport; + /* if remainder on bufs/port, ports < extrabuf get 1 extra */ + u32 ipath_ports_extrabuf; u32 ipath_pioupd_thresh; /* update threshold, some chips */ /* * number of ports configured as max; zero is set to number chip @@ -477,7 +488,7 @@ struct ipath_devdata { /* SendDMA related entries */ spinlock_t ipath_sdma_lock; - u64 ipath_sdma_status; + unsigned long ipath_sdma_status; unsigned long ipath_sdma_abort_jiffies; unsigned long ipath_sdma_abort_intr_timeout; unsigned long ipath_sdma_buf_jiffies; @@ -816,8 +827,8 @@ struct ipath_devdata { #define IPATH_SDMA_DISARMED 1 #define IPATH_SDMA_DISABLED 2 #define IPATH_SDMA_LAYERBUF 3 -#define IPATH_SDMA_RUNNING 62 -#define IPATH_SDMA_SHUTDOWN 63 +#define IPATH_SDMA_RUNNING 30 +#define IPATH_SDMA_SHUTDOWN 31 /* bit combinations that correspond to abort states */ #define IPATH_SDMA_ABORT_NONE 0 @@ -1011,7 +1022,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *); int ipath_update_eeprom_log(struct ipath_devdata *dd); void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr); u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); -void ipath_disarm_senderrbufs(struct ipath_devdata *, int); +void ipath_disarm_senderrbufs(struct ipath_devdata *); void ipath_force_pio_avail_update(struct ipath_devdata *); void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev); diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index 1ff46ae7dd9..be4fc9ada8e 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -111,9 +111,9 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp, nip->revision = cpu_to_be32((majrev << 16) | minrev); nip->local_port_num = port; vendor = dd->ipath_vendorid; - nip->vendor_id[0] = 0; - nip->vendor_id[1] = vendor >> 8; - nip->vendor_id[2] = vendor; + nip->vendor_id[0] = IPATH_SRC_OUI_1; + nip->vendor_id[1] = IPATH_SRC_OUI_2; + nip->vendor_id[2] = IPATH_SRC_OUI_3; return reply(smp); } @@ -1492,6 +1492,10 @@ static int process_subn(struct ib_device *ibdev, int mad_flags, goto bail; } + case IB_MGMT_METHOD_TRAP: + case IB_MGMT_METHOD_REPORT: + case IB_MGMT_METHOD_REPORT_RESP: + case IB_MGMT_METHOD_TRAP_REPRESS: case IB_MGMT_METHOD_GET_RESP: /* * The ib_mad module will call us to process responses diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index dd5b6e9d57c..4715911101e 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -242,7 +242,6 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp) { struct ipath_qp *q, **qpp; unsigned long flags; - int fnd = 0; spin_lock_irqsave(&qpt->lock, flags); @@ -253,51 +252,40 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp) *qpp = qp->next; qp->next = NULL; atomic_dec(&qp->refcount); - fnd = 1; break; } } spin_unlock_irqrestore(&qpt->lock, flags); - - if (!fnd) - return; - - free_qpn(qpt, qp->ibqp.qp_num); - - wait_event(qp->wait, !atomic_read(&qp->refcount)); } /** - * ipath_free_all_qps - remove all QPs from the table + * ipath_free_all_qps - check for QPs still in use * @qpt: the QP table to empty + * + * There should not be any QPs still in use. + * Free memory for table. */ -void ipath_free_all_qps(struct ipath_qp_table *qpt) +unsigned ipath_free_all_qps(struct ipath_qp_table *qpt) { unsigned long flags; - struct ipath_qp *qp, *nqp; - u32 n; + struct ipath_qp *qp; + u32 n, qp_inuse = 0; + spin_lock_irqsave(&qpt->lock, flags); for (n = 0; n < qpt->max; n++) { - spin_lock_irqsave(&qpt->lock, flags); qp = qpt->table[n]; qpt->table[n] = NULL; - spin_unlock_irqrestore(&qpt->lock, flags); - - while (qp) { - nqp = qp->next; - free_qpn(qpt, qp->ibqp.qp_num); - if (!atomic_dec_and_test(&qp->refcount) || - !ipath_destroy_qp(&qp->ibqp)) - ipath_dbg("QP memory leak!\n"); - qp = nqp; - } + + for (; qp; qp = qp->next) + qp_inuse++; } + spin_unlock_irqrestore(&qpt->lock, flags); - for (n = 0; n < ARRAY_SIZE(qpt->map); n++) { + for (n = 0; n < ARRAY_SIZE(qpt->map); n++) if (qpt->map[n].page) - free_page((unsigned long)qpt->map[n].page); - } + free_page((unsigned long) qpt->map[n].page); + return qp_inuse; } /** @@ -336,11 +324,12 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) qp->remote_qpn = 0; qp->qkey = 0; qp->qp_access_flags = 0; - qp->s_busy = 0; + atomic_set(&qp->s_dma_busy, 0); qp->s_flags &= IPATH_S_SIGNAL_REQ_WR; qp->s_hdrwords = 0; qp->s_wqe = NULL; qp->s_pkt_delay = 0; + qp->s_draining = 0; qp->s_psn = 0; qp->r_psn = 0; qp->r_msn = 0; @@ -353,7 +342,8 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) } qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; qp->r_nak_state = 0; - qp->r_wrid_valid = 0; + qp->r_aflags = 0; + qp->r_flags = 0; qp->s_rnr_timeout = 0; qp->s_head = 0; qp->s_tail = 0; @@ -361,7 +351,6 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) qp->s_last = 0; qp->s_ssn = 1; qp->s_lsn = 0; - qp->s_wait_credit = 0; memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); qp->r_head_ack_queue = 0; qp->s_tail_ack_queue = 0; @@ -370,17 +359,17 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type) qp->r_rq.wq->head = 0; qp->r_rq.wq->tail = 0; } - qp->r_reuse_sge = 0; } /** - * ipath_error_qp - put a QP into an error state - * @qp: the QP to put into an error state + * ipath_error_qp - put a QP into the error state + * @qp: the QP to put into the error state * @err: the receive completion error to signal if a RWQE is active * * Flushes both send and receive work queues. * Returns true if last WQE event should be generated. * The QP s_lock should be held and interrupts disabled. + * If we are already in error state, just return. */ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) @@ -389,8 +378,10 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) struct ib_wc wc; int ret = 0; - ipath_dbg("QP%d/%d in error state (%d)\n", - qp->ibqp.qp_num, qp->remote_qpn, err); + if (qp->state == IB_QPS_ERR) + goto bail; + + qp->state = IB_QPS_ERR; spin_lock(&dev->pending_lock); if (!list_empty(&qp->timerwait)) @@ -399,39 +390,21 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) list_del_init(&qp->piowait); spin_unlock(&dev->pending_lock); - wc.vendor_err = 0; - wc.byte_len = 0; - wc.imm_data = 0; + /* Schedule the sending tasklet to drain the send work queue. */ + if (qp->s_last != qp->s_head) + ipath_schedule_send(qp); + + memset(&wc, 0, sizeof(wc)); wc.qp = &qp->ibqp; - wc.src_qp = 0; - wc.wc_flags = 0; - wc.pkey_index = 0; - wc.slid = 0; - wc.sl = 0; - wc.dlid_path_bits = 0; - wc.port_num = 0; - if (qp->r_wrid_valid) { - qp->r_wrid_valid = 0; + wc.opcode = IB_WC_RECV; + + if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) { wc.wr_id = qp->r_wr_id; - wc.opcode = IB_WC_RECV; wc.status = err; ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); } wc.status = IB_WC_WR_FLUSH_ERR; - while (qp->s_last != qp->s_head) { - struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); - - wc.wr_id = wqe->wr.wr_id; - wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); - } - qp->s_cur = qp->s_tail = qp->s_head; - qp->s_hdrwords = 0; - qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; - if (qp->r_rq.wq) { struct ipath_rwq *wq; u32 head; @@ -447,7 +420,6 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) tail = wq->tail; if (tail >= qp->r_rq.size) tail = 0; - wc.opcode = IB_WC_RECV; while (tail != head) { wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; if (++tail >= qp->r_rq.size) @@ -460,6 +432,7 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) } else if (qp->ibqp.event_handler) ret = 1; +bail: return ret; } @@ -478,11 +451,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct ipath_ibdev *dev = to_idev(ibqp->device); struct ipath_qp *qp = to_iqp(ibqp); enum ib_qp_state cur_state, new_state; - unsigned long flags; int lastwqe = 0; int ret; - spin_lock_irqsave(&qp->s_lock, flags); + spin_lock_irq(&qp->s_lock); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; @@ -535,16 +507,42 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, switch (new_state) { case IB_QPS_RESET: + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->timerwait)) + list_del_init(&qp->timerwait); + if (!list_empty(&qp->piowait)) + list_del_init(&qp->piowait); + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~IPATH_S_ANY_WAIT; + spin_unlock_irq(&qp->s_lock); + /* Stop the sending tasklet */ + tasklet_kill(&qp->s_task); + wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); + spin_lock_irq(&qp->s_lock); + } ipath_reset_qp(qp, ibqp->qp_type); break; + case IB_QPS_SQD: + qp->s_draining = qp->s_last != qp->s_cur; + qp->state = new_state; + break; + + case IB_QPS_SQE: + if (qp->ibqp.qp_type == IB_QPT_RC) + goto inval; + qp->state = new_state; + break; + case IB_QPS_ERR: lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); break; default: + qp->state = new_state; break; - } if (attr_mask & IB_QP_PKEY_INDEX) @@ -597,8 +595,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) qp->s_max_rd_atomic = attr->max_rd_atomic; - qp->state = new_state; - spin_unlock_irqrestore(&qp->s_lock, flags); + spin_unlock_irq(&qp->s_lock); if (lastwqe) { struct ib_event ev; @@ -612,7 +609,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto bail; inval: - spin_unlock_irqrestore(&qp->s_lock, flags); + spin_unlock_irq(&qp->s_lock); ret = -EINVAL; bail: @@ -643,7 +640,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->pkey_index = qp->s_pkey_index; attr->alt_pkey_index = 0; attr->en_sqd_async_notify = 0; - attr->sq_draining = 0; + attr->sq_draining = qp->s_draining; attr->max_rd_atomic = qp->s_max_rd_atomic; attr->max_dest_rd_atomic = qp->r_max_rd_atomic; attr->min_rnr_timer = qp->r_min_rnr_timer; @@ -833,6 +830,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, spin_lock_init(&qp->r_rq.lock); atomic_set(&qp->refcount, 0); init_waitqueue_head(&qp->wait); + init_waitqueue_head(&qp->wait_dma); tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp); INIT_LIST_HEAD(&qp->piowait); INIT_LIST_HEAD(&qp->timerwait); @@ -926,6 +924,7 @@ bail_ip: else vfree(qp->r_rq.wq); ipath_free_qp(&dev->qp_table, qp); + free_qpn(&dev->qp_table, qp->ibqp.qp_num); bail_qp: kfree(qp); bail_swq: @@ -947,41 +946,44 @@ int ipath_destroy_qp(struct ib_qp *ibqp) { struct ipath_qp *qp = to_iqp(ibqp); struct ipath_ibdev *dev = to_idev(ibqp->device); - unsigned long flags; - spin_lock_irqsave(&qp->s_lock, flags); - qp->state = IB_QPS_ERR; - spin_unlock_irqrestore(&qp->s_lock, flags); - spin_lock(&dev->n_qps_lock); - dev->n_qps_allocated--; - spin_unlock(&dev->n_qps_lock); + /* Make sure HW and driver activity is stopped. */ + spin_lock_irq(&qp->s_lock); + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->timerwait)) + list_del_init(&qp->timerwait); + if (!list_empty(&qp->piowait)) + list_del_init(&qp->piowait); + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~IPATH_S_ANY_WAIT; + spin_unlock_irq(&qp->s_lock); + /* Stop the sending tasklet */ + tasklet_kill(&qp->s_task); + wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); + } else + spin_unlock_irq(&qp->s_lock); - /* Stop the sending tasklet. */ - tasklet_kill(&qp->s_task); + ipath_free_qp(&dev->qp_table, qp); if (qp->s_tx) { atomic_dec(&qp->refcount); if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) kfree(qp->s_tx->txreq.map_addr); + spin_lock_irq(&dev->pending_lock); + list_add(&qp->s_tx->txreq.list, &dev->txreq_free); + spin_unlock_irq(&dev->pending_lock); + qp->s_tx = NULL; } - /* Make sure the QP isn't on the timeout list. */ - spin_lock_irqsave(&dev->pending_lock, flags); - if (!list_empty(&qp->timerwait)) - list_del_init(&qp->timerwait); - if (!list_empty(&qp->piowait)) - list_del_init(&qp->piowait); - if (qp->s_tx) - list_add(&qp->s_tx->txreq.list, &dev->txreq_free); - spin_unlock_irqrestore(&dev->pending_lock, flags); + wait_event(qp->wait, !atomic_read(&qp->refcount)); - /* - * Make sure that the QP is not in the QPN table so receive - * interrupts will discard packets for this QP. XXX Also remove QP - * from multicast table. - */ - if (atomic_read(&qp->refcount) != 0) - ipath_free_qp(&dev->qp_table, qp); + /* all user's cleaned up, mark it available */ + free_qpn(&dev->qp_table, qp->ibqp.qp_num); + spin_lock(&dev->n_qps_lock); + dev->n_qps_allocated--; + spin_unlock(&dev->n_qps_lock); if (qp->ip) kref_put(&qp->ip->ref, ipath_release_mmap_info); @@ -1026,48 +1028,6 @@ bail: } /** - * ipath_sqerror_qp - put a QP's send queue into an error state - * @qp: QP who's send queue will be put into an error state - * @wc: the WC responsible for putting the QP in this state - * - * Flushes the send work queue. - * The QP s_lock should be held and interrupts disabled. - */ - -void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc) -{ - struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); - - ipath_dbg("Send queue error on QP%d/%d: err: %d\n", - qp->ibqp.qp_num, qp->remote_qpn, wc->status); - - spin_lock(&dev->pending_lock); - if (!list_empty(&qp->timerwait)) - list_del_init(&qp->timerwait); - if (!list_empty(&qp->piowait)) - list_del_init(&qp->piowait); - spin_unlock(&dev->pending_lock); - - ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1); - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - - wc->status = IB_WC_WR_FLUSH_ERR; - - while (qp->s_last != qp->s_head) { - wqe = get_swqe_ptr(qp, qp->s_last); - wc->wr_id = wqe->wr.wr_id; - wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1); - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - } - qp->s_cur = qp->s_tail = qp->s_head; - qp->state = IB_QPS_SQE; -} - -/** * ipath_get_credit - flush the send work queue of a QP * @qp: the qp who's send work queue to flush * @aeth: the Acknowledge Extended Transport Header @@ -1093,9 +1053,10 @@ void ipath_get_credit(struct ipath_qp *qp, u32 aeth) } /* Restart sending if it was blocked due to lack of credits. */ - if (qp->s_cur != qp->s_head && + if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) && + qp->s_cur != qp->s_head && (qp->s_lsn == (u32) -1 || ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn, qp->s_lsn + 1) <= 0)) - tasklet_hi_schedule(&qp->s_task); + ipath_schedule_send(qp); } diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index c405dfba553..97710522624 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -92,6 +92,10 @@ static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp, u32 bth0; u32 bth2; + /* Don't send an ACK if we aren't supposed to. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto bail; + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; @@ -238,14 +242,25 @@ int ipath_make_rc_req(struct ipath_qp *qp) ipath_make_rc_ack(dev, qp, ohdr, pmtu)) goto done; - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || - qp->s_rnr_timeout || qp->s_wait_credit) - goto bail; + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { + if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) + goto bail; + /* We are in the error state, flush the work request. */ + if (qp->s_last == qp->s_head) + goto bail; + /* If DMAs are in progress, we can't flush immediately. */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= IPATH_S_WAIT_DMA; + goto bail; + } + wqe = get_swqe_ptr(qp, qp->s_last); + ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); + goto done; + } - /* Limit the number of packets sent without an ACK. */ - if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { - qp->s_wait_credit = 1; - dev->n_rc_stalls++; + /* Leave BUSY set until RNR timeout. */ + if (qp->s_rnr_timeout) { + qp->s_flags |= IPATH_S_WAITING; goto bail; } @@ -257,6 +272,9 @@ int ipath_make_rc_req(struct ipath_qp *qp) wqe = get_swqe_ptr(qp, qp->s_cur); switch (qp->s_state) { default: + if (!(ib_ipath_state_ops[qp->state] & + IPATH_PROCESS_NEXT_SEND_OK)) + goto bail; /* * Resend an old request or start a new one. * @@ -294,8 +312,10 @@ int ipath_make_rc_req(struct ipath_qp *qp) case IB_WR_SEND_WITH_IMM: /* If no credit, return. */ if (qp->s_lsn != (u32) -1 && - ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) + ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { + qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT; goto bail; + } wqe->lpsn = wqe->psn; if (len > pmtu) { wqe->lpsn += (len - 1) / pmtu; @@ -325,8 +345,10 @@ int ipath_make_rc_req(struct ipath_qp *qp) case IB_WR_RDMA_WRITE_WITH_IMM: /* If no credit, return. */ if (qp->s_lsn != (u32) -1 && - ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) + ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { + qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT; goto bail; + } ohdr->u.rc.reth.vaddr = cpu_to_be64(wqe->wr.wr.rdma.remote_addr); ohdr->u.rc.reth.rkey = @@ -570,7 +592,11 @@ int ipath_make_rc_req(struct ipath_qp *qp) ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2); done: ret = 1; + goto unlock; + bail: + qp->s_flags &= ~IPATH_S_BUSY; +unlock: spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -606,7 +632,11 @@ static void send_rc_ack(struct ipath_qp *qp) spin_unlock_irqrestore(&qp->s_lock, flags); + /* Don't try to send ACKs if the link isn't ACTIVE */ dd = dev->dd; + if (!(dd->ipath_flags & IPATH_LINKACTIVE)) + goto done; + piobuf = ipath_getpiobuf(dd, 0, NULL); if (!piobuf) { /* @@ -668,15 +698,16 @@ static void send_rc_ack(struct ipath_qp *qp) goto done; queue_ack: - dev->n_rc_qacks++; - qp->s_flags |= IPATH_S_ACK_PENDING; - qp->s_nak_state = qp->r_nak_state; - qp->s_ack_psn = qp->r_ack_psn; + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) { + dev->n_rc_qacks++; + qp->s_flags |= IPATH_S_ACK_PENDING; + qp->s_nak_state = qp->r_nak_state; + qp->s_ack_psn = qp->r_ack_psn; + + /* Schedule the send tasklet. */ + ipath_schedule_send(qp); + } spin_unlock_irqrestore(&qp->s_lock, flags); - - /* Call ipath_do_rc_send() in another thread. */ - tasklet_hi_schedule(&qp->s_task); - done: return; } @@ -735,7 +766,7 @@ static void reset_psn(struct ipath_qp *qp, u32 psn) /* * Set the state to restart in the middle of a request. * Don't change the s_sge, s_cur_sge, or s_cur_size. - * See ipath_do_rc_send(). + * See ipath_make_rc_req(). */ switch (opcode) { case IB_WR_SEND: @@ -771,27 +802,14 @@ done: * * The QP s_lock should be held and interrupts disabled. */ -void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) +void ipath_restart_rc(struct ipath_qp *qp, u32 psn) { struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); struct ipath_ibdev *dev; if (qp->s_retry == 0) { - wc->wr_id = wqe->wr.wr_id; - wc->status = IB_WC_RETRY_EXC_ERR; - wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc->vendor_err = 0; - wc->byte_len = 0; - wc->qp = &qp->ibqp; - wc->imm_data = 0; - wc->src_qp = qp->remote_qpn; - wc->wc_flags = 0; - wc->pkey_index = 0; - wc->slid = qp->remote_ah_attr.dlid; - wc->sl = qp->remote_ah_attr.sl; - wc->dlid_path_bits = 0; - wc->port_num = 0; - ipath_sqerror_qp(qp, wc); + ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); + ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); goto bail; } qp->s_retry--; @@ -804,6 +822,8 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) spin_lock(&dev->pending_lock); if (!list_empty(&qp->timerwait)) list_del_init(&qp->timerwait); + if (!list_empty(&qp->piowait)) + list_del_init(&qp->piowait); spin_unlock(&dev->pending_lock); if (wqe->wr.opcode == IB_WR_RDMA_READ) @@ -812,7 +832,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK; reset_psn(qp, psn); - tasklet_hi_schedule(&qp->s_task); + ipath_schedule_send(qp); bail: return; @@ -820,13 +840,7 @@ bail: static inline void update_last_psn(struct ipath_qp *qp, u32 psn) { - if (qp->s_last_psn != psn) { - qp->s_last_psn = psn; - if (qp->s_wait_credit) { - qp->s_wait_credit = 0; - tasklet_hi_schedule(&qp->s_task); - } - } + qp->s_last_psn = psn; } /** @@ -845,6 +859,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, { struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ib_wc wc; + enum ib_wc_status status; struct ipath_swqe *wqe; int ret = 0; u32 ack_psn; @@ -909,7 +924,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, */ update_last_psn(qp, wqe->psn - 1); /* Retry this request. */ - ipath_restart_rc(qp, wqe->psn, &wc); + ipath_restart_rc(qp, wqe->psn); /* * No need to process the ACK/NAK since we are * restarting an earlier request. @@ -925,32 +940,23 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { qp->s_num_rd_atomic--; /* Restart sending task if fence is complete */ - if ((qp->s_flags & IPATH_S_FENCE_PENDING) && - !qp->s_num_rd_atomic) { - qp->s_flags &= ~IPATH_S_FENCE_PENDING; - tasklet_hi_schedule(&qp->s_task); - } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) { - qp->s_flags &= ~IPATH_S_RDMAR_PENDING; - tasklet_hi_schedule(&qp->s_task); - } + if (((qp->s_flags & IPATH_S_FENCE_PENDING) && + !qp->s_num_rd_atomic) || + qp->s_flags & IPATH_S_RDMAR_PENDING) + ipath_schedule_send(qp); } /* Post a send completion queue entry if requested. */ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || (wqe->wr.send_flags & IB_SEND_SIGNALED)) { + memset(&wc, 0, sizeof wc); wc.wr_id = wqe->wr.wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc.vendor_err = 0; wc.byte_len = wqe->length; - wc.imm_data = 0; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.wc_flags = 0; - wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; - wc.dlid_path_bits = 0; - wc.port_num = 0; ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); } qp->s_retry = qp->s_retry_cnt; @@ -971,6 +977,8 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, } else { if (++qp->s_last >= qp->s_size) qp->s_last = 0; + if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur) + qp->s_draining = 0; if (qp->s_last == qp->s_tail) break; wqe = get_swqe_ptr(qp, qp->s_last); @@ -994,7 +1002,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, */ if (ipath_cmp24(qp->s_psn, psn) <= 0) { reset_psn(qp, psn + 1); - tasklet_hi_schedule(&qp->s_task); + ipath_schedule_send(qp); } } else if (ipath_cmp24(qp->s_psn, psn) <= 0) { qp->s_state = OP(SEND_LAST); @@ -1012,7 +1020,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, if (qp->s_last == qp->s_tail) goto bail; if (qp->s_rnr_retry == 0) { - wc.status = IB_WC_RNR_RETRY_EXC_ERR; + status = IB_WC_RNR_RETRY_EXC_ERR; goto class_b; } if (qp->s_rnr_retry_cnt < 7) @@ -1033,6 +1041,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK]; ipath_insert_rnr_queue(qp); + ipath_schedule_send(qp); goto bail; case 3: /* NAK */ @@ -1050,37 +1059,25 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, * RDMA READ response which terminates the RDMA * READ. */ - ipath_restart_rc(qp, psn, &wc); + ipath_restart_rc(qp, psn); break; case 1: /* Invalid Request */ - wc.status = IB_WC_REM_INV_REQ_ERR; + status = IB_WC_REM_INV_REQ_ERR; dev->n_other_naks++; goto class_b; case 2: /* Remote Access Error */ - wc.status = IB_WC_REM_ACCESS_ERR; + status = IB_WC_REM_ACCESS_ERR; dev->n_other_naks++; goto class_b; case 3: /* Remote Operation Error */ - wc.status = IB_WC_REM_OP_ERR; + status = IB_WC_REM_OP_ERR; dev->n_other_naks++; class_b: - wc.wr_id = wqe->wr.wr_id; - wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc.vendor_err = 0; - wc.byte_len = 0; - wc.qp = &qp->ibqp; - wc.imm_data = 0; - wc.src_qp = qp->remote_qpn; - wc.wc_flags = 0; - wc.pkey_index = 0; - wc.slid = qp->remote_ah_attr.dlid; - wc.sl = qp->remote_ah_attr.sl; - wc.dlid_path_bits = 0; - wc.port_num = 0; - ipath_sqerror_qp(qp, &wc); + ipath_send_complete(qp, wqe, status); + ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); break; default: @@ -1126,8 +1123,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, int header_in_data) { struct ipath_swqe *wqe; + enum ib_wc_status status; unsigned long flags; - struct ib_wc wc; int diff; u32 pad; u32 aeth; @@ -1135,6 +1132,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this now that we hold the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto ack_done; + /* Ignore invalid responses. */ if (ipath_cmp24(psn, qp->s_next_psn) >= 0) goto ack_done; @@ -1159,6 +1160,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, if (unlikely(qp->s_last == qp->s_tail)) goto ack_done; wqe = get_swqe_ptr(qp, qp->s_last); + status = IB_WC_SUCCESS; switch (opcode) { case OP(ACKNOWLEDGE): @@ -1187,6 +1189,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, wqe = get_swqe_ptr(qp, qp->s_last); if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) goto ack_op_err; + qp->r_flags &= ~IPATH_R_RDMAR_SEQ; /* * If this is a response to a resent RDMA read, we * have to be careful to copy the data to the right @@ -1200,7 +1203,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, /* no AETH, no ACK */ if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { dev->n_rdma_seq++; - ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); + if (qp->r_flags & IPATH_R_RDMAR_SEQ) + goto ack_done; + qp->r_flags |= IPATH_R_RDMAR_SEQ; + ipath_restart_rc(qp, qp->s_last_psn + 1); goto ack_done; } if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) @@ -1261,7 +1267,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, /* ACKs READ req. */ if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { dev->n_rdma_seq++; - ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); + if (qp->r_flags & IPATH_R_RDMAR_SEQ) + goto ack_done; + qp->r_flags |= IPATH_R_RDMAR_SEQ; + ipath_restart_rc(qp, qp->s_last_psn + 1); goto ack_done; } if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) @@ -1291,31 +1300,16 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, goto ack_done; } -ack_done: - spin_unlock_irqrestore(&qp->s_lock, flags); - goto bail; - ack_op_err: - wc.status = IB_WC_LOC_QP_OP_ERR; + status = IB_WC_LOC_QP_OP_ERR; goto ack_err; ack_len_err: - wc.status = IB_WC_LOC_LEN_ERR; + status = IB_WC_LOC_LEN_ERR; ack_err: - wc.wr_id = wqe->wr.wr_id; - wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc.vendor_err = 0; - wc.byte_len = 0; - wc.imm_data = 0; - wc.qp = &qp->ibqp; - wc.src_qp = qp->remote_qpn; - wc.wc_flags = 0; - wc.pkey_index = 0; - wc.slid = qp->remote_ah_attr.dlid; - wc.sl = qp->remote_ah_attr.sl; - wc.dlid_path_bits = 0; - wc.port_num = 0; - ipath_sqerror_qp(qp, &wc); + ipath_send_complete(qp, wqe, status); + ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); +ack_done: spin_unlock_irqrestore(&qp->s_lock, flags); bail: return; @@ -1384,7 +1378,12 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, psn &= IPATH_PSN_MASK; e = NULL; old_req = 1; + spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this now that we hold the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto unlock_done; + for (i = qp->r_head_ack_queue; ; i = prev) { if (i == qp->s_tail_ack_queue) old_req = 0; @@ -1512,7 +1511,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, break; } qp->r_nak_state = 0; - tasklet_hi_schedule(&qp->s_task); + ipath_schedule_send(qp); unlock_done: spin_unlock_irqrestore(&qp->s_lock, flags); @@ -1523,13 +1522,12 @@ send_ack: return 0; } -static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) +void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) { unsigned long flags; int lastwqe; spin_lock_irqsave(&qp->s_lock, flags); - qp->state = IB_QPS_ERR; lastwqe = ipath_error_qp(qp, err); spin_unlock_irqrestore(&qp->s_lock, flags); @@ -1545,18 +1543,15 @@ static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n) { - unsigned long flags; unsigned next; next = n + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; - spin_lock_irqsave(&qp->s_lock, flags); if (n == qp->s_tail_ack_queue) { qp->s_tail_ack_queue = next; qp->s_ack_state = OP(ACKNOWLEDGE); } - spin_unlock_irqrestore(&qp->s_lock, flags); } /** @@ -1585,6 +1580,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, int diff; struct ib_reth *reth; int header_in_data; + unsigned long flags; /* Validate the SLID. See Ch. 9.6.1.5 */ if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) @@ -1643,11 +1639,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, opcode == OP(SEND_LAST) || opcode == OP(SEND_LAST_WITH_IMMEDIATE)) break; - nack_inv: - ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR); - qp->r_nak_state = IB_NAK_INVALID_REQUEST; - qp->r_ack_psn = qp->r_psn; - goto send_ack; + goto nack_inv; case OP(RDMA_WRITE_FIRST): case OP(RDMA_WRITE_MIDDLE): @@ -1673,18 +1665,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, break; } - wc.imm_data = 0; - wc.wc_flags = 0; + memset(&wc, 0, sizeof wc); /* OK, process the packet. */ switch (opcode) { case OP(SEND_FIRST): - if (!ipath_get_rwqe(qp, 0)) { - rnr_nak: - qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; - qp->r_ack_psn = qp->r_psn; - goto send_ack; - } + if (!ipath_get_rwqe(qp, 0)) + goto rnr_nak; qp->r_rcv_len = 0; /* FALLTHROUGH */ case OP(SEND_MIDDLE): @@ -1716,11 +1703,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, case OP(SEND_LAST_WITH_IMMEDIATE): send_last_imm: if (header_in_data) { - wc.imm_data = *(__be32 *) data; + wc.ex.imm_data = *(__be32 *) data; data += sizeof(__be32); } else { /* Immediate data comes after BTH */ - wc.imm_data = ohdr->u.imm_data; + wc.ex.imm_data = ohdr->u.imm_data; } hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; @@ -1741,20 +1728,19 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, goto nack_inv; ipath_copy_sge(&qp->r_sge, data, tlen); qp->r_msn++; - if (!qp->r_wrid_valid) + if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) break; - qp->r_wrid_valid = 0; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; - wc.opcode = IB_WC_RECV; - wc.vendor_err = 0; + if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) || + opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) + wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; + else + wc.opcode = IB_WC_RECV; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; - wc.dlid_path_bits = 0; - wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & @@ -1815,9 +1801,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, next = qp->r_head_ack_queue + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; + spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this while holding the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto unlock; if (unlikely(next == qp->s_tail_ack_queue)) { if (!qp->s_ack_queue[next].sent) - goto nack_inv; + goto nack_inv_unlck; ipath_update_ack_queue(qp, next); } e = &qp->s_ack_queue[qp->r_head_ack_queue]; @@ -1838,7 +1828,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey, IB_ACCESS_REMOTE_READ); if (unlikely(!ok)) - goto nack_acc; + goto nack_acc_unlck; /* * Update the next expected PSN. We add 1 later * below, so only add the remainder here. @@ -1865,13 +1855,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, qp->r_psn++; qp->r_state = opcode; qp->r_nak_state = 0; - barrier(); qp->r_head_ack_queue = next; - /* Call ipath_do_rc_send() in another thread. */ - tasklet_hi_schedule(&qp->s_task); + /* Schedule the send tasklet. */ + ipath_schedule_send(qp); - goto done; + goto unlock; } case OP(COMPARE_SWAP): @@ -1890,9 +1879,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, next = qp->r_head_ack_queue + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; + spin_lock_irqsave(&qp->s_lock, flags); + /* Double check we can process this while holding the s_lock. */ + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) + goto unlock; if (unlikely(next == qp->s_tail_ack_queue)) { if (!qp->s_ack_queue[next].sent) - goto nack_inv; + goto nack_inv_unlck; ipath_update_ack_queue(qp, next); } if (!header_in_data) @@ -1902,13 +1895,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | be32_to_cpu(ateth->vaddr[1]); if (unlikely(vaddr & (sizeof(u64) - 1))) - goto nack_inv; + goto nack_inv_unlck; rkey = be32_to_cpu(ateth->rkey); /* Check rkey & NAK */ if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), vaddr, rkey, IB_ACCESS_REMOTE_ATOMIC))) - goto nack_acc; + goto nack_acc_unlck; /* Perform atomic OP and save result. */ maddr = (atomic64_t *) qp->r_sge.sge.vaddr; sdata = be64_to_cpu(ateth->swap_data); @@ -1925,13 +1918,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, qp->r_psn++; qp->r_state = opcode; qp->r_nak_state = 0; - barrier(); qp->r_head_ack_queue = next; - /* Call ipath_do_rc_send() in another thread. */ - tasklet_hi_schedule(&qp->s_task); + /* Schedule the send tasklet. */ + ipath_schedule_send(qp); - goto done; + goto unlock; } default: @@ -1947,14 +1939,31 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, goto send_ack; goto done; +rnr_nak: + qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; + qp->r_ack_psn = qp->r_psn; + goto send_ack; + +nack_inv_unlck: + spin_unlock_irqrestore(&qp->s_lock, flags); +nack_inv: + ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR); + qp->r_nak_state = IB_NAK_INVALID_REQUEST; + qp->r_ack_psn = qp->r_psn; + goto send_ack; + +nack_acc_unlck: + spin_unlock_irqrestore(&qp->s_lock, flags); nack_acc: - ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR); + ipath_rc_error(qp, IB_WC_LOC_PROT_ERR); qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; - send_ack: send_rc_ack(qp); + goto done; +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); done: return; } diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index 8ac5c1d82cc..af051f75766 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -78,6 +78,7 @@ const u32 ib_ipath_rnr_table[32] = { * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device * @qp: the QP * + * Called with the QP s_lock held and interrupts disabled. * XXX Use a simple list for now. We might need a priority * queue if we have lots of QPs waiting for RNR timeouts * but that should be rare. @@ -85,9 +86,9 @@ const u32 ib_ipath_rnr_table[32] = { void ipath_insert_rnr_queue(struct ipath_qp *qp) { struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - unsigned long flags; - spin_lock_irqsave(&dev->pending_lock, flags); + /* We already did a spin_lock_irqsave(), so just use spin_lock */ + spin_lock(&dev->pending_lock); if (list_empty(&dev->rnrwait)) list_add(&qp->timerwait, &dev->rnrwait); else { @@ -109,7 +110,7 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp) nqp->s_rnr_timeout -= qp->s_rnr_timeout; list_add(&qp->timerwait, l); } - spin_unlock_irqrestore(&dev->pending_lock, flags); + spin_unlock(&dev->pending_lock); } /** @@ -140,20 +141,11 @@ int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe, goto bail; bad_lkey: + memset(&wc, 0, sizeof(wc)); wc.wr_id = wqe->wr_id; wc.status = IB_WC_LOC_PROT_ERR; wc.opcode = IB_WC_RECV; - wc.vendor_err = 0; - wc.byte_len = 0; - wc.imm_data = 0; wc.qp = &qp->ibqp; - wc.src_qp = 0; - wc.wc_flags = 0; - wc.pkey_index = 0; - wc.slid = 0; - wc.sl = 0; - wc.dlid_path_bits = 0; - wc.port_num = 0; /* Signal solicited completion event. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); ret = 0; @@ -194,6 +186,11 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) } spin_lock_irqsave(&rq->lock, flags); + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { + ret = 0; + goto unlock; + } + wq = rq->wq; tail = wq->tail; /* Validate tail before using it since it is user writable. */ @@ -201,9 +198,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) tail = 0; do { if (unlikely(tail == wq->head)) { - spin_unlock_irqrestore(&rq->lock, flags); ret = 0; - goto bail; + goto unlock; } /* Make sure entry is read after head index is read. */ smp_rmb(); @@ -216,7 +212,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) wq->tail = tail; ret = 1; - qp->r_wrid_valid = 1; + set_bit(IPATH_R_WRID_VALID, &qp->r_aflags); if (handler) { u32 n; @@ -243,8 +239,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) goto bail; } } +unlock: spin_unlock_irqrestore(&rq->lock, flags); - bail: return ret; } @@ -270,38 +266,63 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp) struct ib_wc wc; u64 sdata; atomic64_t *maddr; + enum ib_wc_status send_status; + /* + * Note that we check the responder QP state after + * checking the requester's state. + */ qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); - if (!qp) { - dev->n_pkt_drops++; - return; - } -again: spin_lock_irqsave(&sqp->s_lock, flags); - if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) || - sqp->s_rnr_timeout) { - spin_unlock_irqrestore(&sqp->s_lock, flags); - goto done; - } + /* Return if we are already busy processing a work request. */ + if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || + !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) + goto unlock; - /* Get the next send request. */ - if (sqp->s_last == sqp->s_head) { - /* Send work queue is empty. */ - spin_unlock_irqrestore(&sqp->s_lock, flags); - goto done; + sqp->s_flags |= IPATH_S_BUSY; + +again: + if (sqp->s_last == sqp->s_head) + goto clr_busy; + wqe = get_swqe_ptr(sqp, sqp->s_last); + + /* Return if it is not OK to start a new work reqeust. */ + if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) { + if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND)) + goto clr_busy; + /* We are in the error state, flush the work request. */ + send_status = IB_WC_WR_FLUSH_ERR; + goto flush_send; } /* * We can rely on the entry not changing without the s_lock * being held until we update s_last. + * We increment s_cur to indicate s_last is in progress. */ - wqe = get_swqe_ptr(sqp, sqp->s_last); + if (sqp->s_last == sqp->s_cur) { + if (++sqp->s_cur >= sqp->s_size) + sqp->s_cur = 0; + } spin_unlock_irqrestore(&sqp->s_lock, flags); - wc.wc_flags = 0; - wc.imm_data = 0; + if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { + dev->n_pkt_drops++; + /* + * For RC, the requester would timeout and retry so + * shortcut the timeouts and just signal too many retries. + */ + if (sqp->ibqp.qp_type == IB_QPT_RC) + send_status = IB_WC_RETRY_EXC_ERR; + else + send_status = IB_WC_SUCCESS; + goto serr; + } + + memset(&wc, 0, sizeof wc); + send_status = IB_WC_SUCCESS; sqp->s_sge.sge = wqe->sg_list[0]; sqp->s_sge.sg_list = wqe->sg_list + 1; @@ -310,78 +331,36 @@ again: switch (wqe->wr.opcode) { case IB_WR_SEND_WITH_IMM: wc.wc_flags = IB_WC_WITH_IMM; - wc.imm_data = wqe->wr.ex.imm_data; + wc.ex.imm_data = wqe->wr.ex.imm_data; /* FALLTHROUGH */ case IB_WR_SEND: - if (!ipath_get_rwqe(qp, 0)) { - rnr_nak: - /* Handle RNR NAK */ - if (qp->ibqp.qp_type == IB_QPT_UC) - goto send_comp; - if (sqp->s_rnr_retry == 0) { - wc.status = IB_WC_RNR_RETRY_EXC_ERR; - goto err; - } - if (sqp->s_rnr_retry_cnt < 7) - sqp->s_rnr_retry--; - dev->n_rnr_naks++; - sqp->s_rnr_timeout = - ib_ipath_rnr_table[qp->r_min_rnr_timer]; - ipath_insert_rnr_queue(sqp); - goto done; - } + if (!ipath_get_rwqe(qp, 0)) + goto rnr_nak; break; case IB_WR_RDMA_WRITE_WITH_IMM: - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_WRITE))) { - wc.status = IB_WC_REM_INV_REQ_ERR; - goto err; - } + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) + goto inv_err; wc.wc_flags = IB_WC_WITH_IMM; - wc.imm_data = wqe->wr.ex.imm_data; + wc.ex.imm_data = wqe->wr.ex.imm_data; if (!ipath_get_rwqe(qp, 1)) goto rnr_nak; /* FALLTHROUGH */ case IB_WR_RDMA_WRITE: - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_WRITE))) { - wc.status = IB_WC_REM_INV_REQ_ERR; - goto err; - } + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) + goto inv_err; if (wqe->length == 0) break; if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, - IB_ACCESS_REMOTE_WRITE))) { - acc_err: - wc.status = IB_WC_REM_ACCESS_ERR; - err: - wc.wr_id = wqe->wr.wr_id; - wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc.vendor_err = 0; - wc.byte_len = 0; - wc.qp = &sqp->ibqp; - wc.src_qp = sqp->remote_qpn; - wc.pkey_index = 0; - wc.slid = sqp->remote_ah_attr.dlid; - wc.sl = sqp->remote_ah_attr.sl; - wc.dlid_path_bits = 0; - wc.port_num = 0; - spin_lock_irqsave(&sqp->s_lock, flags); - ipath_sqerror_qp(sqp, &wc); - spin_unlock_irqrestore(&sqp->s_lock, flags); - goto done; - } + IB_ACCESS_REMOTE_WRITE))) + goto acc_err; break; case IB_WR_RDMA_READ: - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_READ))) { - wc.status = IB_WC_REM_INV_REQ_ERR; - goto err; - } + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) + goto inv_err; if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, @@ -394,11 +373,8 @@ again: case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_ATOMIC))) { - wc.status = IB_WC_REM_INV_REQ_ERR; - goto err; - } + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) + goto inv_err; if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), wqe->wr.wr.atomic.remote_addr, wqe->wr.wr.atomic.rkey, @@ -415,7 +391,8 @@ again: goto send_comp; default: - goto done; + send_status = IB_WC_LOC_QP_OP_ERR; + goto serr; } sge = &sqp->s_sge.sge; @@ -448,8 +425,7 @@ again: sqp->s_len -= len; } - if (wqe->wr.opcode == IB_WR_RDMA_WRITE || - wqe->wr.opcode == IB_WR_RDMA_READ) + if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) goto send_comp; if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) @@ -458,32 +434,89 @@ again: wc.opcode = IB_WC_RECV; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; - wc.vendor_err = 0; wc.byte_len = wqe->length; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; - wc.dlid_path_bits = 0; wc.port_num = 1; /* Signal completion event if the solicited bit is set. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, wqe->wr.send_flags & IB_SEND_SOLICITED); send_comp: + spin_lock_irqsave(&sqp->s_lock, flags); +flush_send: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; - ipath_send_complete(sqp, wqe, IB_WC_SUCCESS); + ipath_send_complete(sqp, wqe, send_status); goto again; +rnr_nak: + /* Handle RNR NAK */ + if (qp->ibqp.qp_type == IB_QPT_UC) + goto send_comp; + /* + * Note: we don't need the s_lock held since the BUSY flag + * makes this single threaded. + */ + if (sqp->s_rnr_retry == 0) { + send_status = IB_WC_RNR_RETRY_EXC_ERR; + goto serr; + } + if (sqp->s_rnr_retry_cnt < 7) + sqp->s_rnr_retry--; + spin_lock_irqsave(&sqp->s_lock, flags); + if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK)) + goto clr_busy; + sqp->s_flags |= IPATH_S_WAITING; + dev->n_rnr_naks++; + sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer]; + ipath_insert_rnr_queue(sqp); + goto clr_busy; + +inv_err: + send_status = IB_WC_REM_INV_REQ_ERR; + wc.status = IB_WC_LOC_QP_OP_ERR; + goto err; + +acc_err: + send_status = IB_WC_REM_ACCESS_ERR; + wc.status = IB_WC_LOC_PROT_ERR; +err: + /* responder goes to error state */ + ipath_rc_error(qp, wc.status); + +serr: + spin_lock_irqsave(&sqp->s_lock, flags); + ipath_send_complete(sqp, wqe, send_status); + if (sqp->ibqp.qp_type == IB_QPT_RC) { + int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + + sqp->s_flags &= ~IPATH_S_BUSY; + spin_unlock_irqrestore(&sqp->s_lock, flags); + if (lastwqe) { + struct ib_event ev; + + ev.device = sqp->ibqp.device; + ev.element.qp = &sqp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context); + } + goto done; + } +clr_busy: + sqp->s_flags &= ~IPATH_S_BUSY; +unlock: + spin_unlock_irqrestore(&sqp->s_lock, flags); done: - if (atomic_dec_and_test(&qp->refcount)) + if (qp && atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); } -static void want_buffer(struct ipath_devdata *dd) +static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp) { - if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) { + if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) || + qp->ibqp.qp_type == IB_QPT_SMI) { unsigned long flags; spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); @@ -501,26 +534,36 @@ static void want_buffer(struct ipath_devdata *dd) * @dev: the device we ran out of buffers on * * Called when we run out of PIO buffers. + * If we are now in the error state, return zero to flush the + * send work request. */ -static void ipath_no_bufs_available(struct ipath_qp *qp, +static int ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) { unsigned long flags; + int ret = 1; /* * Note that as soon as want_buffer() is called and * possibly before it returns, ipath_ib_piobufavail() - * could be called. If we are still in the tasklet function, - * tasklet_hi_schedule() will not call us until the next time - * tasklet_hi_schedule() is called. - * We leave the busy flag set so that another post send doesn't - * try to put the same QP on the piowait list again. + * could be called. Therefore, put QP on the piowait list before + * enabling the PIO avail interrupt. */ - spin_lock_irqsave(&dev->pending_lock, flags); - list_add_tail(&qp->piowait, &dev->piowait); - spin_unlock_irqrestore(&dev->pending_lock, flags); - want_buffer(dev->dd); - dev->n_piowait++; + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) { + dev->n_piowait++; + qp->s_flags |= IPATH_S_WAITING; + qp->s_flags &= ~IPATH_S_BUSY; + spin_lock(&dev->pending_lock); + if (list_empty(&qp->piowait)) + list_add_tail(&qp->piowait, &dev->piowait); + spin_unlock(&dev->pending_lock); + } else + ret = 0; + spin_unlock_irqrestore(&qp->s_lock, flags); + if (ret) + want_buffer(dev->dd, qp); + return ret; } /** @@ -596,15 +639,13 @@ void ipath_do_send(unsigned long data) struct ipath_qp *qp = (struct ipath_qp *)data; struct ipath_ibdev *dev = to_idev(qp->ibqp.device); int (*make_req)(struct ipath_qp *qp); - - if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy)) - goto bail; + unsigned long flags; if ((qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) && qp->remote_ah_attr.dlid == dev->dd->ipath_lid) { ipath_ruc_loopback(qp); - goto clear; + goto bail; } if (qp->ibqp.qp_type == IB_QPT_RC) @@ -614,6 +655,19 @@ void ipath_do_send(unsigned long data) else make_req = ipath_make_ud_req; + spin_lock_irqsave(&qp->s_lock, flags); + + /* Return if we are already busy processing a work request. */ + if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || + !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) { + spin_unlock_irqrestore(&qp->s_lock, flags); + goto bail; + } + + qp->s_flags |= IPATH_S_BUSY; + + spin_unlock_irqrestore(&qp->s_lock, flags); + again: /* Check for a constructed packet to be sent. */ if (qp->s_hdrwords != 0) { @@ -623,8 +677,8 @@ again: */ if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords, qp->s_cur_sge, qp->s_cur_size)) { - ipath_no_bufs_available(qp, dev); - goto bail; + if (ipath_no_bufs_available(qp, dev)) + goto bail; } dev->n_unicast_xmit++; /* Record that we sent the packet and s_hdr is empty. */ @@ -633,16 +687,20 @@ again: if (make_req(qp)) goto again; -clear: - clear_bit(IPATH_S_BUSY, &qp->s_busy); + bail:; } +/* + * This should be called with s_lock held. + */ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, enum ib_wc_status status) { - unsigned long flags; - u32 last; + u32 old_last, last; + + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) + return; /* See ch. 11.2.4.1 and 10.7.3.1 */ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || @@ -650,27 +708,25 @@ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, status != IB_WC_SUCCESS) { struct ib_wc wc; + memset(&wc, 0, sizeof wc); wc.wr_id = wqe->wr.wr_id; wc.status = status; wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc.vendor_err = 0; - wc.byte_len = wqe->length; - wc.imm_data = 0; wc.qp = &qp->ibqp; - wc.src_qp = 0; - wc.wc_flags = 0; - wc.pkey_index = 0; - wc.slid = 0; - wc.sl = 0; - wc.dlid_path_bits = 0; - wc.port_num = 0; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); + if (status == IB_WC_SUCCESS) + wc.byte_len = wqe->length; + ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, + status != IB_WC_SUCCESS); } - spin_lock_irqsave(&qp->s_lock, flags); - last = qp->s_last; + old_last = last = qp->s_last; if (++last >= qp->s_size) last = 0; qp->s_last = last; - spin_unlock_irqrestore(&qp->s_lock, flags); + if (qp->s_cur == old_last) + qp->s_cur = last; + if (qp->s_tail == old_last) + qp->s_tail = last; + if (qp->state == IB_QPS_SQD && last == qp->s_cur) + qp->s_draining = 0; } diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c index 1974df7a9f7..284c9bca517 100644 --- a/drivers/infiniband/hw/ipath/ipath_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_sdma.c @@ -263,14 +263,10 @@ static void sdma_abort_task(unsigned long opaque) hwstatus = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmastatus); - if (/* ScoreBoardDrainInProg */ - test_bit(63, &hwstatus) || - /* AbortInProg */ - test_bit(62, &hwstatus) || - /* InternalSDmaEnable */ - test_bit(61, &hwstatus) || - /* ScbEmpty */ - !test_bit(30, &hwstatus)) { + if ((hwstatus & (IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG | + IPATH_SDMA_STATUS_ABORT_IN_PROG | + IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE)) || + !(hwstatus & IPATH_SDMA_STATUS_SCB_EMPTY)) { if (dd->ipath_sdma_reset_wait > 0) { /* not done shutting down sdma */ --dd->ipath_sdma_reset_wait; @@ -308,13 +304,15 @@ static void sdma_abort_task(unsigned long opaque) spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); /* - * Don't restart sdma here. Wait until link is up to ACTIVE. - * VL15 MADs used to bring the link up use PIO, and multiple - * link transitions otherwise cause the sdma engine to be + * Don't restart sdma here (with the exception + * below). Wait until link is up to ACTIVE. VL15 MADs + * used to bring the link up use PIO, and multiple link + * transitions otherwise cause the sdma engine to be * stopped and started multiple times. - * The disable is done here, including the shadow, so the - * state is kept consistent. - * See ipath_restart_sdma() for the actual starting of sdma. + * The disable is done here, including the shadow, + * so the state is kept consistent. + * See ipath_restart_sdma() for the actual starting + * of sdma. */ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE; @@ -326,6 +324,13 @@ static void sdma_abort_task(unsigned long opaque) /* make sure I see next message */ dd->ipath_sdma_abort_jiffies = 0; + /* + * Not everything that takes SDMA offline is a link + * status change. If the link was up, restart SDMA. + */ + if (dd->ipath_flags & IPATH_LINKACTIVE) + ipath_restart_sdma(dd); + goto done; } @@ -336,7 +341,7 @@ resched: * state change */ if (jiffies > dd->ipath_sdma_abort_jiffies) { - ipath_dbg("looping with status 0x%016llx\n", + ipath_dbg("looping with status 0x%08lx\n", dd->ipath_sdma_status); dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ; } @@ -427,7 +432,12 @@ int setup_sdma(struct ipath_devdata *dd) goto done; } - dd->ipath_sdma_status = 0; + /* + * Set initial status as if we had been up, then gone down. + * This lets initial start on transition to ACTIVE be the + * same as restart after link flap. + */ + dd->ipath_sdma_status = IPATH_SDMA_ABORT_ABORTED; dd->ipath_sdma_abort_jiffies = 0; dd->ipath_sdma_generation = 0; dd->ipath_sdma_descq_tail = 0; @@ -449,16 +459,19 @@ int setup_sdma(struct ipath_devdata *dd) ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr, dd->ipath_sdma_head_phys); - /* Reserve all the former "kernel" piobufs */ - n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - dd->ipath_pioreserved; - for (i = dd->ipath_lastport_piobuf; i < n; ++i) { + /* + * Reserve all the former "kernel" piobufs, using high number range + * so we get as many 4K buffers as possible + */ + n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; + i = dd->ipath_lastport_piobuf + dd->ipath_pioreserved; + ipath_chg_pioavailkernel(dd, i, n - i , 0); + for (; i < n; ++i) { unsigned word = i / 64; unsigned bit = i & 63; BUG_ON(word >= 3); senddmabufmask[word] |= 1ULL << bit; } - ipath_chg_pioavailkernel(dd, dd->ipath_lastport_piobuf, - n - dd->ipath_lastport_piobuf, 0); ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0, senddmabufmask[0]); ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1, @@ -598,7 +611,7 @@ void ipath_restart_sdma(struct ipath_devdata *dd) } spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); if (!needed) { - ipath_dbg("invalid attempt to restart SDMA, status 0x%016llx\n", + ipath_dbg("invalid attempt to restart SDMA, status 0x%08lx\n", dd->ipath_sdma_status); goto bail; } @@ -615,6 +628,9 @@ void ipath_restart_sdma(struct ipath_devdata *dd) ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); + /* notify upper layers */ + ipath_ib_piobufavail(dd->verbs_dev); + bail: return; } @@ -682,7 +698,7 @@ retry: addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr, tx->map_len, DMA_TO_DEVICE); - if (dma_mapping_error(addr)) { + if (dma_mapping_error(&dd->pcidev->dev, addr)) { ret = -EIO; goto unlock; } diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index bfe8926b551..82cc588b8bf 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -47,14 +47,30 @@ int ipath_make_uc_req(struct ipath_qp *qp) { struct ipath_other_headers *ohdr; struct ipath_swqe *wqe; + unsigned long flags; u32 hwords; u32 bth0; u32 len; u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); int ret = 0; - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) + spin_lock_irqsave(&qp->s_lock, flags); + + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { + if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) + goto bail; + /* We are in the error state, flush the work request. */ + if (qp->s_last == qp->s_head) + goto bail; + /* If DMAs are in progress, we can't flush immediately. */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= IPATH_S_WAIT_DMA; + goto bail; + } + wqe = get_swqe_ptr(qp, qp->s_last); + ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); goto done; + } ohdr = &qp->s_hdr.u.oth; if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) @@ -69,9 +85,12 @@ int ipath_make_uc_req(struct ipath_qp *qp) qp->s_wqe = NULL; switch (qp->s_state) { default: + if (!(ib_ipath_state_ops[qp->state] & + IPATH_PROCESS_NEXT_SEND_OK)) + goto bail; /* Check if send work queue is empty. */ if (qp->s_cur == qp->s_head) - goto done; + goto bail; /* * Start a new request. */ @@ -134,7 +153,7 @@ int ipath_make_uc_req(struct ipath_qp *qp) break; default: - goto done; + goto bail; } break; @@ -194,9 +213,14 @@ int ipath_make_uc_req(struct ipath_qp *qp) ipath_make_ruc_header(to_idev(qp->ibqp.device), qp, ohdr, bth0 | (qp->s_state << 24), qp->s_next_psn++ & IPATH_PSN_MASK); +done: ret = 1; + goto unlock; -done: +bail: + qp->s_flags &= ~IPATH_S_BUSY; +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -258,8 +282,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, */ opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - wc.imm_data = 0; - wc.wc_flags = 0; + memset(&wc, 0, sizeof wc); /* Compare the PSN verses the expected PSN. */ if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) { @@ -322,8 +345,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, case OP(SEND_ONLY): case OP(SEND_ONLY_WITH_IMMEDIATE): send_first: - if (qp->r_reuse_sge) { - qp->r_reuse_sge = 0; + if (qp->r_flags & IPATH_R_REUSE_SGE) { + qp->r_flags &= ~IPATH_R_REUSE_SGE; qp->r_sge = qp->s_rdma_read_sge; } else if (!ipath_get_rwqe(qp, 0)) { dev->n_pkt_drops++; @@ -340,13 +363,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, case OP(SEND_MIDDLE): /* Check for invalid length PMTU or posted rwqe len. */ if (unlikely(tlen != (hdrsize + pmtu + 4))) { - qp->r_reuse_sge = 1; + qp->r_flags |= IPATH_R_REUSE_SGE; dev->n_pkt_drops++; goto done; } qp->r_rcv_len += pmtu; if (unlikely(qp->r_rcv_len > qp->r_len)) { - qp->r_reuse_sge = 1; + qp->r_flags |= IPATH_R_REUSE_SGE; dev->n_pkt_drops++; goto done; } @@ -356,11 +379,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, case OP(SEND_LAST_WITH_IMMEDIATE): send_last_imm: if (header_in_data) { - wc.imm_data = *(__be32 *) data; + wc.ex.imm_data = *(__be32 *) data; data += sizeof(__be32); } else { /* Immediate data comes after BTH */ - wc.imm_data = ohdr->u.imm_data; + wc.ex.imm_data = ohdr->u.imm_data; } hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; @@ -372,7 +395,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, /* Check for invalid length. */ /* XXX LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) { - qp->r_reuse_sge = 1; + qp->r_flags |= IPATH_R_REUSE_SGE; dev->n_pkt_drops++; goto done; } @@ -380,24 +403,19 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, tlen -= (hdrsize + pad + 4); wc.byte_len = tlen + qp->r_rcv_len; if (unlikely(wc.byte_len > qp->r_len)) { - qp->r_reuse_sge = 1; + qp->r_flags |= IPATH_R_REUSE_SGE; dev->n_pkt_drops++; goto done; } - /* XXX Need to free SGEs */ + wc.opcode = IB_WC_RECV; last_imm: ipath_copy_sge(&qp->r_sge, data, tlen); wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; - wc.opcode = IB_WC_RECV; - wc.vendor_err = 0; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; - wc.dlid_path_bits = 0; - wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & @@ -465,11 +483,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): rdma_last_imm: if (header_in_data) { - wc.imm_data = *(__be32 *) data; + wc.ex.imm_data = *(__be32 *) data; data += sizeof(__be32); } else { /* Immediate data comes after BTH */ - wc.imm_data = ohdr->u.imm_data; + wc.ex.imm_data = ohdr->u.imm_data; } hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; @@ -488,13 +506,14 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, dev->n_pkt_drops++; goto done; } - if (qp->r_reuse_sge) - qp->r_reuse_sge = 0; + if (qp->r_flags & IPATH_R_REUSE_SGE) + qp->r_flags &= ~IPATH_R_REUSE_SGE; else if (!ipath_get_rwqe(qp, 1)) { dev->n_pkt_drops++; goto done; } wc.byte_len = qp->r_len; + wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; goto last_imm; case OP(RDMA_WRITE_LAST): diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 8b6a261c89e..36aa242c487 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -65,9 +65,9 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) u32 length; qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn); - if (!qp) { + if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { dev->n_pkt_drops++; - goto send_comp; + goto done; } rsge.sg_list = NULL; @@ -91,14 +91,12 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) * present on the wire. */ length = swqe->length; + memset(&wc, 0, sizeof wc); wc.byte_len = length + sizeof(struct ib_grh); if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) { wc.wc_flags = IB_WC_WITH_IMM; - wc.imm_data = swqe->wr.ex.imm_data; - } else { - wc.wc_flags = 0; - wc.imm_data = 0; + wc.ex.imm_data = swqe->wr.ex.imm_data; } /* @@ -229,7 +227,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) } wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; - wc.vendor_err = 0; wc.qp = &qp->ibqp; wc.src_qp = sqp->ibqp.qp_num; /* XXX do we know which pkey matched? Only needed for GSI. */ @@ -248,8 +245,7 @@ drop: kfree(rsge.sg_list); if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); -send_comp: - ipath_send_complete(sqp, swqe, IB_WC_SUCCESS); +done:; } /** @@ -264,6 +260,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) struct ipath_other_headers *ohdr; struct ib_ah_attr *ah_attr; struct ipath_swqe *wqe; + unsigned long flags; u32 nwords; u32 extra_bytes; u32 bth0; @@ -271,13 +268,30 @@ int ipath_make_ud_req(struct ipath_qp *qp) u16 lid; int ret = 0; - if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))) - goto bail; + spin_lock_irqsave(&qp->s_lock, flags); + + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) { + if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) + goto bail; + /* We are in the error state, flush the work request. */ + if (qp->s_last == qp->s_head) + goto bail; + /* If DMAs are in progress, we can't flush immediately. */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= IPATH_S_WAIT_DMA; + goto bail; + } + wqe = get_swqe_ptr(qp, qp->s_last); + ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); + goto done; + } if (qp->s_cur == qp->s_head) goto bail; wqe = get_swqe_ptr(qp, qp->s_cur); + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; /* Construct the header. */ ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; @@ -288,10 +302,23 @@ int ipath_make_ud_req(struct ipath_qp *qp) dev->n_unicast_xmit++; } else { dev->n_unicast_xmit++; - lid = ah_attr->dlid & - ~((1 << dev->dd->ipath_lmc) - 1); + lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1); if (unlikely(lid == dev->dd->ipath_lid)) { + /* + * If DMAs are in progress, we can't generate + * a completion for the loopback packet since + * it would be out of order. + * XXX Instead of waiting, we could queue a + * zero length descriptor so we get a callback. + */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= IPATH_S_WAIT_DMA; + goto bail; + } + spin_unlock_irqrestore(&qp->s_lock, flags); ipath_ud_loopback(qp, wqe); + spin_lock_irqsave(&qp->s_lock, flags); + ipath_send_complete(qp, wqe, IB_WC_SUCCESS); goto done; } } @@ -368,11 +395,13 @@ int ipath_make_ud_req(struct ipath_qp *qp) ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); done: - if (++qp->s_cur >= qp->s_size) - qp->s_cur = 0; ret = 1; + goto unlock; bail: + qp->s_flags &= ~IPATH_S_BUSY; +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -463,14 +492,14 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, if (qp->ibqp.qp_num > 1 && opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { if (header_in_data) { - wc.imm_data = *(__be32 *) data; + wc.ex.imm_data = *(__be32 *) data; data += sizeof(__be32); } else - wc.imm_data = ohdr->u.ud.imm_data; + wc.ex.imm_data = ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; hdrsize += sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { - wc.imm_data = 0; + wc.ex.imm_data = 0; wc.wc_flags = 0; } else { dev->n_pkt_drops++; @@ -506,8 +535,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, /* * Get the next work request entry to find where to put the data. */ - if (qp->r_reuse_sge) - qp->r_reuse_sge = 0; + if (qp->r_flags & IPATH_R_REUSE_SGE) + qp->r_flags &= ~IPATH_R_REUSE_SGE; else if (!ipath_get_rwqe(qp, 0)) { /* * Count VL15 packets dropped due to no receive buffer. @@ -523,7 +552,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, } /* Silently drop packets which are too big. */ if (wc.byte_len > qp->r_len) { - qp->r_reuse_sge = 1; + qp->r_flags |= IPATH_R_REUSE_SGE; dev->n_pkt_drops++; goto bail; } @@ -535,7 +564,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh)); ipath_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh)); - qp->r_wrid_valid = 0; + if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) + goto bail; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c index 86e016916cd..82d9a0b5ca2 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c @@ -206,7 +206,7 @@ static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd, dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len, DMA_TO_DEVICE); - if (dma_mapping_error(dma_addr)) { + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { ret = -ENOMEM; goto free_unmap; } @@ -301,7 +301,7 @@ static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd, pages[j], 0, flen, DMA_TO_DEVICE); unsigned long fofs = addr & ~PAGE_MASK; - if (dma_mapping_error(dma_addr)) { + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { ret = -ENOMEM; goto done; } @@ -508,7 +508,7 @@ static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd, if (page) { dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len, DMA_TO_DEVICE); - if (dma_mapping_error(dma_addr)) { + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { ret = -ENOMEM; goto free_pbc; } diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.h b/drivers/infiniband/hw/ipath/ipath_user_sdma.h index e70946c1428..fc76316c4a5 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_sdma.h +++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.h @@ -45,8 +45,6 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd, int ipath_user_sdma_make_progress(struct ipath_devdata *dd, struct ipath_user_sdma_queue *pq); -int ipath_user_sdma_pkt_sent(const struct ipath_user_sdma_queue *pq, - u32 counter); void ipath_user_sdma_queue_drain(struct ipath_devdata *dd, struct ipath_user_sdma_queue *pq); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index e63927cce5b..55c71882882 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -35,6 +35,7 @@ #include <rdma/ib_user_verbs.h> #include <linux/io.h> #include <linux/utsname.h> +#include <linux/rculist.h> #include "ipath_kernel.h" #include "ipath_verbs.h" @@ -111,16 +112,24 @@ static unsigned int ib_ipath_disable_sma; module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(disable_sma, "Disable the SMA"); +/* + * Note that it is OK to post send work requests in the SQE and ERR + * states; ipath_do_send() will process them and generate error + * completions as per IB 1.2 C10-96. + */ const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { [IB_QPS_RESET] = 0, [IB_QPS_INIT] = IPATH_POST_RECV_OK, [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | - IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK, + IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK | + IPATH_PROCESS_NEXT_SEND_OK, [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | - IPATH_POST_SEND_OK, - [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, - [IB_QPS_ERR] = 0, + IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK, + [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | + IPATH_POST_SEND_OK | IPATH_FLUSH_SEND, + [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV | + IPATH_POST_SEND_OK | IPATH_FLUSH_SEND, }; struct ipath_ucontext { @@ -230,18 +239,6 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length) } } -static void ipath_flush_wqe(struct ipath_qp *qp, struct ib_send_wr *wr) -{ - struct ib_wc wc; - - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wr->wr_id; - wc.status = IB_WC_WR_FLUSH_ERR; - wc.opcode = ib_ipath_wc_opcode[wr->opcode]; - wc.qp = &qp->ibqp; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); -} - /* * Count the number of DMA descriptors needed to send length bytes of data. * Don't modify the ipath_sge_state to get the count. @@ -347,14 +344,8 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) spin_lock_irqsave(&qp->s_lock, flags); /* Check that state is OK to post send. */ - if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) { - if (qp->state != IB_QPS_SQE && qp->state != IB_QPS_ERR) - goto bail_inval; - /* C10-96 says generate a flushed completion entry. */ - ipath_flush_wqe(qp, wr); - ret = 0; - goto bail; - } + if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) + goto bail_inval; /* IB spec says that num_sge == 0 is OK. */ if (wr->num_sge > qp->s_max_sge) @@ -396,7 +387,6 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) wqe = get_swqe_ptr(qp, qp->s_head); wqe->wr = *wr; - wqe->ssn = qp->s_ssn++; wqe->length = 0; if (wr->num_sge) { acc = wr->opcode >= IB_WR_RDMA_READ ? @@ -422,6 +412,7 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) goto bail_inval; } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu) goto bail_inval; + wqe->ssn = qp->s_ssn++; qp->s_head = next; ret = 0; @@ -677,6 +668,7 @@ bail:; static void ipath_ib_timer(struct ipath_ibdev *dev) { struct ipath_qp *resend = NULL; + struct ipath_qp *rnr = NULL; struct list_head *last; struct ipath_qp *qp; unsigned long flags; @@ -703,7 +695,9 @@ static void ipath_ib_timer(struct ipath_ibdev *dev) if (--qp->s_rnr_timeout == 0) { do { list_del_init(&qp->timerwait); - tasklet_hi_schedule(&qp->s_task); + qp->timer_next = rnr; + rnr = qp; + atomic_inc(&qp->refcount); if (list_empty(last)) break; qp = list_entry(last->next, struct ipath_qp, @@ -743,13 +737,15 @@ static void ipath_ib_timer(struct ipath_ibdev *dev) spin_unlock_irqrestore(&dev->pending_lock, flags); /* XXX What if timer fires again while this is running? */ - for (qp = resend; qp != NULL; qp = qp->timer_next) { - struct ib_wc wc; + while (resend != NULL) { + qp = resend; + resend = qp->timer_next; spin_lock_irqsave(&qp->s_lock, flags); - if (qp->s_last != qp->s_tail && qp->state == IB_QPS_RTS) { + if (qp->s_last != qp->s_tail && + ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) { dev->n_timeouts++; - ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); + ipath_restart_rc(qp, qp->s_last_psn + 1); } spin_unlock_irqrestore(&qp->s_lock, flags); @@ -757,6 +753,19 @@ static void ipath_ib_timer(struct ipath_ibdev *dev) if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); } + while (rnr != NULL) { + qp = rnr; + rnr = qp->timer_next; + + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) + ipath_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + + /* Notify ipath_destroy_qp() if it is waiting. */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } } static void update_sge(struct ipath_sge_state *ss, u32 length) @@ -1012,13 +1021,24 @@ static void sdma_complete(void *cookie, int status) struct ipath_verbs_txreq *tx = cookie; struct ipath_qp *qp = tx->qp; struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + unsigned int flags; + enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? + IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; - /* Generate a completion queue entry if needed */ - if (qp->ibqp.qp_type != IB_QPT_RC && tx->wqe) { - enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? - IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; - + if (atomic_dec_and_test(&qp->s_dma_busy)) { + spin_lock_irqsave(&qp->s_lock, flags); + if (tx->wqe) + ipath_send_complete(qp, tx->wqe, ibs); + if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && + qp->s_last != qp->s_head) || + (qp->s_flags & IPATH_S_WAIT_DMA)) + ipath_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + wake_up(&qp->wait_dma); + } else if (tx->wqe) { + spin_lock_irqsave(&qp->s_lock, flags); ipath_send_complete(qp, tx->wqe, ibs); + spin_unlock_irqrestore(&qp->s_lock, flags); } if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) @@ -1029,6 +1049,21 @@ static void sdma_complete(void *cookie, int status) wake_up(&qp->wait); } +static void decrement_dma_busy(struct ipath_qp *qp) +{ + unsigned int flags; + + if (atomic_dec_and_test(&qp->s_dma_busy)) { + spin_lock_irqsave(&qp->s_lock, flags); + if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && + qp->s_last != qp->s_head) || + (qp->s_flags & IPATH_S_WAIT_DMA)) + ipath_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + wake_up(&qp->wait_dma); + } +} + /* * Compute the number of clock cycles of delay before sending the next packet. * The multipliers reflect the number of clocks for the fastest rate so @@ -1067,9 +1102,12 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, if (tx) { qp->s_tx = NULL; /* resend previously constructed packet */ + atomic_inc(&qp->s_dma_busy); ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx); - if (ret) + if (ret) { qp->s_tx = tx; + decrement_dma_busy(qp); + } goto bail; } @@ -1120,12 +1158,14 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, tx->txreq.sg_count = ndesc; tx->map_len = (hdrwords + 2) << 2; tx->txreq.map_addr = &tx->hdr; + atomic_inc(&qp->s_dma_busy); ret = ipath_sdma_verbs_send(dd, ss, dwords, tx); if (ret) { /* save ss and length in dwords */ tx->ss = ss; tx->len = dwords; qp->s_tx = tx; + decrement_dma_busy(qp); } goto bail; } @@ -1146,6 +1186,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, memcpy(piobuf, hdr, hdrwords << 2); ipath_copy_from_sge(piobuf + hdrwords, ss, len); + atomic_inc(&qp->s_dma_busy); ret = ipath_sdma_verbs_send(dd, NULL, 0, tx); /* * If we couldn't queue the DMA request, save the info @@ -1156,6 +1197,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp, tx->ss = NULL; tx->len = 0; qp->s_tx = tx; + decrement_dma_busy(qp); } dev->n_unaligned++; goto bail; @@ -1179,6 +1221,7 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp, unsigned flush_wc; u32 control; int ret; + unsigned int flags; piobuf = ipath_getpiobuf(dd, plen, NULL); if (unlikely(piobuf == NULL)) { @@ -1249,8 +1292,11 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp, } copy_io(piobuf, ss, len, flush_wc); done: - if (qp->s_wqe) + if (qp->s_wqe) { + spin_lock_irqsave(&qp->s_lock, flags); ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); + spin_unlock_irqrestore(&qp->s_lock, flags); + } ret = 0; bail: return ret; @@ -1283,19 +1329,12 @@ int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr, * can defer SDMA restart until link goes ACTIVE without * worrying about just how we got there. */ - if (qp->ibqp.qp_type == IB_QPT_SMI) + if (qp->ibqp.qp_type == IB_QPT_SMI || + !(dd->ipath_flags & IPATH_HAS_SEND_DMA)) ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, plen, dwords); - /* All non-VL15 packets are dropped if link is not ACTIVE */ - else if (!(dd->ipath_flags & IPATH_LINKACTIVE)) { - if (qp->s_wqe) - ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); - ret = 0; - } else if (dd->ipath_flags & IPATH_HAS_SEND_DMA) - ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len, - plen, dwords); else - ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, + ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len, plen, dwords); return ret; @@ -1403,27 +1442,46 @@ bail: * This is called from ipath_intr() at interrupt level when a PIO buffer is * available after ipath_verbs_send() returned an error that no buffers were * available. Return 1 if we consumed all the PIO buffers and we still have - * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and + * QPs waiting for buffers (for now, just restart the send tasklet and * return zero). */ int ipath_ib_piobufavail(struct ipath_ibdev *dev) { + struct list_head *list; + struct ipath_qp *qplist; struct ipath_qp *qp; unsigned long flags; if (dev == NULL) goto bail; + list = &dev->piowait; + qplist = NULL; + spin_lock_irqsave(&dev->pending_lock, flags); - while (!list_empty(&dev->piowait)) { - qp = list_entry(dev->piowait.next, struct ipath_qp, - piowait); + while (!list_empty(list)) { + qp = list_entry(list->next, struct ipath_qp, piowait); list_del_init(&qp->piowait); - clear_bit(IPATH_S_BUSY, &qp->s_busy); - tasklet_hi_schedule(&qp->s_task); + qp->pio_next = qplist; + qplist = qp; + atomic_inc(&qp->refcount); } spin_unlock_irqrestore(&dev->pending_lock, flags); + while (qplist != NULL) { + qp = qplist; + qplist = qp->pio_next; + + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) + ipath_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + + /* Notify ipath_destroy_qp() if it is waiting. */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } + bail: return 0; } @@ -1437,9 +1495,11 @@ static int ipath_query_device(struct ib_device *ibdev, props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | - IB_DEVICE_SYS_IMAGE_GUID; + IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | + IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; props->page_size_cap = PAGE_SIZE; - props->vendor_id = dev->dd->ipath_vendorid; + props->vendor_id = + IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3; props->vendor_part_id = dev->dd->ipath_deviceid; props->hw_ver = dev->dd->ipath_pcirev; @@ -2145,11 +2205,12 @@ bail: void ipath_unregister_ib_device(struct ipath_ibdev *dev) { struct ib_device *ibdev = &dev->ibdev; - - disable_timer(dev->dd); + u32 qps_inuse; ib_unregister_device(ibdev); + disable_timer(dev->dd); + if (!list_empty(&dev->pending[0]) || !list_empty(&dev->pending[1]) || !list_empty(&dev->pending[2])) @@ -2164,7 +2225,10 @@ void ipath_unregister_ib_device(struct ipath_ibdev *dev) * Note that ipath_unregister_ib_device() can be called before all * the QPs are destroyed! */ - ipath_free_all_qps(&dev->qp_table); + qps_inuse = ipath_free_all_qps(&dev->qp_table); + if (qps_inuse) + ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n", + qps_inuse); kfree(dev->qp_table.table); kfree(dev->lk_table.table); kfree(dev->txreq_bufs); @@ -2215,17 +2279,14 @@ static ssize_t show_stats(struct device *device, struct device_attribute *attr, "RC OTH NAKs %d\n" "RC timeouts %d\n" "RC RDMA dup %d\n" - "RC stalls %d\n" "piobuf wait %d\n" - "no piobuf %d\n" "unaligned %d\n" "PKT drops %d\n" "WQE errs %d\n", dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks, dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks, dev->n_other_naks, dev->n_timeouts, - dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait, - dev->n_no_piobuf, dev->n_unaligned, + dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned, dev->n_pkt_drops, dev->n_wqe_errs); for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) { const struct ipath_opcode_stats *si = &dev->opstats[i]; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 6514aa8306c..9d12ae8a778 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -74,6 +74,11 @@ #define IPATH_POST_RECV_OK 0x02 #define IPATH_PROCESS_RECV_OK 0x04 #define IPATH_PROCESS_SEND_OK 0x08 +#define IPATH_PROCESS_NEXT_SEND_OK 0x10 +#define IPATH_FLUSH_SEND 0x20 +#define IPATH_FLUSH_RECV 0x40 +#define IPATH_PROCESS_OR_FLUSH_SEND \ + (IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND) /* IB Performance Manager status values */ #define IB_PMA_SAMPLE_STATUS_DONE 0x00 @@ -353,12 +358,14 @@ struct ipath_qp { struct ib_qp ibqp; struct ipath_qp *next; /* link list for QPN hash table */ struct ipath_qp *timer_next; /* link list for ipath_ib_timer() */ + struct ipath_qp *pio_next; /* link for ipath_ib_piobufavail() */ struct list_head piowait; /* link for wait PIO buf */ struct list_head timerwait; /* link for waiting for timeouts */ struct ib_ah_attr remote_ah_attr; struct ipath_ib_header s_hdr; /* next packet header to send */ atomic_t refcount; wait_queue_head_t wait; + wait_queue_head_t wait_dma; struct tasklet_struct s_task; struct ipath_mmap_info *ip; struct ipath_sge_state *s_cur_sge; @@ -369,7 +376,7 @@ struct ipath_qp { struct ipath_sge_state s_rdma_read_sge; struct ipath_sge_state r_sge; /* current receive data */ spinlock_t s_lock; - unsigned long s_busy; + atomic_t s_dma_busy; u16 s_pkt_delay; u16 s_hdrwords; /* size of s_hdr in 32 bit words */ u32 s_cur_size; /* size of send packet in bytes */ @@ -383,6 +390,7 @@ struct ipath_qp { u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ u64 r_wr_id; /* ID for current receive WQE */ + unsigned long r_aflags; u32 r_len; /* total length of r_sge */ u32 r_rcv_len; /* receive data len processed */ u32 r_psn; /* expected rcv packet sequence number */ @@ -394,8 +402,7 @@ struct ipath_qp { u8 r_state; /* opcode of last packet received */ u8 r_nak_state; /* non-zero if NAK is pending */ u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ - u8 r_reuse_sge; /* for UC receive errors */ - u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */ + u8 r_flags; u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ u8 r_head_ack_queue; /* index into s_ack_queue[] */ u8 qp_access_flags; @@ -404,13 +411,13 @@ struct ipath_qp { u8 s_rnr_retry_cnt; u8 s_retry; /* requester retry counter */ u8 s_rnr_retry; /* requester RNR retry counter */ - u8 s_wait_credit; /* limit number of unacked packets sent */ u8 s_pkey_index; /* PKEY index to use */ u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ u8 s_tail_ack_queue; /* index into s_ack_queue[] */ u8 s_flags; u8 s_dmult; + u8 s_draining; u8 timeout; /* Timeout for this QP */ enum ib_mtu path_mtu; u32 remote_qpn; @@ -428,16 +435,40 @@ struct ipath_qp { struct ipath_sge r_sg_list[0]; /* verified SGEs */ }; -/* Bit definition for s_busy. */ -#define IPATH_S_BUSY 0 +/* + * Atomic bit definitions for r_aflags. + */ +#define IPATH_R_WRID_VALID 0 + +/* + * Bit definitions for r_flags. + */ +#define IPATH_R_REUSE_SGE 0x01 +#define IPATH_R_RDMAR_SEQ 0x02 /* * Bit definitions for s_flags. + * + * IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs + * before processing the next SWQE + * IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs + * before processing the next SWQE + * IPATH_S_WAITING - waiting for RNR timeout or send buffer available. + * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE + * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating + * next send completion entry not via send DMA. */ #define IPATH_S_SIGNAL_REQ_WR 0x01 #define IPATH_S_FENCE_PENDING 0x02 #define IPATH_S_RDMAR_PENDING 0x04 #define IPATH_S_ACK_PENDING 0x08 +#define IPATH_S_BUSY 0x10 +#define IPATH_S_WAITING 0x20 +#define IPATH_S_WAIT_SSN_CREDIT 0x40 +#define IPATH_S_WAIT_DMA 0x80 + +#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \ + IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA) #define IPATH_PSN_CREDIT 512 @@ -573,13 +604,11 @@ struct ipath_ibdev { u32 n_rnr_naks; u32 n_other_naks; u32 n_timeouts; - u32 n_rc_stalls; u32 n_pkt_drops; u32 n_vl15_dropped; u32 n_wqe_errs; u32 n_rdma_dup_busy; u32 n_piowait; - u32 n_no_piobuf; u32 n_unaligned; u32 port_cap_flags; u32 pma_sample_start; @@ -657,6 +686,17 @@ static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev) return container_of(ibdev, struct ipath_ibdev, ibdev); } +/* + * This must be called with s_lock held. + */ +static inline void ipath_schedule_send(struct ipath_qp *qp) +{ + if (qp->s_flags & IPATH_S_ANY_WAIT) + qp->s_flags &= ~IPATH_S_ANY_WAIT; + if (!(qp->s_flags & IPATH_S_BUSY)) + tasklet_hi_schedule(&qp->s_task); +} + int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, @@ -706,12 +746,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr); -void ipath_free_all_qps(struct ipath_qp_table *qpt); +unsigned ipath_free_all_qps(struct ipath_qp_table *qpt); int ipath_init_qp_table(struct ipath_ibdev *idev, int size); -void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc); - void ipath_get_credit(struct ipath_qp *qp, u32 aeth); unsigned ipath_ib_rate_to_mult(enum ib_rate rate); @@ -729,7 +767,9 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, int has_grh, void *data, u32 tlen, struct ipath_qp *qp); -void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc); +void ipath_restart_rc(struct ipath_qp *qp, u32 psn); + +void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err); int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c index 9e5abf9c309..d73e3223287 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c @@ -31,8 +31,7 @@ * SOFTWARE. */ -#include <linux/list.h> -#include <linux/rcupdate.h> +#include <linux/rculist.h> #include "ipath_verbs.h" diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 2f199c5c4a7..a1464574bfd 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -246,7 +247,7 @@ err_mtt: if (context) ib_umem_release(cq->umem); else - mlx4_ib_free_cq_buf(dev, &cq->buf, entries); + mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); err_db: if (!context) @@ -434,7 +435,7 @@ int mlx4_ib_destroy_cq(struct ib_cq *cq) mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db); ib_umem_release(mcq->umem); } else { - mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe + 1); + mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe); mlx4_db_free(dev->dev, &mcq->db); } @@ -637,6 +638,7 @@ repoll: case MLX4_OPCODE_SEND_IMM: wc->wc_flags |= IB_WC_WITH_IMM; case MLX4_OPCODE_SEND: + case MLX4_OPCODE_SEND_INVAL: wc->opcode = IB_WC_SEND; break; case MLX4_OPCODE_RDMA_READ: @@ -657,24 +659,35 @@ repoll: case MLX4_OPCODE_LSO: wc->opcode = IB_WC_LSO; break; + case MLX4_OPCODE_FMR: + wc->opcode = IB_WC_FAST_REG_MR; + break; + case MLX4_OPCODE_LOCAL_INVAL: + wc->opcode = IB_WC_LOCAL_INV; + break; } } else { wc->byte_len = be32_to_cpu(cqe->byte_cnt); switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: - wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; - wc->wc_flags = IB_WC_WITH_IMM; - wc->imm_data = cqe->immed_rss_invalid; + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + wc->wc_flags = IB_WC_WITH_IMM; + wc->ex.imm_data = cqe->immed_rss_invalid; + break; + case MLX4_RECV_OPCODE_SEND_INVAL: + wc->opcode = IB_WC_RECV; + wc->wc_flags = IB_WC_WITH_INVALIDATE; + wc->ex.invalidate_rkey = be32_to_cpu(cqe->immed_rss_invalid); break; case MLX4_RECV_OPCODE_SEND: wc->opcode = IB_WC_RECV; wc->wc_flags = 0; break; case MLX4_RECV_OPCODE_SEND_IMM: - wc->opcode = IB_WC_RECV; - wc->wc_flags = IB_WC_WITH_IMM; - wc->imm_data = cqe->immed_rss_invalid; + wc->opcode = IB_WC_RECV; + wc->wc_flags = IB_WC_WITH_IMM; + wc->ex.imm_data = cqe->immed_rss_invalid; break; } diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 4c1e72fc8f5..cdca3a511e1 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -255,7 +255,8 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_SUCCESS; } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 || - in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2) { + in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2 || + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) { if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) return IB_MAD_RESULT_SUCCESS; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 4d61e32866c..a3c2851c054 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -90,7 +91,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | - IB_DEVICE_RC_RNR_NAK_GEN; + IB_DEVICE_RC_RNR_NAK_GEN | + IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR) props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR) @@ -103,6 +105,12 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; if (dev->dev->caps.max_gso_sz) props->device_cap_flags |= IB_DEVICE_UD_TSO; + if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY) + props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; + if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) && + (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) && + (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR)) + props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; @@ -126,6 +134,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs; props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1; props->max_srq_sge = dev->dev->caps.max_srq_sge; + props->max_fast_reg_page_list_len = PAGE_SIZE / sizeof (u64); props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? IB_ATOMIC_HCA : IB_ATOMIC_NONE; @@ -437,7 +446,9 @@ static int mlx4_ib_dealloc_pd(struct ib_pd *pd) static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { return mlx4_multicast_attach(to_mdev(ibqp->device)->dev, - &to_mqp(ibqp)->mqp, gid->raw); + &to_mqp(ibqp)->mqp, gid->raw, + !!(to_mqp(ibqp)->flags & + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)); } static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) @@ -562,6 +573,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX); ibdev->ib_dev.owner = THIS_MODULE; ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; + ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey; ibdev->ib_dev.phys_port_cnt = dev->caps.num_ports; ibdev->ib_dev.num_comp_vectors = 1; ibdev->ib_dev.dma_device = &dev->pdev->dev; @@ -624,6 +636,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr; ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; + ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr; + ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list; + ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list; ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; ibdev->ib_dev.process_mad = mlx4_ib_process_mad; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 5cf994794d2..6e2b0dc21b6 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -83,6 +84,11 @@ struct mlx4_ib_mr { struct ib_umem *umem; }; +struct mlx4_ib_fast_reg_page_list { + struct ib_fast_reg_page_list ibfrpl; + dma_addr_t map; +}; + struct mlx4_ib_fmr { struct ib_fmr ibfmr; struct mlx4_fmr mfmr; @@ -101,7 +107,8 @@ struct mlx4_ib_wq { }; enum mlx4_ib_qp_flags { - MLX4_IB_QP_LSO = 1 << 0 + MLX4_IB_QP_LSO = 1 << 0, + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1, }; struct mlx4_ib_qp { @@ -198,6 +205,11 @@ static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr) return container_of(ibmr, struct mlx4_ib_mr, ibmr); } +static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl) +{ + return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl); +} + static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) { return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr); @@ -238,6 +250,11 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); int mlx4_ib_dereg_mr(struct ib_mr *mr); +struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, + int max_page_list_len); +struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, + int page_list_len); +void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 68e92485fc7..a4cdb465cd1 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -183,6 +184,76 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr) return 0; } +struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, + int max_page_list_len) +{ + struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct mlx4_ib_mr *mr; + int err; + + mr = kmalloc(sizeof *mr, GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0, + max_page_list_len, 0, &mr->mmr); + if (err) + goto err_free; + + err = mlx4_mr_enable(dev->dev, &mr->mmr); + if (err) + goto err_mr; + + return &mr->ibmr; + +err_mr: + mlx4_mr_free(dev->dev, &mr->mmr); + +err_free: + kfree(mr); + return ERR_PTR(err); +} + +struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, + int page_list_len) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + struct mlx4_ib_fast_reg_page_list *mfrpl; + int size = page_list_len * sizeof (u64); + + if (size > PAGE_SIZE) + return ERR_PTR(-EINVAL); + + mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL); + if (!mfrpl) + return ERR_PTR(-ENOMEM); + + mfrpl->ibfrpl.page_list = dma_alloc_coherent(&dev->dev->pdev->dev, + size, &mfrpl->map, + GFP_KERNEL); + if (!mfrpl->ibfrpl.page_list) + goto err_free; + + WARN_ON(mfrpl->map & 0x3f); + + return &mfrpl->ibfrpl; + +err_free: + kfree(mfrpl); + return ERR_PTR(-ENOMEM); +} + +void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) +{ + struct mlx4_ib_dev *dev = to_mdev(page_list->device); + struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); + int size = page_list->max_page_list_len * sizeof (u64); + + dma_free_coherent(&dev->dev->pdev->dev, size, page_list->page_list, + mfrpl->map); + kfree(mfrpl); +} + struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, struct ib_fmr_attr *fmr_attr) { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 8e02ecfec18..f7bc7dd8578 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -78,6 +79,9 @@ static const __be32 mlx4_ib_opcode[] = { [IB_WR_RDMA_READ] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ), [IB_WR_ATOMIC_CMP_AND_SWP] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), [IB_WR_ATOMIC_FETCH_AND_ADD] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), + [IB_WR_SEND_WITH_INV] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_INVAL), + [IB_WR_LOCAL_INV] = __constant_cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), + [IB_WR_FAST_REG_MR] = __constant_cpu_to_be32(MLX4_OPCODE_FMR), }; static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) @@ -129,9 +133,10 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) int ind; void *buf; __be32 stamp; + struct mlx4_wqe_ctrl_seg *ctrl; - s = roundup(size, 1U << qp->sq.wqe_shift); if (qp->sq_max_wqes_per_wr > 1) { + s = roundup(size, 1U << qp->sq.wqe_shift); for (i = 0; i < s; i += 64) { ind = (i >> qp->sq.wqe_shift) + n; stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) : @@ -141,7 +146,8 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) *wqe = stamp; } } else { - buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); + ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); + s = (ctrl->fence_size & 0x3f) << 4; for (i = 64; i < s; i += 64) { wqe = buf + i; *wqe = cpu_to_be32(0xffffffff); @@ -333,6 +339,9 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) + send_wqe_overhead(type, qp->flags); + if (s > dev->dev->caps.max_sq_desc_sz) + return -EINVAL; + /* * Hermon supports shrinking WQEs, such that a single work * request can include multiple units of 1 << wqe_shift. This @@ -372,9 +381,6 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); for (;;) { - if (1 << qp->sq.wqe_shift > dev->dev->caps.max_sq_desc_sz) - return -EINVAL; - qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift); /* @@ -395,7 +401,8 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, ++qp->sq.wqe_shift; } - qp->sq.max_gs = ((qp->sq_max_wqes_per_wr << qp->sq.wqe_shift) - + qp->sq.max_gs = (min(dev->dev->caps.max_sq_desc_sz, + (qp->sq_max_wqes_per_wr << qp->sq.wqe_shift)) - send_wqe_overhead(type, qp->flags)) / sizeof (struct mlx4_wqe_data_seg); @@ -411,7 +418,9 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, cap->max_send_wr = qp->sq.max_post = (qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr; - cap->max_send_sge = qp->sq.max_gs; + cap->max_send_sge = min(qp->sq.max_gs, + min(dev->dev->caps.max_sq_sg, + dev->dev->caps.max_rq_sg)); /* We don't support inline sends for kernel QPs (yet) */ cap->max_inline_data = 0; @@ -449,19 +458,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, spin_lock_init(&qp->rq.lock); qp->state = IB_QPS_RESET; - qp->atomic_rd_en = 0; - qp->resp_depth = 0; - - qp->rq.head = 0; - qp->rq.tail = 0; - qp->sq.head = 0; - qp->sq.tail = 0; - qp->sq_next_wqe = 0; - if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - else - qp->sq_signal_bits = 0; err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp); if (err) @@ -506,6 +504,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } else { qp->sq_no_prefetch = 0; + if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) + qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; + if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) qp->flags |= MLX4_IB_QP_LSO; @@ -679,10 +680,15 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp; int err; - /* We only support LSO, and only for kernel UD QPs. */ - if (init_attr->create_flags & ~IB_QP_CREATE_IPOIB_UD_LSO) + /* + * We only support LSO and multicast loopback blocking, and + * only for kernel UD QPs. + */ + if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO | + IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)) return ERR_PTR(-EINVAL); - if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO && + + if (init_attr->create_flags && (pd->uobject || init_attr->qp_type != IB_QPT_UD)) return ERR_PTR(-EINVAL); @@ -691,7 +697,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, case IB_QPT_UC: case IB_QPT_UD: { - qp = kmalloc(sizeof *qp, GFP_KERNEL); + qp = kzalloc(sizeof *qp, GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); @@ -712,7 +718,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, if (pd->uobject) return ERR_PTR(-EINVAL); - sqp = kmalloc(sizeof *sqp, GFP_KERNEL); + sqp = kzalloc(sizeof *sqp, GFP_KERNEL); if (!sqp) return ERR_PTR(-ENOMEM); @@ -903,7 +909,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, attr->path_mtu); goto out; } - context->mtu_msgmax = (attr->path_mtu << 5) | 31; + context->mtu_msgmax = (attr->path_mtu << 5) | + ilog2(dev->dev->caps.max_msg_sz); } if (qp->rq.wqe_cnt) @@ -973,6 +980,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn); context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); + /* Set "fast registration enabled" for all kernel QPs */ + if (!qp->ibqp.uobject) + context->params1 |= cpu_to_be32(1 << 11); + if (attr_mask & IB_QP_RNR_RETRY) { context->params1 |= cpu_to_be32(attr->rnr_retry << 13); optpar |= MLX4_QP_OPTPAR_RNR_RETRY; @@ -1060,6 +1071,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, for (i = 0; i < qp->sq.wqe_cnt; ++i) { ctrl = get_send_wqe(qp, i); ctrl->owner_opcode = cpu_to_be32(1 << 31); + if (qp->sq_max_wqes_per_wr == 1) + ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4); stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift); } @@ -1124,23 +1137,6 @@ out: return err; } -static const struct ib_qp_attr mlx4_ib_qp_attr = { .port_num = 1 }; -static const int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1] = { - [IB_QPT_UD] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_QKEY), - [IB_QPT_UC] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_ACCESS_FLAGS), - [IB_QPT_RC] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_ACCESS_FLAGS), - [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | - IB_QP_QKEY), - [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | - IB_QP_QKEY), -}; - int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { @@ -1183,15 +1179,6 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) { - err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr, - mlx4_ib_qp_attr_mask_table[ibqp->qp_type], - IB_QPS_RESET, IB_QPS_INIT); - if (err) - goto out; - cur_state = IB_QPS_INIT; - } - err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); out: @@ -1343,6 +1330,38 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq return cur + nreq >= wq->max_post; } +static __be32 convert_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_PERM_ATOMIC) : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_WRITE) : 0) | + (acc & IB_ACCESS_REMOTE_READ ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_READ) : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) | + cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ); +} + +static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr) +{ + struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list); + + fseg->flags = convert_access(wr->wr.fast_reg.access_flags); + fseg->mem_key = cpu_to_be32(wr->wr.fast_reg.rkey); + fseg->buf_list = cpu_to_be64(mfrpl->map); + fseg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); + fseg->reg_len = cpu_to_be64(wr->wr.fast_reg.length); + fseg->offset = 0; /* XXX -- is this just for ZBVA? */ + fseg->page_size = cpu_to_be32(wr->wr.fast_reg.page_shift); + fseg->reserved[0] = 0; + fseg->reserved[1] = 0; +} + +static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) +{ + iseg->flags = 0; + iseg->mem_key = cpu_to_be32(rkey); + iseg->guest_id = 0; + iseg->pa = 0; +} + static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, u64 remote_addr, u32 rkey) { @@ -1416,7 +1435,7 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) dseg->addr = cpu_to_be64(sg->addr); } -static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr, +static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, struct mlx4_ib_qp *qp, unsigned *lso_seg_len) { unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); @@ -1444,6 +1463,21 @@ static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr, return 0; } +static __be32 send_ieth(struct ib_send_wr *wr) +{ + switch (wr->opcode) { + case IB_WR_SEND_WITH_IMM: + case IB_WR_RDMA_WRITE_WITH_IMM: + return wr->ex.imm_data; + + case IB_WR_SEND_WITH_INV: + return cpu_to_be32(wr->ex.invalidate_rkey); + + default: + return 0; + } +} + int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -1457,7 +1491,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, unsigned ind; int uninitialized_var(stamp); int uninitialized_var(size); - unsigned seglen; + unsigned uninitialized_var(seglen); int i; spin_lock_irqsave(&qp->sq.lock, flags); @@ -1490,11 +1524,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) | qp->sq_signal_bits; - if (wr->opcode == IB_WR_SEND_WITH_IMM || - wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) - ctrl->imm = wr->ex.imm_data; - else - ctrl->imm = 0; + ctrl->imm = send_ieth(wr); wqe += sizeof *ctrl; size = sizeof *ctrl / 16; @@ -1526,6 +1556,18 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size += sizeof (struct mlx4_wqe_raddr_seg) / 16; break; + case IB_WR_LOCAL_INV: + set_local_inv_seg(wqe, wr->ex.invalidate_rkey); + wqe += sizeof (struct mlx4_wqe_local_inval_seg); + size += sizeof (struct mlx4_wqe_local_inval_seg) / 16; + break; + + case IB_WR_FAST_REG_MR: + set_fmr_seg(wqe, wr); + wqe += sizeof (struct mlx4_wqe_fmr_seg); + size += sizeof (struct mlx4_wqe_fmr_seg) / 16; + break; + default: /* No extra segments required for sends */ break; @@ -1862,6 +1904,13 @@ done: qp_init_attr->cap = qp_attr->cap; + qp_init_attr->create_flags = 0; + if (qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) + qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; + + if (qp->flags & MLX4_IB_QP_LSO) + qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; + out: mutex_unlock(&qp->mutex); return err; diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 12d6bc6f800..d42565258fb 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h index e2d11be4525..13beedeeef9 100644 --- a/drivers/infiniband/hw/mlx4/user.h +++ b/drivers/infiniband/hw/mlx4/user.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c index a7630670961..c5ccc2daab6 100644 --- a/drivers/infiniband/hw/mthca/mthca_allocator.c +++ b/drivers/infiniband/hw/mthca/mthca_allocator.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_allocator.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/errno.h> diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index 4b111a852ff..32f6c631545 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_av.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/string.h> diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c index e948158a28d..cc440f90000 100644 --- a/drivers/infiniband/hw/mthca/mthca_catas.c +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id$ */ #include <linux/jiffies.h> @@ -128,7 +126,6 @@ static void handle_catas(struct mthca_dev *dev) static void poll_catas(unsigned long dev_ptr) { struct mthca_dev *dev = (struct mthca_dev *) dev_ptr; - unsigned long flags; int i; for (i = 0; i < dev->catas_err.size; ++i) @@ -137,13 +134,8 @@ static void poll_catas(unsigned long dev_ptr) return; } - spin_lock_irqsave(&catas_lock, flags); - if (!dev->catas_err.stop) - mod_timer(&dev->catas_err.timer, - jiffies + MTHCA_CATAS_POLL_INTERVAL); - spin_unlock_irqrestore(&catas_lock, flags); - - return; + mod_timer(&dev->catas_err.timer, + round_jiffies(jiffies + MTHCA_CATAS_POLL_INTERVAL)); } void mthca_start_catas_poll(struct mthca_dev *dev) @@ -151,7 +143,6 @@ void mthca_start_catas_poll(struct mthca_dev *dev) unsigned long addr; init_timer(&dev->catas_err.timer); - dev->catas_err.stop = 0; dev->catas_err.map = NULL; addr = pci_resource_start(dev->pdev, 0) + @@ -182,10 +173,6 @@ void mthca_start_catas_poll(struct mthca_dev *dev) void mthca_stop_catas_poll(struct mthca_dev *dev) { - spin_lock_irq(&catas_lock); - dev->catas_err.stop = 1; - spin_unlock_irq(&catas_lock); - del_timer_sync(&dev->catas_err.timer); if (dev->catas_err.map) { diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 54d230ee7d6..c33e1c53c79 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_cmd.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/completion.h> diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h index 8928ca4a932..6efd3265f24 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.h +++ b/drivers/infiniband/hw/mthca/mthca_cmd.h @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_cmd.h 1349 2004-12-16 21:09:43Z roland $ */ #ifndef MTHCA_CMD_H diff --git a/drivers/infiniband/hw/mthca/mthca_config_reg.h b/drivers/infiniband/hw/mthca/mthca_config_reg.h index afa56bfaab2..75671f75cac 100644 --- a/drivers/infiniband/hw/mthca/mthca_config_reg.h +++ b/drivers/infiniband/hw/mthca/mthca_config_reg.h @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_config_reg.h 1349 2004-12-16 21:09:43Z roland $ */ #ifndef MTHCA_CONFIG_REG_H diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 20401d2ba6b..d9f4735c2b3 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -32,8 +32,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_cq.c 1369 2004-12-20 16:17:07Z roland $ */ #include <linux/hardirq.h> @@ -622,13 +620,13 @@ static inline int mthca_poll_one(struct mthca_dev *dev, case IB_OPCODE_SEND_LAST_WITH_IMMEDIATE: case IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE: entry->wc_flags = IB_WC_WITH_IMM; - entry->imm_data = cqe->imm_etype_pkey_eec; + entry->ex.imm_data = cqe->imm_etype_pkey_eec; entry->opcode = IB_WC_RECV; break; case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE: case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE: entry->wc_flags = IB_WC_WITH_IMM; - entry->imm_data = cqe->imm_etype_pkey_eec; + entry->ex.imm_data = cqe->imm_etype_pkey_eec; entry->opcode = IB_WC_RECV_RDMA_WITH_IMM; break; default: diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 7bc32f8e377..252590116df 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -32,8 +32,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_dev.h 1349 2004-12-16 21:09:43Z roland $ */ #ifndef MTHCA_DEV_H @@ -204,6 +202,7 @@ struct mthca_pd_table { struct mthca_buddy { unsigned long **bits; + int *num_free; int max_order; spinlock_t lock; }; @@ -279,7 +278,6 @@ struct mthca_mcg_table { struct mthca_catas_err { u64 addr; u32 __iomem *map; - unsigned long stop; u32 size; struct timer_list timer; struct list_head list; diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h index b374dc395be..14f51ef97d7 100644 --- a/drivers/infiniband/hw/mthca/mthca_doorbell.h +++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_doorbell.h 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/types.h> diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 8bde7f98e58..cc6858f0b65 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_eq.c 1382 2004-12-24 02:21:02Z roland $ */ #include <linux/errno.h> @@ -782,7 +780,7 @@ int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt) return -ENOMEM; dev->eq_table.icm_dma = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(dev->eq_table.icm_dma)) { + if (pci_dma_mapping_error(dev->pdev, dev->eq_table.icm_dma)) { __free_page(dev->eq_table.icm_page); return -ENOMEM; } diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 8b7e83e6e88..640449582ab 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_mad.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/string.h> diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 9ebadd6e0cf..fb9f91b60f3 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_main.c 1396 2004-12-28 04:10:27Z roland $ */ #include <linux/module.h> @@ -45,6 +43,7 @@ #include "mthca_cmd.h" #include "mthca_profile.h" #include "mthca_memfree.h" +#include "mthca_wqe.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver"); @@ -200,7 +199,18 @@ static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim) mdev->limits.gid_table_len = dev_lim->max_gids; mdev->limits.pkey_table_len = dev_lim->max_pkeys; mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay; - mdev->limits.max_sg = dev_lim->max_sg; + /* + * Need to allow for worst case send WQE overhead and check + * whether max_desc_sz imposes a lower limit than max_sg; UD + * send has the biggest overhead. + */ + mdev->limits.max_sg = min_t(int, dev_lim->max_sg, + (dev_lim->max_desc_sz - + sizeof (struct mthca_next_seg) - + (mthca_is_memfree(mdev) ? + sizeof (struct mthca_arbel_ud_seg) : + sizeof (struct mthca_tavor_ud_seg))) / + sizeof (struct mthca_data_seg)); mdev->limits.max_wqes = dev_lim->max_qp_sz; mdev->limits.max_qp_init_rdma = dev_lim->max_requester_per_qp; mdev->limits.reserved_qps = dev_lim->reserved_qps; diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c index a8ad072be07..3f5f9487920 100644 --- a/drivers/infiniband/hw/mthca/mthca_mcg.c +++ b/drivers/infiniband/hw/mthca/mthca_mcg.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_mcg.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/string.h> diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index b224079d4e1..1f7d1a29d2a 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id$ */ #include <linux/mm.h> @@ -109,7 +107,11 @@ static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_m { struct page *page; - page = alloc_pages(gfp_mask, order); + /* + * Use __GFP_ZERO because buggy firmware assumes ICM pages are + * cleared, and subtle failures are seen if they aren't. + */ + page = alloc_pages(gfp_mask | __GFP_ZERO, order); if (!page) return -ENOMEM; diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h index a1ab06847b7..da9b8f9b884 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.h +++ b/drivers/infiniband/hw/mthca/mthca_memfree.h @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id$ */ #ifndef MTHCA_MEMFREE_H diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 820205dec56..882e6b73591 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_mr.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/slab.h> @@ -91,23 +89,26 @@ static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order) spin_lock(&buddy->lock); - for (o = order; o <= buddy->max_order; ++o) { - m = 1 << (buddy->max_order - o); - seg = find_first_bit(buddy->bits[o], m); - if (seg < m) - goto found; - } + for (o = order; o <= buddy->max_order; ++o) + if (buddy->num_free[o]) { + m = 1 << (buddy->max_order - o); + seg = find_first_bit(buddy->bits[o], m); + if (seg < m) + goto found; + } spin_unlock(&buddy->lock); return -1; found: clear_bit(seg, buddy->bits[o]); + --buddy->num_free[o]; while (o > order) { --o; seg <<= 1; set_bit(seg ^ 1, buddy->bits[o]); + ++buddy->num_free[o]; } spin_unlock(&buddy->lock); @@ -125,11 +126,13 @@ static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order) while (test_bit(seg ^ 1, buddy->bits[order])) { clear_bit(seg ^ 1, buddy->bits[order]); + --buddy->num_free[order]; seg >>= 1; ++order; } set_bit(seg, buddy->bits[order]); + ++buddy->num_free[order]; spin_unlock(&buddy->lock); } @@ -143,7 +146,9 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order) buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *), GFP_KERNEL); - if (!buddy->bits) + buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *), + GFP_KERNEL); + if (!buddy->bits || !buddy->num_free) goto err_out; for (i = 0; i <= buddy->max_order; ++i) { @@ -156,6 +161,7 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order) } set_bit(0, buddy->bits[buddy->max_order]); + buddy->num_free[buddy->max_order] = 1; return 0; @@ -163,9 +169,10 @@ err_out_free: for (i = 0; i <= buddy->max_order; ++i) kfree(buddy->bits[i]); +err_out: kfree(buddy->bits); + kfree(buddy->num_free); -err_out: return -ENOMEM; } @@ -177,6 +184,7 @@ static void mthca_buddy_cleanup(struct mthca_buddy *buddy) kfree(buddy->bits[i]); kfree(buddy->bits); + kfree(buddy->num_free); } static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order, diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c index c1e950764bd..266f14e4740 100644 --- a/drivers/infiniband/hw/mthca/mthca_pd.c +++ b/drivers/infiniband/hw/mthca/mthca_pd.c @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_pd.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/errno.h> diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c index 605a8d57fac..d168c254061 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.c +++ b/drivers/infiniband/hw/mthca/mthca_profile.c @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_profile.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/module.h> diff --git a/drivers/infiniband/hw/mthca/mthca_profile.h b/drivers/infiniband/hw/mthca/mthca_profile.h index e76cb62d8e3..62b009cc873 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.h +++ b/drivers/infiniband/hw/mthca/mthca_profile.h @@ -29,8 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_profile.h 1349 2004-12-16 21:09:43Z roland $ */ #ifndef MTHCA_PROFILE_H diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index be34f99ca62..87ad889e367 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -32,8 +32,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_provider.c 4859 2006-01-09 21:55:10Z roland $ */ #include <rdma/ib_smi.h> diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index 934bf954403..c621f8794b8 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -30,8 +30,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_provider.h 1349 2004-12-16 21:09:43Z roland $ */ #ifndef MTHCA_PROVIDER_H diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 09dc3614cf2..f5081bfde6d 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -31,8 +31,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_qp.c 1355 2004-12-17 15:23:43Z roland $ */ #include <linux/string.h> @@ -850,23 +848,6 @@ out: return err; } -static const struct ib_qp_attr dummy_init_attr = { .port_num = 1 }; -static const int dummy_init_attr_mask[] = { - [IB_QPT_UD] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_QKEY), - [IB_QPT_UC] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_ACCESS_FLAGS), - [IB_QPT_RC] = (IB_QP_PKEY_INDEX | - IB_QP_PORT | - IB_QP_ACCESS_FLAGS), - [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | - IB_QP_QKEY), - [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | - IB_QP_QKEY), -}; - int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { @@ -928,15 +909,6 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, goto out; } - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) { - err = __mthca_modify_qp(ibqp, &dummy_init_attr, - dummy_init_attr_mask[ibqp->qp_type], - IB_QPS_RESET, IB_QPS_INIT); - if (err) - goto out; - cur_state = IB_QPS_INIT; - } - err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); out: @@ -1277,10 +1249,10 @@ static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, return -EINVAL; /* - * For MLX transport we need 2 extra S/G entries: + * For MLX transport we need 2 extra send gather entries: * one for the header and one for the checksum at the end */ - if (qp->transport == MLX && cap->max_recv_sge + 2 > dev->limits.max_sg) + if (qp->transport == MLX && cap->max_send_sge + 2 > dev->limits.max_sg) return -EINVAL; if (mthca_is_memfree(dev)) { diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c index 91934f2d9db..acb6817f606 100644 --- a/drivers/infiniband/hw/mthca/mthca_reset.c +++ b/drivers/infiniband/hw/mthca/mthca_reset.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_reset.c 1349 2004-12-16 21:09:43Z roland $ */ #include <linux/init.h> diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index a5ffff6e102..4fabe62aab8 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_srq.c 3047 2005-08-10 03:59:35Z roland $ */ #include <linux/slab.h> diff --git a/drivers/infiniband/hw/mthca/mthca_uar.c b/drivers/infiniband/hw/mthca/mthca_uar.c index 8b728486410..ca5900c96fc 100644 --- a/drivers/infiniband/hw/mthca/mthca_uar.c +++ b/drivers/infiniband/hw/mthca/mthca_uar.c @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id$ */ #include <asm/page.h> /* PAGE_SHIFT */ diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h index e1262c942db..5fe56e81073 100644 --- a/drivers/infiniband/hw/mthca/mthca_user.h +++ b/drivers/infiniband/hw/mthca/mthca_user.h @@ -29,7 +29,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * */ #ifndef MTHCA_USER_H diff --git a/drivers/infiniband/hw/mthca/mthca_wqe.h b/drivers/infiniband/hw/mthca/mthca_wqe.h index b3551a8dea1..341a5ae881c 100644 --- a/drivers/infiniband/hw/mthca/mthca_wqe.h +++ b/drivers/infiniband/hw/mthca/mthca_wqe.h @@ -28,8 +28,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: mthca_wqe.h 3047 2005-08-10 03:59:35Z roland $ */ #ifndef MTHCA_WQE_H diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 9f7364a9096..b0cab64e5e3 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -91,10 +91,6 @@ unsigned int nes_debug_level = 0; module_param_named(debug_level, nes_debug_level, uint, 0644); MODULE_PARM_DESC(debug_level, "Enable debug output level"); -unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR; -module_param(nes_lro_max_aggr, int, NES_LRO_MAX_AGGR); -MODULE_PARM_DESC(nes_mro_max_aggr, " nic LRO MAX packet aggregation"); - LIST_HEAD(nes_adapter_list); static LIST_HEAD(nes_dev_list); @@ -280,6 +276,7 @@ static void nes_cqp_rem_ref_callback(struct nes_device *nesdev, struct nes_cqp_r } nes_free_resource(nesadapter, nesadapter->allocated_qps, nesqp->hwqp.qp_id); + nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL; kfree(nesqp->allocated_buffer); } @@ -293,7 +290,6 @@ void nes_rem_ref(struct ib_qp *ibqp) struct nes_qp *nesqp; struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; struct nes_hw_cqp_wqe *cqp_wqe; struct nes_cqp_request *cqp_request; u32 opcode; @@ -307,8 +303,6 @@ void nes_rem_ref(struct ib_qp *ibqp) } if (atomic_dec_and_test(&nesqp->refcount)) { - nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL; - /* Destroy the QP */ cqp_request = nes_get_cqp_request(nesdev); if (cqp_request == NULL) { @@ -332,7 +326,7 @@ void nes_rem_ref(struct ib_qp *ibqp) set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); u64temp = (u64)nesqp->nesqp_context_pbase; set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); } } diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 1f9f7bf7386..39bd897b40c 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -94,9 +94,6 @@ #define MAX_DPC_ITERATIONS 128 -#define NES_CQP_REQUEST_NO_DOORBELL_RING 0 -#define NES_CQP_REQUEST_RING_DOORBELL 1 - #define NES_DRV_OPT_ENABLE_MPA_VER_0 0x00000001 #define NES_DRV_OPT_DISABLE_MPA_CRC 0x00000002 #define NES_DRV_OPT_DISABLE_FIRST_WRITE 0x00000004 @@ -173,7 +170,6 @@ extern int disable_mpa_crc; extern unsigned int send_first; extern unsigned int nes_drv_opt; extern unsigned int nes_debug_level; -extern unsigned int nes_lro_max_aggr; extern struct list_head nes_adapter_list; @@ -539,7 +535,11 @@ void nes_read_1G_phy_reg(struct nes_device *, u8, u8, u16 *); void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16, u16); void nes_read_10G_phy_reg(struct nes_device *, u8, u8, u16); struct nes_cqp_request *nes_get_cqp_request(struct nes_device *); -void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *, int); +void nes_free_cqp_request(struct nes_device *nesdev, + struct nes_cqp_request *cqp_request); +void nes_put_cqp_request(struct nes_device *nesdev, + struct nes_cqp_request *cqp_request); +void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *); int nes_arp_table(struct nes_device *, u32, u8 *, u32); void nes_mh_fix(unsigned long); void nes_clc(unsigned long); diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 9a4b40fae40..9f0b964b2c9 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -74,36 +74,59 @@ atomic_t cm_nodes_destroyed; atomic_t cm_accel_dropped_pkts; atomic_t cm_resets_recvd; -static inline int mini_cm_accelerated(struct nes_cm_core *, struct nes_cm_node *); +static inline int mini_cm_accelerated(struct nes_cm_core *, + struct nes_cm_node *); static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *, - struct nes_vnic *, struct nes_cm_info *); -static int add_ref_cm_node(struct nes_cm_node *); -static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *); + struct nes_vnic *, struct nes_cm_info *); static int mini_cm_del_listen(struct nes_cm_core *, struct nes_cm_listener *); -static struct sk_buff *form_cm_frame(struct sk_buff *, struct nes_cm_node *, - void *, u32, void *, u32, u8); -static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node); - static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *, - struct nes_vnic *, - struct ietf_mpa_frame *, - struct nes_cm_info *); + struct nes_vnic *, u16, void *, struct nes_cm_info *); +static int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *); static int mini_cm_accept(struct nes_cm_core *, struct ietf_mpa_frame *, - struct nes_cm_node *); + struct nes_cm_node *); static int mini_cm_reject(struct nes_cm_core *, struct ietf_mpa_frame *, - struct nes_cm_node *); -static int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *); -static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, - struct sk_buff *); + struct nes_cm_node *); +static void mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, + struct sk_buff *); static int mini_cm_dealloc_core(struct nes_cm_core *); static int mini_cm_get(struct nes_cm_core *); static int mini_cm_set(struct nes_cm_core *, u32, u32); + +static struct sk_buff *form_cm_frame(struct sk_buff *, struct nes_cm_node *, + void *, u32, void *, u32, u8); +static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node); +static int add_ref_cm_node(struct nes_cm_node *); +static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *); + static int nes_cm_disconn_true(struct nes_qp *); static int nes_cm_post_event(struct nes_cm_event *event); static int nes_disconnect(struct nes_qp *nesqp, int abrupt); static void nes_disconnect_worker(struct work_struct *work); -static int send_ack(struct nes_cm_node *cm_node); + +static int send_mpa_request(struct nes_cm_node *, struct sk_buff *); +static int send_syn(struct nes_cm_node *, u32, struct sk_buff *); +static int send_reset(struct nes_cm_node *, struct sk_buff *); +static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb); static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb); +static void process_packet(struct nes_cm_node *, struct sk_buff *, + struct nes_cm_core *); + +static void active_open_err(struct nes_cm_node *, struct sk_buff *, int); +static void passive_open_err(struct nes_cm_node *, struct sk_buff *, int); +static void cleanup_retrans_entry(struct nes_cm_node *); +static void handle_rcv_mpa(struct nes_cm_node *, struct sk_buff *, + enum nes_cm_event_type); +static void free_retrans_entry(struct nes_cm_node *cm_node); +static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, + struct sk_buff *skb, int optionsize, int passive); + +/* CM event handler functions */ +static void cm_event_connected(struct nes_cm_event *); +static void cm_event_connect_error(struct nes_cm_event *); +static void cm_event_reset(struct nes_cm_event *); +static void cm_event_mpa_req(struct nes_cm_event *); + +static void print_core(struct nes_cm_core *core); /* External CM API Interface */ /* instance of function pointers for client API */ @@ -158,11 +181,11 @@ static struct nes_cm_event *create_event(struct nes_cm_node *cm_node, event->cm_info.loc_port = cm_node->loc_port; event->cm_info.cm_id = cm_node->cm_id; - nes_debug(NES_DBG_CM, "Created event=%p, type=%u, dst_addr=%08x[%x]," - " src_addr=%08x[%x]\n", - event, type, - event->cm_info.loc_addr, event->cm_info.loc_port, - event->cm_info.rem_addr, event->cm_info.rem_port); + nes_debug(NES_DBG_CM, "cm_node=%p Created event=%p, type=%u, " + "dst_addr=%08x[%x], src_addr=%08x[%x]\n", + cm_node, event, type, event->cm_info.loc_addr, + event->cm_info.loc_port, event->cm_info.rem_addr, + event->cm_info.rem_port); nes_cm_post_event(event); return event; @@ -172,14 +195,11 @@ static struct nes_cm_event *create_event(struct nes_cm_node *cm_node, /** * send_mpa_request */ -static int send_mpa_request(struct nes_cm_node *cm_node) +static int send_mpa_request(struct nes_cm_node *cm_node, struct sk_buff *skb) { - struct sk_buff *skb; int ret; - - skb = get_free_pkt(cm_node); if (!skb) { - nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); + nes_debug(NES_DBG_CM, "skb set to NULL\n"); return -1; } @@ -188,9 +208,8 @@ static int send_mpa_request(struct nes_cm_node *cm_node) cm_node->mpa_frame_size, SET_ACK); ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); - if (ret < 0) { + if (ret < 0) return ret; - } return 0; } @@ -229,46 +248,12 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 len) /** - * handle_exception_pkt - process an exception packet. - * We have been in a TSA state, and we have now received SW - * TCP/IP traffic should be a FIN request or IP pkt with options - */ -static int handle_exception_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb) -{ - int ret = 0; - struct tcphdr *tcph = tcp_hdr(skb); - - /* first check to see if this a FIN pkt */ - if (tcph->fin) { - /* we need to ACK the FIN request */ - send_ack(cm_node); - - /* check which side we are (client/server) and set next state accordingly */ - if (cm_node->tcp_cntxt.client) - cm_node->state = NES_CM_STATE_CLOSING; - else { - /* we are the server side */ - cm_node->state = NES_CM_STATE_CLOSE_WAIT; - /* since this is a self contained CM we don't wait for */ - /* an APP to close us, just send final FIN immediately */ - ret = send_fin(cm_node, NULL); - cm_node->state = NES_CM_STATE_LAST_ACK; - } - } else { - ret = -EINVAL; - } - - return ret; -} - - -/** * form_cm_frame - get a free packet and build empty frame Use * node info to build. */ -static struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm_node, - void *options, u32 optionsize, void *data, - u32 datasize, u8 flags) +static struct sk_buff *form_cm_frame(struct sk_buff *skb, + struct nes_cm_node *cm_node, void *options, u32 optionsize, + void *data, u32 datasize, u8 flags) { struct tcphdr *tcph; struct iphdr *iph; @@ -332,10 +317,12 @@ static struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm cm_node->tcp_cntxt.loc_seq_num++; tcph->syn = 1; } else - cm_node->tcp_cntxt.loc_seq_num += datasize; /* data (no headers) */ + cm_node->tcp_cntxt.loc_seq_num += datasize; - if (flags & SET_FIN) + if (flags & SET_FIN) { + cm_node->tcp_cntxt.loc_seq_num++; tcph->fin = 1; + } if (flags & SET_RST) tcph->rst = 1; @@ -389,7 +376,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, int close_when_complete) { unsigned long flags; - struct nes_cm_core *cm_core; + struct nes_cm_core *cm_core = cm_node->cm_core; struct nes_timer_entry *new_send; int ret = 0; u32 was_timer_set; @@ -411,7 +398,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, new_send->close_when_complete = close_when_complete; if (type == NES_TIMER_TYPE_CLOSE) { - new_send->timetosend += (HZ/2); /* TODO: decide on the correct value here */ + new_send->timetosend += (HZ/10); spin_lock_irqsave(&cm_node->recv_list_lock, flags); list_add_tail(&new_send->list, &cm_node->recv_list); spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); @@ -420,36 +407,28 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, if (type == NES_TIMER_TYPE_SEND) { new_send->seq_num = ntohl(tcp_hdr(skb)->seq); atomic_inc(&new_send->skb->users); + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + cm_node->send_entry = new_send; + add_ref_cm_node(cm_node); + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + new_send->timetosend = jiffies + NES_RETRY_TIMEOUT; ret = nes_nic_cm_xmit(new_send->skb, cm_node->netdev); if (ret != NETDEV_TX_OK) { - nes_debug(NES_DBG_CM, "Error sending packet %p (jiffies = %lu)\n", - new_send, jiffies); + nes_debug(NES_DBG_CM, "Error sending packet %p " + "(jiffies = %lu)\n", new_send, jiffies); atomic_dec(&new_send->skb->users); new_send->timetosend = jiffies; } else { cm_packets_sent++; if (!send_retrans) { + cleanup_retrans_entry(cm_node); if (close_when_complete) - rem_ref_cm_node(cm_node->cm_core, cm_node); - dev_kfree_skb_any(new_send->skb); - kfree(new_send); + rem_ref_cm_node(cm_core, cm_node); return ret; } - new_send->timetosend = jiffies + NES_RETRY_TIMEOUT; } - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - list_add_tail(&new_send->list, &cm_node->retrans_list); - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - } - if (type == NES_TIMER_TYPE_RECV) { - new_send->seq_num = ntohl(tcp_hdr(skb)->seq); - new_send->timetosend = jiffies; - spin_lock_irqsave(&cm_node->recv_list_lock, flags); - list_add_tail(&new_send->list, &cm_node->recv_list); - spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); } - cm_core = cm_node->cm_core; was_timer_set = timer_pending(&cm_core->tcp_timer); @@ -476,23 +455,27 @@ static void nes_cm_timer_tick(unsigned long pass) struct list_head *list_node, *list_node_temp; struct nes_cm_core *cm_core = g_cm_core; struct nes_qp *nesqp; - struct sk_buff *skb; u32 settimer = 0; int ret = NETDEV_TX_OK; - int node_done; + enum nes_cm_node_state last_state; spin_lock_irqsave(&cm_core->ht_lock, flags); - list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) { + list_for_each_safe(list_node, list_core_temp, + &cm_core->connected_nodes) { cm_node = container_of(list_node, struct nes_cm_node, list); add_ref_cm_node(cm_node); spin_unlock_irqrestore(&cm_core->ht_lock, flags); spin_lock_irqsave(&cm_node->recv_list_lock, flags); - list_for_each_safe(list_core, list_node_temp, &cm_node->recv_list) { - recv_entry = container_of(list_core, struct nes_timer_entry, list); - if ((time_after(recv_entry->timetosend, jiffies)) && - (recv_entry->type == NES_TIMER_TYPE_CLOSE)) { - if (nexttimeout > recv_entry->timetosend || !settimer) { + list_for_each_safe(list_core, list_node_temp, + &cm_node->recv_list) { + recv_entry = container_of(list_core, + struct nes_timer_entry, list); + if (!recv_entry) + break; + if (time_after(recv_entry->timetosend, jiffies)) { + if (nexttimeout > recv_entry->timetosend || + !settimer) { nexttimeout = recv_entry->timetosend; settimer = 1; } @@ -501,157 +484,143 @@ static void nes_cm_timer_tick(unsigned long pass) list_del(&recv_entry->list); cm_id = cm_node->cm_id; spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); - if (recv_entry->type == NES_TIMER_TYPE_CLOSE) { - nesqp = (struct nes_qp *)recv_entry->skb; - spin_lock_irqsave(&nesqp->lock, qplockflags); - if (nesqp->cm_id) { - nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, refcount = %d: " - "****** HIT A NES_TIMER_TYPE_CLOSE" - " with something to do!!! ******\n", - nesqp->hwqp.qp_id, cm_id, - atomic_read(&nesqp->refcount)); - nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; - nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; - nesqp->ibqp_state = IB_QPS_ERR; - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_cm_disconn(nesqp); - } else { - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, refcount = %d:" - " ****** HIT A NES_TIMER_TYPE_CLOSE" - " with nothing to do!!! ******\n", - nesqp->hwqp.qp_id, cm_id, - atomic_read(&nesqp->refcount)); - nes_rem_ref(&nesqp->ibqp); - } - if (cm_id) - cm_id->rem_ref(cm_id); + nesqp = (struct nes_qp *)recv_entry->skb; + spin_lock_irqsave(&nesqp->lock, qplockflags); + if (nesqp->cm_id) { + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, " + "refcount = %d: HIT A " + "NES_TIMER_TYPE_CLOSE with something " + "to do!!!\n", nesqp->hwqp.qp_id, cm_id, + atomic_read(&nesqp->refcount)); + nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; + nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; + nesqp->ibqp_state = IB_QPS_ERR; + spin_unlock_irqrestore(&nesqp->lock, + qplockflags); + nes_cm_disconn(nesqp); + } else { + spin_unlock_irqrestore(&nesqp->lock, + qplockflags); + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, " + "refcount = %d: HIT A " + "NES_TIMER_TYPE_CLOSE with nothing " + "to do!!!\n", nesqp->hwqp.qp_id, cm_id, + atomic_read(&nesqp->refcount)); } + if (cm_id) + cm_id->rem_ref(cm_id); + kfree(recv_entry); spin_lock_irqsave(&cm_node->recv_list_lock, flags); } spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - node_done = 0; - list_for_each_safe(list_core, list_node_temp, &cm_node->retrans_list) { - if (node_done) { - break; - } - send_entry = container_of(list_core, struct nes_timer_entry, list); + do { + send_entry = cm_node->send_entry; + if (!send_entry) + continue; if (time_after(send_entry->timetosend, jiffies)) { if (cm_node->state != NES_CM_STATE_TSA) { - if ((nexttimeout > send_entry->timetosend) || !settimer) { - nexttimeout = send_entry->timetosend; + if ((nexttimeout > + send_entry->timetosend) || + !settimer) { + nexttimeout = + send_entry->timetosend; settimer = 1; + continue; } - node_done = 1; - continue; } else { - list_del(&send_entry->list); - skb = send_entry->skb; - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - dev_kfree_skb_any(skb); - kfree(send_entry); - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + free_retrans_entry(cm_node); continue; } } - if (send_entry->type == NES_TIMER_NODE_CLEANUP) { - list_del(&send_entry->list); - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - kfree(send_entry); - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - continue; - } - if ((send_entry->seq_num < cm_node->tcp_cntxt.rem_ack_num) || - (cm_node->state == NES_CM_STATE_TSA) || - (cm_node->state == NES_CM_STATE_CLOSED)) { - skb = send_entry->skb; - list_del(&send_entry->list); - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - kfree(send_entry); - dev_kfree_skb_any(skb); - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + + if ((cm_node->state == NES_CM_STATE_TSA) || + (cm_node->state == NES_CM_STATE_CLOSED)) { + free_retrans_entry(cm_node); continue; } - if (!send_entry->retranscount || !send_entry->retrycount) { + if (!send_entry->retranscount || + !send_entry->retrycount) { cm_packets_dropped++; - skb = send_entry->skb; - list_del(&send_entry->list); - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - dev_kfree_skb_any(skb); - kfree(send_entry); - if (cm_node->state == NES_CM_STATE_SYN_RCVD) { - /* this node never even generated an indication up to the cm */ + last_state = cm_node->state; + cm_node->state = NES_CM_STATE_CLOSED; + free_retrans_entry(cm_node); + spin_unlock_irqrestore( + &cm_node->retrans_list_lock, flags); + if (last_state == NES_CM_STATE_SYN_RCVD) rem_ref_cm_node(cm_core, cm_node); - } else { - cm_node->state = NES_CM_STATE_CLOSED; - create_event(cm_node, NES_CM_EVENT_ABORTED); - } - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + else + create_event(cm_node, + NES_CM_EVENT_ABORTED); + spin_lock_irqsave(&cm_node->retrans_list_lock, + flags); continue; } - /* this seems like the correct place, but leave send entry unprotected */ - /* spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); */ atomic_inc(&send_entry->skb->users); cm_packets_retrans++; - nes_debug(NES_DBG_CM, "Retransmitting send_entry %p for node %p," - " jiffies = %lu, time to send = %lu, retranscount = %u, " - "send_entry->seq_num = 0x%08X, cm_node->tcp_cntxt.rem_ack_num = 0x%08X\n", - send_entry, cm_node, jiffies, send_entry->timetosend, send_entry->retranscount, - send_entry->seq_num, cm_node->tcp_cntxt.rem_ack_num); - - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + nes_debug(NES_DBG_CM, "Retransmitting send_entry %p " + "for node %p, jiffies = %lu, time to send = " + "%lu, retranscount = %u, send_entry->seq_num = " + "0x%08X, cm_node->tcp_cntxt.rem_ack_num = " + "0x%08X\n", send_entry, cm_node, jiffies, + send_entry->timetosend, + send_entry->retranscount, + send_entry->seq_num, + cm_node->tcp_cntxt.rem_ack_num); + + spin_unlock_irqrestore(&cm_node->retrans_list_lock, + flags); ret = nes_nic_cm_xmit(send_entry->skb, cm_node->netdev); + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); if (ret != NETDEV_TX_OK) { + nes_debug(NES_DBG_CM, "rexmit failed for " + "node=%p\n", cm_node); cm_packets_bounced++; atomic_dec(&send_entry->skb->users); send_entry->retrycount--; nexttimeout = jiffies + NES_SHORT_TIME; settimer = 1; - node_done = 1; - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); continue; } else { cm_packets_sent++; } - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - list_del(&send_entry->list); - nes_debug(NES_DBG_CM, "Packet Sent: retrans count = %u, retry count = %u.\n", - send_entry->retranscount, send_entry->retrycount); + nes_debug(NES_DBG_CM, "Packet Sent: retrans count = " + "%u, retry count = %u.\n", + send_entry->retranscount, + send_entry->retrycount); if (send_entry->send_retrans) { send_entry->retranscount--; - send_entry->timetosend = jiffies + NES_RETRY_TIMEOUT; - if (nexttimeout > send_entry->timetosend || !settimer) { + send_entry->timetosend = jiffies + + NES_RETRY_TIMEOUT; + if (nexttimeout > send_entry->timetosend || + !settimer) { nexttimeout = send_entry->timetosend; settimer = 1; } - list_add(&send_entry->list, &cm_node->retrans_list); - continue; } else { int close_when_complete; - skb = send_entry->skb; - close_when_complete = send_entry->close_when_complete; - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - if (close_when_complete) { - BUG_ON(atomic_read(&cm_node->ref_count) == 1); - rem_ref_cm_node(cm_core, cm_node); - } - dev_kfree_skb_any(skb); - kfree(send_entry); - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - continue; + close_when_complete = + send_entry->close_when_complete; + nes_debug(NES_DBG_CM, "cm_node=%p state=%d\n", + cm_node, cm_node->state); + free_retrans_entry(cm_node); + if (close_when_complete) + rem_ref_cm_node(cm_node->cm_core, + cm_node); } - } - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - - rem_ref_cm_node(cm_core, cm_node); + } while (0); + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + rem_ref_cm_node(cm_node->cm_core, cm_node); spin_lock_irqsave(&cm_core->ht_lock, flags); - if (ret != NETDEV_TX_OK) + if (ret != NETDEV_TX_OK) { + nes_debug(NES_DBG_CM, "rexmit failed for cm_node=%p\n", + cm_node); break; + } } spin_unlock_irqrestore(&cm_core->ht_lock, flags); @@ -667,14 +636,14 @@ static void nes_cm_timer_tick(unsigned long pass) /** * send_syn */ -static int send_syn(struct nes_cm_node *cm_node, u32 sendack) +static int send_syn(struct nes_cm_node *cm_node, u32 sendack, + struct sk_buff *skb) { int ret; int flags = SET_SYN; - struct sk_buff *skb; char optionsbuffer[sizeof(struct option_mss) + - sizeof(struct option_windowscale) + - sizeof(struct option_base) + 1]; + sizeof(struct option_windowscale) + sizeof(struct option_base) + + TCP_OPTIONS_PADDING]; int optionssize = 0; /* Sending MSS option */ @@ -695,8 +664,7 @@ static int send_syn(struct nes_cm_node *cm_node, u32 sendack) options->as_windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale; optionssize += sizeof(struct option_windowscale); - if (sendack && !(NES_DRV_OPT_SUPRESS_OPTION_BC & nes_drv_opt) - ) { + if (sendack && !(NES_DRV_OPT_SUPRESS_OPTION_BC & nes_drv_opt)) { options = (union all_known_options *)&optionsbuffer[optionssize]; options->as_base.optionnum = OPTION_NUMBER_WRITE0; options->as_base.length = sizeof(struct option_base); @@ -714,7 +682,8 @@ static int send_syn(struct nes_cm_node *cm_node, u32 sendack) options->as_end = OPTION_NUMBER_END; optionssize += 1; - skb = get_free_pkt(cm_node); + if (!skb) + skb = get_free_pkt(cm_node); if (!skb) { nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); return -1; @@ -733,18 +702,18 @@ static int send_syn(struct nes_cm_node *cm_node, u32 sendack) /** * send_reset */ -static int send_reset(struct nes_cm_node *cm_node) +static int send_reset(struct nes_cm_node *cm_node, struct sk_buff *skb) { int ret; - struct sk_buff *skb = get_free_pkt(cm_node); int flags = SET_RST | SET_ACK; + if (!skb) + skb = get_free_pkt(cm_node); if (!skb) { nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); return -1; } - add_ref_cm_node(cm_node); form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, flags); ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 0, 1); @@ -755,10 +724,12 @@ static int send_reset(struct nes_cm_node *cm_node) /** * send_ack */ -static int send_ack(struct nes_cm_node *cm_node) +static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb) { int ret; - struct sk_buff *skb = get_free_pkt(cm_node); + + if (!skb) + skb = get_free_pkt(cm_node); if (!skb) { nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); @@ -922,7 +893,8 @@ static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node if (!cm_node || !cm_core) return -EINVAL; - nes_debug(NES_DBG_CM, "Adding Node to Active Connection HT\n"); + nes_debug(NES_DBG_CM, "Adding Node %p to Active Connection HT\n", + cm_node); /* first, make an index into our hash table */ hashkey = make_hashkey(cm_node->loc_port, cm_node->loc_addr, @@ -946,10 +918,35 @@ static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node * mini_cm_dec_refcnt_listen */ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, - struct nes_cm_listener *listener, int free_hanging_nodes) + struct nes_cm_listener *listener, int free_hanging_nodes) { int ret = 1; unsigned long flags; + struct list_head *list_pos = NULL; + struct list_head *list_temp = NULL; + struct nes_cm_node *cm_node = NULL; + + nes_debug(NES_DBG_CM, "attempting listener= %p free_nodes= %d, " + "refcnt=%d\n", listener, free_hanging_nodes, + atomic_read(&listener->ref_count)); + /* free non-accelerated child nodes for this listener */ + if (free_hanging_nodes) { + spin_lock_irqsave(&cm_core->ht_lock, flags); + list_for_each_safe(list_pos, list_temp, + &g_cm_core->connected_nodes) { + cm_node = container_of(list_pos, struct nes_cm_node, + list); + if ((cm_node->listener == listener) && + (!cm_node->accelerated)) { + cleanup_retrans_entry(cm_node); + spin_unlock_irqrestore(&cm_core->ht_lock, + flags); + send_reset(cm_node, NULL); + spin_lock_irqsave(&cm_core->ht_lock, flags); + } + } + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + } spin_lock_irqsave(&cm_core->listen_list_lock, flags); if (!atomic_dec_return(&listener->ref_count)) { list_del(&listener->list); @@ -1067,18 +1064,18 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->loc_port = cm_info->loc_port; cm_node->rem_port = cm_info->rem_port; cm_node->send_write0 = send_first; - nes_debug(NES_DBG_CM, "Make node addresses : loc = " NIPQUAD_FMT ":%x, rem = " NIPQUAD_FMT ":%x\n", - HIPQUAD(cm_node->loc_addr), cm_node->loc_port, - HIPQUAD(cm_node->rem_addr), cm_node->rem_port); + nes_debug(NES_DBG_CM, "Make node addresses : loc = " NIPQUAD_FMT + ":%x, rem = " NIPQUAD_FMT ":%x\n", + HIPQUAD(cm_node->loc_addr), cm_node->loc_port, + HIPQUAD(cm_node->rem_addr), cm_node->rem_port); cm_node->listener = listener; cm_node->netdev = nesvnic->netdev; cm_node->cm_id = cm_info->cm_id; memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN); - nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", - cm_node->listener, cm_node->cm_id); + nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", cm_node->listener, + cm_node->cm_id); - INIT_LIST_HEAD(&cm_node->retrans_list); spin_lock_init(&cm_node->retrans_list_lock); INIT_LIST_HEAD(&cm_node->recv_list); spin_lock_init(&cm_node->recv_list_lock); @@ -1142,10 +1139,9 @@ static int add_ref_cm_node(struct nes_cm_node *cm_node) * rem_ref_cm_node - destroy an instance of a cm node */ static int rem_ref_cm_node(struct nes_cm_core *cm_core, - struct nes_cm_node *cm_node) + struct nes_cm_node *cm_node) { unsigned long flags, qplockflags; - struct nes_timer_entry *send_entry; struct nes_timer_entry *recv_entry; struct iw_cm_id *cm_id; struct list_head *list_core, *list_node_temp; @@ -1169,48 +1165,33 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core, atomic_dec(&cm_node->listener->pend_accepts_cnt); BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0); } - - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - list_for_each_safe(list_core, list_node_temp, &cm_node->retrans_list) { - send_entry = container_of(list_core, struct nes_timer_entry, list); - list_del(&send_entry->list); - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - dev_kfree_skb_any(send_entry->skb); - kfree(send_entry); - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - continue; - } - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - + BUG_ON(cm_node->send_entry); spin_lock_irqsave(&cm_node->recv_list_lock, flags); list_for_each_safe(list_core, list_node_temp, &cm_node->recv_list) { - recv_entry = container_of(list_core, struct nes_timer_entry, list); + recv_entry = container_of(list_core, struct nes_timer_entry, + list); list_del(&recv_entry->list); cm_id = cm_node->cm_id; spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); - if (recv_entry->type == NES_TIMER_TYPE_CLOSE) { - nesqp = (struct nes_qp *)recv_entry->skb; - spin_lock_irqsave(&nesqp->lock, qplockflags); - if (nesqp->cm_id) { - nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: ****** HIT A NES_TIMER_TYPE_CLOSE" - " with something to do!!! ******\n", - nesqp->hwqp.qp_id, cm_id); - nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; - nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; - nesqp->ibqp_state = IB_QPS_ERR; - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_cm_disconn(nesqp); - } else { - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: ****** HIT A NES_TIMER_TYPE_CLOSE" - " with nothing to do!!! ******\n", - nesqp->hwqp.qp_id, cm_id); - nes_rem_ref(&nesqp->ibqp); - } - cm_id->rem_ref(cm_id); - } else if (recv_entry->type == NES_TIMER_TYPE_RECV) { - dev_kfree_skb_any(recv_entry->skb); + nesqp = (struct nes_qp *)recv_entry->skb; + spin_lock_irqsave(&nesqp->lock, qplockflags); + if (nesqp->cm_id) { + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: HIT A " + "NES_TIMER_TYPE_CLOSE with something to do!\n", + nesqp->hwqp.qp_id, cm_id); + nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; + nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; + nesqp->ibqp_state = IB_QPS_ERR; + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_cm_disconn(nesqp); + } else { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: HIT A " + "NES_TIMER_TYPE_CLOSE with nothing to do!\n", + nesqp->hwqp.qp_id, cm_id); } + cm_id->rem_ref(cm_id); + kfree(recv_entry); spin_lock_irqsave(&cm_node->recv_list_lock, flags); } @@ -1221,23 +1202,31 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core, } else { if (cm_node->apbvt_set && cm_node->nesvnic) { nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port, - PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn), - NES_MANAGE_APBVT_DEL); + PCI_FUNC( + cm_node->nesvnic->nesdev->pcidev->devfn), + NES_MANAGE_APBVT_DEL); } } - kfree(cm_node); atomic_dec(&cm_core->node_cnt); atomic_inc(&cm_nodes_destroyed); + nesqp = cm_node->nesqp; + if (nesqp) { + nesqp->cm_node = NULL; + nes_rem_ref(&nesqp->ibqp); + cm_node->nesqp = NULL; + } + cm_node->freed = 1; + kfree(cm_node); return 0; } - /** * process_options */ -static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, u32 optionsize, u32 syn_packet) +static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, + u32 optionsize, u32 syn_packet) { u32 tmp; u32 offset = 0; @@ -1247,35 +1236,37 @@ static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, u32 opti while (offset < optionsize) { all_options = (union all_known_options *)(optionsloc + offset); switch (all_options->as_base.optionnum) { - case OPTION_NUMBER_END: - offset = optionsize; - break; - case OPTION_NUMBER_NONE: - offset += 1; - continue; - case OPTION_NUMBER_MSS: - nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d Size: %d\n", - __func__, - all_options->as_mss.length, offset, optionsize); - got_mss_option = 1; - if (all_options->as_mss.length != 4) { - return 1; - } else { - tmp = ntohs(all_options->as_mss.mss); - if (tmp > 0 && tmp < cm_node->tcp_cntxt.mss) - cm_node->tcp_cntxt.mss = tmp; - } - break; - case OPTION_NUMBER_WINDOW_SCALE: - cm_node->tcp_cntxt.snd_wscale = all_options->as_windowscale.shiftcount; - break; - case OPTION_NUMBER_WRITE0: - cm_node->send_write0 = 1; - break; - default: - nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n", - all_options->as_base.optionnum); - break; + case OPTION_NUMBER_END: + offset = optionsize; + break; + case OPTION_NUMBER_NONE: + offset += 1; + continue; + case OPTION_NUMBER_MSS: + nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d " + "Size: %d\n", __func__, + all_options->as_mss.length, offset, optionsize); + got_mss_option = 1; + if (all_options->as_mss.length != 4) { + return 1; + } else { + tmp = ntohs(all_options->as_mss.mss); + if (tmp > 0 && tmp < + cm_node->tcp_cntxt.mss) + cm_node->tcp_cntxt.mss = tmp; + } + break; + case OPTION_NUMBER_WINDOW_SCALE: + cm_node->tcp_cntxt.snd_wscale = + all_options->as_windowscale.shiftcount; + break; + case OPTION_NUMBER_WRITE0: + cm_node->send_write0 = 1; + break; + default: + nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n", + all_options->as_base.optionnum); + break; } offset += all_options->as_base.length; } @@ -1284,300 +1275,491 @@ static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, u32 opti return 0; } +static void drop_packet(struct sk_buff *skb) +{ + atomic_inc(&cm_accel_dropped_pkts); + dev_kfree_skb_any(skb); +} -/** - * process_packet - */ -static int process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, - struct nes_cm_core *cm_core) +static void handle_fin_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) { - int optionsize; - int datasize; - int ret = 0; - struct tcphdr *tcph = tcp_hdr(skb); - u32 inc_sequence; - if (cm_node->state == NES_CM_STATE_SYN_SENT && tcph->syn) { - inc_sequence = ntohl(tcph->seq); - cm_node->tcp_cntxt.rcv_nxt = inc_sequence; + atomic_inc(&cm_resets_recvd); + nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. " + "refcnt=%d\n", cm_node, cm_node->state, + atomic_read(&cm_node->ref_count)); + cm_node->tcp_cntxt.rcv_nxt++; + cleanup_retrans_entry(cm_node); + switch (cm_node->state) { + case NES_CM_STATE_SYN_RCVD: + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_MPAREQ_SENT: + cm_node->state = NES_CM_STATE_LAST_ACK; + send_fin(cm_node, skb); + break; + case NES_CM_STATE_FIN_WAIT1: + cm_node->state = NES_CM_STATE_CLOSING; + send_ack(cm_node, skb); + break; + case NES_CM_STATE_FIN_WAIT2: + cm_node->state = NES_CM_STATE_TIME_WAIT; + send_ack(cm_node, skb); + cm_node->state = NES_CM_STATE_CLOSED; + break; + case NES_CM_STATE_TSA: + default: + nes_debug(NES_DBG_CM, "Error Rcvd FIN for node-%p state = %d\n", + cm_node, cm_node->state); + drop_packet(skb); + break; } +} - if ((!tcph) || (cm_node->state == NES_CM_STATE_TSA)) { - BUG_ON(!tcph); - atomic_inc(&cm_accel_dropped_pkts); - return -1; - } - if (tcph->rst) { - atomic_inc(&cm_resets_recvd); - nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u. refcnt=%d\n", - cm_node, cm_node->state, atomic_read(&cm_node->ref_count)); - switch (cm_node->state) { - case NES_CM_STATE_LISTENING: - rem_ref_cm_node(cm_core, cm_node); - break; - case NES_CM_STATE_TSA: - case NES_CM_STATE_CLOSED: - break; - case NES_CM_STATE_SYN_RCVD: - nes_debug(NES_DBG_CM, "Received a reset for local 0x%08X:%04X," - " remote 0x%08X:%04X, node state = %u\n", - cm_node->loc_addr, cm_node->loc_port, - cm_node->rem_addr, cm_node->rem_port, - cm_node->state); - rem_ref_cm_node(cm_core, cm_node); - break; - case NES_CM_STATE_ONE_SIDE_ESTABLISHED: - case NES_CM_STATE_ESTABLISHED: - case NES_CM_STATE_MPAREQ_SENT: - default: - nes_debug(NES_DBG_CM, "Received a reset for local 0x%08X:%04X," - " remote 0x%08X:%04X, node state = %u refcnt=%d\n", - cm_node->loc_addr, cm_node->loc_port, - cm_node->rem_addr, cm_node->rem_port, - cm_node->state, atomic_read(&cm_node->ref_count)); - /* create event */ - cm_node->state = NES_CM_STATE_CLOSED; +static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) +{ - create_event(cm_node, NES_CM_EVENT_ABORTED); - break; + int reset = 0; /* whether to send reset in case of err.. */ + atomic_inc(&cm_resets_recvd); + nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u." + " refcnt=%d\n", cm_node, cm_node->state, + atomic_read(&cm_node->ref_count)); + cleanup_retrans_entry(cm_node); + switch (cm_node->state) { + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_MPAREQ_SENT: + nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " + "listener=%p state=%d\n", __func__, __LINE__, cm_node, + cm_node->listener, cm_node->state); + active_open_err(cm_node, skb, reset); + break; + /* For PASSIVE open states, remove the cm_node event */ + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_SYN_RCVD: + case NES_CM_STATE_LISTENING: + nes_debug(NES_DBG_CM, "Bad state %s[%u]\n", __func__, __LINE__); + passive_open_err(cm_node, skb, reset); + break; + case NES_CM_STATE_TSA: + default: + break; + } +} +static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb, + enum nes_cm_event_type type) +{ + + int ret; + int datasize = skb->len; + u8 *dataloc = skb->data; + ret = parse_mpa(cm_node, dataloc, datasize); + if (ret < 0) { + nes_debug(NES_DBG_CM, "didn't like MPA Request\n"); + if (type == NES_CM_EVENT_CONNECTED) { + nes_debug(NES_DBG_CM, "%s[%u] create abort for " + "cm_node=%p listener=%p state=%d\n", __func__, + __LINE__, cm_node, cm_node->listener, + cm_node->state); + active_open_err(cm_node, skb, 1); + } else { + passive_open_err(cm_node, skb, 1); } - return -1; + } else { + cleanup_retrans_entry(cm_node); + dev_kfree_skb_any(skb); + if (type == NES_CM_EVENT_CONNECTED) + cm_node->state = NES_CM_STATE_TSA; + create_event(cm_node, type); + + } + return ; +} + +static void indicate_pkt_err(struct nes_cm_node *cm_node, struct sk_buff *skb) +{ + switch (cm_node->state) { + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_MPAREQ_SENT: + nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " + "listener=%p state=%d\n", __func__, __LINE__, cm_node, + cm_node->listener, cm_node->state); + active_open_err(cm_node, skb, 1); + break; + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_SYN_RCVD: + passive_open_err(cm_node, skb, 1); + break; + case NES_CM_STATE_TSA: + default: + drop_packet(skb); } +} + +static int check_syn(struct nes_cm_node *cm_node, struct tcphdr *tcph, + struct sk_buff *skb) +{ + int err; + + err = ((ntohl(tcph->ack_seq) == cm_node->tcp_cntxt.loc_seq_num))? 0 : 1; + if (err) + active_open_err(cm_node, skb, 1); + + return err; +} + +static int check_seq(struct nes_cm_node *cm_node, struct tcphdr *tcph, + struct sk_buff *skb) +{ + int err = 0; + u32 seq; + u32 ack_seq; + u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num; + u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt; + u32 rcv_wnd; + seq = ntohl(tcph->seq); + ack_seq = ntohl(tcph->ack_seq); + rcv_wnd = cm_node->tcp_cntxt.rcv_wnd; + if (ack_seq != loc_seq_num) + err = 1; + else if ((seq + rcv_wnd) < rcv_nxt) + err = 1; + if (err) { + nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " + "listener=%p state=%d\n", __func__, __LINE__, cm_node, + cm_node->listener, cm_node->state); + indicate_pkt_err(cm_node, skb); + nes_debug(NES_DBG_CM, "seq ERROR cm_node =%p seq=0x%08X " + "rcv_nxt=0x%08X rcv_wnd=0x%x\n", cm_node, seq, rcv_nxt, + rcv_wnd); + } + return err; +} + +/* + * handle_syn_pkt() is for Passive node. The syn packet is received when a node + * is created with a listener or it may comein as rexmitted packet which in + * that case will be just dropped. + */ + +static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) +{ + int ret; + u32 inc_sequence; + int optionsize; optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); + skb_pull(skb, tcph->doff << 2); + inc_sequence = ntohl(tcph->seq); - skb_pull(skb, ip_hdr(skb)->ihl << 2); + switch (cm_node->state) { + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_MPAREQ_SENT: + /* Rcvd syn on active open connection*/ + active_open_err(cm_node, skb, 1); + break; + case NES_CM_STATE_LISTENING: + /* Passive OPEN */ + cm_node->accept_pend = 1; + atomic_inc(&cm_node->listener->pend_accepts_cnt); + if (atomic_read(&cm_node->listener->pend_accepts_cnt) > + cm_node->listener->backlog) { + nes_debug(NES_DBG_CM, "drop syn due to backlog " + "pressure \n"); + cm_backlog_drops++; + passive_open_err(cm_node, skb, 0); + break; + } + ret = handle_tcp_options(cm_node, tcph, skb, optionsize, + 1); + if (ret) { + passive_open_err(cm_node, skb, 0); + /* drop pkt */ + break; + } + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1; + BUG_ON(cm_node->send_entry); + cm_node->state = NES_CM_STATE_SYN_RCVD; + send_syn(cm_node, 1, skb); + break; + case NES_CM_STATE_TSA: + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_MPAREQ_RCVD: + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_CLOSING: + case NES_CM_STATE_UNKNOWN: + case NES_CM_STATE_CLOSED: + default: + drop_packet(skb); + break; + } +} + +static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) +{ + + int ret; + u32 inc_sequence; + int optionsize; + + optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); skb_pull(skb, tcph->doff << 2); + inc_sequence = ntohl(tcph->seq); + switch (cm_node->state) { + case NES_CM_STATE_SYN_SENT: + /* active open */ + if (check_syn(cm_node, tcph, skb)) + return; + cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); + /* setup options */ + ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 0); + if (ret) { + nes_debug(NES_DBG_CM, "cm_node=%p tcp_options failed\n", + cm_node); + break; + } + cleanup_retrans_entry(cm_node); + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1; + send_mpa_request(cm_node, skb); + cm_node->state = NES_CM_STATE_MPAREQ_SENT; + break; + case NES_CM_STATE_MPAREQ_RCVD: + /* passive open, so should not be here */ + passive_open_err(cm_node, skb, 1); + break; + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_TSA: + case NES_CM_STATE_CLOSING: + case NES_CM_STATE_UNKNOWN: + case NES_CM_STATE_CLOSED: + case NES_CM_STATE_MPAREQ_SENT: + default: + drop_packet(skb); + break; + } +} - datasize = skb->len; +static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) +{ + int datasize = 0; + u32 inc_sequence; + u32 rem_seq_ack; + u32 rem_seq; + if (check_seq(cm_node, tcph, skb)) + return; + + skb_pull(skb, tcph->doff << 2); inc_sequence = ntohl(tcph->seq); - nes_debug(NES_DBG_CM, "datasize = %u, sequence = 0x%08X, ack_seq = 0x%08X," - " rcv_nxt = 0x%08X Flags: %s %s.\n", - datasize, inc_sequence, ntohl(tcph->ack_seq), - cm_node->tcp_cntxt.rcv_nxt, (tcph->syn ? "SYN":""), - (tcph->ack ? "ACK":"")); - - if (!tcph->syn && (inc_sequence != cm_node->tcp_cntxt.rcv_nxt) - ) { - nes_debug(NES_DBG_CM, "dropping packet, datasize = %u, sequence = 0x%08X," - " ack_seq = 0x%08X, rcv_nxt = 0x%08X Flags: %s.\n", - datasize, inc_sequence, ntohl(tcph->ack_seq), - cm_node->tcp_cntxt.rcv_nxt, (tcph->ack ? "ACK":"")); - if (cm_node->state == NES_CM_STATE_LISTENING) { - rem_ref_cm_node(cm_core, cm_node); + rem_seq = ntohl(tcph->seq); + rem_seq_ack = ntohl(tcph->ack_seq); + datasize = skb->len; + + switch (cm_node->state) { + case NES_CM_STATE_SYN_RCVD: + /* Passive OPEN */ + cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); + cm_node->state = NES_CM_STATE_ESTABLISHED; + if (datasize) { + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; + cm_node->state = NES_CM_STATE_MPAREQ_RCVD; + handle_rcv_mpa(cm_node, skb, NES_CM_EVENT_MPA_REQ); + } else { /* rcvd ACK only */ + dev_kfree_skb_any(skb); + cleanup_retrans_entry(cm_node); + } + break; + case NES_CM_STATE_ESTABLISHED: + /* Passive OPEN */ + /* We expect mpa frame to be received only */ + if (datasize) { + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; + cm_node->state = NES_CM_STATE_MPAREQ_RCVD; + handle_rcv_mpa(cm_node, skb, + NES_CM_EVENT_MPA_REQ); + } else + drop_packet(skb); + break; + case NES_CM_STATE_MPAREQ_SENT: + cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); + if (datasize) { + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; + handle_rcv_mpa(cm_node, skb, NES_CM_EVENT_CONNECTED); + } else { /* Could be just an ack pkt.. */ + cleanup_retrans_entry(cm_node); + dev_kfree_skb_any(skb); } - return -1; + break; + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_TSA: + case NES_CM_STATE_CLOSED: + case NES_CM_STATE_MPAREQ_RCVD: + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_CLOSING: + case NES_CM_STATE_UNKNOWN: + default: + drop_packet(skb); + break; } +} - cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; +static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, + struct sk_buff *skb, int optionsize, int passive) +{ + u8 *optionsloc = (u8 *)&tcph[1]; if (optionsize) { - u8 *optionsloc = (u8 *)&tcph[1]; - if (process_options(cm_node, optionsloc, optionsize, (u32)tcph->syn)) { - nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n", __func__, cm_node); - send_reset(cm_node); - if (cm_node->state != NES_CM_STATE_SYN_SENT) - rem_ref_cm_node(cm_core, cm_node); - return 0; + if (process_options(cm_node, optionsloc, optionsize, + (u32)tcph->syn)) { + nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n", + __func__, cm_node); + if (passive) + passive_open_err(cm_node, skb, 0); + else + active_open_err(cm_node, skb, 0); + return 1; } - } else if (tcph->syn) - cm_node->tcp_cntxt.mss = NES_CM_DEFAULT_MSS; + } cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) << cm_node->tcp_cntxt.snd_wscale; - if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd) { + if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd) cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd; - } + return 0; +} - if (tcph->ack) { - cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); - switch (cm_node->state) { - case NES_CM_STATE_SYN_RCVD: - case NES_CM_STATE_SYN_SENT: - /* read and stash current sequence number */ - if (cm_node->tcp_cntxt.rem_ack_num != cm_node->tcp_cntxt.loc_seq_num) { - nes_debug(NES_DBG_CM, "ERROR - cm_node->tcp_cntxt.rem_ack_num !=" - " cm_node->tcp_cntxt.loc_seq_num\n"); - send_reset(cm_node); - return 0; - } - if (cm_node->state == NES_CM_STATE_SYN_SENT) - cm_node->state = NES_CM_STATE_ONE_SIDE_ESTABLISHED; - else { - cm_node->state = NES_CM_STATE_ESTABLISHED; - } - break; - case NES_CM_STATE_LAST_ACK: - cm_node->state = NES_CM_STATE_CLOSED; - break; - case NES_CM_STATE_FIN_WAIT1: - cm_node->state = NES_CM_STATE_FIN_WAIT2; - break; - case NES_CM_STATE_CLOSING: - cm_node->state = NES_CM_STATE_TIME_WAIT; - /* need to schedule this to happen in 2MSL timeouts */ - cm_node->state = NES_CM_STATE_CLOSED; - break; - case NES_CM_STATE_ONE_SIDE_ESTABLISHED: - case NES_CM_STATE_ESTABLISHED: - case NES_CM_STATE_MPAREQ_SENT: - case NES_CM_STATE_CLOSE_WAIT: - case NES_CM_STATE_TIME_WAIT: - case NES_CM_STATE_CLOSED: - break; - case NES_CM_STATE_LISTENING: - nes_debug(NES_DBG_CM, "Received an ACK on a listening port (SYN %d)\n", tcph->syn); - cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq); - send_reset(cm_node); - /* send_reset bumps refcount, this should have been a new node */ - rem_ref_cm_node(cm_core, cm_node); - return -1; - break; - case NES_CM_STATE_TSA: - nes_debug(NES_DBG_CM, "Received a packet with the ack bit set while in TSA state\n"); - break; - case NES_CM_STATE_UNKNOWN: - case NES_CM_STATE_INITED: - case NES_CM_STATE_ACCEPTING: - case NES_CM_STATE_FIN_WAIT2: - default: - nes_debug(NES_DBG_CM, "Received ack from unknown state: %x\n", - cm_node->state); - send_reset(cm_node); - break; - } - } +/* + * active_open_err() will send reset() if flag set.. + * It will also send ABORT event. + */ - if (tcph->syn) { - if (cm_node->state == NES_CM_STATE_LISTENING) { - /* do not exceed backlog */ - atomic_inc(&cm_node->listener->pend_accepts_cnt); - if (atomic_read(&cm_node->listener->pend_accepts_cnt) > - cm_node->listener->backlog) { - nes_debug(NES_DBG_CM, "drop syn due to backlog pressure \n"); - cm_backlog_drops++; - atomic_dec(&cm_node->listener->pend_accepts_cnt); - rem_ref_cm_node(cm_core, cm_node); - return 0; - } - cm_node->accept_pend = 1; +static void active_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb, + int reset) +{ + cleanup_retrans_entry(cm_node); + if (reset) { + nes_debug(NES_DBG_CM, "ERROR active err called for cm_node=%p, " + "state=%d\n", cm_node, cm_node->state); + add_ref_cm_node(cm_node); + send_reset(cm_node, skb); + } else + dev_kfree_skb_any(skb); - } - if (datasize == 0) - cm_node->tcp_cntxt.rcv_nxt ++; + cm_node->state = NES_CM_STATE_CLOSED; + create_event(cm_node, NES_CM_EVENT_ABORTED); +} - if (cm_node->state == NES_CM_STATE_LISTENING) { - cm_node->state = NES_CM_STATE_SYN_RCVD; - send_syn(cm_node, 1); - } - if (cm_node->state == NES_CM_STATE_ONE_SIDE_ESTABLISHED) { - cm_node->state = NES_CM_STATE_ESTABLISHED; - /* send final handshake ACK */ - ret = send_ack(cm_node); - if (ret < 0) - return ret; +/* + * passive_open_err() will either do a reset() or will free up the skb and + * remove the cm_node. + */ - cm_node->state = NES_CM_STATE_MPAREQ_SENT; - ret = send_mpa_request(cm_node); - if (ret < 0) - return ret; - } +static void passive_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb, + int reset) +{ + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_CLOSED; + if (reset) { + nes_debug(NES_DBG_CM, "passive_open_err sending RST for " + "cm_node=%p state =%d\n", cm_node, cm_node->state); + send_reset(cm_node, skb); + } else { + dev_kfree_skb_any(skb); + rem_ref_cm_node(cm_node->cm_core, cm_node); } +} - if (tcph->fin) { - cm_node->tcp_cntxt.rcv_nxt++; - switch (cm_node->state) { - case NES_CM_STATE_SYN_RCVD: - case NES_CM_STATE_SYN_SENT: - case NES_CM_STATE_ONE_SIDE_ESTABLISHED: - case NES_CM_STATE_ESTABLISHED: - case NES_CM_STATE_ACCEPTING: - case NES_CM_STATE_MPAREQ_SENT: - cm_node->state = NES_CM_STATE_CLOSE_WAIT; - cm_node->state = NES_CM_STATE_LAST_ACK; - ret = send_fin(cm_node, NULL); - break; - case NES_CM_STATE_FIN_WAIT1: - cm_node->state = NES_CM_STATE_CLOSING; - ret = send_ack(cm_node); - break; - case NES_CM_STATE_FIN_WAIT2: - cm_node->state = NES_CM_STATE_TIME_WAIT; - cm_node->tcp_cntxt.loc_seq_num ++; - ret = send_ack(cm_node); - /* need to schedule this to happen in 2MSL timeouts */ - cm_node->state = NES_CM_STATE_CLOSED; - break; - case NES_CM_STATE_CLOSE_WAIT: - case NES_CM_STATE_LAST_ACK: - case NES_CM_STATE_CLOSING: - case NES_CM_STATE_TSA: - default: - nes_debug(NES_DBG_CM, "Received a fin while in %x state\n", - cm_node->state); - ret = -EINVAL; - break; - } +/* + * free_retrans_entry() routines assumes that the retrans_list_lock has + * been acquired before calling. + */ +static void free_retrans_entry(struct nes_cm_node *cm_node) +{ + struct nes_timer_entry *send_entry; + send_entry = cm_node->send_entry; + if (send_entry) { + cm_node->send_entry = NULL; + dev_kfree_skb_any(send_entry->skb); + kfree(send_entry); + rem_ref_cm_node(cm_node->cm_core, cm_node); } +} - if (datasize) { - u8 *dataloc = skb->data; - /* figure out what state we are in and handle transition to next state */ - switch (cm_node->state) { - case NES_CM_STATE_LISTENING: - case NES_CM_STATE_SYN_RCVD: - case NES_CM_STATE_SYN_SENT: - case NES_CM_STATE_FIN_WAIT1: - case NES_CM_STATE_FIN_WAIT2: - case NES_CM_STATE_CLOSE_WAIT: - case NES_CM_STATE_LAST_ACK: - case NES_CM_STATE_CLOSING: - break; - case NES_CM_STATE_MPAREQ_SENT: - /* recv the mpa res frame, ret=frame len (incl priv data) */ - ret = parse_mpa(cm_node, dataloc, datasize); - if (ret < 0) - break; - /* set the req frame payload len in skb */ - /* we are done handling this state, set node to a TSA state */ - cm_node->state = NES_CM_STATE_TSA; - send_ack(cm_node); - create_event(cm_node, NES_CM_EVENT_CONNECTED); - break; - - case NES_CM_STATE_ESTABLISHED: - /* we are expecting an MPA req frame */ - ret = parse_mpa(cm_node, dataloc, datasize); - if (ret < 0) { - break; - } - cm_node->state = NES_CM_STATE_TSA; - send_ack(cm_node); - /* we got a valid MPA request, create an event */ - create_event(cm_node, NES_CM_EVENT_MPA_REQ); - break; - case NES_CM_STATE_TSA: - handle_exception_pkt(cm_node, skb); - break; - case NES_CM_STATE_UNKNOWN: - case NES_CM_STATE_INITED: - default: - ret = -1; - } - } +static void cleanup_retrans_entry(struct nes_cm_node *cm_node) +{ + unsigned long flags; - return ret; + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + free_retrans_entry(cm_node); + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); } +/** + * process_packet + * Returns skb if to be freed, else it will return NULL if already used.. + */ +static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct nes_cm_core *cm_core) +{ + enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN; + struct tcphdr *tcph = tcp_hdr(skb); + skb_pull(skb, ip_hdr(skb)->ihl << 2); + + nes_debug(NES_DBG_CM, "process_packet: cm_node=%p state =%d syn=%d " + "ack=%d rst=%d fin=%d\n", cm_node, cm_node->state, tcph->syn, + tcph->ack, tcph->rst, tcph->fin); + + if (tcph->rst) + pkt_type = NES_PKT_TYPE_RST; + else if (tcph->syn) { + pkt_type = NES_PKT_TYPE_SYN; + if (tcph->ack) + pkt_type = NES_PKT_TYPE_SYNACK; + } else if (tcph->fin) + pkt_type = NES_PKT_TYPE_FIN; + else if (tcph->ack) + pkt_type = NES_PKT_TYPE_ACK; + + switch (pkt_type) { + case NES_PKT_TYPE_SYN: + handle_syn_pkt(cm_node, skb, tcph); + break; + case NES_PKT_TYPE_SYNACK: + handle_synack_pkt(cm_node, skb, tcph); + break; + case NES_PKT_TYPE_ACK: + handle_ack_pkt(cm_node, skb, tcph); + break; + case NES_PKT_TYPE_RST: + handle_rst_pkt(cm_node, skb, tcph); + break; + case NES_PKT_TYPE_FIN: + handle_fin_pkt(cm_node, skb, tcph); + break; + default: + drop_packet(skb); + break; + } +} /** * mini_cm_listen - create a listen node with params */ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, - struct nes_vnic *nesvnic, struct nes_cm_info *cm_info) + struct nes_vnic *nesvnic, struct nes_cm_info *cm_info) { struct nes_cm_listener *listener; unsigned long flags; @@ -1603,7 +1785,6 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, return NULL; } - memset(listener, 0, sizeof(struct nes_cm_listener)); listener->loc_addr = htonl(cm_info->loc_addr); listener->loc_port = htons(cm_info->loc_port); listener->reused_node = 0; @@ -1645,37 +1826,36 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, /** * mini_cm_connect - make a connection node with params */ -static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, - struct nes_vnic *nesvnic, - struct ietf_mpa_frame *mpa_frame, - struct nes_cm_info *cm_info) +struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, + struct nes_vnic *nesvnic, u16 private_data_len, + void *private_data, struct nes_cm_info *cm_info) { int ret = 0; struct nes_cm_node *cm_node; struct nes_cm_listener *loopbackremotelistener; struct nes_cm_node *loopbackremotenode; struct nes_cm_info loopback_cm_info; - - u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) + - ntohs(mpa_frame->priv_data_len); - - cm_info->loc_addr = htonl(cm_info->loc_addr); - cm_info->rem_addr = htonl(cm_info->rem_addr); - cm_info->loc_port = htons(cm_info->loc_port); - cm_info->rem_port = htons(cm_info->rem_port); + u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) + private_data_len; + struct ietf_mpa_frame *mpa_frame = NULL; /* create a CM connection node */ cm_node = make_cm_node(cm_core, nesvnic, cm_info, NULL); if (!cm_node) return NULL; + mpa_frame = &cm_node->mpa_frame; + strcpy(mpa_frame->key, IEFT_MPA_KEY_REQ); + mpa_frame->flags = IETF_MPA_FLAGS_CRC; + mpa_frame->rev = IETF_MPA_VERSION; + mpa_frame->priv_data_len = htons(private_data_len); /* set our node side to client (active) side */ cm_node->tcp_cntxt.client = 1; cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; if (cm_info->loc_addr == cm_info->rem_addr) { - loopbackremotelistener = find_listener(cm_core, cm_node->rem_addr, - cm_node->rem_port, NES_CM_LISTENER_ACTIVE_STATE); + loopbackremotelistener = find_listener(cm_core, + ntohl(nesvnic->local_ipaddr), cm_node->rem_port, + NES_CM_LISTENER_ACTIVE_STATE); if (loopbackremotelistener == NULL) { create_event(cm_node, NES_CM_EVENT_ABORTED); } else { @@ -1684,26 +1864,35 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, loopback_cm_info.loc_port = cm_info->rem_port; loopback_cm_info.rem_port = cm_info->loc_port; loopback_cm_info.cm_id = loopbackremotelistener->cm_id; - loopbackremotenode = make_cm_node(cm_core, nesvnic, &loopback_cm_info, - loopbackremotelistener); + loopbackremotenode = make_cm_node(cm_core, nesvnic, + &loopback_cm_info, loopbackremotelistener); loopbackremotenode->loopbackpartner = cm_node; - loopbackremotenode->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; + loopbackremotenode->tcp_cntxt.rcv_wscale = + NES_CM_DEFAULT_RCV_WND_SCALE; cm_node->loopbackpartner = loopbackremotenode; - memcpy(loopbackremotenode->mpa_frame_buf, &mpa_frame->priv_data, - mpa_frame_size); - loopbackremotenode->mpa_frame_size = mpa_frame_size - - sizeof(struct ietf_mpa_frame); + memcpy(loopbackremotenode->mpa_frame_buf, private_data, + private_data_len); + loopbackremotenode->mpa_frame_size = private_data_len; - /* we are done handling this state, set node to a TSA state */ + /* we are done handling this state. */ + /* set node to a TSA state */ cm_node->state = NES_CM_STATE_TSA; - cm_node->tcp_cntxt.rcv_nxt = loopbackremotenode->tcp_cntxt.loc_seq_num; - loopbackremotenode->tcp_cntxt.rcv_nxt = cm_node->tcp_cntxt.loc_seq_num; - cm_node->tcp_cntxt.max_snd_wnd = loopbackremotenode->tcp_cntxt.rcv_wnd; - loopbackremotenode->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.rcv_wnd; - cm_node->tcp_cntxt.snd_wnd = loopbackremotenode->tcp_cntxt.rcv_wnd; - loopbackremotenode->tcp_cntxt.snd_wnd = cm_node->tcp_cntxt.rcv_wnd; - cm_node->tcp_cntxt.snd_wscale = loopbackremotenode->tcp_cntxt.rcv_wscale; - loopbackremotenode->tcp_cntxt.snd_wscale = cm_node->tcp_cntxt.rcv_wscale; + cm_node->tcp_cntxt.rcv_nxt = + loopbackremotenode->tcp_cntxt.loc_seq_num; + loopbackremotenode->tcp_cntxt.rcv_nxt = + cm_node->tcp_cntxt.loc_seq_num; + cm_node->tcp_cntxt.max_snd_wnd = + loopbackremotenode->tcp_cntxt.rcv_wnd; + loopbackremotenode->tcp_cntxt.max_snd_wnd = + cm_node->tcp_cntxt.rcv_wnd; + cm_node->tcp_cntxt.snd_wnd = + loopbackremotenode->tcp_cntxt.rcv_wnd; + loopbackremotenode->tcp_cntxt.snd_wnd = + cm_node->tcp_cntxt.rcv_wnd; + cm_node->tcp_cntxt.snd_wscale = + loopbackremotenode->tcp_cntxt.rcv_wscale; + loopbackremotenode->tcp_cntxt.snd_wscale = + cm_node->tcp_cntxt.rcv_wscale; create_event(loopbackremotenode, NES_CM_EVENT_MPA_REQ); } @@ -1713,16 +1902,29 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, /* set our node side to client (active) side */ cm_node->tcp_cntxt.client = 1; /* init our MPA frame ptr */ - memcpy(&cm_node->mpa_frame, mpa_frame, mpa_frame_size); + memcpy(mpa_frame->priv_data, private_data, private_data_len); + cm_node->mpa_frame_size = mpa_frame_size; /* send a syn and goto syn sent state */ cm_node->state = NES_CM_STATE_SYN_SENT; - ret = send_syn(cm_node, 0); + ret = send_syn(cm_node, 0, NULL); + + if (ret) { + /* error in sending the syn free up the cm_node struct */ + nes_debug(NES_DBG_CM, "Api - connect() FAILED: dest " + "addr=0x%08X, port=0x%04x, cm_node=%p, cm_id = %p.\n", + cm_node->rem_addr, cm_node->rem_port, cm_node, + cm_node->cm_id); + rem_ref_cm_node(cm_node->cm_core, cm_node); + cm_node = NULL; + } - nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X, port=0x%04x," - " cm_node=%p, cm_id = %p.\n", - cm_node->rem_addr, cm_node->rem_port, cm_node, cm_node->cm_id); + if (cm_node) + nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X," + "port=0x%04x, cm_node=%p, cm_id = %p.\n", + cm_node->rem_addr, cm_node->rem_port, cm_node, + cm_node->cm_id); return cm_node; } @@ -1732,8 +1934,8 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, * mini_cm_accept - accept a connection * This function is never called */ -static int mini_cm_accept(struct nes_cm_core *cm_core, struct ietf_mpa_frame *mpa_frame, - struct nes_cm_node *cm_node) +static int mini_cm_accept(struct nes_cm_core *cm_core, + struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node) { return 0; } @@ -1743,32 +1945,26 @@ static int mini_cm_accept(struct nes_cm_core *cm_core, struct ietf_mpa_frame *mp * mini_cm_reject - reject and teardown a connection */ static int mini_cm_reject(struct nes_cm_core *cm_core, - struct ietf_mpa_frame *mpa_frame, - struct nes_cm_node *cm_node) + struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node) { int ret = 0; - struct sk_buff *skb; - u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) + - ntohs(mpa_frame->priv_data_len); - skb = get_free_pkt(cm_node); - if (!skb) { - nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); - return -1; - } - - /* send an MPA Request frame */ - form_cm_frame(skb, cm_node, NULL, 0, mpa_frame, mpa_frame_size, SET_ACK | SET_FIN); - ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); + nes_debug(NES_DBG_CM, "%s cm_node=%p type=%d state=%d\n", + __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state); + if (cm_node->tcp_cntxt.client) + return ret; + cleanup_retrans_entry(cm_node); cm_node->state = NES_CM_STATE_CLOSED; ret = send_fin(cm_node, NULL); - if (ret < 0) { - printk(KERN_INFO PFX "failed to send MPA Reply (reject)\n"); - return ret; + if (cm_node->accept_pend) { + BUG_ON(!cm_node->listener); + atomic_dec(&cm_node->listener->pend_accepts_cnt); + BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0); } + ret = send_reset(cm_node, NULL); return ret; } @@ -1784,35 +1980,39 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod return -EINVAL; switch (cm_node->state) { - /* if passed in node is null, create a reference key node for node search */ - /* check if we found an owner node for this pkt */ - case NES_CM_STATE_SYN_RCVD: - case NES_CM_STATE_SYN_SENT: - case NES_CM_STATE_ONE_SIDE_ESTABLISHED: - case NES_CM_STATE_ESTABLISHED: - case NES_CM_STATE_ACCEPTING: - case NES_CM_STATE_MPAREQ_SENT: - cm_node->state = NES_CM_STATE_FIN_WAIT1; - send_fin(cm_node, NULL); - break; - case NES_CM_STATE_CLOSE_WAIT: - cm_node->state = NES_CM_STATE_LAST_ACK; - send_fin(cm_node, NULL); - break; - case NES_CM_STATE_FIN_WAIT1: - case NES_CM_STATE_FIN_WAIT2: - case NES_CM_STATE_LAST_ACK: - case NES_CM_STATE_TIME_WAIT: - case NES_CM_STATE_CLOSING: - ret = -1; - break; - case NES_CM_STATE_LISTENING: - case NES_CM_STATE_UNKNOWN: - case NES_CM_STATE_INITED: - case NES_CM_STATE_CLOSED: - case NES_CM_STATE_TSA: - ret = rem_ref_cm_node(cm_core, cm_node); - break; + case NES_CM_STATE_SYN_RCVD: + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_ONE_SIDE_ESTABLISHED: + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_ACCEPTING: + case NES_CM_STATE_MPAREQ_SENT: + case NES_CM_STATE_MPAREQ_RCVD: + cleanup_retrans_entry(cm_node); + send_reset(cm_node, NULL); + break; + case NES_CM_STATE_CLOSE_WAIT: + cm_node->state = NES_CM_STATE_LAST_ACK; + send_fin(cm_node, NULL); + break; + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_TIME_WAIT: + case NES_CM_STATE_CLOSING: + ret = -1; + break; + case NES_CM_STATE_LISTENING: + case NES_CM_STATE_UNKNOWN: + case NES_CM_STATE_INITED: + case NES_CM_STATE_CLOSED: + ret = rem_ref_cm_node(cm_core, cm_node); + break; + case NES_CM_STATE_TSA: + if (cm_node->send_entry) + printk(KERN_ERR "ERROR Close got called from STATE_TSA " + "send_entry=%p\n", cm_node->send_entry); + ret = rem_ref_cm_node(cm_core, cm_node); + break; } cm_node->cm_id = NULL; return ret; @@ -1823,25 +2023,30 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod * recv_pkt - recv an ETHERNET packet, and process it through CM * node state machine */ -static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct nes_vnic *nesvnic, - struct sk_buff *skb) +static void mini_cm_recv_pkt(struct nes_cm_core *cm_core, + struct nes_vnic *nesvnic, struct sk_buff *skb) { struct nes_cm_node *cm_node = NULL; struct nes_cm_listener *listener = NULL; struct iphdr *iph; struct tcphdr *tcph; struct nes_cm_info nfo; - int ret = 0; - if (!skb || skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) { - ret = -EINVAL; - goto out; + if (!skb) + return; + if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) { + dev_kfree_skb_any(skb); + return; } iph = (struct iphdr *)skb->data; tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr)); skb_reset_network_header(skb); skb_set_transport_header(skb, sizeof(*tcph)); + if (!tcph) { + dev_kfree_skb_any(skb); + return; + } skb->len = ntohs(iph->tot_len); nfo.loc_addr = ntohl(iph->daddr); @@ -1854,61 +2059,60 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct nes_vnic *nesvni NIPQUAD(iph->daddr), tcph->dest, NIPQUAD(iph->saddr), tcph->source); - /* note: this call is going to increment cm_node ref count */ - cm_node = find_node(cm_core, + do { + cm_node = find_node(cm_core, nfo.rem_port, nfo.rem_addr, nfo.loc_port, nfo.loc_addr); - if (!cm_node) { - listener = find_listener(cm_core, nfo.loc_addr, nfo.loc_port, - NES_CM_LISTENER_ACTIVE_STATE); - if (listener) { - nfo.cm_id = listener->cm_id; - nfo.conn_type = listener->conn_type; - } else { - nfo.cm_id = NULL; - nfo.conn_type = 0; - } - - cm_node = make_cm_node(cm_core, nesvnic, &nfo, listener); if (!cm_node) { - nes_debug(NES_DBG_CM, "Unable to allocate node\n"); + /* Only type of packet accepted are for */ + /* the PASSIVE open (syn only) */ + if ((!tcph->syn) || (tcph->ack)) { + cm_packets_dropped++; + break; + } + listener = find_listener(cm_core, nfo.loc_addr, + nfo.loc_port, + NES_CM_LISTENER_ACTIVE_STATE); if (listener) { - nes_debug(NES_DBG_CM, "unable to allocate node and decrementing listener refcount\n"); + nfo.cm_id = listener->cm_id; + nfo.conn_type = listener->conn_type; + } else { + nes_debug(NES_DBG_CM, "Unable to find listener " + "for the pkt\n"); + cm_packets_dropped++; + dev_kfree_skb_any(skb); + break; + } + + cm_node = make_cm_node(cm_core, nesvnic, &nfo, + listener); + if (!cm_node) { + nes_debug(NES_DBG_CM, "Unable to allocate " + "node\n"); + cm_packets_dropped++; atomic_dec(&listener->ref_count); + dev_kfree_skb_any(skb); + break; } - ret = -1; - goto out; - } - if (!listener) { - nes_debug(NES_DBG_CM, "Packet found for unknown port %x refcnt=%d\n", - nfo.loc_port, atomic_read(&cm_node->ref_count)); - if (!tcph->rst) { - nes_debug(NES_DBG_CM, "Packet found for unknown port=%d" - " rem_port=%d refcnt=%d\n", - nfo.loc_port, nfo.rem_port, atomic_read(&cm_node->ref_count)); - - cm_node->tcp_cntxt.rcv_nxt = ntohl(tcph->seq); - cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq); - send_reset(cm_node); + if (!tcph->rst && !tcph->fin) { + cm_node->state = NES_CM_STATE_LISTENING; + } else { + cm_packets_dropped++; + rem_ref_cm_node(cm_core, cm_node); + dev_kfree_skb_any(skb); + break; } + add_ref_cm_node(cm_node); + } else if (cm_node->state == NES_CM_STATE_TSA) { rem_ref_cm_node(cm_core, cm_node); - ret = -1; - goto out; + atomic_inc(&cm_accel_dropped_pkts); + dev_kfree_skb_any(skb); + break; } - add_ref_cm_node(cm_node); - cm_node->state = NES_CM_STATE_LISTENING; - } - - nes_debug(NES_DBG_CM, "Processing Packet for node %p, data = (%p):\n", - cm_node, skb->data); - process_packet(cm_node, skb, cm_core); - - rem_ref_cm_node(cm_core, cm_node); - out: - if (skb) - dev_kfree_skb_any(skb); - return ret; + process_packet(cm_node, skb, cm_core); + rem_ref_cm_node(cm_core, cm_node); + } while (0); } @@ -2108,15 +2312,12 @@ int nes_cm_disconn(struct nes_qp *nesqp) if (nesqp->disconn_pending == 0) { nesqp->disconn_pending++; spin_unlock_irqrestore(&nesqp->lock, flags); - /* nes_add_ref(&nesqp->ibqp); */ /* init our disconnect work element, to */ INIT_WORK(&nesqp->disconn_work, nes_disconnect_worker); queue_work(g_cm_core->disconn_wq, &nesqp->disconn_work); - } else { + } else spin_unlock_irqrestore(&nesqp->lock, flags); - nes_rem_ref(&nesqp->ibqp); - } return 0; } @@ -2162,7 +2363,6 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) nes_debug(NES_DBG_CM, "QP%u disconnect_worker cmid is NULL\n", nesqp->hwqp.qp_id); spin_unlock_irqrestore(&nesqp->lock, flags); - nes_rem_ref(&nesqp->ibqp); return -1; } @@ -2183,30 +2383,31 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) atomic_inc(&cm_disconnects); cm_event.event = IW_CM_EVENT_DISCONNECT; if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET) { - issued_disconnect_reset = 1; cm_event.status = IW_CM_EVENT_STATUS_RESET; - nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event (status reset) for " - " QP%u, cm_id = %p. \n", - nesqp->hwqp.qp_id, cm_id); - } else { + nes_debug(NES_DBG_CM, "Generating a CM " + "Disconnect Event (status reset) for " + "QP%u, cm_id = %p. \n", + nesqp->hwqp.qp_id, cm_id); + } else cm_event.status = IW_CM_EVENT_STATUS_OK; - } cm_event.local_addr = cm_id->local_addr; cm_event.remote_addr = cm_id->remote_addr; cm_event.private_data = NULL; cm_event.private_data_len = 0; - nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event for " - " QP%u, SQ Head = %u, SQ Tail = %u. cm_id = %p, refcount = %u.\n", - nesqp->hwqp.qp_id, - nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail, cm_id, - atomic_read(&nesqp->refcount)); + nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event" + " for QP%u, SQ Head = %u, SQ Tail = %u. " + "cm_id = %p, refcount = %u.\n", + nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, + nesqp->hwqp.sq_tail, cm_id, + atomic_read(&nesqp->refcount)); spin_unlock_irqrestore(&nesqp->lock, flags); ret = cm_id->event_handler(cm_id, &cm_event); if (ret) - nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); + nes_debug(NES_DBG_CM, "OFA CM event_handler " + "returned, ret=%d\n", ret); spin_lock_irqsave(&nesqp->lock, flags); } @@ -2248,31 +2449,24 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) if (nesqp->flush_issued == 0) { nesqp->flush_issued = 1; spin_unlock_irqrestore(&nesqp->lock, flags); - flush_wqes(nesvnic->nesdev, nesqp, NES_CQP_FLUSH_RQ, 1); - } else { + flush_wqes(nesvnic->nesdev, nesqp, + NES_CQP_FLUSH_RQ, 1); + } else spin_unlock_irqrestore(&nesqp->lock, flags); - } - - /* This reference is from either ModifyQP or the AE processing, - there is still a race here with modifyqp */ - nes_rem_ref(&nesqp->ibqp); - } else { cm_id = nesqp->cm_id; spin_unlock_irqrestore(&nesqp->lock, flags); /* check to see if the inbound reset beat the outbound reset */ if ((!cm_id) && (last_ae==NES_AEQE_AEID_RESET_SENT)) { - nes_debug(NES_DBG_CM, "QP%u: Decing refcount due to inbound reset" - " beating the outbound reset.\n", - nesqp->hwqp.qp_id); - nes_rem_ref(&nesqp->ibqp); + nes_debug(NES_DBG_CM, "QP%u: Decing refcount " + "due to inbound reset beating the " + "outbound reset.\n", nesqp->hwqp.qp_id); } } } else { nesqp->disconn_pending = 0; spin_unlock_irqrestore(&nesqp->lock, flags); } - nes_rem_ref(&nesqp->ibqp); return 0; } @@ -2350,71 +2544,82 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nesdev = nesvnic->nesdev; adapter = nesdev->nesadapter; - nes_debug(NES_DBG_CM, "nesvnic=%p, netdev=%p, %s\n", - nesvnic, nesvnic->netdev, nesvnic->netdev->name); - - /* since this is from a listen, we were able to put node handle into cm_id */ cm_node = (struct nes_cm_node *)cm_id->provider_data; + nes_debug(NES_DBG_CM, "nes_accept: cm_node= %p nesvnic=%p, netdev=%p," + "%s\n", cm_node, nesvnic, nesvnic->netdev, + nesvnic->netdev->name); /* associate the node with the QP */ nesqp->cm_node = (void *)cm_node; + cm_node->nesqp = nesqp; + nes_add_ref(&nesqp->ibqp); - nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu\n", - nesqp->hwqp.qp_id, cm_node, jiffies); + nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu listener = %p\n", + nesqp->hwqp.qp_id, cm_node, jiffies, cm_node->listener); atomic_inc(&cm_accepts); nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n", atomic_read(&nesvnic->netdev->refcnt)); - /* allocate the ietf frame and space for private data */ - nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev, - sizeof(struct ietf_mpa_frame) + conn_param->private_data_len, - &nesqp->ietf_frame_pbase); - - if (!nesqp->ietf_frame) { - nes_debug(NES_DBG_CM, "Unable to allocate memory for private data\n"); - return -ENOMEM; - } + /* allocate the ietf frame and space for private data */ + nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev, + sizeof(struct ietf_mpa_frame) + conn_param->private_data_len, + &nesqp->ietf_frame_pbase); + if (!nesqp->ietf_frame) { + nes_debug(NES_DBG_CM, "Unable to allocate memory for private " + "data\n"); + return -ENOMEM; + } - /* setup the MPA frame */ - nesqp->private_data_len = conn_param->private_data_len; - memcpy(nesqp->ietf_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE); - memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data, - conn_param->private_data_len); + /* setup the MPA frame */ + nesqp->private_data_len = conn_param->private_data_len; + memcpy(nesqp->ietf_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE); - nesqp->ietf_frame->priv_data_len = cpu_to_be16(conn_param->private_data_len); - nesqp->ietf_frame->rev = mpa_version; - nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC; + memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data, + conn_param->private_data_len); - /* setup our first outgoing iWarp send WQE (the IETF frame response) */ - wqe = &nesqp->hwqp.sq_vbase[0]; + nesqp->ietf_frame->priv_data_len = + cpu_to_be16(conn_param->private_data_len); + nesqp->ietf_frame->rev = mpa_version; + nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC; - if (cm_id->remote_addr.sin_addr.s_addr != cm_id->local_addr.sin_addr.s_addr) { - u64temp = (unsigned long)nesqp; - u64temp |= NES_SW_CONTEXT_ALIGN>>1; - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, - u64temp); - wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = - cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING | NES_IWARP_SQ_WQE_WRPDU); - wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = - cpu_to_le32(conn_param->private_data_len + sizeof(struct ietf_mpa_frame)); - wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = - cpu_to_le32((u32)nesqp->ietf_frame_pbase); - wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = - cpu_to_le32((u32)((u64)nesqp->ietf_frame_pbase >> 32)); - wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = - cpu_to_le32(conn_param->private_data_len + sizeof(struct ietf_mpa_frame)); - wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; - - nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32( - NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | NES_QPCONTEXT_ORDIRD_WRPDU); - } else { - nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | - NES_QPCONTEXT_ORDIRD_WRPDU | NES_QPCONTEXT_ORDIRD_ALSMM)); - } - nesqp->skip_lsmm = 1; + /* setup our first outgoing iWarp send WQE (the IETF frame response) */ + wqe = &nesqp->hwqp.sq_vbase[0]; + + if (cm_id->remote_addr.sin_addr.s_addr != + cm_id->local_addr.sin_addr.s_addr) { + u64temp = (unsigned long)nesqp; + u64temp |= NES_SW_CONTEXT_ALIGN>>1; + set_wqe_64bit_value(wqe->wqe_words, + NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, + u64temp); + wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = + cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING | + NES_IWARP_SQ_WQE_WRPDU); + wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = + cpu_to_le32(conn_param->private_data_len + + sizeof(struct ietf_mpa_frame)); + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = + cpu_to_le32((u32)nesqp->ietf_frame_pbase); + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = + cpu_to_le32((u32)((u64)nesqp->ietf_frame_pbase >> 32)); + wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = + cpu_to_le32(conn_param->private_data_len + + sizeof(struct ietf_mpa_frame)); + wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; + + nesqp->nesqp_context->ird_ord_sizes |= + cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | + NES_QPCONTEXT_ORDIRD_WRPDU); + } else { + nesqp->nesqp_context->ird_ord_sizes |= + cpu_to_le32((NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | + NES_QPCONTEXT_ORDIRD_WRPDU | + NES_QPCONTEXT_ORDIRD_ALSMM)); + } + nesqp->skip_lsmm = 1; /* Cache the cm_id in the qp */ @@ -2425,55 +2630,75 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_id->provider_data = nesqp; nesqp->active_conn = 0; + if (cm_node->state == NES_CM_STATE_TSA) + nes_debug(NES_DBG_CM, "Already state = TSA for cm_node=%p\n", + cm_node); + nes_cm_init_tsa_conn(nesqp, cm_node); - nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(cm_id->local_addr.sin_port)); - nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); - nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); + nesqp->nesqp_context->tcpPorts[0] = + cpu_to_le16(ntohs(cm_id->local_addr.sin_port)); + nesqp->nesqp_context->tcpPorts[1] = + cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); + + if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) + nesqp->nesqp_context->ip0 = + cpu_to_le32(ntohl(nesvnic->local_ipaddr)); + else + nesqp->nesqp_context->ip0 = + cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); nesqp->nesqp_context->misc2 |= cpu_to_le32( - (u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); + (u32)PCI_FUNC(nesdev->pcidev->devfn) << + NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); - nesqp->nesqp_context->arp_index_vlan |= cpu_to_le32( - nes_arp_table(nesdev, le32_to_cpu(nesqp->nesqp_context->ip0), NULL, + nesqp->nesqp_context->arp_index_vlan |= + cpu_to_le32(nes_arp_table(nesdev, + le32_to_cpu(nesqp->nesqp_context->ip0), NULL, NES_ARP_RESOLVE) << 16); nesqp->nesqp_context->ts_val_delta = cpu_to_le32( - jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW)); + jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW)); nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id); nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32( - ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT)); - nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord); + ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT)); + nesqp->nesqp_context->ird_ord_sizes |= + cpu_to_le32((u32)conn_param->ord); memset(&nes_quad, 0, sizeof(nes_quad)); - nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); - nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; - nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; - nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; + nes_quad.DstIpAdrIndex = + cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); + if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) + nes_quad.SrcIpadr = nesvnic->local_ipaddr; + else + nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; + nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; + nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; /* Produce hash key */ crc_value = get_crc_value(&nes_quad); nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n", - nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask); + nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask); nesqp->hte_index &= adapter->hte_index_mask; nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index); cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node); - nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = 0x%08X:0x%04X," - " rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + private data length=%zu.\n", - nesqp->hwqp.qp_id, + nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = " + "0x%08X:0x%04X, rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + " + "private data length=%zu.\n", nesqp->hwqp.qp_id, ntohl(cm_id->remote_addr.sin_addr.s_addr), ntohs(cm_id->remote_addr.sin_port), ntohl(cm_id->local_addr.sin_addr.s_addr), ntohs(cm_id->local_addr.sin_port), le32_to_cpu(nesqp->nesqp_context->rcv_nxt), le32_to_cpu(nesqp->nesqp_context->snd_nxt), - conn_param->private_data_len+sizeof(struct ietf_mpa_frame)); + conn_param->private_data_len + + sizeof(struct ietf_mpa_frame)); attr.qp_state = IB_QPS_RTS; nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); @@ -2490,15 +2715,16 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_event.private_data_len = 0; ret = cm_id->event_handler(cm_id, &cm_event); if (cm_node->loopbackpartner) { - cm_node->loopbackpartner->mpa_frame_size = nesqp->private_data_len; + cm_node->loopbackpartner->mpa_frame_size = + nesqp->private_data_len; /* copy entire MPA frame to our cm_node's frame */ - memcpy(cm_node->loopbackpartner->mpa_frame_buf, nesqp->ietf_frame->priv_data, - nesqp->private_data_len); + memcpy(cm_node->loopbackpartner->mpa_frame_buf, + nesqp->ietf_frame->priv_data, nesqp->private_data_len); create_event(cm_node->loopbackpartner, NES_CM_EVENT_CONNECTED); } if (ret) - printk("%s[%u] OFA CM event_handler returned, ret=%d\n", - __func__, __LINE__, ret); + printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " + "ret=%d\n", __func__, __LINE__, ret); return 0; } @@ -2556,74 +2782,61 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (!nesdev) return -EINVAL; - atomic_inc(&cm_connects); - - nesqp->ietf_frame = kzalloc(sizeof(struct ietf_mpa_frame) + - conn_param->private_data_len, GFP_KERNEL); - if (!nesqp->ietf_frame) - return -ENOMEM; + nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = " + "0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id, + ntohl(nesvnic->local_ipaddr), + ntohl(cm_id->remote_addr.sin_addr.s_addr), + ntohs(cm_id->remote_addr.sin_port), + ntohl(cm_id->local_addr.sin_addr.s_addr), + ntohs(cm_id->local_addr.sin_port)); - /* set qp as having an active connection */ + atomic_inc(&cm_connects); nesqp->active_conn = 1; - nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", - nesqp->hwqp.qp_id, - ntohl(cm_id->remote_addr.sin_addr.s_addr), - ntohs(cm_id->remote_addr.sin_port), - ntohl(cm_id->local_addr.sin_addr.s_addr), - ntohs(cm_id->local_addr.sin_port)); - /* cache the cm_id in the qp */ nesqp->cm_id = cm_id; cm_id->provider_data = nesqp; - /* copy the private data */ - if (conn_param->private_data_len) { - memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data, - conn_param->private_data_len); - } - nesqp->private_data_len = conn_param->private_data_len; nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord); nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord); - nes_debug(NES_DBG_CM, "mpa private data len =%u\n", conn_param->private_data_len); - - strcpy(&nesqp->ietf_frame->key[0], IEFT_MPA_KEY_REQ); - nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC; - nesqp->ietf_frame->rev = IETF_MPA_VERSION; - nesqp->ietf_frame->priv_data_len = htons(conn_param->private_data_len); + nes_debug(NES_DBG_CM, "mpa private data len =%u\n", + conn_param->private_data_len); - if (cm_id->local_addr.sin_addr.s_addr != cm_id->remote_addr.sin_addr.s_addr) + if (cm_id->local_addr.sin_addr.s_addr != + cm_id->remote_addr.sin_addr.s_addr) nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), - PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); + PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); /* set up the connection params for the node */ - cm_info.loc_addr = (cm_id->local_addr.sin_addr.s_addr); - cm_info.loc_port = (cm_id->local_addr.sin_port); - cm_info.rem_addr = (cm_id->remote_addr.sin_addr.s_addr); - cm_info.rem_port = (cm_id->remote_addr.sin_port); + cm_info.loc_addr = htonl(cm_id->local_addr.sin_addr.s_addr); + cm_info.loc_port = htons(cm_id->local_addr.sin_port); + cm_info.rem_addr = htonl(cm_id->remote_addr.sin_addr.s_addr); + cm_info.rem_port = htons(cm_id->remote_addr.sin_port); cm_info.cm_id = cm_id; cm_info.conn_type = NES_CM_IWARP_CONN_TYPE; cm_id->add_ref(cm_id); - nes_add_ref(&nesqp->ibqp); /* create a connect CM node connection */ - cm_node = g_cm_core->api->connect(g_cm_core, nesvnic, nesqp->ietf_frame, &cm_info); + cm_node = g_cm_core->api->connect(g_cm_core, nesvnic, + conn_param->private_data_len, (void *)conn_param->private_data, + &cm_info); if (!cm_node) { - if (cm_id->local_addr.sin_addr.s_addr != cm_id->remote_addr.sin_addr.s_addr) + if (cm_id->local_addr.sin_addr.s_addr != + cm_id->remote_addr.sin_addr.s_addr) nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), - PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); - nes_rem_ref(&nesqp->ibqp); - kfree(nesqp->ietf_frame); - nesqp->ietf_frame = NULL; + PCI_FUNC(nesdev->pcidev->devfn), + NES_MANAGE_APBVT_DEL); + cm_id->rem_ref(cm_id); return -ENOMEM; } cm_node->apbvt_set = 1; nesqp->cm_node = cm_node; + cm_node->nesqp = nesqp; return 0; } @@ -2665,7 +2878,7 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info); if (!cm_node) { - printk("%s[%u] Error returned from listen API call\n", + printk(KERN_ERR "%s[%u] Error returned from listen API call\n", __func__, __LINE__); return -ENOMEM; } @@ -2673,10 +2886,13 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) cm_id->provider_data = cm_node; if (!cm_node->reused_node) { - err = nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), - PCI_FUNC(nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); + err = nes_manage_apbvt(nesvnic, + ntohs(cm_id->local_addr.sin_port), + PCI_FUNC(nesvnic->nesdev->pcidev->devfn), + NES_MANAGE_APBVT_ADD); if (err) { - printk("nes_manage_apbvt call returned %d.\n", err); + printk(KERN_ERR "nes_manage_apbvt call returned %d.\n", + err); g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node); return err; } @@ -2796,53 +3012,70 @@ static void cm_event_connected(struct nes_cm_event *event) nes_cm_init_tsa_conn(nesqp, cm_node); /* set the QP tsa context */ - nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(cm_id->local_addr.sin_port)); - nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); - nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); + nesqp->nesqp_context->tcpPorts[0] = + cpu_to_le16(ntohs(cm_id->local_addr.sin_port)); + nesqp->nesqp_context->tcpPorts[1] = + cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); + if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) + nesqp->nesqp_context->ip0 = + cpu_to_le32(ntohl(nesvnic->local_ipaddr)); + else + nesqp->nesqp_context->ip0 = + cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); nesqp->nesqp_context->misc2 |= cpu_to_le32( - (u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); + (u32)PCI_FUNC(nesdev->pcidev->devfn) << + NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); nesqp->nesqp_context->arp_index_vlan |= cpu_to_le32( - nes_arp_table(nesdev, le32_to_cpu(nesqp->nesqp_context->ip0), + nes_arp_table(nesdev, + le32_to_cpu(nesqp->nesqp_context->ip0), NULL, NES_ARP_RESOLVE) << 16); nesqp->nesqp_context->ts_val_delta = cpu_to_le32( jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW)); nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id); nesqp->nesqp_context->ird_ord_sizes |= - cpu_to_le32((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT); + cpu_to_le32((u32)1 << + NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT); /* Adjust tail for not having a LSMM */ nesqp->hwqp.sq_tail = 1; #if defined(NES_SEND_FIRST_WRITE) - if (cm_node->send_write0) { - nes_debug(NES_DBG_CM, "Sending first write.\n"); - wqe = &nesqp->hwqp.sq_vbase[0]; - u64temp = (unsigned long)nesqp; - u64temp |= NES_SW_CONTEXT_ALIGN>>1; - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, - u64temp); - wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(NES_IWARP_SQ_OP_RDMAW); - wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; - - /* use the reserved spot on the WQ for the extra first WQE */ - nesqp->nesqp_context->ird_ord_sizes &= cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | - NES_QPCONTEXT_ORDIRD_WRPDU | NES_QPCONTEXT_ORDIRD_ALSMM)); - nesqp->skip_lsmm = 1; - nesqp->hwqp.sq_tail = 0; - nes_write32(nesdev->regs + NES_WQE_ALLOC, - (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id); - } + if (cm_node->send_write0) { + nes_debug(NES_DBG_CM, "Sending first write.\n"); + wqe = &nesqp->hwqp.sq_vbase[0]; + u64temp = (unsigned long)nesqp; + u64temp |= NES_SW_CONTEXT_ALIGN>>1; + set_wqe_64bit_value(wqe->wqe_words, + NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp); + wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = + cpu_to_le32(NES_IWARP_SQ_OP_RDMAW); + wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; + + /* use the reserved spot on the WQ for the extra first WQE */ + nesqp->nesqp_context->ird_ord_sizes &= + cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | + NES_QPCONTEXT_ORDIRD_WRPDU | + NES_QPCONTEXT_ORDIRD_ALSMM)); + nesqp->skip_lsmm = 1; + nesqp->hwqp.sq_tail = 0; + nes_write32(nesdev->regs + NES_WQE_ALLOC, + (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id); + } #endif memset(&nes_quad, 0, sizeof(nes_quad)); - nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); - nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; + nes_quad.DstIpAdrIndex = + cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); + if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) + nes_quad.SrcIpadr = nesvnic->local_ipaddr; + else + nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; @@ -2859,10 +3092,6 @@ static void cm_event_connected(struct nes_cm_event *event) nesqp->private_data_len = (u8) cm_node->mpa_frame_size; cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node); - /* modify QP state to rts */ - attr.qp_state = IB_QPS_RTS; - nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); - /* notify OF layer we successfully created the requested connection */ cm_event.event = IW_CM_EVENT_CONNECT_REPLY; cm_event.status = IW_CM_EVENT_STATUS_ACCEPTED; @@ -2871,20 +3100,21 @@ static void cm_event_connected(struct nes_cm_event *event) cm_event.local_addr.sin_port = cm_id->local_addr.sin_port; cm_event.remote_addr = cm_id->remote_addr; - cm_event.private_data = (void *)event->cm_node->mpa_frame_buf; - cm_event.private_data_len = (u8) event->cm_node->mpa_frame_size; + cm_event.private_data = (void *)event->cm_node->mpa_frame_buf; + cm_event.private_data_len = (u8) event->cm_node->mpa_frame_size; cm_event.local_addr.sin_addr.s_addr = event->cm_info.rem_addr; ret = cm_id->event_handler(cm_id, &cm_event); nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); if (ret) - printk("%s[%u] OFA CM event_handler returned, ret=%d\n", - __func__, __LINE__, ret); - nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = %lu\n", - nesqp->hwqp.qp_id, jiffies ); + printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " + "ret=%d\n", __func__, __LINE__, ret); + attr.qp_state = IB_QPS_RTS; + nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); - nes_rem_ref(&nesqp->ibqp); + nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = " + "%lu\n", nesqp->hwqp.qp_id, jiffies); return; } @@ -2928,17 +3158,19 @@ static void cm_event_connect_error(struct nes_cm_event *event) cm_event.private_data = NULL; cm_event.private_data_len = 0; - nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, remove_addr=%08x\n", - cm_event.local_addr.sin_addr.s_addr, cm_event.remote_addr.sin_addr.s_addr); + nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, " + "remove_addr=%08x\n", cm_event.local_addr.sin_addr.s_addr, + cm_event.remote_addr.sin_addr.s_addr); ret = cm_id->event_handler(cm_id, &cm_event); nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); if (ret) - printk("%s[%u] OFA CM event_handler returned, ret=%d\n", - __func__, __LINE__, ret); + printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " + "ret=%d\n", __func__, __LINE__, ret); nes_rem_ref(&nesqp->ibqp); - cm_id->rem_ref(cm_id); + cm_id->rem_ref(cm_id); + rem_ref_cm_node(event->cm_node->cm_core, event->cm_node); return; } @@ -3041,7 +3273,8 @@ static int nes_cm_post_event(struct nes_cm_event *event) add_ref_cm_node(event->cm_node); event->cm_info.cm_id->add_ref(event->cm_info.cm_id); INIT_WORK(&event->event_work, nes_cm_event_handler); - nes_debug(NES_DBG_CM, "queue_work, event=%p\n", event); + nes_debug(NES_DBG_CM, "cm_node=%p queue_work, event=%p\n", + event->cm_node, event); queue_work(event->cm_node->cm_core->event_wq, &event->event_work); @@ -3057,46 +3290,48 @@ static int nes_cm_post_event(struct nes_cm_event *event) */ static void nes_cm_event_handler(struct work_struct *work) { - struct nes_cm_event *event = container_of(work, struct nes_cm_event, event_work); + struct nes_cm_event *event = container_of(work, struct nes_cm_event, + event_work); struct nes_cm_core *cm_core; - if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core)) { + if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core)) return; - } + cm_core = event->cm_node->cm_core; nes_debug(NES_DBG_CM, "event=%p, event->type=%u, events posted=%u\n", - event, event->type, atomic_read(&cm_core->events_posted)); + event, event->type, atomic_read(&cm_core->events_posted)); switch (event->type) { - case NES_CM_EVENT_MPA_REQ: - cm_event_mpa_req(event); - nes_debug(NES_DBG_CM, "CM Event: MPA REQUEST\n"); - break; - case NES_CM_EVENT_RESET: - nes_debug(NES_DBG_CM, "CM Event: RESET\n"); - cm_event_reset(event); - break; - case NES_CM_EVENT_CONNECTED: - if ((!event->cm_node->cm_id) || - (event->cm_node->state != NES_CM_STATE_TSA)) { - break; - } - cm_event_connected(event); - nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n"); + case NES_CM_EVENT_MPA_REQ: + cm_event_mpa_req(event); + nes_debug(NES_DBG_CM, "cm_node=%p CM Event: MPA REQUEST\n", + event->cm_node); + break; + case NES_CM_EVENT_RESET: + nes_debug(NES_DBG_CM, "cm_node = %p CM Event: RESET\n", + event->cm_node); + cm_event_reset(event); + break; + case NES_CM_EVENT_CONNECTED: + if ((!event->cm_node->cm_id) || + (event->cm_node->state != NES_CM_STATE_TSA)) break; - case NES_CM_EVENT_ABORTED: - if ((!event->cm_node->cm_id) || (event->cm_node->state == NES_CM_STATE_TSA)) { - break; - } - cm_event_connect_error(event); - nes_debug(NES_DBG_CM, "CM Event: ABORTED\n"); - break; - case NES_CM_EVENT_DROPPED_PKT: - nes_debug(NES_DBG_CM, "CM Event: DROPPED PKT\n"); - break; - default: - nes_debug(NES_DBG_CM, "CM Event: UNKNOWN EVENT TYPE\n"); + cm_event_connected(event); + nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n"); + break; + case NES_CM_EVENT_ABORTED: + if ((!event->cm_node->cm_id) || + (event->cm_node->state == NES_CM_STATE_TSA)) break; + cm_event_connect_error(event); + nes_debug(NES_DBG_CM, "CM Event: ABORTED\n"); + break; + case NES_CM_EVENT_DROPPED_PKT: + nes_debug(NES_DBG_CM, "CM Event: DROPPED PKT\n"); + break; + default: + nes_debug(NES_DBG_CM, "CM Event: UNKNOWN EVENT TYPE\n"); + break; } atomic_dec(&cm_core->events_posted); diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index 7717cb2ab50..367b3d29014 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -83,6 +83,8 @@ enum nes_timer_type { #define SET_FIN 4 #define SET_RST 8 +#define TCP_OPTIONS_PADDING 3 + struct option_base { u8 optionnum; u8 length; @@ -177,6 +179,7 @@ enum nes_cm_node_state { NES_CM_STATE_ESTABLISHED, NES_CM_STATE_ACCEPTING, NES_CM_STATE_MPAREQ_SENT, + NES_CM_STATE_MPAREQ_RCVD, NES_CM_STATE_TSA, NES_CM_STATE_FIN_WAIT1, NES_CM_STATE_FIN_WAIT2, @@ -187,6 +190,16 @@ enum nes_cm_node_state { NES_CM_STATE_CLOSED }; +enum nes_tcpip_pkt_type { + NES_PKT_TYPE_UNKNOWN, + NES_PKT_TYPE_SYN, + NES_PKT_TYPE_SYNACK, + NES_PKT_TYPE_ACK, + NES_PKT_TYPE_FIN, + NES_PKT_TYPE_RST +}; + + /* type of nes connection */ enum nes_cm_conn_type { NES_CM_IWARP_CONN_TYPE, @@ -257,7 +270,9 @@ struct nes_cm_node { struct net_device *netdev; struct nes_cm_node *loopbackpartner; - struct list_head retrans_list; + + struct nes_timer_entry *send_entry; + spinlock_t retrans_list_lock; struct list_head recv_list; spinlock_t recv_list_lock; @@ -276,6 +291,8 @@ struct nes_cm_node { struct nes_vnic *nesvnic; int apbvt_set; int accept_pend; + int freed; + struct nes_qp *nesqp; }; /* structure for client or CM to fill when making CM api calls. */ @@ -366,14 +383,14 @@ struct nes_cm_ops { struct nes_cm_info *); int (*stop_listener)(struct nes_cm_core *, struct nes_cm_listener *); struct nes_cm_node * (*connect)(struct nes_cm_core *, - struct nes_vnic *, struct ietf_mpa_frame *, + struct nes_vnic *, u16, void *, struct nes_cm_info *); int (*close)(struct nes_cm_core *, struct nes_cm_node *); int (*accept)(struct nes_cm_core *, struct ietf_mpa_frame *, struct nes_cm_node *); int (*reject)(struct nes_cm_core *, struct ietf_mpa_frame *, struct nes_cm_node *); - int (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *, + void (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *, struct sk_buff *); int (*destroy_cm_core)(struct nes_cm_core *); int (*get)(struct nes_cm_core *); diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 8dc70f9bad2..1513d4066f1 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -42,6 +42,10 @@ #include "nes.h" +static unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR; +module_param(nes_lro_max_aggr, uint, 0444); +MODULE_PARM_DESC(nes_lro_max_aggr, "NIC LRO max packet aggregation"); + static u32 crit_err_count; u32 int_mod_timer_init; u32 int_mod_cq_depth_256; @@ -394,7 +398,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { nesadapter->base_pd = 1; nesadapter->device_cap_flags = - IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW; + IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW; nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter) [(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]); @@ -1738,7 +1742,7 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) jumbomode = 1; nes_nic_init_timer_defaults(nesdev, jumbomode); } - nesvnic->lro_mgr.max_aggr = NES_LRO_MAX_AGGR; + nesvnic->lro_mgr.max_aggr = nes_lro_max_aggr; nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS; nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc; nesvnic->lro_mgr.get_skb_header = nes_lro_get_skb_hdr; @@ -2706,39 +2710,11 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) barrier(); cqp_request->request_done = 1; wake_up(&cqp_request->waitq); - if (atomic_dec_and_test(&cqp_request->refcount)) { - nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n", - cqp_request, - le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f); - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } - } else if (cqp_request->callback) { - /* Envoke the callback routine */ - cqp_request->cqp_callback(nesdev, cqp_request); - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } + nes_put_cqp_request(nesdev, cqp_request); } else { - nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n", - cqp_request, - le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]) & 0x3f); - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } + if (cqp_request->callback) + cqp_request->cqp_callback(nesdev, cqp_request); + nes_free_cqp_request(nesdev, cqp_request); } } else { wake_up(&nesdev->cqp.waitq); @@ -2838,7 +2814,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, nesqp = *((struct nes_qp **)&context); if (atomic_inc_return(&nesqp->close_timer_started) == 1) { nesqp->cm_id->add_ref(nesqp->cm_id); - nes_add_ref(&nesqp->ibqp); schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp, NES_TIMER_TYPE_CLOSE, 1, 0); nes_debug(NES_DBG_AEQ, "QP%u Not decrementing QP refcount (%d)," @@ -2862,7 +2837,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, if (async_event_id == NES_AEQE_AEID_RESET_SENT) { tcp_state = NES_AEQE_TCP_STATE_CLOSED; } - nes_add_ref(&nesqp->ibqp); spin_lock_irqsave(&nesqp->lock, flags); nesqp->hw_iwarp_state = iwarp_state; nesqp->hw_tcp_state = tcp_state; @@ -2900,7 +2874,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, } spin_unlock_irqrestore(&nesqp->lock, flags); if (next_iwarp_state) { - nes_add_ref(&nesqp->ibqp); nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X," " also added another reference\n", nesqp->hwqp.qp_id, next_iwarp_state); @@ -2912,7 +2885,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, /* FIN Received but ib state not RTS, close complete will be on its way */ spin_unlock_irqrestore(&nesqp->lock, flags); - nes_rem_ref(&nesqp->ibqp); return; } spin_unlock_irqrestore(&nesqp->lock, flags); @@ -2946,7 +2918,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) || ((nesqp->ibqp_state == IB_QPS_RTS)&& (async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) { - nes_add_ref(&nesqp->ibqp); nes_cm_disconn(nesqp); } else { nesqp->in_disconnect = 0; @@ -2955,7 +2926,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, break; case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES: nesqp = *((struct nes_qp **)&context); - nes_add_ref(&nesqp->ibqp); spin_lock_irqsave(&nesqp->lock, flags); nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_ERROR; nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; @@ -3066,7 +3036,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); } /* tell cm to disconnect, cm will queue work to thread */ - nes_add_ref(&nesqp->ibqp); nes_cm_disconn(nesqp); break; case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE: @@ -3086,7 +3055,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); } /* tell cm to disconnect, cm will queue work to thread */ - nes_add_ref(&nesqp->ibqp); nes_cm_disconn(nesqp); break; case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR: @@ -3106,7 +3074,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); } /* tell cm to disconnect, cm will queue work to thread */ - nes_add_ref(&nesqp->ibqp); nes_cm_disconn(nesqp); break; /* TODO: additional AEs need to be here */ @@ -3145,7 +3112,6 @@ int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port, { struct nes_device *nesdev = nesvnic->nesdev; struct nes_hw_cqp_wqe *cqp_wqe; - unsigned long flags; struct nes_cqp_request *cqp_request; int ret = 0; u16 major_code; @@ -3172,7 +3138,7 @@ int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port, nes_debug(NES_DBG_QP, "Waiting for CQP completion for APBVT.\n"); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); if (add_port == NES_MANAGE_APBVT_ADD) ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), @@ -3180,15 +3146,9 @@ int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port, nes_debug(NES_DBG_QP, "Completed, ret=%u, CQP Major:Minor codes = 0x%04X:0x%04X\n", ret, cqp_request->major_code, cqp_request->minor_code); major_code = cqp_request->major_code; - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + + nes_put_cqp_request(nesdev, cqp_request); + if (!ret) return -ETIME; else if (major_code) @@ -3248,7 +3208,7 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr, nesdev->cqp.sq_head, nesdev->cqp.sq_tail); atomic_set(&cqp_request->refcount, 1); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); } @@ -3258,7 +3218,6 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr, void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp, u32 which_wq, u32 wait_completion) { - unsigned long flags; struct nes_cqp_request *cqp_request; struct nes_hw_cqp_wqe *cqp_wqe; int ret; @@ -3281,7 +3240,7 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp, cpu_to_le32(NES_CQP_FLUSH_WQES | which_wq); cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); if (wait_completion) { /* Wait for CQP */ @@ -3290,14 +3249,6 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp, nes_debug(NES_DBG_QP, "Flush SQ QP WQEs completed, ret=%u," " CQP Major:Minor codes = 0x%04X:0x%04X\n", ret, cqp_request->major_code, cqp_request->minor_code); - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + nes_put_cqp_request(nesdev, cqp_request); } } diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index 745bf94f3f0..7b81e0ae007 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -1172,7 +1172,7 @@ struct nes_vnic { u32 mcrq_qp_id; struct nes_ucontext *mcrq_ucontext; struct nes_cqp_request* (*get_cqp_request)(struct nes_device *nesdev); - void (*post_cqp_request)(struct nes_device*, struct nes_cqp_request *, int); + void (*post_cqp_request)(struct nes_device*, struct nes_cqp_request *); int (*mcrq_mcast_filter)( struct nes_vnic* nesvnic, __u8* dmi_addr ); struct net_device_stats netstats; /* used to put the netdev on the adapters logical port list */ diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c index fe83d1b2b17..fb8cbd71a2e 100644 --- a/drivers/infiniband/hw/nes/nes_utils.c +++ b/drivers/infiniband/hw/nes/nes_utils.c @@ -567,12 +567,36 @@ struct nes_cqp_request *nes_get_cqp_request(struct nes_device *nesdev) return cqp_request; } +void nes_free_cqp_request(struct nes_device *nesdev, + struct nes_cqp_request *cqp_request) +{ + unsigned long flags; + + nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n", + cqp_request, + le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]) & 0x3f); + + if (cqp_request->dynamic) { + kfree(cqp_request); + } else { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } +} + +void nes_put_cqp_request(struct nes_device *nesdev, + struct nes_cqp_request *cqp_request) +{ + if (atomic_dec_and_test(&cqp_request->refcount)) + nes_free_cqp_request(nesdev, cqp_request); +} /** * nes_post_cqp_request */ void nes_post_cqp_request(struct nes_device *nesdev, - struct nes_cqp_request *cqp_request, int ring_doorbell) + struct nes_cqp_request *cqp_request) { struct nes_hw_cqp_wqe *cqp_wqe; unsigned long flags; @@ -600,10 +624,9 @@ void nes_post_cqp_request(struct nes_device *nesdev, nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size, cqp_request->waiting, atomic_read(&cqp_request->refcount)); barrier(); - if (ring_doorbell) { - /* Ring doorbell (1 WQEs) */ - nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id); - } + + /* Ring doorbell (1 WQEs) */ + nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id); barrier(); } else { diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 99b3c4ae86e..d79942e8497 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -55,7 +55,6 @@ static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev); * nes_alloc_mw */ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { - unsigned long flags; struct nes_pd *nespd = to_nespd(ibpd); struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); struct nes_device *nesdev = nesvnic->nesdev; @@ -119,7 +118,7 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), @@ -128,15 +127,7 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { " CQP Major:Minor codes = 0x%04X:0x%04X.\n", stag, ret, cqp_request->major_code, cqp_request->minor_code); if ((!ret) || (cqp_request->major_code)) { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + nes_put_cqp_request(nesdev, cqp_request); kfree(nesmr); nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); if (!ret) { @@ -144,17 +135,8 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { } else { return ERR_PTR(-ENOMEM); } - } else { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } } + nes_put_cqp_request(nesdev, cqp_request); nesmr->ibmw.rkey = stag; nesmr->mode = IWNES_MEMREG_TYPE_MW; @@ -178,7 +160,6 @@ static int nes_dealloc_mw(struct ib_mw *ibmw) struct nes_hw_cqp_wqe *cqp_wqe; struct nes_cqp_request *cqp_request; int err = 0; - unsigned long flags; int ret; /* Deallocate the window with the adapter */ @@ -194,7 +175,7 @@ static int nes_dealloc_mw(struct ib_mw *ibmw) set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ibmw->rkey); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X to complete.\n", @@ -204,32 +185,12 @@ static int nes_dealloc_mw(struct ib_mw *ibmw) nes_debug(NES_DBG_MR, "Deallocate STag completed, wait_event_timeout ret = %u," " CQP Major:Minor codes = 0x%04X:0x%04X.\n", ret, cqp_request->major_code, cqp_request->minor_code); - if ((!ret) || (cqp_request->major_code)) { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } - if (!ret) { - err = -ETIME; - } else { - err = -EIO; - } - } else { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } - } + if (!ret) + err = -ETIME; + else if (cqp_request->major_code) + err = -EIO; + + nes_put_cqp_request(nesdev, cqp_request); nes_free_resource(nesadapter, nesadapter->allocated_mrs, (ibmw->rkey & 0x0fffff00) >> 8); @@ -516,7 +477,7 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, (nesfmr->nesmr.pbls_used-1) : nesfmr->nesmr.pbls_used); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), @@ -526,29 +487,11 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, stag, ret, cqp_request->major_code, cqp_request->minor_code); if ((!ret) || (cqp_request->major_code)) { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + nes_put_cqp_request(nesdev, cqp_request); ret = (!ret) ? -ETIME : -EIO; goto failed_leaf_vpbl_pages_alloc; - } else { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } } - + nes_put_cqp_request(nesdev, cqp_request); nesfmr->nesmr.ibfmr.lkey = stag; nesfmr->nesmr.ibfmr.rkey = stag; nesfmr->attr = *ibfmr_attr; @@ -1474,7 +1417,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ nes_debug(NES_DBG_QP, "Waiting for create iWARP QP%u to complete.\n", @@ -1487,15 +1430,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, nesqp->hwqp.qp_id, ret, nesdev->cqp.sq_head, nesdev->cqp.sq_tail, cqp_request->major_code, cqp_request->minor_code); if ((!ret) || (cqp_request->major_code)) { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + nes_put_cqp_request(nesdev, cqp_request); nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); nes_free_qp_mem(nesdev, nesqp,virt_wqs); kfree(nesqp->allocated_buffer); @@ -1504,18 +1439,10 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, } else { return ERR_PTR(-EIO); } - } else { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } } + nes_put_cqp_request(nesdev, cqp_request); + if (ibpd->uobject) { uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index; uresp.actual_sq_size = sq_size; @@ -1817,7 +1744,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ nes_debug(NES_DBG_CQ, "Waiting for create iWARP CQ%u to complete.\n", @@ -1827,32 +1754,15 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, nes_debug(NES_DBG_CQ, "Create iWARP CQ%u completed, wait_event_timeout ret = %d.\n", nescq->hw_cq.cq_number, ret); if ((!ret) || (cqp_request->major_code)) { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + nes_put_cqp_request(nesdev, cqp_request); if (!context) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, nescq->hw_cq.cq_pbase); nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); kfree(nescq); return ERR_PTR(-EIO); - } else { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } } + nes_put_cqp_request(nesdev, cqp_request); if (context) { /* free the nespbl */ @@ -1931,7 +1841,7 @@ static int nes_destroy_cq(struct ib_cq *ib_cq) (nescq->hw_cq.cq_number | ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 16))); nes_free_resource(nesadapter, nesadapter->allocated_cqs, nescq->hw_cq.cq_number); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ nes_debug(NES_DBG_CQ, "Waiting for destroy iWARP CQ%u to complete.\n", @@ -1942,37 +1852,18 @@ static int nes_destroy_cq(struct ib_cq *ib_cq) " CQP Major:Minor codes = 0x%04X:0x%04X.\n", nescq->hw_cq.cq_number, ret, cqp_request->major_code, cqp_request->minor_code); - if ((!ret) || (cqp_request->major_code)) { - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } - if (!ret) { - nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy timeout expired\n", + if (!ret) { + nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy timeout expired\n", nescq->hw_cq.cq_number); - ret = -ETIME; - } else { - nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy failed\n", + ret = -ETIME; + } else if (cqp_request->major_code) { + nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy failed\n", nescq->hw_cq.cq_number); - ret = -EIO; - } + ret = -EIO; } else { ret = 0; - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } } + nes_put_cqp_request(nesdev, cqp_request); if (nescq->cq_mem_size) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, @@ -2096,7 +1987,7 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, barrier(); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), @@ -2105,15 +1996,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, " CQP Major:Minor codes = 0x%04X:0x%04X.\n", stag, ret, cqp_request->major_code, cqp_request->minor_code); major_code = cqp_request->major_code; - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + nes_put_cqp_request(nesdev, cqp_request); + if (!ret) return -ETIME; else if (major_code) @@ -2456,10 +2340,8 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if ((page_count!=0)&&(page_count<<12)-(region->offset&(4096-1))>=region->length) goto enough_pages; if ((page_count&0x01FF) == 0) { - if (page_count>(1024*512)) { + if (page_count >= 1024 * 512) { ib_umem_release(region); - pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, - vpbl.pbl_pbase); nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); kfree(nesmr); @@ -2756,7 +2638,7 @@ static int nes_dereg_mr(struct ib_mr *ib_mr) set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ib_mr->rkey); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X completed\n", ib_mr->rkey); @@ -2773,15 +2655,9 @@ static int nes_dereg_mr(struct ib_mr *ib_mr) major_code = cqp_request->major_code; minor_code = cqp_request->minor_code; - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + + nes_put_cqp_request(nesdev, cqp_request); + if (!ret) { nes_debug(NES_DBG_MR, "Timeout waiting to destroy STag," " ib_mr=%p, rkey = 0x%08X\n", @@ -2906,7 +2782,6 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp, /* struct iw_cm_id *cm_id = nesqp->cm_id; */ /* struct iw_cm_event cm_event; */ struct nes_cqp_request *cqp_request; - unsigned long flags; int ret; u16 major_code; @@ -2934,7 +2809,7 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp, set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, (u64)nesqp->nesqp_context_pbase); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ if (wait_completion) { @@ -2952,15 +2827,9 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp, nesqp->hwqp.qp_id, cqp_request->major_code, cqp_request->minor_code, next_iwarp_state); } - if (atomic_dec_and_test(&cqp_request->refcount)) { - if (cqp_request->dynamic) { - kfree(cqp_request); - } else { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - } + + nes_put_cqp_request(nesdev, cqp_request); + if (!ret) return -ETIME; else if (major_code) @@ -2998,7 +2867,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id, attr->qp_state, nesqp->ibqp_state, nesqp->iwarp_state, atomic_read(&nesqp->refcount)); - nes_add_ref(&nesqp->ibqp); spin_lock_irqsave(&nesqp->lock, qplockflags); nes_debug(NES_DBG_MOD_QP, "QP%u: hw_iwarp_state=0x%X, hw_tcp_state=0x%X," @@ -3013,7 +2881,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id); if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_IDLE) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE; @@ -3024,7 +2891,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id); if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_IDLE) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE; @@ -3035,14 +2901,12 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id); if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_RTS) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } if (nesqp->cm_id == NULL) { nes_debug(NES_DBG_MOD_QP, "QP%u: Failing attempt to move QP to RTS without a CM_ID. \n", nesqp->hwqp.qp_id ); spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } next_iwarp_state = NES_CQP_QP_IWARP_STATE_RTS; @@ -3060,7 +2924,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail); if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_CLOSING) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return 0; } else { if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_CLOSING) { @@ -3068,7 +2931,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, " ignored due to current iWARP state\n", nesqp->hwqp.qp_id); spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } if (nesqp->hw_iwarp_state != NES_AEQE_IWARP_STATE_RTS) { @@ -3100,7 +2962,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id); if (nesqp->iwarp_state>=(u32)NES_CQP_QP_IWARP_STATE_TERMINATE) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } /* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */ @@ -3113,7 +2974,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, case IB_QPS_RESET: if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_ERROR) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } nes_debug(NES_DBG_MOD_QP, "QP%u: new state = error\n", @@ -3139,7 +2999,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, break; default: spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; break; } @@ -3219,7 +3078,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), original_last_aeq, nesqp->last_aeq); /* this one is for the cm_disconnect thread */ - nes_add_ref(&nesqp->ibqp); spin_lock_irqsave(&nesqp->lock, qplockflags); nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; @@ -3228,14 +3086,12 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } else { nes_debug(NES_DBG_MOD_QP, "QP%u No fake disconnect, QP refcount=%d\n", nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount)); - nes_rem_ref(&nesqp->ibqp); } } else { spin_lock_irqsave(&nesqp->lock, qplockflags); if (nesqp->cm_id) { /* These two are for the timer thread */ if (atomic_inc_return(&nesqp->close_timer_started) == 1) { - nes_add_ref(&nesqp->ibqp); nesqp->cm_id->add_ref(nesqp->cm_id); nes_debug(NES_DBG_MOD_QP, "QP%u Not decrementing QP refcount (%d)," " need ae to finish up, original_last_aeq = 0x%04X." @@ -3259,14 +3115,12 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), original_last_aeq, nesqp->last_aeq); - nes_rem_ref(&nesqp->ibqp); } } else { nes_debug(NES_DBG_MOD_QP, "QP%u Decrementing QP refcount (%d), No ae to finish up," " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), original_last_aeq, nesqp->last_aeq); - nes_rem_ref(&nesqp->ibqp); } err = 0; |