diff options
Diffstat (limited to 'drivers/infiniband/hw')
56 files changed, 1828 insertions, 1029 deletions
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c index 59243d9aedd..58bc272bd40 100644 --- a/drivers/infiniband/hw/amso1100/c2.c +++ b/drivers/infiniband/hw/amso1100/c2.c @@ -439,7 +439,8 @@ static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem) } /* Setup the skb for reuse since we're dropping this pkt */ - elem->skb->tail = elem->skb->data = elem->skb->head; + elem->skb->data = elem->skb->head; + skb_reset_tail_pointer(elem->skb); /* Zero out the rxp hdr in the sk_buff */ memset(elem->skb->data, 0, sizeof(*rxp_hdr)); @@ -521,9 +522,8 @@ static void c2_rx_interrupt(struct net_device *netdev) * "sizeof(struct c2_rxp_hdr)". */ skb->data += sizeof(*rxp_hdr); - skb->tail = skb->data + buflen; + skb_set_tail_pointer(skb, buflen); skb->len = buflen; - skb->dev = netdev; skb->protocol = eth_type_trans(skb, netdev); netif_rx(skb); diff --git a/drivers/infiniband/hw/amso1100/c2.h b/drivers/infiniband/hw/amso1100/c2.h index 04a9db5de88..fa58200217a 100644 --- a/drivers/infiniband/hw/amso1100/c2.h +++ b/drivers/infiniband/hw/amso1100/c2.h @@ -519,7 +519,7 @@ extern void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq); extern void c2_cq_event(struct c2_dev *c2dev, u32 mq_index); extern void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index); extern int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); -extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify); +extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); /* CM */ extern int c2_llp_connect(struct iw_cm_id *cm_id, diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c index 5175c99ee58..d2b3366786d 100644 --- a/drivers/infiniband/hw/amso1100/c2_cq.c +++ b/drivers/infiniband/hw/amso1100/c2_cq.c @@ -217,17 +217,19 @@ int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) return npolled; } -int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) +int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) { struct c2_mq_shared __iomem *shared; struct c2_cq *cq; + unsigned long flags; + int ret = 0; cq = to_c2cq(ibcq); shared = cq->mq.peer; - if (notify == IB_CQ_NEXT_COMP) + if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_NEXT_COMP) writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type); - else if (notify == IB_CQ_SOLICITED) + else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type); else return -EINVAL; @@ -241,7 +243,13 @@ int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) */ readb(&shared->armed); - return 0; + if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { + spin_lock_irqsave(&cq->lock, flags); + ret = !c2_mq_empty(&cq->mq); + spin_unlock_irqrestore(&cq->lock, flags); + } + + return ret; } static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq) diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index fef97275291..109166223c0 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -290,7 +290,7 @@ static int c2_destroy_qp(struct ib_qp *ib_qp) return 0; } -static struct ib_cq *c2_create_cq(struct ib_device *ibdev, int entries, +static struct ib_cq *c2_create_cq(struct ib_device *ibdev, int entries, int vector, struct ib_ucontext *context, struct ib_udata *udata) { @@ -795,8 +795,8 @@ int c2_register_device(struct c2_dev *dev) memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6); dev->ibdev.phys_port_cnt = 1; + dev->ibdev.num_comp_vectors = 1; dev->ibdev.dma_device = &dev->pcidev->dev; - dev->ibdev.class_dev.dev = &dev->pcidev->dev; dev->ibdev.query_device = c2_query_device; dev->ibdev.query_port = c2_query_port; dev->ibdev.modify_port = c2_modify_port; diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index 818cf1aee8c..76049afc765 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -114,7 +114,10 @@ int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq, return -EIO; } } + + return 1; } + return 0; } @@ -498,9 +501,9 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) u64 sge_cmd, ctx0, ctx1; u64 base_addr; struct t3_modify_qp_wr *wqe; - struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL); - + struct sk_buff *skb; + skb = alloc_skb(sizeof(*wqe), GFP_KERNEL); if (!skb) { PDBG("%s alloc_skb failed\n", __FUNCTION__); return -ENOMEM; @@ -508,7 +511,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) err = cxio_hal_init_ctrl_cq(rdev_p); if (err) { PDBG("%s err %d initializing ctrl_cq\n", __FUNCTION__, err); - return err; + goto err; } rdev_p->ctrl_qp.workq = dma_alloc_coherent( &(rdev_p->rnic_info.pdev->dev), @@ -518,7 +521,8 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) GFP_KERNEL); if (!rdev_p->ctrl_qp.workq) { PDBG("%s dma_alloc_coherent failed\n", __FUNCTION__); - return -ENOMEM; + err = -ENOMEM; + goto err; } pci_unmap_addr_set(&rdev_p->ctrl_qp, mapping, rdev_p->ctrl_qp.dma_addr); @@ -556,6 +560,9 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2); skb->priority = CPL_PRIORITY_CONTROL; return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb)); +err: + kfree_skb(skb); + return err; } static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p) diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index 90d7b8972cb..ff7290eacef 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -38,6 +38,7 @@ #include "firmware_exports.h" #define T3_MAX_SGE 4 +#define T3_MAX_INLINE 64 #define Q_EMPTY(rptr,wptr) ((rptr)==(wptr)) #define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \ diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index d0ed1d35ca3..b2faff5abce 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -477,7 +477,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb) BUG_ON(skb_cloned(skb)); mpalen = sizeof(*mpa) + ep->plen; - if (skb->data + mpalen + sizeof(*req) > skb->end) { + if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) { kfree_skb(skb); skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL); if (!skb) { @@ -507,7 +507,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb) */ skb_get(skb); set_arp_failure_handler(skb, arp_failure_discard); - skb->h.raw = skb->data; + skb_reset_transport_header(skb); len = skb->len; req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); @@ -559,7 +559,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) skb_get(skb); skb->priority = CPL_PRIORITY_DATA; set_arp_failure_handler(skb, arp_failure_discard); - skb->h.raw = skb->data; + skb_reset_transport_header(skb); req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); req->wr_lo = htonl(V_WR_TID(ep->hwtid)); @@ -610,7 +610,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) */ skb_get(skb); set_arp_failure_handler(skb, arp_failure_discard); - skb->h.raw = skb->data; + skb_reset_transport_header(skb); len = skb->len; req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); @@ -821,7 +821,8 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb) /* * copy the new data into our accumulation buffer. */ - memcpy(&(ep->mpa_pkt[ep->mpa_pkt_len]), skb->data, skb->len); + skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), + skb->len); ep->mpa_pkt_len += skb->len; /* @@ -940,7 +941,8 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb) /* * Copy the new data into our accumulation buffer. */ - memcpy(&(ep->mpa_pkt[ep->mpa_pkt_len]), skb->data, skb->len); + skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), + skb->len); ep->mpa_pkt_len += skb->len; /* @@ -1107,6 +1109,15 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) PDBG("%s ep %p\n", __FUNCTION__, ep); + /* + * We get 2 abort replies from the HW. The first one must + * be ignored except for scribbling that we need one more. + */ + if (!(ep->flags & ABORT_REQ_IN_PROGRESS)) { + ep->flags |= ABORT_REQ_IN_PROGRESS; + return CPL_RET_BUF_DONE; + } + close_complete_upcall(ep); state_set(&ep->com, DEAD); release_ep_resources(ep); @@ -1187,6 +1198,7 @@ static int listen_stop(struct iwch_listen_ep *ep) } req = (struct cpl_close_listserv_req *) skb_put(skb, sizeof(*req)); req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + req->cpu_idx = 0; OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid)); skb->priority = 1; ep->com.tdev->send(ep->com.tdev, skb); @@ -1473,6 +1485,15 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) int ret; int state; + /* + * We get 2 peer aborts from the HW. The first one must + * be ignored except for scribbling that we need one more. + */ + if (!(ep->flags & PEER_ABORT_IN_PROGRESS)) { + ep->flags |= PEER_ABORT_IN_PROGRESS; + return CPL_RET_BUF_DONE; + } + if (is_neg_adv_abort(req->status)) { PDBG("%s neg_adv_abort ep %p tid %d\n", __FUNCTION__, ep, ep->hwtid); @@ -1619,7 +1640,8 @@ static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) PDBG("%s ep %p\n", __FUNCTION__, ep); skb_pull(skb, sizeof(struct cpl_rdma_terminate)); PDBG("%s saving %d bytes of term msg\n", __FUNCTION__, skb->len); - memcpy(ep->com.qp->attr.terminate_buffer, skb->data, skb->len); + skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer, + skb->len); ep->com.qp->attr.terminate_msg_len = skb->len; ep->com.qp->attr.is_terminate_local = 0; return CPL_RET_BUF_DONE; @@ -2026,6 +2048,17 @@ static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) return 0; } +static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) +{ + struct cpl_set_tcb_rpl *rpl = cplhdr(skb); + + if (rpl->status != CPL_ERR_NONE) { + printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u " + "for tid %u\n", rpl->status, GET_TID(rpl)); + } + return CPL_RET_BUF_DONE; +} + int __init iwch_cm_init(void) { skb_queue_head_init(&rxq); @@ -2053,6 +2086,7 @@ int __init iwch_cm_init(void) t3c_handlers[CPL_ABORT_REQ_RSS] = sched; t3c_handlers[CPL_RDMA_TERMINATE] = sched; t3c_handlers[CPL_RDMA_EC_STATUS] = sched; + t3c_handlers[CPL_SET_TCB_RPL] = set_tcb_rpl; /* * These are the real handlers that are called from a diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h index 0c6f281bd4a..21a388c313c 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.h +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h @@ -143,6 +143,11 @@ enum iwch_ep_state { DEAD, }; +enum iwch_ep_flags { + PEER_ABORT_IN_PROGRESS = (1 << 0), + ABORT_REQ_IN_PROGRESS = (1 << 1), +}; + struct iwch_ep_common { struct iw_cm_id *cm_id; struct iwch_qp *qp; @@ -181,6 +186,7 @@ struct iwch_ep { u16 plen; u32 ird; u32 ord; + u32 flags; }; static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index f2774ae906b..a891493fd34 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -139,7 +139,7 @@ static int iwch_destroy_cq(struct ib_cq *ib_cq) return 0; } -static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, +static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int vector, struct ib_ucontext *ib_context, struct ib_udata *udata) { @@ -292,7 +292,7 @@ static int iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) #endif } -static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) +static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct iwch_dev *rhp; struct iwch_cq *chp; @@ -303,7 +303,7 @@ static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) chp = to_iwch_cq(ibcq); rhp = chp->rhp; - if (notify == IB_CQ_SOLICITED) + if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) cq_op = CQ_ARM_SE; else cq_op = CQ_ARM_AN; @@ -317,9 +317,11 @@ static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) PDBG("%s rptr 0x%x\n", __FUNCTION__, chp->cq.rptr); err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0); spin_unlock_irqrestore(&chp->lock, flag); - if (err) + if (err < 0) printk(KERN_ERR MOD "Error %d rearming CQID 0x%x\n", err, chp->cq.cqid); + if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS)) + err = 0; return err; } @@ -545,11 +547,14 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr, php = to_iwch_pd(pd); if (mr_rereg_mask & IB_MR_REREG_ACCESS) mh.attr.perms = iwch_ib_to_tpt_access(acc); - if (mr_rereg_mask & IB_MR_REREG_TRANS) + if (mr_rereg_mask & IB_MR_REREG_TRANS) { ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start, &total_size, &npages, &shift, &page_list); + if (ret) + return ret; + } ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages); kfree(page_list); @@ -777,6 +782,9 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, if (rqsize > T3_MAX_RQ_SIZE) return ERR_PTR(-EINVAL); + if (attrs->cap.max_inline_data > T3_MAX_INLINE) + return ERR_PTR(-EINVAL); + /* * NOTE: The SQ and total WQ sizes don't need to be * a power of two. However, all the code assumes @@ -1104,8 +1112,8 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.node_type = RDMA_NODE_RNIC; memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC)); dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports; + dev->ibdev.num_comp_vectors = 1; dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev); - dev->ibdev.class_dev.dev = &(dev->rdev.rnic_info.pdev->dev); dev->ibdev.query_device = iwch_query_device; dev->ibdev.query_port = iwch_query_port; dev->ibdev.modify_port = iwch_modify_port; diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 0a472c9b44d..714dddbc9a9 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -471,43 +471,62 @@ int iwch_bind_mw(struct ib_qp *qp, return err; } -static void build_term_codes(int t3err, u8 *layer_type, u8 *ecode, int tagged) +static inline void build_term_codes(struct respQ_msg_t *rsp_msg, + u8 *layer_type, u8 *ecode) { - switch (t3err) { + int status = TPT_ERR_INTERNAL_ERR; + int tagged = 0; + int opcode = -1; + int rqtype = 0; + int send_inv = 0; + + if (rsp_msg) { + status = CQE_STATUS(rsp_msg->cqe); + opcode = CQE_OPCODE(rsp_msg->cqe); + rqtype = RQ_TYPE(rsp_msg->cqe); + send_inv = (opcode == T3_SEND_WITH_INV) || + (opcode == T3_SEND_WITH_SE_INV); + tagged = (opcode == T3_RDMA_WRITE) || + (rqtype && (opcode == T3_READ_RESP)); + } + + switch (status) { case TPT_ERR_STAG: - if (tagged == 1) { - *layer_type = LAYER_DDP|DDP_TAGGED_ERR; - *ecode = DDPT_INV_STAG; - } else if (tagged == 2) { + if (send_inv) { + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; + *ecode = RDMAP_CANT_INV_STAG; + } else { *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; *ecode = RDMAP_INV_STAG; } break; case TPT_ERR_PDID: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + if ((opcode == T3_SEND_WITH_INV) || + (opcode == T3_SEND_WITH_SE_INV)) + *ecode = RDMAP_CANT_INV_STAG; + else + *ecode = RDMAP_STAG_NOT_ASSOC; + break; case TPT_ERR_QPID: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_STAG_NOT_ASSOC; + break; case TPT_ERR_ACCESS: - if (tagged == 1) { - *layer_type = LAYER_DDP|DDP_TAGGED_ERR; - *ecode = DDPT_STAG_NOT_ASSOC; - } else if (tagged == 2) { - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_STAG_NOT_ASSOC; - } + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_ACC_VIOL; break; case TPT_ERR_WRAP: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; *ecode = RDMAP_TO_WRAP; break; case TPT_ERR_BOUND: - if (tagged == 1) { + if (tagged) { *layer_type = LAYER_DDP|DDP_TAGGED_ERR; *ecode = DDPT_BASE_BOUNDS; - } else if (tagged == 2) { + } else { *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; *ecode = RDMAP_BASE_BOUNDS; - } else { - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_MSG_TOOBIG; } break; case TPT_ERR_INVALIDATE_SHARED_MR: @@ -591,8 +610,6 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) { union t3_wr *wqe; struct terminate_message *term; - int status; - int tagged = 0; struct sk_buff *skb; PDBG("%s %d\n", __FUNCTION__, __LINE__); @@ -610,17 +627,7 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) /* immediate data starts here. */ term = (struct terminate_message *)wqe->send.sgl; - if (rsp_msg) { - status = CQE_STATUS(rsp_msg->cqe); - if (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE) - tagged = 1; - if ((CQE_OPCODE(rsp_msg->cqe) == T3_READ_REQ) || - (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP)) - tagged = 2; - } else { - status = TPT_ERR_INTERNAL_ERR; - } - build_term_codes(status, &term->layer_etype, &term->ecode, tagged); + build_term_codes(rsp_msg, &term->layer_etype, &term->ecode); build_fw_riwrh((void *)wqe, T3_WR_SEND, T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 1, qhp->ep->hwtid, 5); diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 82ded44c6ce..10fb8fbafa0 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -106,6 +106,7 @@ struct ehca_shca { struct ehca_mr *maxmr; struct ehca_pd *pd; struct h_galpas galpas; + struct mutex modify_mutex; }; struct ehca_pd { diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index e2cdc1a16fe..67f0670fe3b 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -113,7 +113,7 @@ struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) return ret; } -struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, +struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, struct ib_ucontext *context, struct ib_udata *udata) { diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index 30eb45df9f0..32b55a4f0e5 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -147,6 +147,7 @@ int ehca_query_port(struct ib_device *ibdev, break; } + props->port_cap_flags = rblock->capability_mask; props->gid_tbl_len = rblock->gid_tbl_len; props->max_msg_sz = rblock->max_msg_sz; props->bad_pkey_cntr = rblock->bad_pkey_cntr; @@ -236,10 +237,60 @@ query_gid1: return ret; } +const u32 allowed_port_caps = ( + IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP | + IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP | + IB_PORT_VENDOR_CLASS_SUP); + int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask, struct ib_port_modify *props) { - /* Not implemented yet */ - return -EFAULT; + int ret = 0; + struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); + struct hipz_query_port *rblock; + u32 cap; + u64 hret; + + if ((props->set_port_cap_mask | props->clr_port_cap_mask) + & ~allowed_port_caps) { + ehca_err(&shca->ib_device, "Non-changeable bits set in masks " + "set=%x clr=%x allowed=%x", props->set_port_cap_mask, + props->clr_port_cap_mask, allowed_port_caps); + return -EINVAL; + } + + if (mutex_lock_interruptible(&shca->modify_mutex)) + return -ERESTARTSYS; + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + ret = -ENOMEM; + goto modify_port1; + } + + if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto modify_port2; + } + + cap = (rblock->capability_mask | props->set_port_cap_mask) + & ~props->clr_port_cap_mask; + + hret = hipz_h_modify_port(shca->ipz_hca_handle, port, + cap, props->init_type, port_modify_mask); + if (hret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Modify port failed hret=%lx", hret); + ret = -EINVAL; + } + +modify_port2: + ehca_free_fw_ctrlblock(rblock); + +modify_port1: + mutex_unlock(&shca->modify_mutex); + + return ret; } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 20f36bf8b2b..f284be1c916 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -66,7 +66,9 @@ static void queue_comp_task(struct ehca_cq *__cq); static struct ehca_comp_pool* pool; +#ifdef CONFIG_HOTPLUG_CPU static struct notifier_block comp_pool_callback_nb; +#endif static inline void comp_event_callback(struct ehca_cq *cq) { @@ -733,6 +735,7 @@ static void take_over_work(struct ehca_comp_pool *pool, } +#ifdef CONFIG_HOTPLUG_CPU static int comp_pool_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) @@ -775,6 +778,7 @@ static int comp_pool_callback(struct notifier_block *nfb, return NOTIFY_OK; } +#endif int ehca_create_comp_pool(void) { @@ -805,9 +809,11 @@ int ehca_create_comp_pool(void) } } +#ifdef CONFIG_HOTPLUG_CPU comp_pool_callback_nb.notifier_call = comp_pool_callback; comp_pool_callback_nb.priority =0; register_cpu_notifier(&comp_pool_callback_nb); +#endif printk(KERN_INFO "eHCA scaling code enabled\n"); @@ -821,7 +827,9 @@ void ehca_destroy_comp_pool(void) if (!ehca_scaling_code) return; +#ifdef CONFIG_HOTPLUG_CPU unregister_cpu_notifier(&comp_pool_callback_nb); +#endif for (i = 0; i < NR_CPUS; i++) { if (cpu_online(i)) diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index 95fd59fb452..e14b029332c 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -123,7 +123,7 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq); void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq); -struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, +struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, struct ib_ucontext *context, struct ib_udata *udata); @@ -135,7 +135,7 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc); int ehca_peek_cq(struct ib_cq *cq, int wc_cnt); -int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify); +int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags); struct ib_qp *ehca_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 059da9628bb..2d370543e96 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -313,6 +313,7 @@ int ehca_init_device(struct ehca_shca *shca) shca->ib_device.node_type = RDMA_NODE_IB_CA; shca->ib_device.phys_port_cnt = shca->num_ports; + shca->ib_device.num_comp_vectors = 1; shca->ib_device.dma_device = &shca->ibmebus_dev->ofdev.dev; shca->ib_device.query_device = ehca_query_device; shca->ib_device.query_port = ehca_query_port; @@ -375,7 +376,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) return -EPERM; } - ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10); + ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10, 0); if (IS_ERR(ibcq)) { ehca_err(&shca->ib_device, "Cannot create AQP1 CQ."); return PTR_ERR(ibcq); @@ -565,11 +566,11 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, const struct of_device_id *id) { struct ehca_shca *shca; - u64 *handle; + const u64 *handle; struct ib_pd *ibpd; int ret; - handle = (u64 *)get_property(dev->ofdev.node, "ibm,hca-handle", NULL); + handle = get_property(dev->ofdev.node, "ibm,hca-handle", NULL); if (!handle) { ehca_gen_err("Cannot get eHCA handle for adapter: %s.", dev->ofdev.node->full_name); @@ -587,6 +588,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev, ehca_gen_err("Cannot allocate shca memory."); return -ENOMEM; } + mutex_init(&shca->modify_mutex); shca->ibmebus_dev = dev; shca->ipz_hca_handle.handle = *handle; diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index 08d3f892d9f..caec9dee09e 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -634,11 +634,13 @@ poll_cq_exit0: return ret; } -int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify) +int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags) { struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + unsigned long spl_flags; + int ret = 0; - switch (cq_notify) { + switch (notify_flags & IB_CQ_SOLICITED_MASK) { case IB_CQ_SOLICITED: hipz_set_cqx_n0(my_cq, 1); break; @@ -649,5 +651,11 @@ int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify) return -EINVAL; } - return 0; + if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { + spin_lock_irqsave(&my_cq->spinlock, spl_flags); + ret = ipz_qeit_is_valid(&my_cq->ipz_queue); + spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); + } + + return ret; } diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index 3fb46e67df8..b564fcd3b28 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -70,6 +70,10 @@ #define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0, 31) #define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32, 63) +#define H_MP_INIT_TYPE EHCA_BMASK_IBM(44, 47) +#define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48) +#define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49) + /* direct access qp controls */ #define DAQP_CTRL_ENABLE 0x01 #define DAQP_CTRL_SEND_COMP 0x20 @@ -364,6 +368,26 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, return ret; } +u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, const u32 port_cap, + const u8 init_type, const int modify_mask) +{ + u64 port_attributes = port_cap; + + if (modify_mask & IB_PORT_SHUTDOWN) + port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1); + if (modify_mask & IB_PORT_INIT_TYPE) + port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type); + if (modify_mask & IB_PORT_RESET_QKEY_CNTR) + port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1); + + return ehca_plpar_hcall_norets(H_MODIFY_PORT, + adapter_handle.handle, /* r4 */ + port_id, /* r5 */ + port_attributes, /* r6 */ + 0, 0, 0, 0); +} + u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, struct hipz_query_hca *query_hca_rblock) { diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h index 587ebd47095..2869f7dd619 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.h +++ b/drivers/infiniband/hw/ehca/hcp_if.h @@ -85,6 +85,10 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, const u8 port_id, struct hipz_query_port *query_port_response_block); +u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, const u32 port_cap, + const u8 init_type, const int modify_mask); + u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, struct hipz_query_hca *query_hca_rblock); diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h index 8199c45768a..57f141a36bc 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h @@ -140,6 +140,14 @@ static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue) return cqe; } +static inline int ipz_qeit_is_valid(struct ipz_queue *queue) +{ + struct ehca_cqe *cqe = ipz_qeit_get(queue); + u32 cqe_flags = cqe->cqe_flags; + + return cqe_flags >> 7 == (queue->toggle_state & 1); +} + /* * returns and resets Queue Entry iterator * returns address (kv) of first Queue Entry diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h index 54139d39818..10c008f22ba 100644 --- a/drivers/infiniband/hw/ipath/ipath_common.h +++ b/drivers/infiniband/hw/ipath/ipath_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -78,6 +78,8 @@ #define IPATH_IB_LINKINIT 3 #define IPATH_IB_LINKDOWN_SLEEP 4 #define IPATH_IB_LINKDOWN_DISABLE 5 +#define IPATH_IB_LINK_LOOPBACK 6 /* enable local loopback */ +#define IPATH_IB_LINK_EXTERNAL 7 /* normal, disable local loopback */ /* * stats maintained by the driver. For now, at least, this is global @@ -316,11 +318,17 @@ struct ipath_base_info { /* address of readonly memory copy of the rcvhdrq tail register. */ __u64 spi_rcvhdr_tailaddr; - /* shared memory pages for subports if IPATH_RUNTIME_MASTER is set */ + /* shared memory pages for subports if port is shared */ __u64 spi_subport_uregbase; __u64 spi_subport_rcvegrbuf; __u64 spi_subport_rcvhdr_base; + /* shared memory page for hardware port if it is shared */ + __u64 spi_port_uregbase; + __u64 spi_port_rcvegrbuf; + __u64 spi_port_rcvhdr_base; + __u64 spi_port_rcvhdr_tailaddr; + } __attribute__ ((aligned(8))); @@ -344,7 +352,7 @@ struct ipath_base_info { * may not be implemented; the user code must deal with this if it * cares, or it must abort after initialization reports the difference. */ -#define IPATH_USER_SWMINOR 3 +#define IPATH_USER_SWMINOR 5 #define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR) @@ -418,11 +426,14 @@ struct ipath_user_info { #define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */ #define IPATH_CMD_TID_FREE 20 /* free expected TID entries */ #define IPATH_CMD_SET_PART_KEY 21 /* add partition key */ -#define IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes */ +#define __IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes (for old user code) */ #define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */ #define IPATH_CMD_USER_INIT 24 /* set up userspace */ +#define IPATH_CMD_UNUSED_1 25 +#define IPATH_CMD_UNUSED_2 26 +#define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */ -#define IPATH_CMD_MAX 24 +#define IPATH_CMD_MAX 27 struct ipath_port_info { __u32 num_active; /* number of active units */ @@ -430,7 +441,7 @@ struct ipath_port_info { __u16 port; /* port on unit assigned to caller */ __u16 subport; /* subport on unit assigned to caller */ __u16 num_ports; /* number of ports available on unit */ - __u16 num_subports; /* number of subport slaves opened on port */ + __u16 num_subports; /* number of subports opened on port */ }; struct ipath_tid_info { diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index 87462e0cb4d..3e9241badba 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -76,7 +76,20 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) } return; } - wc->queue[head] = *entry; + wc->queue[head].wr_id = entry->wr_id; + wc->queue[head].status = entry->status; + wc->queue[head].opcode = entry->opcode; + wc->queue[head].vendor_err = entry->vendor_err; + wc->queue[head].byte_len = entry->byte_len; + wc->queue[head].imm_data = (__u32 __force)entry->imm_data; + wc->queue[head].qp_num = entry->qp->qp_num; + wc->queue[head].src_qp = entry->src_qp; + wc->queue[head].wc_flags = entry->wc_flags; + wc->queue[head].pkey_index = entry->pkey_index; + wc->queue[head].slid = entry->slid; + wc->queue[head].sl = entry->sl; + wc->queue[head].dlid_path_bits = entry->dlid_path_bits; + wc->queue[head].port_num = entry->port_num; wc->head = next; if (cq->notify == IB_CQ_NEXT_COMP || @@ -122,9 +135,30 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) if (tail > (u32) cq->ibcq.cqe) tail = (u32) cq->ibcq.cqe; for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { + struct ipath_qp *qp; + if (tail == wc->head) break; - *entry = wc->queue[tail]; + + qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table, + wc->queue[tail].qp_num); + entry->qp = &qp->ibqp; + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + + entry->wr_id = wc->queue[tail].wr_id; + entry->status = wc->queue[tail].status; + entry->opcode = wc->queue[tail].opcode; + entry->vendor_err = wc->queue[tail].vendor_err; + entry->byte_len = wc->queue[tail].byte_len; + entry->imm_data = wc->queue[tail].imm_data; + entry->src_qp = wc->queue[tail].src_qp; + entry->wc_flags = wc->queue[tail].wc_flags; + entry->pkey_index = wc->queue[tail].pkey_index; + entry->slid = wc->queue[tail].slid; + entry->sl = wc->queue[tail].sl; + entry->dlid_path_bits = wc->queue[tail].dlid_path_bits; + entry->port_num = wc->queue[tail].port_num; if (tail >= cq->ibcq.cqe) tail = 0; else @@ -170,7 +204,7 @@ static void send_complete(unsigned long data) * * Called by ib_create_cq() in the generic verbs code. */ -struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, +struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector, struct ib_ucontext *context, struct ib_udata *udata) { @@ -209,33 +243,21 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, * See ipath_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { - struct ipath_mmap_info *ip; - __u64 offset = (__u64) wc; int err; + u32 s = sizeof *wc + sizeof(struct ib_wc) * entries; - err = ib_copy_to_udata(udata, &offset, sizeof(offset)); - if (err) { - ret = ERR_PTR(err); + cq->ip = ipath_create_mmap_info(dev, s, context, wc); + if (!cq->ip) { + ret = ERR_PTR(-ENOMEM); goto bail_wc; } - /* Allocate info for ipath_mmap(). */ - ip = kmalloc(sizeof(*ip), GFP_KERNEL); - if (!ip) { - ret = ERR_PTR(-ENOMEM); - goto bail_wc; + err = ib_copy_to_udata(udata, &cq->ip->offset, + sizeof(cq->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; } - cq->ip = ip; - ip->context = context; - ip->obj = wc; - kref_init(&ip->ref); - ip->mmap_cnt = 0; - ip->size = PAGE_ALIGN(sizeof(*wc) + - sizeof(struct ib_wc) * entries); - spin_lock_irq(&dev->pending_lock); - ip->next = dev->pending_mmaps; - dev->pending_mmaps = ip; - spin_unlock_irq(&dev->pending_lock); } else cq->ip = NULL; @@ -243,12 +265,18 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, if (dev->n_cqs_allocated == ib_ipath_max_cqs) { spin_unlock(&dev->n_cqs_lock); ret = ERR_PTR(-ENOMEM); - goto bail_wc; + goto bail_ip; } dev->n_cqs_allocated++; spin_unlock(&dev->n_cqs_lock); + if (cq->ip) { + spin_lock_irq(&dev->pending_lock); + list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + /* * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. * The number of entries should be >= the number requested or return @@ -267,12 +295,12 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, goto done; +bail_ip: + kfree(cq->ip); bail_wc: vfree(wc); - bail_cq: kfree(cq); - done: return ret; } @@ -306,17 +334,18 @@ int ipath_destroy_cq(struct ib_cq *ibcq) /** * ipath_req_notify_cq - change the notification type for a completion queue * @ibcq: the completion queue - * @notify: the type of notification to request + * @notify_flags: the type of notification to request * * Returns 0 for success. * * This may be called from interrupt context. Also called by * ib_req_notify_cq() in the generic verbs code. */ -int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) +int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) { struct ipath_cq *cq = to_icq(ibcq); unsigned long flags; + int ret = 0; spin_lock_irqsave(&cq->lock, flags); /* @@ -324,9 +353,15 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). */ if (cq->notify != IB_CQ_NEXT_COMP) - cq->notify = notify; + cq->notify = notify_flags & IB_CQ_SOLICITED_MASK; + + if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && + cq->queue->head != cq->queue->tail) + ret = 1; + spin_unlock_irqrestore(&cq->lock, flags); - return 0; + + return ret; } /** @@ -409,13 +444,12 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) if (cq->ip) { struct ipath_ibdev *dev = to_idev(ibcq->device); struct ipath_mmap_info *ip = cq->ip; + u32 s = sizeof *wc + sizeof(struct ib_wc) * cqe; - ip->obj = wc; - ip->size = PAGE_ALIGN(sizeof(*wc) + - sizeof(struct ib_wc) * cqe); + ipath_update_mmap_info(dev, ip, s, wc); spin_lock_irq(&dev->pending_lock); - ip->next = dev->pending_mmaps; - dev->pending_mmaps = ip; + if (list_empty(&ip->pending_mmaps)) + list_add(&ip->pending_mmaps, &dev->pending_mmaps); spin_unlock_irq(&dev->pending_lock); } diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h index df69f0d80b8..42bfbdb0d3e 100644 --- a/drivers/infiniband/hw/ipath/ipath_debug.h +++ b/drivers/infiniband/hw/ipath/ipath_debug.h @@ -57,6 +57,7 @@ #define __IPATH_PROCDBG 0x100 /* print mmap/nopage stuff, not using VDBG any more */ #define __IPATH_MMDBG 0x200 +#define __IPATH_ERRPKTDBG 0x400 #define __IPATH_USER_SEND 0x1000 /* use user mode send */ #define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */ #define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */ diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c index 0f13a2182cc..63e8368b0e9 100644 --- a/drivers/infiniband/hw/ipath/ipath_diag.c +++ b/drivers/infiniband/hw/ipath/ipath_diag.c @@ -296,7 +296,7 @@ static int ipath_diag_open(struct inode *in, struct file *fp) } fp->private_data = dd; - ipath_diag_inuse = 1; + ipath_diag_inuse = -2; diag_set_link = 0; ret = 0; @@ -461,6 +461,8 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data, else if ((count % 4) || (*off % 4)) /* address or length is not 32-bit aligned, hence invalid */ ret = -EINVAL; + else if (ipath_diag_inuse < 1 && (*off || count != 8)) + ret = -EINVAL; /* prevent cat /dev/ipath_diag* */ else if ((count % 8) || (*off % 8)) /* address or length not 64-bit aligned; do 32-bit reads */ ret = ipath_read_umem32(dd, data, kreg_base + *off, count); @@ -470,6 +472,8 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data, if (ret >= 0) { *off += count; ret = count; + if (ipath_diag_inuse == -2) + ipath_diag_inuse++; } return ret; @@ -489,6 +493,9 @@ static ssize_t ipath_diag_write(struct file *fp, const char __user *data, else if ((count % 4) || (*off % 4)) /* address or length is not 32-bit aligned, hence invalid */ ret = -EINVAL; + else if ((ipath_diag_inuse == -1 && (*off || count != 8)) || + ipath_diag_inuse == -2) /* read qw off 0, write qw off 0 */ + ret = -EINVAL; /* before any other write allowed */ else if ((count % 8) || (*off % 8)) /* address or length not 64-bit aligned; do 32-bit writes */ ret = ipath_write_umem32(dd, kreg_base + *off, data, count); @@ -498,6 +505,8 @@ static ssize_t ipath_diag_write(struct file *fp, const char __user *data, if (ret >= 0) { *off += count; ret = count; + if (ipath_diag_inuse == -1) + ipath_diag_inuse = 1; /* all read/write OK now */ } return ret; diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c index f6f94904082..f87f003e3ef 100644 --- a/drivers/infiniband/hw/ipath/ipath_dma.c +++ b/drivers/infiniband/hw/ipath/ipath_dma.c @@ -167,7 +167,7 @@ static void *ipath_dma_alloc_coherent(struct ib_device *dev, size_t size, } static void ipath_dma_free_coherent(struct ib_device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_handle) + void *cpu_addr, u64 dma_handle) { free_pages((unsigned long) cpu_addr, get_order(size)); } diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index ae7f21a0cdc..e3a22320971 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -390,15 +390,23 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, /* setup the chip-specific functions, as early as possible. */ switch (ent->device) { -#ifdef CONFIG_HT_IRQ case PCI_DEVICE_ID_INFINIPATH_HT: +#ifdef CONFIG_HT_IRQ ipath_init_iba6110_funcs(dd); break; +#else + ipath_dev_err(dd, "QLogic HT device 0x%x cannot work if " + "CONFIG_HT_IRQ is not enabled\n", ent->device); + return -ENODEV; #endif -#ifdef CONFIG_PCI_MSI case PCI_DEVICE_ID_INFINIPATH_PE800: +#ifdef CONFIG_PCI_MSI ipath_init_iba6120_funcs(dd); break; +#else + ipath_dev_err(dd, "QLogic PCIE device 0x%x cannot work if " + "CONFIG_PCI_MSI is not enabled\n", ent->device); + return -ENODEV; #endif default: ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, " @@ -486,7 +494,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, ret = ipath_init_chip(dd, 0); /* do the chip-specific init */ if (ret) - goto bail_iounmap; + goto bail_irqsetup; ret = ipath_enable_wc(dd); @@ -505,6 +513,9 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, goto bail; +bail_irqsetup: + if (pdev->irq) free_irq(pdev->irq, dd); + bail_iounmap: iounmap((volatile void __iomem *) dd->ipath_kregbase); @@ -525,8 +536,6 @@ static void __devexit cleanup_device(struct ipath_devdata *dd) { int port; - ipath_shutdown_device(dd); - if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { /* can't do anything more with chip; needs re-init */ *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT; @@ -594,8 +603,9 @@ static void __devexit cleanup_device(struct ipath_devdata *dd) ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n", dd->ipath_pageshadow); - vfree(dd->ipath_pageshadow); + tmpp = dd->ipath_pageshadow; dd->ipath_pageshadow = NULL; + vfree(tmpp); } /* @@ -622,6 +632,12 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev) ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd); + /* + * disable the IB link early, to be sure no new packets arrive, which + * complicates the shutdown process + */ + ipath_shutdown_device(dd); + if (dd->verbs_dev) ipath_unregister_ib_device(dd->verbs_dev); @@ -754,9 +770,42 @@ static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT; } -void ipath_decode_err(char *buf, size_t blen, ipath_err_t err) +/* + * Decode the error status into strings, deciding whether to always + * print * it or not depending on "normal packet errors" vs everything + * else. Return 1 if "real" errors, otherwise 0 if only packet + * errors, so caller can decide what to print with the string. + */ +int ipath_decode_err(char *buf, size_t blen, ipath_err_t err) { + int iserr = 1; *buf = '\0'; + if (err & INFINIPATH_E_PKTERRS) { + if (!(err & ~INFINIPATH_E_PKTERRS)) + iserr = 0; // if only packet errors. + if (ipath_debug & __IPATH_ERRPKTDBG) { + if (err & INFINIPATH_E_REBP) + strlcat(buf, "EBP ", blen); + if (err & INFINIPATH_E_RVCRC) + strlcat(buf, "VCRC ", blen); + if (err & INFINIPATH_E_RICRC) { + strlcat(buf, "CRC ", blen); + // clear for check below, so only once + err &= INFINIPATH_E_RICRC; + } + if (err & INFINIPATH_E_RSHORTPKTLEN) + strlcat(buf, "rshortpktlen ", blen); + if (err & INFINIPATH_E_SDROPPEDDATAPKT) + strlcat(buf, "sdroppeddatapkt ", blen); + if (err & INFINIPATH_E_SPKTLEN) + strlcat(buf, "spktlen ", blen); + } + if ((err & INFINIPATH_E_RICRC) && + !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP))) + strlcat(buf, "CRC ", blen); + if (!iserr) + goto done; + } if (err & INFINIPATH_E_RHDRLEN) strlcat(buf, "rhdrlen ", blen); if (err & INFINIPATH_E_RBADTID) @@ -767,12 +816,12 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err) strlcat(buf, "rhdr ", blen); if (err & INFINIPATH_E_RLONGPKTLEN) strlcat(buf, "rlongpktlen ", blen); - if (err & INFINIPATH_E_RSHORTPKTLEN) - strlcat(buf, "rshortpktlen ", blen); if (err & INFINIPATH_E_RMAXPKTLEN) strlcat(buf, "rmaxpktlen ", blen); if (err & INFINIPATH_E_RMINPKTLEN) strlcat(buf, "rminpktlen ", blen); + if (err & INFINIPATH_E_SMINPKTLEN) + strlcat(buf, "sminpktlen ", blen); if (err & INFINIPATH_E_RFORMATERR) strlcat(buf, "rformaterr ", blen); if (err & INFINIPATH_E_RUNSUPVL) @@ -781,32 +830,20 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err) strlcat(buf, "runexpchar ", blen); if (err & INFINIPATH_E_RIBFLOW) strlcat(buf, "ribflow ", blen); - if (err & INFINIPATH_E_REBP) - strlcat(buf, "EBP ", blen); if (err & INFINIPATH_E_SUNDERRUN) strlcat(buf, "sunderrun ", blen); if (err & INFINIPATH_E_SPIOARMLAUNCH) strlcat(buf, "spioarmlaunch ", blen); if (err & INFINIPATH_E_SUNEXPERRPKTNUM) strlcat(buf, "sunexperrpktnum ", blen); - if (err & INFINIPATH_E_SDROPPEDDATAPKT) - strlcat(buf, "sdroppeddatapkt ", blen); if (err & INFINIPATH_E_SDROPPEDSMPPKT) strlcat(buf, "sdroppedsmppkt ", blen); if (err & INFINIPATH_E_SMAXPKTLEN) strlcat(buf, "smaxpktlen ", blen); - if (err & INFINIPATH_E_SMINPKTLEN) - strlcat(buf, "sminpktlen ", blen); if (err & INFINIPATH_E_SUNSUPVL) strlcat(buf, "sunsupVL ", blen); - if (err & INFINIPATH_E_SPKTLEN) - strlcat(buf, "spktlen ", blen); if (err & INFINIPATH_E_INVALIDADDR) strlcat(buf, "invalidaddr ", blen); - if (err & INFINIPATH_E_RICRC) - strlcat(buf, "CRC ", blen); - if (err & INFINIPATH_E_RVCRC) - strlcat(buf, "VCRC ", blen); if (err & INFINIPATH_E_RRCVEGRFULL) strlcat(buf, "rcvegrfull ", blen); if (err & INFINIPATH_E_RRCVHDRFULL) @@ -819,6 +856,8 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err) strlcat(buf, "hardware ", blen); if (err & INFINIPATH_E_RESET) strlcat(buf, "reset ", blen); +done: + return iserr; } /** @@ -1662,6 +1701,22 @@ int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) lstate = IPATH_LINKACTIVE; break; + case IPATH_IB_LINK_LOOPBACK: + dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n"); + dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK; + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl); + ret = 0; + goto bail; // no state change to wait for + + case IPATH_IB_LINK_EXTERNAL: + dev_info(&dd->pcidev->dev, "Disabling IB local loopback (normal)\n"); + dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK; + ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, + dd->ipath_ibcctrl); + ret = 0; + goto bail; // no state change to wait for + default: ipath_dbg("Invalid linkstate 0x%x requested\n", newstate); ret = -EINVAL; @@ -1765,29 +1820,6 @@ int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc) return 0; } -/** - * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register - * @dd: the infinipath device - * @regno: the register number to read - * @port: the port containing the register - * - * Registers that vary with the chip implementation constants (port) - * use this routine. - */ -u64 ipath_read_kreg64_port(const struct ipath_devdata *dd, ipath_kreg regno, - unsigned port) -{ - u16 where; - - if (port < dd->ipath_portcnt && - (regno == dd->ipath_kregs->kr_rcvhdraddr || - regno == dd->ipath_kregs->kr_rcvhdrtailaddr)) - where = regno + port; - else - where = -1; - - return ipath_read_kreg64(dd, where); -} /** * ipath_write_kreg_port - write a device's per-port 64-bit kernel register @@ -1973,7 +2005,8 @@ static int __init infinipath_init(void) { int ret; - ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version); + if (ipath_debug & __IPATH_DBG) + printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version); /* * These must be called before the driver is registered with diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c index a4019a6b756..030185f90ee 100644 --- a/drivers/infiniband/hw/ipath/ipath_eeprom.c +++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c @@ -626,6 +626,10 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd) } else memcpy(dd->ipath_serial, ifp->if_serial, sizeof ifp->if_serial); + if (!strstr(ifp->if_comment, "Tested successfully")) + ipath_dev_err(dd, "Board SN %s did not pass functional " + "test: %s\n", dd->ipath_serial, + ifp->if_comment); ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n", (unsigned long long) be64_to_cpu(dd->ipath_guid)); diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 5d64ff87529..1272aaf2a78 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -41,12 +41,6 @@ #include "ipath_kernel.h" #include "ipath_common.h" -/* - * mmap64 doesn't allow all 64 bits for 32-bit applications - * so only use the low 43 bits. - */ -#define MMAP64_MASK 0x7FFFFFFFFFFUL - static int ipath_open(struct inode *, struct file *); static int ipath_close(struct inode *, struct file *); static ssize_t ipath_write(struct file *, const char __user *, size_t, @@ -63,6 +57,24 @@ static const struct file_operations ipath_file_ops = { .mmap = ipath_mmap }; +/* + * Convert kernel virtual addresses to physical addresses so they don't + * potentially conflict with the chip addresses used as mmap offsets. + * It doesn't really matter what mmap offset we use as long as we can + * interpret it correctly. + */ +static u64 cvt_kvaddr(void *p) +{ + struct page *page; + u64 paddr = 0; + + page = vmalloc_to_page(p); + if (page) + paddr = page_to_pfn(page) << PAGE_SHIFT; + + return paddr; +} + static int ipath_get_base_info(struct file *fp, void __user *ubase, size_t ubase_size) { @@ -87,7 +99,7 @@ static int ipath_get_base_info(struct file *fp, sz = sizeof(*kinfo); /* If port sharing is not requested, allow the old size structure */ if (!shared) - sz -= 3 * sizeof(u64); + sz -= 7 * sizeof(u64); if (ubase_size < sz) { ipath_cdbg(PROC, "Base size %zu, need %zu (version mismatch?)\n", @@ -165,24 +177,41 @@ static int ipath_get_base_info(struct file *fp, kinfo->spi_piobufbase = (u64) pd->port_piobufs + dd->ipath_palign * (dd->ipath_pbufsport - kinfo->spi_piocnt); - kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + - dd->ipath_palign * pd->port_port; } else { unsigned slave = subport_fp(fp) - 1; kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt; kinfo->spi_piobufbase = (u64) pd->port_piobufs + dd->ipath_palign * kinfo->spi_piocnt * slave; - kinfo->__spi_uregbase = ((u64) pd->subport_uregbase + - PAGE_SIZE * slave) & MMAP64_MASK; + } + if (shared) { + kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase + + dd->ipath_palign * pd->port_port; + kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs; + kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base; + kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr; - kinfo->spi_rcvhdr_base = ((u64) pd->subport_rcvhdr_base + - pd->port_rcvhdrq_size * slave) & MMAP64_MASK; - kinfo->spi_rcvhdr_tailaddr = - (u64) pd->port_rcvhdrqtailaddr_phys & MMAP64_MASK; - kinfo->spi_rcv_egrbufs = ((u64) pd->subport_rcvegrbuf + - dd->ipath_rcvegrcnt * dd->ipath_rcvegrbufsize * slave) & - MMAP64_MASK; + kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase + + PAGE_SIZE * subport_fp(fp)); + + kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base + + pd->port_rcvhdrq_size * subport_fp(fp)); + kinfo->spi_rcvhdr_tailaddr = 0; + kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf + + pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size * + subport_fp(fp)); + + kinfo->spi_subport_uregbase = + cvt_kvaddr(pd->subport_uregbase); + kinfo->spi_subport_rcvegrbuf = + cvt_kvaddr(pd->subport_rcvegrbuf); + kinfo->spi_subport_rcvhdr_base = + cvt_kvaddr(pd->subport_rcvhdr_base); + ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n", + kinfo->spi_port, kinfo->spi_runtime_flags, + (unsigned long long) kinfo->spi_subport_uregbase, + (unsigned long long) kinfo->spi_subport_rcvegrbuf, + (unsigned long long) kinfo->spi_subport_rcvhdr_base); } kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) / @@ -199,20 +228,10 @@ static int ipath_get_base_info(struct file *fp, if (master) { kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER; - kinfo->spi_subport_uregbase = - (u64) pd->subport_uregbase & MMAP64_MASK; - kinfo->spi_subport_rcvegrbuf = - (u64) pd->subport_rcvegrbuf & MMAP64_MASK; - kinfo->spi_subport_rcvhdr_base = - (u64) pd->subport_rcvhdr_base & MMAP64_MASK; - ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n", - kinfo->spi_port, kinfo->spi_runtime_flags, - (unsigned long long) kinfo->spi_subport_uregbase, - (unsigned long long) kinfo->spi_subport_rcvegrbuf, - (unsigned long long) kinfo->spi_subport_rcvhdr_base); } - if (copy_to_user(ubase, kinfo, sizeof(*kinfo))) + sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo); + if (copy_to_user(ubase, kinfo, sz)) ret = -EFAULT; bail: @@ -1132,67 +1151,55 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, struct ipath_devdata *dd; void *addr; size_t size; - int ret; + int ret = 0; /* If the port is not shared, all addresses should be physical */ - if (!pd->port_subport_cnt) { - ret = -EINVAL; + if (!pd->port_subport_cnt) goto bail; - } dd = pd->port_dd; size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size; /* - * Master has all the slave uregbase, rcvhdrq, and - * rcvegrbufs mmapped. + * Each process has all the subport uregbase, rcvhdrq, and + * rcvegrbufs mmapped - as an array for all the processes, + * and also separately for this process. */ - if (subport == 0) { - unsigned num_slaves = pd->port_subport_cnt - 1; - - if (pgaddr == ((u64) pd->subport_uregbase & MMAP64_MASK)) { - addr = pd->subport_uregbase; - size = PAGE_SIZE * num_slaves; - } else if (pgaddr == ((u64) pd->subport_rcvhdr_base & - MMAP64_MASK)) { - addr = pd->subport_rcvhdr_base; - size = pd->port_rcvhdrq_size * num_slaves; - } else if (pgaddr == ((u64) pd->subport_rcvegrbuf & - MMAP64_MASK)) { - addr = pd->subport_rcvegrbuf; - size *= num_slaves; - } else { - ret = -EINVAL; - goto bail; - } - } else if (pgaddr == (((u64) pd->subport_uregbase + - PAGE_SIZE * (subport - 1)) & MMAP64_MASK)) { - addr = pd->subport_uregbase + PAGE_SIZE * (subport - 1); - size = PAGE_SIZE; - } else if (pgaddr == (((u64) pd->subport_rcvhdr_base + - pd->port_rcvhdrq_size * (subport - 1)) & - MMAP64_MASK)) { - addr = pd->subport_rcvhdr_base + - pd->port_rcvhdrq_size * (subport - 1); - size = pd->port_rcvhdrq_size; - } else if (pgaddr == (((u64) pd->subport_rcvegrbuf + - size * (subport - 1)) & MMAP64_MASK)) { - addr = pd->subport_rcvegrbuf + size * (subport - 1); - /* rcvegrbufs are read-only on the slave */ - if (vma->vm_flags & VM_WRITE) { - dev_info(&dd->pcidev->dev, - "Can't map eager buffers as " - "writable (flags=%lx)\n", vma->vm_flags); - ret = -EPERM; - goto bail; - } - /* - * Don't allow permission to later change to writeable - * with mprotect. - */ - vma->vm_flags &= ~VM_MAYWRITE; + if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) { + addr = pd->subport_uregbase; + size = PAGE_SIZE * pd->port_subport_cnt; + } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) { + addr = pd->subport_rcvhdr_base; + size = pd->port_rcvhdrq_size * pd->port_subport_cnt; + } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) { + addr = pd->subport_rcvegrbuf; + size *= pd->port_subport_cnt; + } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase + + PAGE_SIZE * subport)) { + addr = pd->subport_uregbase + PAGE_SIZE * subport; + size = PAGE_SIZE; + } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base + + pd->port_rcvhdrq_size * subport)) { + addr = pd->subport_rcvhdr_base + + pd->port_rcvhdrq_size * subport; + size = pd->port_rcvhdrq_size; + } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf + + size * subport)) { + addr = pd->subport_rcvegrbuf + size * subport; + /* rcvegrbufs are read-only on the slave */ + if (vma->vm_flags & VM_WRITE) { + dev_info(&dd->pcidev->dev, + "Can't map eager buffers as " + "writable (flags=%lx)\n", vma->vm_flags); + ret = -EPERM; + goto bail; + } + /* + * Don't allow permission to later change to writeable + * with mprotect. + */ + vma->vm_flags &= ~VM_MAYWRITE; } else { - ret = -EINVAL; goto bail; } len = vma->vm_end - vma->vm_start; @@ -1205,7 +1212,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; vma->vm_ops = &ipath_file_vm_ops; vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; - ret = 0; + ret = 1; bail: return ret; @@ -1265,19 +1272,20 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) * Check for kernel virtual addresses first, anything else must * match a HW or memory address. */ - if (pgaddr >= (1ULL<<40)) { - ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp)); + ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp)); + if (ret) { + if (ret > 0) + ret = 0; goto bail; } + ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; if (!pd->port_subport_cnt) { /* port is not shared */ - ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; piocnt = dd->ipath_pbufsport; piobufs = pd->port_piobufs; } else if (!subport_fp(fp)) { /* caller is the master */ - ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) + (dd->ipath_pbufsport % pd->port_subport_cnt); piobufs = pd->port_piobufs + @@ -1286,7 +1294,6 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) unsigned slave = subport_fp(fp) - 1; /* caller is a slave */ - ureg = 0; piocnt = dd->ipath_pbufsport / pd->port_subport_cnt; piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave; } @@ -1300,9 +1307,6 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, (void *) dd->ipath_pioavailregs_dma, "pioavail registers"); - else if (subport_fp(fp)) - /* Subports don't mmap the physical receive buffers */ - ret = -EINVAL; else if (pgaddr == pd->port_rcvegr_phys) ret = mmap_rcvegrbufs(vma, pd); else if (pgaddr == (u64) pd->port_rcvhdrq_phys) @@ -1400,32 +1404,41 @@ static int init_subports(struct ipath_devdata *dd, const struct ipath_user_info *uinfo) { int ret = 0; - unsigned num_slaves; + unsigned num_subports; size_t size; - /* Old user binaries don't know about subports */ - if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) - goto bail; /* * If the user is requesting zero or one port, * skip the subport allocation. */ if (uinfo->spu_subport_cnt <= 1) goto bail; - if (uinfo->spu_subport_cnt > 4) { + + /* Old user binaries don't know about new subport implementation */ + if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) { + dev_info(&dd->pcidev->dev, + "Mismatched user minor version (%d) and driver " + "minor version (%d) while port sharing. Ensure " + "that driver and library are from the same " + "release.\n", + (int) (uinfo->spu_userversion & 0xffff), + IPATH_USER_SWMINOR); + goto bail; + } + if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) { ret = -EINVAL; goto bail; } - num_slaves = uinfo->spu_subport_cnt - 1; - pd->subport_uregbase = vmalloc(PAGE_SIZE * num_slaves); + num_subports = uinfo->spu_subport_cnt; + pd->subport_uregbase = vmalloc(PAGE_SIZE * num_subports); if (!pd->subport_uregbase) { ret = -ENOMEM; goto bail; } /* Note: pd->port_rcvhdrq_size isn't initialized yet. */ size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * - sizeof(u32), PAGE_SIZE) * num_slaves; + sizeof(u32), PAGE_SIZE) * num_subports; pd->subport_rcvhdr_base = vmalloc(size); if (!pd->subport_rcvhdr_base) { ret = -ENOMEM; @@ -1434,7 +1447,7 @@ static int init_subports(struct ipath_devdata *dd, pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size * - num_slaves); + num_subports); if (!pd->subport_rcvegrbuf) { ret = -ENOMEM; goto bail_rhdr; @@ -1443,6 +1456,12 @@ static int init_subports(struct ipath_devdata *dd, pd->port_subport_cnt = uinfo->spu_subport_cnt; pd->port_subport_id = uinfo->spu_subport_id; pd->active_slaves = 1; + set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag); + memset(pd->subport_uregbase, 0, PAGE_SIZE * num_subports); + memset(pd->subport_rcvhdr_base, 0, size); + memset(pd->subport_rcvegrbuf, 0, pd->port_rcvegrbuf_chunks * + pd->port_rcvegrbuf_size * + num_subports); goto bail; bail_rhdr: @@ -1573,18 +1592,19 @@ static int find_best_unit(struct file *fp, */ if (!cpus_empty(current->cpus_allowed) && !cpus_full(current->cpus_allowed)) { - int ncpus = num_online_cpus(), curcpu = -1; + int ncpus = num_online_cpus(), curcpu = -1, nset = 0; for (i = 0; i < ncpus; i++) if (cpu_isset(i, current->cpus_allowed)) { ipath_cdbg(PROC, "%s[%u] affinity set for " - "cpu %d\n", current->comm, - current->pid, i); + "cpu %d/%d\n", current->comm, + current->pid, i, ncpus); curcpu = i; + nset++; } - if (curcpu != -1) { + if (curcpu != -1 && nset != ncpus) { if (npresent) { prefunit = curcpu / (ncpus / npresent); - ipath_dbg("%s[%u] %d chips, %d cpus, " + ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, " "%d cpus/chip, select unit %d\n", current->comm, current->pid, npresent, ncpus, ncpus / npresent, @@ -1764,11 +1784,17 @@ static int ipath_do_user_init(struct file *fp, const struct ipath_user_info *uinfo) { int ret; - struct ipath_portdata *pd; + struct ipath_portdata *pd = port_fp(fp); struct ipath_devdata *dd; u32 head32; - pd = port_fp(fp); + /* Subports don't need to initialize anything since master did it. */ + if (subport_fp(fp)) { + ret = wait_event_interruptible(pd->port_wait, + !test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag)); + goto done; + } + dd = pd->port_dd; if (uinfo->spu_rcvhdrsize) { @@ -1826,6 +1852,11 @@ static int ipath_do_user_init(struct file *fp, dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD); ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); + /* Notify any waiting slaves */ + if (pd->port_subport_cnt) { + clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag); + wake_up(&pd->port_wait); + } done: return ret; } @@ -2017,6 +2048,17 @@ static int ipath_get_slave_info(struct ipath_portdata *pd, return ret; } +static int ipath_force_pio_avail_update(struct ipath_devdata *dd) +{ + u64 reg = dd->ipath_sendctrl; + + clear_bit(IPATH_S_PIOBUFAVAILUPD, ®); + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, reg); + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); + + return 0; +} + static ssize_t ipath_write(struct file *fp, const char __user *data, size_t count, loff_t *off) { @@ -2071,27 +2113,35 @@ static ssize_t ipath_write(struct file *fp, const char __user *data, dest = &cmd.cmd.part_key; src = &ucmd->cmd.part_key; break; - case IPATH_CMD_SLAVE_INFO: + case __IPATH_CMD_SLAVE_INFO: copy = sizeof(cmd.cmd.slave_mask_addr); dest = &cmd.cmd.slave_mask_addr; src = &ucmd->cmd.slave_mask_addr; break; + case IPATH_CMD_PIOAVAILUPD: // force an update of PIOAvail reg + copy = 0; + src = NULL; + dest = NULL; + break; default: ret = -EINVAL; goto bail; } - if ((count - consumed) < copy) { - ret = -EINVAL; - goto bail; - } + if (copy) { + if ((count - consumed) < copy) { + ret = -EINVAL; + goto bail; + } - if (copy_from_user(dest, src, copy)) { - ret = -EFAULT; - goto bail; + if (copy_from_user(dest, src, copy)) { + ret = -EFAULT; + goto bail; + } + + consumed += copy; } - consumed += copy; pd = port_fp(fp); if (!pd && cmd.type != __IPATH_CMD_USER_INIT && cmd.type != IPATH_CMD_ASSIGN_PORT) { @@ -2137,11 +2187,14 @@ static ssize_t ipath_write(struct file *fp, const char __user *data, case IPATH_CMD_SET_PART_KEY: ret = ipath_set_part_key(pd, cmd.cmd.part_key); break; - case IPATH_CMD_SLAVE_INFO: + case __IPATH_CMD_SLAVE_INFO: ret = ipath_get_slave_info(pd, (void __user *) (unsigned long) cmd.cmd.slave_mask_addr); break; + case IPATH_CMD_PIOAVAILUPD: + ret = ipath_force_pio_avail_update(pd->port_dd); + break; } if (ret >= 0) diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 5b40a846ff9..036ed1ef179 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -38,7 +38,6 @@ #include <linux/pagemap.h> #include <linux/init.h> #include <linux/namei.h> -#include <linux/pci.h> #include "ipath_kernel.h" @@ -451,12 +450,18 @@ bail: return ret; } -static void remove_file(struct dentry *parent, char *name) +static int remove_file(struct dentry *parent, char *name) { struct dentry *tmp; + int ret; tmp = lookup_one_len(name, parent, strlen(name)); + if (IS_ERR(tmp)) { + ret = PTR_ERR(tmp); + goto bail; + } + spin_lock(&dcache_lock); spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { @@ -469,6 +474,14 @@ static void remove_file(struct dentry *parent, char *name) spin_unlock(&tmp->d_lock); spin_unlock(&dcache_lock); } + + ret = 0; +bail: + /* + * We don't expect clients to care about the return value, but + * it's there if they need it. + */ + return ret; } static int remove_device_files(struct super_block *sb, diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index 99348254502..4171198fc20 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -43,6 +43,9 @@ #include "ipath_kernel.h" #include "ipath_registers.h" +static void ipath_setup_ht_setextled(struct ipath_devdata *, u64, u64); + + /* * This lists the InfiniPath registers, in the actual chip layout. * This structure should never be directly accessed. @@ -208,8 +211,8 @@ static const struct ipath_kregs ipath_ht_kregs = { .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus), .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig), /* - * These should not be used directly via ipath_read_kreg64(), - * use them with ipath_read_kreg64_port(), + * These should not be used directly via ipath_write_kreg64(), + * use them with ipath_write_kreg64_port(), */ .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0), .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0) @@ -284,6 +287,14 @@ static const struct ipath_cregs ipath_ht_cregs = { #define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000 #define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000 + +/* TID entries (memory), HT-only */ +#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */ +#define INFINIPATH_RT_VALID 0x8000000000000000ULL +#define INFINIPATH_RT_ADDR_SHIFT 0 +#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL +#define INFINIPATH_RT_BUFSIZE_SHIFT 48 + /* * masks and bits that are different in different chips, or present only * in one @@ -402,6 +413,14 @@ static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = { INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), }; +#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \ + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \ + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) +#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \ + << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) + +static int ipath_ht_txe_recover(struct ipath_devdata *); + /** * ipath_ht_handle_hwerrors - display hardware errors. * @dd: the infinipath device @@ -450,13 +469,12 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, /* * make sure we get this much out, unless told to be quiet, + * it's a parity error we may recover from, * or it's occurred within the last 5 seconds */ - if ((hwerrs & ~(dd->ipath_lasthwerror | - ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | - INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) || - (ipath_debug & __IPATH_VERBDBG)) + if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY | + RXE_EAGER_PARITY)) || + (ipath_debug & __IPATH_VERBDBG)) dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " "(cleared)\n", (unsigned long long) hwerrs); dd->ipath_lasthwerror |= hwerrs; @@ -467,7 +485,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, (hwerrs & ~dd->ipath_hwe_bitsextant)); ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); - if (ctrl & INFINIPATH_C_FREEZEMODE) { + if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) { /* * parity errors in send memory are recoverable, * just cancel the send (if indicated in * sendbuffererror), @@ -476,50 +494,14 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, * occur if a processor speculative read is done to the PIO * buffer while we are sending a packet, for example. */ - if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | - INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { - ipath_stats.sps_txeparity++; - ipath_dbg("Recovering from TXE parity error (%llu), " - "hwerrstatus=%llx\n", - (unsigned long long) ipath_stats.sps_txeparity, - (unsigned long long) hwerrs); - ipath_disarm_senderrbufs(dd); - hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | - INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT); - if (!hwerrs) { /* else leave in freeze mode */ - ipath_write_kreg(dd, - dd->ipath_kregs->kr_control, - dd->ipath_control); - return; - } - } - if (hwerrs) { - /* - * if any set that we aren't ignoring; only - * make the complaint once, in case it's stuck - * or recurring, and we get here multiple - * times. - */ - if (dd->ipath_flags & IPATH_INITTED) { - ipath_dev_err(dd, "Fatal Hardware Error (freeze " - "mode), no longer usable, SN %.16s\n", - dd->ipath_serial); - isfatal = 1; - } - *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; - /* mark as having had error */ - *dd->ipath_statusp |= IPATH_STATUS_HWERROR; - /* - * mark as not usable, at a minimum until driver - * is reloaded, probably until reboot, since no - * other reset is possible. - */ - dd->ipath_flags &= ~IPATH_INITTED; - } else { - ipath_dbg("Clearing freezemode on ignored hardware " - "error\n"); + if ((hwerrs & TXE_PIO_PARITY) && ipath_ht_txe_recover(dd)) + hwerrs &= ~TXE_PIO_PARITY; + if (hwerrs & RXE_EAGER_PARITY) + ipath_dev_err(dd, "RXE parity, Eager TID error is not " + "recoverable\n"); + if (!hwerrs) { + ipath_dbg("Clearing freezemode on ignored or " + "recovered hardware error\n"); ctrl &= ~INFINIPATH_C_FREEZEMODE; ipath_write_kreg(dd, dd->ipath_kregs->kr_control, ctrl); @@ -587,7 +569,39 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, dd->ipath_hwerrmask); } - ipath_dev_err(dd, "%s hardware error\n", msg); + if (hwerrs) { + /* + * if any set that we aren't ignoring; only + * make the complaint once, in case it's stuck + * or recurring, and we get here multiple + * times. + * force link down, so switch knows, and + * LEDs are turned off + */ + if (dd->ipath_flags & IPATH_INITTED) { + ipath_set_linkstate(dd, IPATH_IB_LINKDOWN); + ipath_setup_ht_setextled(dd, + INFINIPATH_IBCS_L_STATE_DOWN, + INFINIPATH_IBCS_LT_STATE_DISABLED); + ipath_dev_err(dd, "Fatal Hardware Error (freeze " + "mode), no longer usable, SN %.16s\n", + dd->ipath_serial); + isfatal = 1; + } + *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; + /* mark as having had error */ + *dd->ipath_statusp |= IPATH_STATUS_HWERROR; + /* + * mark as not usable, at a minimum until driver + * is reloaded, probably until reboot, since no + * other reset is possible. + */ + dd->ipath_flags &= ~IPATH_INITTED; + } + else + *msg = 0; /* recovered from all of them */ + if (*msg) + ipath_dev_err(dd, "%s hardware error\n", msg); if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) /* * for status file; if no trailing brace is copied, @@ -658,7 +672,8 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, if (n) snprintf(name, namelen, "%s", n); - if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) { + if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || + dd->ipath_minrev > 3)) { /* * This version of the driver only supports Rev 3.2 and 3.3 */ @@ -1163,6 +1178,8 @@ static void ipath_ht_init_hwerrors(struct ipath_devdata *dd) if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) ipath_dev_err(dd, "MemBIST did not complete!\n"); + if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT) + ipath_dbg("MemBIST corrected\n"); ipath_check_htlink(dd); @@ -1366,6 +1383,9 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr, u32 type, unsigned long pa) { + if (!dd->ipath_kregbase) + return; + if (pa != dd->ipath_tidinvalid) { if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) { dev_info(&dd->pcidev->dev, @@ -1382,10 +1402,10 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd, pa |= lenvalid | INFINIPATH_RT_VALID; } } - if (dd->ipath_kregbase) - writeq(pa, tidptr); + writeq(pa, tidptr); } + /** * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager * @dd: the infinipath device @@ -1515,7 +1535,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd) INFINIPATH_S_ABORT); ipath_get_eeprom_info(dd); - if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' && + if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' && dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') { /* * Later production QHT7040 has same changes as QHT7140, so @@ -1528,6 +1548,24 @@ static int ipath_ht_early_init(struct ipath_devdata *dd) return 0; } + +static int ipath_ht_txe_recover(struct ipath_devdata *dd) +{ + int cnt = ++ipath_stats.sps_txeparity; + if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) { + if (cnt == IPATH_MAX_PARITY_ATTEMPTS) + ipath_dev_err(dd, + "Too many attempts to recover from " + "TXE parity, giving up\n"); + return 0; + } + dev_info(&dd->pcidev->dev, + "Recovering from TXE PIO parity error\n"); + ipath_disarm_senderrbufs(dd, 1); + return 1; +} + + /** * ipath_init_ht_get_base_info - set chip-specific flags for user code * @dd: the infinipath device diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index 05918e1e7c3..1b9c3085775 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -43,6 +43,8 @@ #include "ipath_kernel.h" #include "ipath_registers.h" +static void ipath_setup_pe_setextled(struct ipath_devdata *, u64, u64); + /* * This file contains all the chip-specific register information and * access functions for the QLogic InfiniPath PCI-Express chip. @@ -207,8 +209,8 @@ static const struct ipath_kregs ipath_pe_kregs = { .kr_ibpllcfg = IPATH_KREG_OFFSET(IBPLLCfg), /* - * These should not be used directly via ipath_read_kreg64(), - * use them with ipath_read_kreg64_port() + * These should not be used directly via ipath_write_kreg64(), + * use them with ipath_write_kreg64_port(), */ .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0), .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0), @@ -321,6 +323,12 @@ static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = { INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), }; +#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \ + INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \ + << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) + +static int ipath_pe_txe_recover(struct ipath_devdata *); + /** * ipath_pe_handle_hwerrors - display hardware errors. * @dd: the infinipath device @@ -394,32 +402,21 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, * occur if a processor speculative read is done to the PIO * buffer while we are sending a packet, for example. */ - if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | - INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { - ipath_stats.sps_txeparity++; - ipath_dbg("Recovering from TXE parity error (%llu), " - "hwerrstatus=%llx\n", - (unsigned long long) ipath_stats.sps_txeparity, - (unsigned long long) hwerrs); - ipath_disarm_senderrbufs(dd); - hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | - INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT); - if (!hwerrs) { /* else leave in freeze mode */ - ipath_write_kreg(dd, - dd->ipath_kregs->kr_control, - dd->ipath_control); - return; - } - } + if ((hwerrs & TXE_PIO_PARITY) && ipath_pe_txe_recover(dd)) + hwerrs &= ~TXE_PIO_PARITY; if (hwerrs) { /* * if any set that we aren't ignoring only make the * complaint once, in case it's stuck or recurring, * and we get here multiple times + * Force link down, so switch knows, and + * LEDs are turned off */ if (dd->ipath_flags & IPATH_INITTED) { + ipath_set_linkstate(dd, IPATH_IB_LINKDOWN); + ipath_setup_pe_setextled(dd, + INFINIPATH_IBCS_L_STATE_DOWN, + INFINIPATH_IBCS_LT_STATE_DISABLED); ipath_dev_err(dd, "Fatal Hardware Error (freeze " "mode), no longer usable, SN %.16s\n", dd->ipath_serial); @@ -493,7 +490,8 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, dd->ipath_hwerrmask); } - ipath_dev_err(dd, "%s hardware error\n", msg); + if (*msg) + ipath_dev_err(dd, "%s hardware error\n", msg); if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) { /* * for /sys status file ; if no trailing } is copied, we'll @@ -581,6 +579,8 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd) if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) ipath_dev_err(dd, "MemBIST did not complete!\n"); + if (extsval & INFINIPATH_EXTS_MEMBIST_FOUND) + ipath_dbg("MemBIST corrected\n"); val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */ @@ -1330,6 +1330,35 @@ static void ipath_pe_free_irq(struct ipath_devdata *dd) dd->ipath_irq = 0; } +/* + * On platforms using this chip, and not having ordered WC stores, we + * can get TXE parity errors due to speculative reads to the PIO buffers, + * and this, due to a chip bug can result in (many) false parity error + * reports. So it's a debug print on those, and an info print on systems + * where the speculative reads don't occur. + * Because we can get lots of false errors, we have no upper limit + * on recovery attempts on those platforms. + */ +static int ipath_pe_txe_recover(struct ipath_devdata *dd) +{ + if (ipath_unordered_wc()) + ipath_dbg("Recovering from TXE PIO parity error\n"); + else { + int cnt = ++ipath_stats.sps_txeparity; + if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) { + if (cnt == IPATH_MAX_PARITY_ATTEMPTS) + ipath_dev_err(dd, + "Too many attempts to recover from " + "TXE parity, giving up\n"); + return 0; + } + dev_info(&dd->pcidev->dev, + "Recovering from TXE PIO parity error\n"); + } + ipath_disarm_senderrbufs(dd, 1); + return 1; +} + /** * ipath_init_iba6120_funcs - set up the chip-specific function pointers * @dd: the infinipath device diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index d4f6b5239ef..7045ba68949 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -216,6 +216,20 @@ static int bringup_link(struct ipath_devdata *dd) return ret; } +static struct ipath_portdata *create_portdata0(struct ipath_devdata *dd) +{ + struct ipath_portdata *pd = NULL; + + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (pd) { + pd->port_dd = dd; + pd->port_cnt = 1; + /* The port 0 pkey table is used by the layer interface. */ + pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY; + } + return pd; +} + static int init_chip_first(struct ipath_devdata *dd, struct ipath_portdata **pdp) { @@ -271,20 +285,16 @@ static int init_chip_first(struct ipath_devdata *dd, goto done; } - dd->ipath_pd[0] = kzalloc(sizeof(*pd), GFP_KERNEL); + pd = create_portdata0(dd); - if (!dd->ipath_pd[0]) { + if (!pd) { ipath_dev_err(dd, "Unable to allocate portdata for port " "0, failing\n"); ret = -ENOMEM; goto done; } - pd = dd->ipath_pd[0]; - pd->port_dd = dd; - pd->port_port = 0; - pd->port_cnt = 1; - /* The port 0 pkey table is used by the layer interface. */ - pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY; + dd->ipath_pd[0] = pd; + dd->ipath_rcvtidcnt = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt); dd->ipath_rcvtidbase = @@ -590,6 +600,10 @@ static int init_housekeeping(struct ipath_devdata *dd, goto done; } + + /* clear diagctrl register, in case diags were running and crashed */ + ipath_write_kreg (dd, dd->ipath_kregs->kr_hwdiagctrl, 0); + /* clear the initial reset flag, in case first driver load */ ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, INFINIPATH_E_RESET); @@ -668,6 +682,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) { int ret = 0, i; u32 val32, kpiobufs; + u32 piobufs, uports; u64 val; struct ipath_portdata *pd = NULL; /* keep gcc4 happy */ gfp_t gfp_flags = GFP_USER | __GFP_COMP; @@ -702,16 +717,17 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) * the in memory DMA'ed copies of the registers. This has to * be done early, before we calculate lastport, etc. */ - val = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; + piobufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; /* * calc number of pioavail registers, and save it; we have 2 * bits per buffer. */ - dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2) + dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2) / (sizeof(u64) * BITS_PER_BYTE / 2); + uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0; if (ipath_kpiobufs == 0) { /* not set by user (this is default) */ - if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128) + if (piobufs >= (uports * IPATH_MIN_USER_PORT_BUFCNT) + 32) kpiobufs = 32; else kpiobufs = 16; @@ -719,31 +735,25 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) else kpiobufs = ipath_kpiobufs; - if (kpiobufs > - (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - - (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT))) { - i = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - - (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT); + if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) { + i = (int) piobufs - + (int) (uports * IPATH_MIN_USER_PORT_BUFCNT); if (i < 0) i = 0; - dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs for " - "kernel leaves too few for %d user ports " + dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of " + "%d for kernel leaves too few for %d user ports " "(%d each); using %u\n", kpiobufs, - dd->ipath_cfgports - 1, - IPATH_MIN_USER_PORT_BUFCNT, i); + piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i); /* * shouldn't change ipath_kpiobufs, because could be * different for different devices... */ kpiobufs = i; } - dd->ipath_lastport_piobuf = - dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - kpiobufs; - dd->ipath_pbufsport = dd->ipath_cfgports > 1 - ? dd->ipath_lastport_piobuf / (dd->ipath_cfgports - 1) - : 0; - val32 = dd->ipath_lastport_piobuf - - (dd->ipath_pbufsport * (dd->ipath_cfgports - 1)); + dd->ipath_lastport_piobuf = piobufs - kpiobufs; + dd->ipath_pbufsport = + uports ? dd->ipath_lastport_piobuf / uports : 0; + val32 = dd->ipath_lastport_piobuf - (dd->ipath_pbufsport * uports); if (val32 > 0) { ipath_dbg("allocating %u pbufs/port leaves %u unused, " "add to kernel\n", dd->ipath_pbufsport, val32); @@ -754,8 +764,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) dd->ipath_lastpioindex = dd->ipath_lastport_piobuf; ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u " "each for %u user ports\n", kpiobufs, - dd->ipath_piobcnt2k + dd->ipath_piobcnt4k, - dd->ipath_pbufsport, dd->ipath_cfgports - 1); + piobufs, dd->ipath_pbufsport, uports); dd->ipath_f_early_init(dd); @@ -839,11 +848,24 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) * Set up the port 0 (kernel) rcvhdr q and egr TIDs. If doing * re-init, the simplest way to handle this is to free * existing, and re-allocate. + * Need to re-create rest of port 0 portdata as well. */ if (reinit) { - struct ipath_portdata *pd = dd->ipath_pd[0]; - dd->ipath_pd[0] = NULL; - ipath_free_pddata(dd, pd); + /* Alloc and init new ipath_portdata for port0, + * Then free old pd. Could lead to fragmentation, but also + * makes later support for hot-swap easier. + */ + struct ipath_portdata *npd; + npd = create_portdata0(dd); + if (npd) { + ipath_free_pddata(dd, pd); + dd->ipath_pd[0] = pd = npd; + } else { + ipath_dev_err(dd, "Unable to allocate portdata for" + " port 0, failing\n"); + ret = -ENOMEM; + goto done; + } } dd->ipath_f_tidtemplate(dd); ret = ipath_create_rcvhdrq(dd, pd); diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 72b9e279d19..45d033169c6 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -38,10 +38,39 @@ #include "ipath_common.h" /* + * clear (write) a pio buffer, to clear a parity error. This routine + * should only be called when in freeze mode, and the buffer should be + * canceled afterwards. + */ +static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum) +{ + u32 __iomem *pbuf; + u32 dwcnt; /* dword count to write */ + if (pnum < dd->ipath_piobcnt2k) { + pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum * + dd->ipath_palign); + dwcnt = dd->ipath_piosize2k >> 2; + } + else { + pbuf = (u32 __iomem *) (dd->ipath_pio4kbase + + (pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign); + dwcnt = dd->ipath_piosize4k >> 2; + } + dev_info(&dd->pcidev->dev, + "Rewrite PIO buffer %u, to recover from parity error\n", + pnum); + *pbuf = dwcnt+1; /* no flush required, since already in freeze */ + while(--dwcnt) + *pbuf++ = 0; +} + +/* * Called when we might have an error that is specific to a particular * PIO buffer, and may need to cancel that buffer, so it can be re-used. + * If rewrite is true, and bits are set in the sendbufferror registers, + * we'll write to the buffer, for error recovery on parity errors. */ -void ipath_disarm_senderrbufs(struct ipath_devdata *dd) +void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) { u32 piobcnt; unsigned long sbuf[4]; @@ -74,8 +103,11 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd) } for (i = 0; i < piobcnt; i++) - if (test_bit(i, sbuf)) + if (test_bit(i, sbuf)) { + if (rewrite) + ipath_clrpiobuf(dd, i); ipath_disarm_piobufs(dd, i, 1); + } dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */ } } @@ -114,7 +146,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs) { u64 ignore_this_time = 0; - ipath_disarm_senderrbufs(dd); + ipath_disarm_senderrbufs(dd, 0); if ((errs & E_SUM_LINK_PKTERRS) && !(dd->ipath_flags & IPATH_LINKACTIVE)) { /* @@ -403,10 +435,13 @@ static void handle_supp_msgs(struct ipath_devdata *dd, * happens so often we never want to count it. */ if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) { - ipath_decode_err(msg, sizeof msg, dd->ipath_lasterror & - ~INFINIPATH_E_IBSTATUSCHANGED); + int iserr; + iserr = ipath_decode_err(msg, sizeof msg, + dd->ipath_lasterror & + ~INFINIPATH_E_IBSTATUSCHANGED); if (dd->ipath_lasterror & - ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL)) + ~(INFINIPATH_E_RRCVEGRFULL | + INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS)) ipath_dev_err(dd, "Suppressed %u messages for " "fast-repeating errors (%s) (%llx)\n", supp_msgs, msg, @@ -420,8 +455,13 @@ static void handle_supp_msgs(struct ipath_devdata *dd, * them. So only complain about these at debug * level. */ - ipath_dbg("Suppressed %u messages for %s\n", - supp_msgs, msg); + if (iserr) + ipath_dbg("Suppressed %u messages for %s\n", + supp_msgs, msg); + else + ipath_cdbg(ERRPKT, + "Suppressed %u messages for %s\n", + supp_msgs, msg); } } } @@ -462,7 +502,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) { char msg[512]; u64 ignore_this_time = 0; - int i; + int i, iserr = 0; int chkerrpkts = 0, noprint = 0; unsigned supp_msgs; @@ -502,6 +542,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) } if (supp_msgs == 250000) { + int s_iserr; /* * It's not entirely reasonable assuming that the errors set * in the last clear period are all responsible for the @@ -511,17 +552,17 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) dd->ipath_maskederrs |= dd->ipath_lasterror | errs; ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, ~dd->ipath_maskederrs); - ipath_decode_err(msg, sizeof msg, + s_iserr = ipath_decode_err(msg, sizeof msg, (dd->ipath_maskederrs & ~dd-> ipath_ignorederrs)); if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) & - ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL)) - ipath_dev_err(dd, "Disabling error(s) %llx because " - "occurring too frequently (%s)\n", - (unsigned long long) - (dd->ipath_maskederrs & - ~dd->ipath_ignorederrs), msg); + ~(INFINIPATH_E_RRCVEGRFULL | + INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS)) + ipath_dev_err(dd, "Temporarily disabling " + "error(s) %llx reporting; too frequent (%s)\n", + (unsigned long long) (dd->ipath_maskederrs & + ~dd->ipath_ignorederrs), msg); else { /* * rcvegrfull and rcvhdrqfull are "normal", @@ -530,8 +571,15 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) * processing them. So only complain about * these at debug level. */ - ipath_dbg("Disabling frequent queue full errors " - "(%s)\n", msg); + if (s_iserr) + ipath_dbg("Temporarily disabling reporting " + "too frequent queue full errors (%s)\n", + msg); + else + ipath_cdbg(ERRPKT, + "Temporarily disabling reporting too" + " frequent packet errors (%s)\n", + msg); } /* @@ -589,6 +637,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) ipath_stats.sps_crcerrs++; chkerrpkts = 1; } + iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS); + /* * We don't want to print these two as they happen, or we can make @@ -677,8 +727,13 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF; } - if (!noprint && *msg) - ipath_dev_err(dd, "%s error\n", msg); + if (!noprint && *msg) { + if (iserr) + ipath_dev_err(dd, "%s error\n", msg); + else + dev_info(&dd->pcidev->dev, "%s packet problems\n", + msg); + } if (dd->ipath_state_wanted & dd->ipath_flags) { ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, " "waking\n", dd->ipath_state_wanted, @@ -819,11 +874,10 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat) struct ipath_portdata *pd = dd->ipath_pd[i]; if (portr & (1 << i) && pd && pd->port_cnt && test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) { - int rcbit; clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); - rcbit = i + INFINIPATH_R_INTRAVAIL_SHIFT; - clear_bit(1UL << rcbit, &dd->ipath_rcvctrl); + clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT, + &dd->ipath_rcvctrl); wake_up_interruptible(&pd->port_wait); rcvdint = 1; } diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 6d8d05fb599..e900c2593f4 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -590,7 +590,6 @@ int ipath_enable_wc(struct ipath_devdata *dd); void ipath_disable_wc(struct ipath_devdata *dd); int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp); void ipath_shutdown_device(struct ipath_devdata *); -void ipath_disarm_senderrbufs(struct ipath_devdata *); struct file_operations; int ipath_cdev_init(int minor, char *name, const struct file_operations *fops, @@ -611,7 +610,7 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t); extern int ipath_diag_inuse; irqreturn_t ipath_intr(int irq, void *devid); -void ipath_decode_err(char *buf, size_t blen, ipath_err_t err); +int ipath_decode_err(char *buf, size_t blen, ipath_err_t err); #if __IPATH_INFO || __IPATH_DBG extern const char *ipath_ibcstatus_str[]; #endif @@ -701,6 +700,8 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); #define IPATH_PORT_WAITING_RCV 2 /* waiting for a PIO buffer to be available */ #define IPATH_PORT_WAITING_PIO 3 + /* master has not finished initializing */ +#define IPATH_PORT_MASTER_UNINIT 4 /* free up any allocated data at closes */ void ipath_free_data(struct ipath_portdata *dd); @@ -711,6 +712,7 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *); void ipath_init_iba6110_funcs(struct ipath_devdata *); void ipath_get_eeprom_info(struct ipath_devdata *); u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); +void ipath_disarm_senderrbufs(struct ipath_devdata *, int); /* * number of words used for protocol header if not set by ipath_userinit(); @@ -754,8 +756,6 @@ int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int); /* these are used for the registers that vary with port */ void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg, unsigned, u64); -u64 ipath_read_kreg64_port(const struct ipath_devdata *, ipath_kreg, - unsigned); /* * We could have a single register get/put routine, that takes a group type, @@ -897,6 +897,8 @@ dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int); extern unsigned ipath_debug; /* debugging bit mask */ +#define IPATH_MAX_PARITY_ATTEMPTS 10000 /* max times to try recovery */ + const char *ipath_get_unit_name(int unit); extern struct mutex ipath_mutex; diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c index 851763d7d2d..dd487c100f5 100644 --- a/drivers/infiniband/hw/ipath/ipath_keys.c +++ b/drivers/infiniband/hw/ipath/ipath_keys.c @@ -61,7 +61,7 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr) r = (r + 1) & (rkt->max - 1); if (r == n) { spin_unlock_irqrestore(&rkt->lock, flags); - ipath_dbg(KERN_INFO "LKEY table full\n"); + ipath_dbg("LKEY table full\n"); ret = 0; goto bail; } @@ -133,6 +133,12 @@ int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge, * being reversible by calling bus_to_virt(). */ if (sge->lkey == 0) { + struct ipath_pd *pd = to_ipd(qp->ibqp.pd); + + if (pd->user) { + ret = 0; + goto bail; + } isge->mr = NULL; isge->vaddr = (void *) sge->addr; isge->length = sge->length; @@ -206,6 +212,12 @@ int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss, * (see ipath_get_dma_mr and ipath_dma.c). */ if (rkey == 0) { + struct ipath_pd *pd = to_ipd(qp->ibqp.pd); + + if (pd->user) { + ret = 0; + goto bail; + } sge->mr = NULL; sge->vaddr = (void *) vaddr; sge->length = len; diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c index e46aa4ed2a7..05a1d2b01d9 100644 --- a/drivers/infiniband/hw/ipath/ipath_layer.c +++ b/drivers/infiniband/hw/ipath/ipath_layer.c @@ -37,7 +37,6 @@ */ #include <linux/io.h> -#include <linux/pci.h> #include <asm/byteorder.h> #include "ipath_kernel.h" diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c index a82157db468..937bc3396b5 100644 --- a/drivers/infiniband/hw/ipath/ipath_mmap.c +++ b/drivers/infiniband/hw/ipath/ipath_mmap.c @@ -46,6 +46,11 @@ void ipath_release_mmap_info(struct kref *ref) { struct ipath_mmap_info *ip = container_of(ref, struct ipath_mmap_info, ref); + struct ipath_ibdev *dev = to_idev(ip->context->device); + + spin_lock_irq(&dev->pending_lock); + list_del(&ip->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); vfree(ip->obj); kfree(ip); @@ -60,14 +65,12 @@ static void ipath_vma_open(struct vm_area_struct *vma) struct ipath_mmap_info *ip = vma->vm_private_data; kref_get(&ip->ref); - ip->mmap_cnt++; } static void ipath_vma_close(struct vm_area_struct *vma) { struct ipath_mmap_info *ip = vma->vm_private_data; - ip->mmap_cnt--; kref_put(&ip->ref, ipath_release_mmap_info); } @@ -87,7 +90,7 @@ int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) struct ipath_ibdev *dev = to_idev(context->device); unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; unsigned long size = vma->vm_end - vma->vm_start; - struct ipath_mmap_info *ip, **pp; + struct ipath_mmap_info *ip, *pp; int ret = -EINVAL; /* @@ -96,15 +99,16 @@ int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) * CQ, QP, or SRQ is soon followed by a call to mmap(). */ spin_lock_irq(&dev->pending_lock); - for (pp = &dev->pending_mmaps; (ip = *pp); pp = &ip->next) { + list_for_each_entry_safe(ip, pp, &dev->pending_mmaps, + pending_mmaps) { /* Only the creator is allowed to mmap the object */ - if (context != ip->context || (void *) offset != ip->obj) + if (context != ip->context || (__u64) offset != ip->offset) continue; /* Don't allow a mmap larger than the object. */ if (size > ip->size) break; - *pp = ip->next; + list_del_init(&ip->pending_mmaps); spin_unlock_irq(&dev->pending_lock); ret = remap_vmalloc_range(vma, ip->obj, 0); @@ -119,3 +123,51 @@ int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) done: return ret; } + +/* + * Allocate information for ipath_mmap + */ +struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev, + u32 size, + struct ib_ucontext *context, + void *obj) { + struct ipath_mmap_info *ip; + + ip = kmalloc(sizeof *ip, GFP_KERNEL); + if (!ip) + goto bail; + + size = PAGE_ALIGN(size); + + spin_lock_irq(&dev->mmap_offset_lock); + if (dev->mmap_offset == 0) + dev->mmap_offset = PAGE_SIZE; + ip->offset = dev->mmap_offset; + dev->mmap_offset += size; + spin_unlock_irq(&dev->mmap_offset_lock); + + INIT_LIST_HEAD(&ip->pending_mmaps); + ip->size = size; + ip->context = context; + ip->obj = obj; + kref_init(&ip->ref); + +bail: + return ip; +} + +void ipath_update_mmap_info(struct ipath_ibdev *dev, + struct ipath_mmap_info *ip, + u32 size, void *obj) { + size = PAGE_ALIGN(size); + + spin_lock_irq(&dev->mmap_offset_lock); + if (dev->mmap_offset == 0) + dev->mmap_offset = PAGE_SIZE; + ip->offset = dev->mmap_offset; + dev->mmap_offset += size; + spin_unlock_irq(&dev->mmap_offset_lock); + + ip->size = size; + ip->obj = obj; +} diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c index 8cc8598d6c6..31e70732e36 100644 --- a/drivers/infiniband/hw/ipath/ipath_mr.c +++ b/drivers/infiniband/hw/ipath/ipath_mr.c @@ -210,9 +210,15 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, m = 0; n = 0; list_for_each_entry(chunk, ®ion->chunk_list, list) { - for (i = 0; i < chunk->nmap; i++) { - mr->mr.map[m]->segs[n].vaddr = - page_address(chunk->page_list[i].page); + for (i = 0; i < chunk->nents; i++) { + void *vaddr; + + vaddr = page_address(chunk->page_list[i].page); + if (!vaddr) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + mr->mr.map[m]->segs[n].vaddr = vaddr; mr->mr.map[m]->segs[n].length = region->page_size; n++; if (n == IPATH_SEGSZ) { diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 64f07b19349..bfef08ecd34 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -81,11 +81,51 @@ static u32 credit_table[31] = { 32768 /* 1E */ }; -static u32 alloc_qpn(struct ipath_qp_table *qpt) + +static void get_map_page(struct ipath_qp_table *qpt, struct qpn_map *map) +{ + unsigned long page = get_zeroed_page(GFP_KERNEL); + unsigned long flags; + + /* + * Free the page if someone raced with us installing it. + */ + + spin_lock_irqsave(&qpt->lock, flags); + if (map->page) + free_page(page); + else + map->page = (void *)page; + spin_unlock_irqrestore(&qpt->lock, flags); +} + + +static int alloc_qpn(struct ipath_qp_table *qpt, enum ib_qp_type type) { u32 i, offset, max_scan, qpn; struct qpn_map *map; - u32 ret; + u32 ret = -1; + + if (type == IB_QPT_SMI) + ret = 0; + else if (type == IB_QPT_GSI) + ret = 1; + + if (ret != -1) { + map = &qpt->map[0]; + if (unlikely(!map->page)) { + get_map_page(qpt, map); + if (unlikely(!map->page)) { + ret = -ENOMEM; + goto bail; + } + } + if (!test_and_set_bit(ret, map->page)) + atomic_dec(&map->n_free); + else + ret = -EBUSY; + goto bail; + } qpn = qpt->last + 1; if (qpn >= QPN_MAX) @@ -95,19 +135,7 @@ static u32 alloc_qpn(struct ipath_qp_table *qpt) max_scan = qpt->nmaps - !offset; for (i = 0;;) { if (unlikely(!map->page)) { - unsigned long page = get_zeroed_page(GFP_KERNEL); - unsigned long flags; - - /* - * Free the page if someone raced with us - * installing it: - */ - spin_lock_irqsave(&qpt->lock, flags); - if (map->page) - free_page(page); - else - map->page = (void *)page; - spin_unlock_irqrestore(&qpt->lock, flags); + get_map_page(qpt, map); if (unlikely(!map->page)) break; } @@ -151,7 +179,7 @@ static u32 alloc_qpn(struct ipath_qp_table *qpt) qpn = mk_qpn(qpt, map, offset); } - ret = 0; + ret = -ENOMEM; bail: return ret; @@ -180,29 +208,19 @@ static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp, enum ib_qp_type type) { unsigned long flags; - u32 qpn; int ret; - if (type == IB_QPT_SMI) - qpn = 0; - else if (type == IB_QPT_GSI) - qpn = 1; - else { - /* Allocate the next available QPN */ - qpn = alloc_qpn(qpt); - if (qpn == 0) { - ret = -ENOMEM; - goto bail; - } - } - qp->ibqp.qp_num = qpn; + ret = alloc_qpn(qpt, type); + if (ret < 0) + goto bail; + qp->ibqp.qp_num = ret; /* Add the QP to the hash table. */ spin_lock_irqsave(&qpt->lock, flags); - qpn %= qpt->max; - qp->next = qpt->table[qpn]; - qpt->table[qpn] = qp; + ret %= qpt->max; + qp->next = qpt->table[ret]; + qpt->table[ret] = qp; atomic_inc(&qp->refcount); spin_unlock_irqrestore(&qpt->lock, flags); @@ -245,9 +263,7 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp) if (!fnd) return; - /* If QPN is not reserved, mark QPN free in the bitmap. */ - if (qp->ibqp.qp_num > 1) - free_qpn(qpt, qp->ibqp.qp_num); + free_qpn(qpt, qp->ibqp.qp_num); wait_event(qp->wait, !atomic_read(&qp->refcount)); } @@ -270,11 +286,10 @@ void ipath_free_all_qps(struct ipath_qp_table *qpt) while (qp) { nqp = qp->next; - if (qp->ibqp.qp_num > 1) - free_qpn(qpt, qp->ibqp.qp_num); + free_qpn(qpt, qp->ibqp.qp_num); if (!atomic_dec_and_test(&qp->refcount) || !ipath_destroy_qp(&qp->ibqp)) - ipath_dbg(KERN_INFO "QP memory leak!\n"); + ipath_dbg("QP memory leak!\n"); qp = nqp; } } @@ -320,7 +335,8 @@ static void ipath_reset_qp(struct ipath_qp *qp) qp->remote_qpn = 0; qp->qkey = 0; qp->qp_access_flags = 0; - clear_bit(IPATH_S_BUSY, &qp->s_flags); + qp->s_busy = 0; + qp->s_flags &= ~IPATH_S_SIGNAL_REQ_WR; qp->s_hdrwords = 0; qp->s_psn = 0; qp->r_psn = 0; @@ -333,7 +349,6 @@ static void ipath_reset_qp(struct ipath_qp *qp) qp->r_state = IB_OPCODE_UC_SEND_LAST; } qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; - qp->r_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; qp->r_nak_state = 0; qp->r_wrid_valid = 0; qp->s_rnr_timeout = 0; @@ -344,6 +359,10 @@ static void ipath_reset_qp(struct ipath_qp *qp) qp->s_ssn = 1; qp->s_lsn = 0; qp->s_wait_credit = 0; + memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); + qp->r_head_ack_queue = 0; + qp->s_tail_ack_queue = 0; + qp->s_num_rd_atomic = 0; if (qp->r_rq.wq) { qp->r_rq.wq->head = 0; qp->r_rq.wq->tail = 0; @@ -357,7 +376,7 @@ static void ipath_reset_qp(struct ipath_qp *qp) * @err: the receive completion error to signal if a RWQE is active * * Flushes both send and receive work queues. - * QP s_lock should be held and interrupts disabled. + * The QP s_lock should be held and interrupts disabled. */ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) @@ -365,7 +384,7 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ib_wc wc; - ipath_dbg(KERN_INFO "QP%d/%d in error state\n", + ipath_dbg("QP%d/%d in error state\n", qp->ibqp.qp_num, qp->remote_qpn); spin_lock(&dev->pending_lock); @@ -389,6 +408,8 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) wc.port_num = 0; if (qp->r_wrid_valid) { qp->r_wrid_valid = 0; + wc.wr_id = qp->r_wr_id; + wc.opcode = IB_WC_RECV; wc.status = err; ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); } @@ -503,13 +524,17 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->path_mig_state != IB_MIG_REARM) goto inval; + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC) + goto inval; + switch (new_state) { case IB_QPS_RESET: ipath_reset_qp(qp); break; case IB_QPS_ERR: - ipath_error_qp(qp, IB_WC_GENERAL_ERR); + ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); break; default: @@ -559,6 +584,12 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_QKEY) qp->qkey = attr->qkey; + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + qp->r_max_rd_atomic = attr->max_dest_rd_atomic; + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) + qp->s_max_rd_atomic = attr->max_rd_atomic; + qp->state = new_state; spin_unlock_irqrestore(&qp->s_lock, flags); @@ -598,8 +629,8 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->alt_pkey_index = 0; attr->en_sqd_async_notify = 0; attr->sq_draining = 0; - attr->max_rd_atomic = 1; - attr->max_dest_rd_atomic = 1; + attr->max_rd_atomic = qp->s_max_rd_atomic; + attr->max_dest_rd_atomic = qp->r_max_rd_atomic; attr->min_rnr_timer = qp->r_min_rnr_timer; attr->port_num = 1; attr->timeout = qp->timeout; @@ -614,7 +645,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->recv_cq = qp->ibqp.recv_cq; init_attr->srq = qp->ibqp.srq; init_attr->cap = attr->cap; - if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR)) + if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR) init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; else init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; @@ -786,7 +817,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, qp->s_size = init_attr->cap.max_send_wr + 1; qp->s_max_sge = init_attr->cap.max_send_sge; if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) - qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR; + qp->s_flags = IPATH_S_SIGNAL_REQ_WR; else qp->s_flags = 0; dev = to_idev(ibpd->device); @@ -813,34 +844,36 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, * See ipath_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { - struct ipath_mmap_info *ip; - __u64 offset = (__u64) qp->r_rq.wq; int err; - err = ib_copy_to_udata(udata, &offset, sizeof(offset)); - if (err) { - ret = ERR_PTR(err); - goto bail_rwq; - } + if (!qp->r_rq.wq) { + __u64 offset = 0; - if (qp->r_rq.wq) { - /* Allocate info for ipath_mmap(). */ - ip = kmalloc(sizeof(*ip), GFP_KERNEL); - if (!ip) { + err = ib_copy_to_udata(udata, &offset, + sizeof(offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_rwq; + } + } else { + u32 s = sizeof(struct ipath_rwq) + + qp->r_rq.size * sz; + + qp->ip = + ipath_create_mmap_info(dev, s, + ibpd->uobject->context, + qp->r_rq.wq); + if (!qp->ip) { ret = ERR_PTR(-ENOMEM); goto bail_rwq; } - qp->ip = ip; - ip->context = ibpd->uobject->context; - ip->obj = qp->r_rq.wq; - kref_init(&ip->ref); - ip->mmap_cnt = 0; - ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) + - qp->r_rq.size * sz); - spin_lock_irq(&dev->pending_lock); - ip->next = dev->pending_mmaps; - dev->pending_mmaps = ip; - spin_unlock_irq(&dev->pending_lock); + + err = ib_copy_to_udata(udata, &(qp->ip->offset), + sizeof(qp->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } } } @@ -854,6 +887,12 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, dev->n_qps_allocated++; spin_unlock(&dev->n_qps_lock); + if (qp->ip) { + spin_lock_irq(&dev->pending_lock); + list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + ret = &qp->ibqp; goto bail; @@ -958,7 +997,7 @@ bail: * @wc: the WC responsible for putting the QP in this state * * Flushes the send work queue. - * The QP s_lock should be held. + * The QP s_lock should be held and interrupts disabled. */ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc) @@ -966,7 +1005,7 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc) struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); - ipath_dbg(KERN_INFO "Send queue error on QP%d/%d: err: %d\n", + ipath_dbg("Send queue error on QP%d/%d: err: %d\n", qp->ibqp.qp_num, qp->remote_qpn, wc->status); spin_lock(&dev->pending_lock); @@ -984,12 +1023,12 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc) wc->status = IB_WC_WR_FLUSH_ERR; while (qp->s_last != qp->s_head) { + wqe = get_swqe_ptr(qp, qp->s_last); wc->wr_id = wqe->wr.wr_id; wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1); if (++qp->s_last >= qp->s_size) qp->s_last = 0; - wqe = get_swqe_ptr(qp, qp->s_last); } qp->s_cur = qp->s_tail = qp->s_head; qp->state = IB_QPS_SQE; diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 5ff20cb0449..1915771fd03 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -37,6 +37,19 @@ /* cut down ridiculously long IB macro names */ #define OP(x) IB_OPCODE_RC_##x +static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe, + u32 psn, u32 pmtu) +{ + u32 len; + + len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu; + ss->sge = wqe->sg_list[0]; + ss->sg_list = wqe->sg_list + 1; + ss->num_sge = wqe->wr.num_sge; + ipath_skip_sge(ss, len); + return wqe->length - len; +} + /** * ipath_init_restart- initialize the qp->s_sge after a restart * @qp: the QP who's SGE we're restarting @@ -47,15 +60,9 @@ static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) { struct ipath_ibdev *dev; - u32 len; - len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * - ib_mtu_enum_to_int(qp->path_mtu); - qp->s_sge.sge = wqe->sg_list[0]; - qp->s_sge.sg_list = wqe->sg_list + 1; - qp->s_sge.num_sge = wqe->wr.num_sge; - ipath_skip_sge(&qp->s_sge, len); - qp->s_len = wqe->length - len; + qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, + ib_mtu_enum_to_int(qp->path_mtu)); dev = to_idev(qp->ibqp.device); spin_lock(&dev->pending_lock); if (list_empty(&qp->timerwait)) @@ -70,45 +77,81 @@ static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) * @ohdr: a pointer to the IB header being constructed * @pmtu: the path MTU * - * Return bth0 if constructed; otherwise, return 0. + * Return 1 if constructed; otherwise, return 0. + * Note that we are in the responder's side of the QP context. * Note the QP s_lock must be held. */ -u32 ipath_make_rc_ack(struct ipath_qp *qp, - struct ipath_other_headers *ohdr, - u32 pmtu) +static int ipath_make_rc_ack(struct ipath_qp *qp, + struct ipath_other_headers *ohdr, + u32 pmtu, u32 *bth0p, u32 *bth2p) { + struct ipath_ack_entry *e; u32 hwords; u32 len; u32 bth0; + u32 bth2; /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; - /* - * Send a response. Note that we are in the responder's - * side of the QP context. - */ switch (qp->s_ack_state) { - case OP(RDMA_READ_REQUEST): - qp->s_cur_sge = &qp->s_rdma_sge; - len = qp->s_rdma_len; - if (len > pmtu) { - len = pmtu; - qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); - } else - qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); - qp->s_rdma_len -= len; + case OP(RDMA_READ_RESPONSE_LAST): + case OP(RDMA_READ_RESPONSE_ONLY): + case OP(ATOMIC_ACKNOWLEDGE): + /* + * We can increment the tail pointer now that the last + * response has been sent instead of only being + * constructed. + */ + if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC) + qp->s_tail_ack_queue = 0; + /* FALLTHROUGH */ + case OP(SEND_ONLY): + case OP(ACKNOWLEDGE): + /* Check for no next entry in the queue. */ + if (qp->r_head_ack_queue == qp->s_tail_ack_queue) { + if (qp->s_flags & IPATH_S_ACK_PENDING) + goto normal; + qp->s_ack_state = OP(ACKNOWLEDGE); + goto bail; + } + + e = &qp->s_ack_queue[qp->s_tail_ack_queue]; + if (e->opcode == OP(RDMA_READ_REQUEST)) { + /* Copy SGE state in case we need to resend */ + qp->s_ack_rdma_sge = e->rdma_sge; + qp->s_cur_sge = &qp->s_ack_rdma_sge; + len = e->rdma_sge.sge.sge_length; + if (len > pmtu) { + len = pmtu; + qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); + } else + qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); + ohdr->u.aeth = ipath_compute_aeth(qp); + hwords++; + qp->s_ack_rdma_psn = e->psn; + bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; + } else { + /* COMPARE_SWAP or FETCH_ADD */ + qp->s_cur_sge = NULL; + len = 0; + qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); + ohdr->u.at.aeth = ipath_compute_aeth(qp); + ohdr->u.at.atomic_ack_eth[0] = + cpu_to_be32(e->atomic_data >> 32); + ohdr->u.at.atomic_ack_eth[1] = + cpu_to_be32(e->atomic_data); + hwords += sizeof(ohdr->u.at) / sizeof(u32); + bth2 = e->psn; + } bth0 = qp->s_ack_state << 24; - ohdr->u.aeth = ipath_compute_aeth(qp); - hwords++; break; case OP(RDMA_READ_RESPONSE_FIRST): qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); /* FALLTHROUGH */ case OP(RDMA_READ_RESPONSE_MIDDLE): - qp->s_cur_sge = &qp->s_rdma_sge; - len = qp->s_rdma_len; + len = qp->s_ack_rdma_sge.sge.sge_length; if (len > pmtu) len = pmtu; else { @@ -116,61 +159,41 @@ u32 ipath_make_rc_ack(struct ipath_qp *qp, hwords++; qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); } - qp->s_rdma_len -= len; bth0 = qp->s_ack_state << 24; - break; - - case OP(RDMA_READ_RESPONSE_LAST): - case OP(RDMA_READ_RESPONSE_ONLY): - /* - * We have to prevent new requests from changing - * the r_sge state while a ipath_verbs_send() - * is in progress. - */ - qp->s_ack_state = OP(ACKNOWLEDGE); - bth0 = 0; - goto bail; - - case OP(COMPARE_SWAP): - case OP(FETCH_ADD): - qp->s_cur_sge = NULL; - len = 0; - /* - * Set the s_ack_state so the receive interrupt handler - * won't try to send an ACK (out of order) until this one - * is actually sent. - */ - qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); - bth0 = OP(ATOMIC_ACKNOWLEDGE) << 24; - ohdr->u.at.aeth = ipath_compute_aeth(qp); - ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data); - hwords += sizeof(ohdr->u.at) / 4; + bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; break; default: - /* Send a regular ACK. */ - qp->s_cur_sge = NULL; - len = 0; + normal: /* - * Set the s_ack_state so the receive interrupt handler - * won't try to send an ACK (out of order) until this one - * is actually sent. + * Send a regular ACK. + * Set the s_ack_state so we wait until after sending + * the ACK before setting s_ack_state to ACKNOWLEDGE + * (see above). */ - qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); - bth0 = OP(ACKNOWLEDGE) << 24; + qp->s_ack_state = OP(SEND_ONLY); + qp->s_flags &= ~IPATH_S_ACK_PENDING; + qp->s_cur_sge = NULL; if (qp->s_nak_state) - ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | - (qp->s_nak_state << - IPATH_AETH_CREDIT_SHIFT)); + ohdr->u.aeth = + cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | + (qp->s_nak_state << + IPATH_AETH_CREDIT_SHIFT)); else ohdr->u.aeth = ipath_compute_aeth(qp); hwords++; + len = 0; + bth0 = OP(ACKNOWLEDGE) << 24; + bth2 = qp->s_ack_psn & IPATH_PSN_MASK; } qp->s_hdrwords = hwords; qp->s_cur_size = len; + *bth0p = bth0; + *bth2p = bth2; + return 1; bail: - return bth0; + return 0; } /** @@ -197,20 +220,22 @@ int ipath_make_rc_req(struct ipath_qp *qp, u32 bth2; char newreq; - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || - qp->s_rnr_timeout) + /* Sending responses has higher priority over sending requests. */ + if ((qp->r_head_ack_queue != qp->s_tail_ack_queue || + (qp->s_flags & IPATH_S_ACK_PENDING) || + qp->s_ack_state != OP(ACKNOWLEDGE)) && + ipath_make_rc_ack(qp, ohdr, pmtu, bth0p, bth2p)) goto done; + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || + qp->s_rnr_timeout || qp->s_wait_credit) + goto bail; + /* Limit the number of packets sent without an ACK. */ if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { qp->s_wait_credit = 1; dev->n_rc_stalls++; - spin_lock(&dev->pending_lock); - if (list_empty(&qp->timerwait)) - list_add_tail(&qp->timerwait, - &dev->pending[dev->pending_index]); - spin_unlock(&dev->pending_lock); - goto done; + goto bail; } /* header size in 32-bit words LRH+BTH = (8+12)/4. */ @@ -232,7 +257,16 @@ int ipath_make_rc_req(struct ipath_qp *qp, if (qp->s_cur == qp->s_tail) { /* Check if send work queue is empty. */ if (qp->s_tail == qp->s_head) - goto done; + goto bail; + /* + * If a fence is requested, wait for previous + * RDMA read and atomic operations to finish. + */ + if ((wqe->wr.send_flags & IB_SEND_FENCE) && + qp->s_num_rd_atomic) { + qp->s_flags |= IPATH_S_FENCE_PENDING; + goto bail; + } wqe->psn = qp->s_next_psn; newreq = 1; } @@ -250,7 +284,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, /* If no credit, return. */ if (qp->s_lsn != (u32) -1 && ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) - goto done; + goto bail; wqe->lpsn = wqe->psn; if (len > pmtu) { wqe->lpsn += (len - 1) / pmtu; @@ -281,13 +315,13 @@ int ipath_make_rc_req(struct ipath_qp *qp, /* If no credit, return. */ if (qp->s_lsn != (u32) -1 && ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) - goto done; + goto bail; ohdr->u.rc.reth.vaddr = cpu_to_be64(wqe->wr.wr.rdma.remote_addr); ohdr->u.rc.reth.rkey = cpu_to_be32(wqe->wr.wr.rdma.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); - hwords += sizeof(struct ib_reth) / 4; + hwords += sizeof(struct ib_reth) / sizeof(u32); wqe->lpsn = wqe->psn; if (len > pmtu) { wqe->lpsn += (len - 1) / pmtu; @@ -312,14 +346,17 @@ int ipath_make_rc_req(struct ipath_qp *qp, break; case IB_WR_RDMA_READ: - ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); - ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); - ohdr->u.rc.reth.length = cpu_to_be32(len); - qp->s_state = OP(RDMA_READ_REQUEST); - hwords += sizeof(ohdr->u.rc.reth) / 4; + /* + * Don't allow more operations to be started + * than the QP limits allow. + */ if (newreq) { + if (qp->s_num_rd_atomic >= + qp->s_max_rd_atomic) { + qp->s_flags |= IPATH_S_RDMAR_PENDING; + goto bail; + } + qp->s_num_rd_atomic++; if (qp->s_lsn != (u32) -1) qp->s_lsn++; /* @@ -330,6 +367,13 @@ int ipath_make_rc_req(struct ipath_qp *qp, qp->s_next_psn += (len - 1) / pmtu; wqe->lpsn = qp->s_next_psn++; } + ohdr->u.rc.reth.vaddr = + cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + ohdr->u.rc.reth.rkey = + cpu_to_be32(wqe->wr.wr.rdma.rkey); + ohdr->u.rc.reth.length = cpu_to_be32(len); + qp->s_state = OP(RDMA_READ_REQUEST); + hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); ss = NULL; len = 0; if (++qp->s_cur == qp->s_size) @@ -338,32 +382,48 @@ int ipath_make_rc_req(struct ipath_qp *qp, case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) - qp->s_state = OP(COMPARE_SWAP); - else - qp->s_state = OP(FETCH_ADD); - ohdr->u.atomic_eth.vaddr = cpu_to_be64( - wqe->wr.wr.atomic.remote_addr); - ohdr->u.atomic_eth.rkey = cpu_to_be32( - wqe->wr.wr.atomic.rkey); - ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->wr.wr.atomic.swap); - ohdr->u.atomic_eth.compare_data = cpu_to_be64( - wqe->wr.wr.atomic.compare_add); - hwords += sizeof(struct ib_atomic_eth) / 4; + /* + * Don't allow more operations to be started + * than the QP limits allow. + */ if (newreq) { + if (qp->s_num_rd_atomic >= + qp->s_max_rd_atomic) { + qp->s_flags |= IPATH_S_RDMAR_PENDING; + goto bail; + } + qp->s_num_rd_atomic++; if (qp->s_lsn != (u32) -1) qp->s_lsn++; wqe->lpsn = wqe->psn; } - if (++qp->s_cur == qp->s_size) - qp->s_cur = 0; + if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + qp->s_state = OP(COMPARE_SWAP); + ohdr->u.atomic_eth.swap_data = cpu_to_be64( + wqe->wr.wr.atomic.swap); + ohdr->u.atomic_eth.compare_data = cpu_to_be64( + wqe->wr.wr.atomic.compare_add); + } else { + qp->s_state = OP(FETCH_ADD); + ohdr->u.atomic_eth.swap_data = cpu_to_be64( + wqe->wr.wr.atomic.compare_add); + ohdr->u.atomic_eth.compare_data = 0; + } + ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( + wqe->wr.wr.atomic.remote_addr >> 32); + ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( + wqe->wr.wr.atomic.remote_addr); + ohdr->u.atomic_eth.rkey = cpu_to_be32( + wqe->wr.wr.atomic.rkey); + hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); ss = NULL; len = 0; + if (++qp->s_cur == qp->s_size) + qp->s_cur = 0; break; default: - goto done; + goto bail; } qp->s_sge.sge = wqe->sg_list[0]; qp->s_sge.sg_list = wqe->sg_list + 1; @@ -379,7 +439,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, qp->s_psn = wqe->lpsn + 1; else { qp->s_psn++; - if ((int)(qp->s_psn - qp->s_next_psn) > 0) + if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) qp->s_next_psn = qp->s_psn; } /* @@ -406,7 +466,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, /* FALLTHROUGH */ case OP(SEND_MIDDLE): bth2 = qp->s_psn++ & IPATH_PSN_MASK; - if ((int)(qp->s_psn - qp->s_next_psn) > 0) + if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; @@ -442,7 +502,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, /* FALLTHROUGH */ case OP(RDMA_WRITE_MIDDLE): bth2 = qp->s_psn++ & IPATH_PSN_MASK; - if ((int)(qp->s_psn - qp->s_next_psn) > 0) + if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; @@ -479,9 +539,9 @@ int ipath_make_rc_req(struct ipath_qp *qp, cpu_to_be32(wqe->wr.wr.rdma.rkey); ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); qp->s_state = OP(RDMA_READ_REQUEST); - hwords += sizeof(ohdr->u.rc.reth) / 4; + hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); bth2 = qp->s_psn++ & IPATH_PSN_MASK; - if ((int)(qp->s_psn - qp->s_next_psn) > 0) + if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) qp->s_next_psn = qp->s_psn; ss = NULL; len = 0; @@ -489,20 +549,6 @@ int ipath_make_rc_req(struct ipath_qp *qp, if (qp->s_cur == qp->s_size) qp->s_cur = 0; break; - - case OP(RDMA_READ_REQUEST): - case OP(COMPARE_SWAP): - case OP(FETCH_ADD): - /* - * We shouldn't start anything new until this request is - * finished. The ACK will handle rescheduling us. XXX The - * number of outstanding ones is negotiated at connection - * setup time (see pg. 258,289)? XXX Also, if we support - * multiple outstanding requests, we need to check the WQE - * IB_SEND_FENCE flag and not send a new request if a RDMA - * read or atomic is pending. - */ - goto done; } if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0) bth2 |= 1 << 31; /* Request ACK. */ @@ -512,9 +558,10 @@ int ipath_make_rc_req(struct ipath_qp *qp, qp->s_cur_size = len; *bth0p = bth0 | (qp->s_state << 24); *bth2p = bth2; +done: return 1; -done: +bail: return 0; } @@ -524,7 +571,8 @@ done: * * This is called from ipath_rc_rcv() and only uses the receive * side QP state. - * Note that RDMA reads are handled in the send side QP state and tasklet. + * Note that RDMA reads and atomics are handled in the + * send side QP state and tasklet. */ static void send_rc_ack(struct ipath_qp *qp) { @@ -534,6 +582,13 @@ static void send_rc_ack(struct ipath_qp *qp) u32 hwords; struct ipath_ib_header hdr; struct ipath_other_headers *ohdr; + unsigned long flags; + + /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ + if (qp->r_head_ack_queue != qp->s_tail_ack_queue || + (qp->s_flags & IPATH_S_ACK_PENDING) || + qp->s_ack_state != OP(ACKNOWLEDGE)) + goto queue_ack; /* Construct the header. */ ohdr = &hdr.u.oth; @@ -548,19 +603,14 @@ static void send_rc_ack(struct ipath_qp *qp) lrh0 = IPATH_LRH_GRH; } /* read pkey_index w/o lock (its atomic) */ - bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index); + bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) | + OP(ACKNOWLEDGE) << 24; if (qp->r_nak_state) ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | (qp->r_nak_state << IPATH_AETH_CREDIT_SHIFT)); else ohdr->u.aeth = ipath_compute_aeth(qp); - if (qp->r_ack_state >= OP(COMPARE_SWAP)) { - bth0 |= OP(ATOMIC_ACKNOWLEDGE) << 24; - ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data); - hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4; - } else - bth0 |= OP(ACKNOWLEDGE) << 24; lrh0 |= qp->remote_ah_attr.sl << 4; hdr.lrh[0] = cpu_to_be16(lrh0); hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); @@ -574,31 +624,31 @@ static void send_rc_ack(struct ipath_qp *qp) * If we can send the ACK, clear the ACK state. */ if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) { - qp->r_ack_state = OP(ACKNOWLEDGE); dev->n_unicast_xmit++; - } else { - /* - * We are out of PIO buffers at the moment. - * Pass responsibility for sending the ACK to the - * send tasklet so that when a PIO buffer becomes - * available, the ACK is sent ahead of other outgoing - * packets. - */ - dev->n_rc_qacks++; - spin_lock_irq(&qp->s_lock); - /* Don't coalesce if a RDMA read or atomic is pending. */ - if (qp->s_ack_state == OP(ACKNOWLEDGE) || - qp->s_ack_state < OP(RDMA_READ_REQUEST)) { - qp->s_ack_state = qp->r_ack_state; - qp->s_nak_state = qp->r_nak_state; - qp->s_ack_psn = qp->r_ack_psn; - qp->r_ack_state = OP(ACKNOWLEDGE); - } - spin_unlock_irq(&qp->s_lock); - - /* Call ipath_do_rc_send() in another thread. */ - tasklet_hi_schedule(&qp->s_task); + goto done; } + + /* + * We are out of PIO buffers at the moment. + * Pass responsibility for sending the ACK to the + * send tasklet so that when a PIO buffer becomes + * available, the ACK is sent ahead of other outgoing + * packets. + */ + dev->n_rc_qacks++; + +queue_ack: + spin_lock_irqsave(&qp->s_lock, flags); + qp->s_flags |= IPATH_S_ACK_PENDING; + qp->s_nak_state = qp->r_nak_state; + qp->s_ack_psn = qp->r_ack_psn; + spin_unlock_irqrestore(&qp->s_lock, flags); + + /* Call ipath_do_rc_send() in another thread. */ + tasklet_hi_schedule(&qp->s_task); + +done: + return; } /** @@ -727,7 +777,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) if (wqe->wr.opcode == IB_WR_RDMA_READ) dev->n_rc_resends++; else - dev->n_rc_resends += (int)qp->s_psn - (int)psn; + dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK; reset_psn(qp, psn); tasklet_hi_schedule(&qp->s_task); @@ -775,10 +825,6 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) list_del_init(&qp->timerwait); spin_unlock(&dev->pending_lock); - /* Nothing is pending to ACK/NAK. */ - if (unlikely(qp->s_last == qp->s_tail)) - goto bail; - /* * Note that NAKs implicitly ACK outstanding SEND and RDMA write * requests and implicitly NAK RDMA read and atomic requests issued @@ -806,7 +852,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) */ if ((wqe->wr.opcode == IB_WR_RDMA_READ && (opcode != OP(RDMA_READ_RESPONSE_LAST) || - ipath_cmp24(ack_psn, wqe->lpsn) != 0)) || + ipath_cmp24(ack_psn, wqe->lpsn) != 0)) || ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && (opcode != OP(ATOMIC_ACKNOWLEDGE) || @@ -824,20 +870,33 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) */ goto bail; } - if (wqe->wr.opcode == IB_WR_RDMA_READ || - wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || - wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) - tasklet_hi_schedule(&qp->s_task); + if (qp->s_num_rd_atomic && + (wqe->wr.opcode == IB_WR_RDMA_READ || + wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { + qp->s_num_rd_atomic--; + /* Restart sending task if fence is complete */ + if ((qp->s_flags & IPATH_S_FENCE_PENDING) && + !qp->s_num_rd_atomic) { + qp->s_flags &= ~IPATH_S_FENCE_PENDING; + tasklet_hi_schedule(&qp->s_task); + } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) { + qp->s_flags &= ~IPATH_S_RDMAR_PENDING; + tasklet_hi_schedule(&qp->s_task); + } + } /* Post a send completion queue entry if requested. */ - if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || + if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || (wqe->wr.send_flags & IB_SEND_SIGNALED)) { wc.wr_id = wqe->wr.wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; wc.vendor_err = 0; wc.byte_len = wqe->length; + wc.imm_data = 0; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; + wc.wc_flags = 0; wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; @@ -854,15 +913,19 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) if (qp->s_last == qp->s_cur) { if (++qp->s_cur >= qp->s_size) qp->s_cur = 0; + qp->s_last = qp->s_cur; + if (qp->s_last == qp->s_tail) + break; wqe = get_swqe_ptr(qp, qp->s_cur); qp->s_state = OP(SEND_LAST); qp->s_psn = wqe->psn; + } else { + if (++qp->s_last >= qp->s_size) + qp->s_last = 0; + if (qp->s_last == qp->s_tail) + break; + wqe = get_swqe_ptr(qp, qp->s_last); } - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - wqe = get_swqe_ptr(qp, qp->s_last); - if (qp->s_last == qp->s_tail) - break; } switch (aeth >> 29) { @@ -874,6 +937,18 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) list_add_tail(&qp->timerwait, &dev->pending[dev->pending_index]); spin_unlock(&dev->pending_lock); + /* + * If we get a partial ACK for a resent operation, + * we can stop resending the earlier packets and + * continue with the next packet the receiver wants. + */ + if (ipath_cmp24(qp->s_psn, psn) <= 0) { + reset_psn(qp, psn + 1); + tasklet_hi_schedule(&qp->s_task); + } + } else if (ipath_cmp24(qp->s_psn, psn) <= 0) { + qp->s_state = OP(SEND_LAST); + qp->s_psn = psn + 1; } ipath_get_credit(qp, aeth); qp->s_rnr_retry = qp->s_rnr_retry_cnt; @@ -884,22 +959,23 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) case 1: /* RNR NAK */ dev->n_rnr_naks++; + if (qp->s_last == qp->s_tail) + goto bail; if (qp->s_rnr_retry == 0) { - if (qp->s_last == qp->s_tail) - goto bail; - wc.status = IB_WC_RNR_RETRY_EXC_ERR; goto class_b; } if (qp->s_rnr_retry_cnt < 7) qp->s_rnr_retry--; - if (qp->s_last == qp->s_tail) - goto bail; /* The last valid PSN is the previous PSN. */ update_last_psn(qp, psn - 1); - dev->n_rc_resends += (int)qp->s_psn - (int)psn; + if (wqe->wr.opcode == IB_WR_RDMA_READ) + dev->n_rc_resends++; + else + dev->n_rc_resends += + (qp->s_psn - psn) & IPATH_PSN_MASK; reset_psn(qp, psn); @@ -910,26 +986,20 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) goto bail; case 3: /* NAK */ - /* The last valid PSN seen is the previous request's. */ - if (qp->s_last != qp->s_tail) - update_last_psn(qp, wqe->psn - 1); + if (qp->s_last == qp->s_tail) + goto bail; + /* The last valid PSN is the previous PSN. */ + update_last_psn(qp, psn - 1); switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK) { case 0: /* PSN sequence error */ dev->n_seq_naks++; /* - * Back up to the responder's expected PSN. XXX + * Back up to the responder's expected PSN. * Note that we might get a NAK in the middle of an * RDMA READ response which terminates the RDMA * READ. */ - if (qp->s_last == qp->s_tail) - break; - - if (ipath_cmp24(psn, wqe->psn) < 0) - break; - - /* Retry the request. */ ipath_restart_rc(qp, psn, &wc); break; @@ -1003,6 +1073,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, u32 psn, u32 hdrsize, u32 pmtu, int header_in_data) { + struct ipath_swqe *wqe; unsigned long flags; struct ib_wc wc; int diff; @@ -1032,6 +1103,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, goto ack_done; } + if (unlikely(qp->s_last == qp->s_tail)) + goto ack_done; + wqe = get_swqe_ptr(qp, qp->s_last); + switch (opcode) { case OP(ACKNOWLEDGE): case OP(ATOMIC_ACKNOWLEDGE): @@ -1042,38 +1117,49 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, aeth = be32_to_cpu(((__be32 *) data)[0]); data += sizeof(__be32); } - if (opcode == OP(ATOMIC_ACKNOWLEDGE)) - *(u64 *) qp->s_sge.sge.vaddr = *(u64 *) data; + if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { + u64 val; + + if (!header_in_data) { + __be32 *p = ohdr->u.at.atomic_ack_eth; + + val = ((u64) be32_to_cpu(p[0]) << 32) | + be32_to_cpu(p[1]); + } else + val = be64_to_cpu(((__be64 *) data)[0]); + *(u64 *) wqe->sg_list[0].vaddr = val; + } if (!do_rc_ack(qp, aeth, psn, opcode) || opcode != OP(RDMA_READ_RESPONSE_FIRST)) goto ack_done; hdrsize += 4; + if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) + goto ack_op_err; /* - * do_rc_ack() has already checked the PSN so skip - * the sequence check. + * If this is a response to a resent RDMA read, we + * have to be careful to copy the data to the right + * location. */ - goto rdma_read; + qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, + wqe, psn, pmtu); + goto read_middle; case OP(RDMA_READ_RESPONSE_MIDDLE): /* no AETH, no ACK */ if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { dev->n_rdma_seq++; - if (qp->s_last != qp->s_tail) - ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); + ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); goto ack_done; } - rdma_read: - if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST))) - goto ack_done; + if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) + goto ack_op_err; + read_middle: if (unlikely(tlen != (hdrsize + pmtu + 4))) - goto ack_done; - if (unlikely(pmtu >= qp->s_len)) - goto ack_done; + goto ack_len_err; + if (unlikely(pmtu >= qp->s_rdma_read_len)) + goto ack_len_err; + /* We got a response so update the timeout. */ - if (unlikely(qp->s_last == qp->s_tail || - get_swqe_ptr(qp, qp->s_last)->wr.opcode != - IB_WR_RDMA_READ)) - goto ack_done; spin_lock(&dev->pending_lock); if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait)) list_move_tail(&qp->timerwait, @@ -1082,67 +1168,98 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, /* * Update the RDMA receive state but do the copy w/o * holding the locks and blocking interrupts. - * XXX Yet another place that affects relaxed RDMA order - * since we don't want s_sge modified. */ - qp->s_len -= pmtu; + qp->s_rdma_read_len -= pmtu; update_last_psn(qp, psn); spin_unlock_irqrestore(&qp->s_lock, flags); - ipath_copy_sge(&qp->s_sge, data, pmtu); + ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu); goto bail; - case OP(RDMA_READ_RESPONSE_LAST): - /* ACKs READ req. */ + case OP(RDMA_READ_RESPONSE_ONLY): if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { dev->n_rdma_seq++; - if (qp->s_last != qp->s_tail) - ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); + ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); goto ack_done; } - /* FALLTHROUGH */ - case OP(RDMA_READ_RESPONSE_ONLY): - if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST))) - goto ack_done; + if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) + goto ack_op_err; + /* Get the number of bytes the message was padded by. */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; /* - * Get the number of bytes the message was padded by. + * Check that the data size is >= 0 && <= pmtu. + * Remember to account for the AETH header (4) and + * ICRC (4). + */ + if (unlikely(tlen < (hdrsize + pad + 8))) + goto ack_len_err; + /* + * If this is a response to a resent RDMA read, we + * have to be careful to copy the data to the right + * location. */ + qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, + wqe, psn, pmtu); + goto read_last; + + case OP(RDMA_READ_RESPONSE_LAST): + /* ACKs READ req. */ + if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { + dev->n_rdma_seq++; + ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); + goto ack_done; + } + if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) + goto ack_op_err; + /* Get the number of bytes the message was padded by. */ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; /* * Check that the data size is >= 1 && <= pmtu. * Remember to account for the AETH header (4) and * ICRC (4). */ - if (unlikely(tlen <= (hdrsize + pad + 8))) { - /* XXX Need to generate an error CQ entry. */ - goto ack_done; - } + if (unlikely(tlen <= (hdrsize + pad + 8))) + goto ack_len_err; + read_last: tlen -= hdrsize + pad + 8; - if (unlikely(tlen != qp->s_len)) { - /* XXX Need to generate an error CQ entry. */ - goto ack_done; - } + if (unlikely(tlen != qp->s_rdma_read_len)) + goto ack_len_err; if (!header_in_data) aeth = be32_to_cpu(ohdr->u.aeth); else { aeth = be32_to_cpu(((__be32 *) data)[0]); data += sizeof(__be32); } - ipath_copy_sge(&qp->s_sge, data, tlen); - if (do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST))) { - /* - * Change the state so we contimue - * processing new requests and wake up the - * tasklet if there are posted sends. - */ - qp->s_state = OP(SEND_LAST); - if (qp->s_tail != qp->s_head) - tasklet_hi_schedule(&qp->s_task); - } + ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen); + (void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST)); goto ack_done; } ack_done: spin_unlock_irqrestore(&qp->s_lock, flags); + goto bail; + +ack_op_err: + wc.status = IB_WC_LOC_QP_OP_ERR; + goto ack_err; + +ack_len_err: + wc.status = IB_WC_LOC_LEN_ERR; +ack_err: + wc.wr_id = wqe->wr.wr_id; + wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; + wc.vendor_err = 0; + wc.byte_len = 0; + wc.imm_data = 0; + wc.qp = &qp->ibqp; + wc.src_qp = qp->remote_qpn; + wc.wc_flags = 0; + wc.pkey_index = 0; + wc.slid = qp->remote_ah_attr.dlid; + wc.sl = qp->remote_ah_attr.sl; + wc.dlid_path_bits = 0; + wc.port_num = 0; + ipath_sqerror_qp(qp, &wc); + spin_unlock_irqrestore(&qp->s_lock, flags); bail: return; } @@ -1162,7 +1279,7 @@ bail: * incoming RC packet for the given QP. * Called at interrupt level. * Return 1 if no more processing is needed; otherwise return 0 to - * schedule a response to be sent and the s_lock unlocked. + * schedule a response to be sent. */ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, struct ipath_other_headers *ohdr, @@ -1173,25 +1290,24 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, int diff, int header_in_data) { - struct ib_reth *reth; + struct ipath_ack_entry *e; + u8 i, prev; + int old_req; + unsigned long flags; if (diff > 0) { /* * Packet sequence error. * A NAK will ACK earlier sends and RDMA writes. - * Don't queue the NAK if a RDMA read, atomic, or - * NAK is pending though. + * Don't queue the NAK if we already sent one. */ - if (qp->s_ack_state != OP(ACKNOWLEDGE) || - qp->r_nak_state != 0) - goto done; - if (qp->r_ack_state < OP(COMPARE_SWAP)) { - qp->r_ack_state = OP(SEND_ONLY); + if (!qp->r_nak_state) { qp->r_nak_state = IB_NAK_PSN_ERROR; /* Use the expected PSN. */ qp->r_ack_psn = qp->r_psn; + goto send_ack; } - goto send_ack; + goto done; } /* @@ -1204,8 +1320,46 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, * can coalesce an outstanding duplicate ACK. We have to * send the earliest so that RDMA reads can be restarted at * the requester's expected PSN. + * + * First, find where this duplicate PSN falls within the + * ACKs previously sent. */ - if (opcode == OP(RDMA_READ_REQUEST)) { + psn &= IPATH_PSN_MASK; + e = NULL; + old_req = 1; + spin_lock_irqsave(&qp->s_lock, flags); + for (i = qp->r_head_ack_queue; ; i = prev) { + if (i == qp->s_tail_ack_queue) + old_req = 0; + if (i) + prev = i - 1; + else + prev = IPATH_MAX_RDMA_ATOMIC; + if (prev == qp->r_head_ack_queue) { + e = NULL; + break; + } + e = &qp->s_ack_queue[prev]; + if (!e->opcode) { + e = NULL; + break; + } + if (ipath_cmp24(psn, e->psn) >= 0) + break; + } + switch (opcode) { + case OP(RDMA_READ_REQUEST): { + struct ib_reth *reth; + u32 offset; + u32 len; + + /* + * If we didn't find the RDMA read request in the ack queue, + * or the send tasklet is already backed up to send an + * earlier entry, we can ignore this request. + */ + if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req) + goto unlock_done; /* RETH comes after BTH */ if (!header_in_data) reth = &ohdr->u.rc.reth; @@ -1214,96 +1368,96 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, data += sizeof(*reth); } /* - * If we receive a duplicate RDMA request, it means the - * requester saw a sequence error and needs to restart - * from an earlier point. We can abort the current - * RDMA read send in that case. + * Address range must be a subset of the original + * request and start on pmtu boundaries. + * We reuse the old ack_queue slot since the requester + * should not back up and request an earlier PSN for the + * same request. */ - spin_lock_irq(&qp->s_lock); - if (qp->s_ack_state != OP(ACKNOWLEDGE) && - (qp->s_hdrwords || ipath_cmp24(psn, qp->s_ack_psn) >= 0)) { - /* - * We are already sending earlier requested data. - * Don't abort it to send later out of sequence data. - */ - spin_unlock_irq(&qp->s_lock); - goto done; - } - qp->s_rdma_len = be32_to_cpu(reth->length); - if (qp->s_rdma_len != 0) { + offset = ((psn - e->psn) & IPATH_PSN_MASK) * + ib_mtu_enum_to_int(qp->path_mtu); + len = be32_to_cpu(reth->length); + if (unlikely(offset + len > e->rdma_sge.sge.sge_length)) + goto unlock_done; + if (len != 0) { u32 rkey = be32_to_cpu(reth->rkey); u64 vaddr = be64_to_cpu(reth->vaddr); int ok; - /* - * Address range must be a subset of the original - * request and start on pmtu boundaries. - */ - ok = ipath_rkey_ok(qp, &qp->s_rdma_sge, - qp->s_rdma_len, vaddr, rkey, + ok = ipath_rkey_ok(qp, &e->rdma_sge, + len, vaddr, rkey, IB_ACCESS_REMOTE_READ); - if (unlikely(!ok)) { - spin_unlock_irq(&qp->s_lock); - goto done; - } + if (unlikely(!ok)) + goto unlock_done; } else { - qp->s_rdma_sge.sg_list = NULL; - qp->s_rdma_sge.num_sge = 0; - qp->s_rdma_sge.sge.mr = NULL; - qp->s_rdma_sge.sge.vaddr = NULL; - qp->s_rdma_sge.sge.length = 0; - qp->s_rdma_sge.sge.sge_length = 0; + e->rdma_sge.sg_list = NULL; + e->rdma_sge.num_sge = 0; + e->rdma_sge.sge.mr = NULL; + e->rdma_sge.sge.vaddr = NULL; + e->rdma_sge.sge.length = 0; + e->rdma_sge.sge.sge_length = 0; } - qp->s_ack_state = opcode; - qp->s_ack_psn = psn; - spin_unlock_irq(&qp->s_lock); - tasklet_hi_schedule(&qp->s_task); - goto send_ack; + e->psn = psn; + qp->s_ack_state = OP(ACKNOWLEDGE); + qp->s_tail_ack_queue = prev; + break; } - /* - * A pending RDMA read will ACK anything before it so - * ignore earlier duplicate requests. - */ - if (qp->s_ack_state != OP(ACKNOWLEDGE)) - goto done; - - /* - * If an ACK is pending, don't replace the pending ACK - * with an earlier one since the later one will ACK the earlier. - * Also, if we already have a pending atomic, send it. - */ - if (qp->r_ack_state != OP(ACKNOWLEDGE) && - (ipath_cmp24(psn, qp->r_ack_psn) <= 0 || - qp->r_ack_state >= OP(COMPARE_SWAP))) - goto send_ack; - switch (opcode) { case OP(COMPARE_SWAP): - case OP(FETCH_ADD): + case OP(FETCH_ADD): { /* - * Check for the PSN of the last atomic operation - * performed and resend the result if found. + * If we didn't find the atomic request in the ack queue + * or the send tasklet is already backed up to send an + * earlier entry, we can ignore this request. */ - if ((psn & IPATH_PSN_MASK) != qp->r_atomic_psn) - goto done; + if (!e || e->opcode != (u8) opcode || old_req) + goto unlock_done; + qp->s_ack_state = OP(ACKNOWLEDGE); + qp->s_tail_ack_queue = prev; + break; + } + + default: + if (old_req) + goto unlock_done; + /* + * Resend the most recent ACK if this request is + * after all the previous RDMA reads and atomics. + */ + if (i == qp->r_head_ack_queue) { + spin_unlock_irqrestore(&qp->s_lock, flags); + qp->r_nak_state = 0; + qp->r_ack_psn = qp->r_psn - 1; + goto send_ack; + } + /* + * Resend the RDMA read or atomic op which + * ACKs this duplicate request. + */ + qp->s_ack_state = OP(ACKNOWLEDGE); + qp->s_tail_ack_queue = i; break; } - qp->r_ack_state = opcode; qp->r_nak_state = 0; - qp->r_ack_psn = psn; -send_ack: - return 0; + tasklet_hi_schedule(&qp->s_task); +unlock_done: + spin_unlock_irqrestore(&qp->s_lock, flags); done: return 1; + +send_ack: + return 0; } static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) { - spin_lock_irq(&qp->s_lock); + unsigned long flags; + + spin_lock_irqsave(&qp->s_lock, flags); qp->state = IB_QPS_ERR; ipath_error_qp(qp, err); - spin_unlock_irq(&qp->s_lock); + spin_unlock_irqrestore(&qp->s_lock, flags); } /** @@ -1391,15 +1545,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, opcode == OP(SEND_LAST_WITH_IMMEDIATE)) break; nack_inv: - /* - * A NAK will ACK earlier sends and RDMA writes. - * Don't queue the NAK if a RDMA read, atomic, or NAK - * is pending though. - */ - if (qp->r_ack_state >= OP(COMPARE_SWAP)) - goto send_ack; ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR); - qp->r_ack_state = OP(SEND_ONLY); qp->r_nak_state = IB_NAK_INVALID_REQUEST; qp->r_ack_psn = qp->r_psn; goto send_ack; @@ -1441,9 +1587,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, * Don't queue the NAK if a RDMA read or atomic * is pending though. */ - if (qp->r_ack_state >= OP(COMPARE_SWAP)) - goto send_ack; - qp->r_ack_state = OP(SEND_ONLY); + if (qp->r_nak_state) + goto done; qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; qp->r_ack_psn = qp->r_psn; goto send_ack; @@ -1567,7 +1712,19 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, goto rnr_nak; goto send_last_imm; - case OP(RDMA_READ_REQUEST): + case OP(RDMA_READ_REQUEST): { + struct ipath_ack_entry *e; + u32 len; + u8 next; + + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) + goto nack_acc; + next = qp->r_head_ack_queue + 1; + if (next > IPATH_MAX_RDMA_ATOMIC) + next = 0; + if (unlikely(next == qp->s_tail_ack_queue)) + goto nack_inv; + e = &qp->s_ack_queue[qp->r_head_ack_queue]; /* RETH comes after BTH */ if (!header_in_data) reth = &ohdr->u.rc.reth; @@ -1575,72 +1732,75 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, reth = (struct ib_reth *)data; data += sizeof(*reth); } - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_READ))) - goto nack_acc; - spin_lock_irq(&qp->s_lock); - qp->s_rdma_len = be32_to_cpu(reth->length); - if (qp->s_rdma_len != 0) { + len = be32_to_cpu(reth->length); + if (len) { u32 rkey = be32_to_cpu(reth->rkey); u64 vaddr = be64_to_cpu(reth->vaddr); int ok; /* Check rkey & NAK */ - ok = ipath_rkey_ok(qp, &qp->s_rdma_sge, - qp->s_rdma_len, vaddr, rkey, - IB_ACCESS_REMOTE_READ); - if (unlikely(!ok)) { - spin_unlock_irq(&qp->s_lock); + ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr, + rkey, IB_ACCESS_REMOTE_READ); + if (unlikely(!ok)) goto nack_acc; - } /* * Update the next expected PSN. We add 1 later * below, so only add the remainder here. */ - if (qp->s_rdma_len > pmtu) - qp->r_psn += (qp->s_rdma_len - 1) / pmtu; + if (len > pmtu) + qp->r_psn += (len - 1) / pmtu; } else { - qp->s_rdma_sge.sg_list = NULL; - qp->s_rdma_sge.num_sge = 0; - qp->s_rdma_sge.sge.mr = NULL; - qp->s_rdma_sge.sge.vaddr = NULL; - qp->s_rdma_sge.sge.length = 0; - qp->s_rdma_sge.sge.sge_length = 0; + e->rdma_sge.sg_list = NULL; + e->rdma_sge.num_sge = 0; + e->rdma_sge.sge.mr = NULL; + e->rdma_sge.sge.vaddr = NULL; + e->rdma_sge.sge.length = 0; + e->rdma_sge.sge.sge_length = 0; } + e->opcode = opcode; + e->psn = psn; /* * We need to increment the MSN here instead of when we * finish sending the result since a duplicate request would * increment it more than once. */ qp->r_msn++; - - qp->s_ack_state = opcode; - qp->s_ack_psn = psn; - spin_unlock_irq(&qp->s_lock); - qp->r_psn++; qp->r_state = opcode; qp->r_nak_state = 0; + barrier(); + qp->r_head_ack_queue = next; /* Call ipath_do_rc_send() in another thread. */ tasklet_hi_schedule(&qp->s_task); goto done; + } case OP(COMPARE_SWAP): case OP(FETCH_ADD): { struct ib_atomic_eth *ateth; + struct ipath_ack_entry *e; u64 vaddr; + atomic64_t *maddr; u64 sdata; u32 rkey; + u8 next; + if (unlikely(!(qp->qp_access_flags & + IB_ACCESS_REMOTE_ATOMIC))) + goto nack_acc; + next = qp->r_head_ack_queue + 1; + if (next > IPATH_MAX_RDMA_ATOMIC) + next = 0; + if (unlikely(next == qp->s_tail_ack_queue)) + goto nack_inv; if (!header_in_data) ateth = &ohdr->u.atomic_eth; - else { + else ateth = (struct ib_atomic_eth *)data; - data += sizeof(*ateth); - } - vaddr = be64_to_cpu(ateth->vaddr); + vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | + be32_to_cpu(ateth->vaddr[1]); if (unlikely(vaddr & (sizeof(u64) - 1))) goto nack_inv; rkey = be32_to_cpu(ateth->rkey); @@ -1649,63 +1809,50 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, sizeof(u64), vaddr, rkey, IB_ACCESS_REMOTE_ATOMIC))) goto nack_acc; - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_ATOMIC))) - goto nack_acc; /* Perform atomic OP and save result. */ + maddr = (atomic64_t *) qp->r_sge.sge.vaddr; sdata = be64_to_cpu(ateth->swap_data); - spin_lock_irq(&dev->pending_lock); - qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; - if (opcode == OP(FETCH_ADD)) - *(u64 *) qp->r_sge.sge.vaddr = - qp->r_atomic_data + sdata; - else if (qp->r_atomic_data == - be64_to_cpu(ateth->compare_data)) - *(u64 *) qp->r_sge.sge.vaddr = sdata; - spin_unlock_irq(&dev->pending_lock); + e = &qp->s_ack_queue[qp->r_head_ack_queue]; + e->atomic_data = (opcode == OP(FETCH_ADD)) ? + (u64) atomic64_add_return(sdata, maddr) - sdata : + (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, + be64_to_cpu(ateth->compare_data), + sdata); + e->opcode = opcode; + e->psn = psn & IPATH_PSN_MASK; qp->r_msn++; - qp->r_atomic_psn = psn & IPATH_PSN_MASK; - psn |= 1 << 31; - break; + qp->r_psn++; + qp->r_state = opcode; + qp->r_nak_state = 0; + barrier(); + qp->r_head_ack_queue = next; + + /* Call ipath_do_rc_send() in another thread. */ + tasklet_hi_schedule(&qp->s_task); + + goto done; } default: - /* Drop packet for unknown opcodes. */ - goto done; + /* NAK unknown opcodes. */ + goto nack_inv; } qp->r_psn++; qp->r_state = opcode; + qp->r_ack_psn = psn; qp->r_nak_state = 0; /* Send an ACK if requested or required. */ - if (psn & (1 << 31)) { - /* - * Coalesce ACKs unless there is a RDMA READ or - * ATOMIC pending. - */ - if (qp->r_ack_state < OP(COMPARE_SWAP)) { - qp->r_ack_state = opcode; - qp->r_ack_psn = psn; - } + if (psn & (1 << 31)) goto send_ack; - } goto done; nack_acc: - /* - * A NAK will ACK earlier sends and RDMA writes. - * Don't queue the NAK if a RDMA read, atomic, or NAK - * is pending though. - */ - if (qp->r_ack_state < OP(COMPARE_SWAP)) { - ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR); - qp->r_ack_state = OP(RDMA_WRITE_ONLY); - qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; - qp->r_ack_psn = qp->r_psn; - } + ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR); + qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; + qp->r_ack_psn = qp->r_psn; + send_ack: - /* Send ACK right away unless the send tasklet has a pending ACK. */ - if (qp->s_ack_state == OP(ACKNOWLEDGE)) - send_rc_ack(qp); + send_rc_ack(qp); done: return; diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h index dffc76016d3..c182bcd6209 100644 --- a/drivers/infiniband/hw/ipath/ipath_registers.h +++ b/drivers/infiniband/hw/ipath/ipath_registers.h @@ -126,9 +126,18 @@ #define INFINIPATH_E_RESET 0x0004000000000000ULL #define INFINIPATH_E_HARDWARE 0x0008000000000000ULL +/* + * this is used to print "common" packet errors only when the + * __IPATH_ERRPKTDBG bit is set in ipath_debug. + */ +#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \ + | INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \ + | INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \ + | INFINIPATH_E_REBP ) + /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ /* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo - * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: eagerTID, 3: expTID + * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: expTID, 3: eagerTID * bit 4: flag buffer, 5: datainfo, 6: header info */ #define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL #define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40 @@ -143,8 +152,8 @@ /* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */ #define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL #define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL -#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x04ULL -#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x08ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x04ULL +#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x08ULL #define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL #define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL #define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL @@ -299,13 +308,6 @@ #define INFINIPATH_XGXS_RX_POL_SHIFT 19 #define INFINIPATH_XGXS_RX_POL_MASK 0xfULL -#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */ - -/* TID entries (memory), HT-only */ -#define INFINIPATH_RT_VALID 0x8000000000000000ULL -#define INFINIPATH_RT_ADDR_SHIFT 0 -#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF -#define INFINIPATH_RT_BUFSIZE_SHIFT 48 /* * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index e86cb171872..d9c2a9b15d8 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -202,6 +202,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) wq->tail = tail; ret = 1; + qp->r_wrid_valid = 1; if (handler) { u32 n; @@ -229,7 +230,6 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) } } spin_unlock_irqrestore(&rq->lock, flags); - qp->r_wrid_valid = 1; bail: return ret; @@ -255,6 +255,7 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp) unsigned long flags; struct ib_wc wc; u64 sdata; + atomic64_t *maddr; qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); if (!qp) { @@ -265,7 +266,8 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp) again: spin_lock_irqsave(&sqp->s_lock, flags); - if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK)) { + if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) || + qp->s_rnr_timeout) { spin_unlock_irqrestore(&sqp->s_lock, flags); goto done; } @@ -310,7 +312,7 @@ again: sqp->s_rnr_retry--; dev->n_rnr_naks++; sqp->s_rnr_timeout = - ib_ipath_rnr_table[sqp->r_min_rnr_timer]; + ib_ipath_rnr_table[qp->r_min_rnr_timer]; ipath_insert_rnr_queue(sqp); goto done; } @@ -343,20 +345,22 @@ again: wc.sl = sqp->remote_ah_attr.sl; wc.dlid_path_bits = 0; wc.port_num = 0; + spin_lock_irqsave(&sqp->s_lock, flags); ipath_sqerror_qp(sqp, &wc); + spin_unlock_irqrestore(&sqp->s_lock, flags); goto done; } break; case IB_WR_RDMA_READ: + if (unlikely(!(qp->qp_access_flags & + IB_ACCESS_REMOTE_READ))) + goto acc_err; if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, IB_ACCESS_REMOTE_READ))) goto acc_err; - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_READ))) - goto acc_err; qp->r_sge.sge = wqe->sg_list[0]; qp->r_sge.sg_list = wqe->sg_list + 1; qp->r_sge.num_sge = wqe->wr.num_sge; @@ -364,22 +368,22 @@ again: case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: + if (unlikely(!(qp->qp_access_flags & + IB_ACCESS_REMOTE_ATOMIC))) + goto acc_err; if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), - wqe->wr.wr.rdma.remote_addr, - wqe->wr.wr.rdma.rkey, + wqe->wr.wr.atomic.remote_addr, + wqe->wr.wr.atomic.rkey, IB_ACCESS_REMOTE_ATOMIC))) goto acc_err; /* Perform atomic OP and save result. */ - sdata = wqe->wr.wr.atomic.swap; - spin_lock_irqsave(&dev->pending_lock, flags); - qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; - if (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) - *(u64 *) qp->r_sge.sge.vaddr = - qp->r_atomic_data + sdata; - else if (qp->r_atomic_data == wqe->wr.wr.atomic.compare_add) - *(u64 *) qp->r_sge.sge.vaddr = sdata; - spin_unlock_irqrestore(&dev->pending_lock, flags); - *(u64 *) sqp->s_sge.sge.vaddr = qp->r_atomic_data; + maddr = (atomic64_t *) qp->r_sge.sge.vaddr; + sdata = wqe->wr.wr.atomic.compare_add; + *(u64 *) sqp->s_sge.sge.vaddr = + (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? + (u64) atomic64_add_return(sdata, maddr) - sdata : + (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, + sdata, wqe->wr.wr.atomic.swap); goto send_comp; default: @@ -440,7 +444,7 @@ again: send_comp: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; - if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &sqp->s_flags) || + if (!(sqp->s_flags & IPATH_S_SIGNAL_REQ_WR) || (wqe->wr.send_flags & IB_SEND_SIGNALED)) { wc.wr_id = wqe->wr.wr_id; wc.status = IB_WC_SUCCESS; @@ -502,7 +506,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) * We clear the tasklet flag now since we are committing to return * from the tasklet function. */ - clear_bit(IPATH_S_BUSY, &qp->s_flags); + clear_bit(IPATH_S_BUSY, &qp->s_busy); tasklet_unlock(&qp->s_task); want_buffer(dev->dd); dev->n_piowait++; @@ -541,6 +545,9 @@ int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr) wr->sg_list[0].addr & (sizeof(u64) - 1))) { ret = -EINVAL; goto bail; + } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) { + ret = -EINVAL; + goto bail; } /* IB spec says that num_sge == 0 is OK. */ if (wr->num_sge > qp->s_max_sge) { @@ -647,7 +654,7 @@ void ipath_do_ruc_send(unsigned long data) u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); struct ipath_other_headers *ohdr; - if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags)) + if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy)) goto bail; if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) { @@ -683,19 +690,15 @@ again: */ spin_lock_irqsave(&qp->s_lock, flags); - /* Sending responses has higher priority over sending requests. */ - if (qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE && - (bth0 = ipath_make_rc_ack(qp, ohdr, pmtu)) != 0) - bth2 = qp->s_ack_psn++ & IPATH_PSN_MASK; - else if (!((qp->ibqp.qp_type == IB_QPT_RC) ? - ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) : - ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) { + if (!((qp->ibqp.qp_type == IB_QPT_RC) ? + ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) : + ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) { /* * Clear the busy bit before unlocking to avoid races with * adding new work queue items and then failing to process * them. */ - clear_bit(IPATH_S_BUSY, &qp->s_flags); + clear_bit(IPATH_S_BUSY, &qp->s_busy); spin_unlock_irqrestore(&qp->s_lock, flags); goto bail; } @@ -728,7 +731,7 @@ again: goto again; clear: - clear_bit(IPATH_S_BUSY, &qp->s_flags); + clear_bit(IPATH_S_BUSY, &qp->s_busy); bail: return; } diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c index 94033503400..03acae66ba8 100644 --- a/drivers/infiniband/hw/ipath/ipath_srq.c +++ b/drivers/infiniband/hw/ipath/ipath_srq.c @@ -139,33 +139,24 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, * See ipath_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { - struct ipath_mmap_info *ip; - __u64 offset = (__u64) srq->rq.wq; int err; + u32 s = sizeof(struct ipath_rwq) + srq->rq.size * sz; - err = ib_copy_to_udata(udata, &offset, sizeof(offset)); - if (err) { - ret = ERR_PTR(err); + srq->ip = + ipath_create_mmap_info(dev, s, + ibpd->uobject->context, + srq->rq.wq); + if (!srq->ip) { + ret = ERR_PTR(-ENOMEM); goto bail_wq; } - /* Allocate info for ipath_mmap(). */ - ip = kmalloc(sizeof(*ip), GFP_KERNEL); - if (!ip) { - ret = ERR_PTR(-ENOMEM); - goto bail_wq; + err = ib_copy_to_udata(udata, &srq->ip->offset, + sizeof(srq->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; } - srq->ip = ip; - ip->context = ibpd->uobject->context; - ip->obj = srq->rq.wq; - kref_init(&ip->ref); - ip->mmap_cnt = 0; - ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) + - srq->rq.size * sz); - spin_lock_irq(&dev->pending_lock); - ip->next = dev->pending_mmaps; - dev->pending_mmaps = ip; - spin_unlock_irq(&dev->pending_lock); } else srq->ip = NULL; @@ -181,21 +172,27 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, if (dev->n_srqs_allocated == ib_ipath_max_srqs) { spin_unlock(&dev->n_srqs_lock); ret = ERR_PTR(-ENOMEM); - goto bail_wq; + goto bail_ip; } dev->n_srqs_allocated++; spin_unlock(&dev->n_srqs_lock); + if (srq->ip) { + spin_lock_irq(&dev->pending_lock); + list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + ret = &srq->ibsrq; goto done; +bail_ip: + kfree(srq->ip); bail_wq: vfree(srq->rq.wq); - bail_srq: kfree(srq); - done: return ret; } @@ -312,13 +309,13 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, if (srq->ip) { struct ipath_mmap_info *ip = srq->ip; struct ipath_ibdev *dev = to_idev(srq->ibsrq.device); + u32 s = sizeof(struct ipath_rwq) + size * sz; - ip->obj = wq; - ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) + - size * sz); + ipath_update_mmap_info(dev, ip, s, wq); spin_lock_irq(&dev->pending_lock); - ip->next = dev->pending_mmaps; - dev->pending_mmaps = ip; + if (list_empty(&ip->pending_mmaps)) + list_add(&ip->pending_mmaps, + &dev->pending_mmaps); spin_unlock_irq(&dev->pending_lock); } } else if (attr_mask & IB_SRQ_LIMIT) { diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c index 30a825928fc..d8b5e4cefe2 100644 --- a/drivers/infiniband/hw/ipath/ipath_stats.c +++ b/drivers/infiniband/hw/ipath/ipath_stats.c @@ -31,8 +31,6 @@ * SOFTWARE. */ -#include <linux/pci.h> - #include "ipath_kernel.h" struct infinipath_stats ipath_stats; @@ -207,7 +205,7 @@ void ipath_get_faststats(unsigned long opaque) * don't access the chip while running diags, or memory diags can * fail */ - if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT) || + if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) || ipath_diag_inuse) /* but re-arm the timer, for diags case; won't hurt other */ goto done; @@ -237,11 +235,13 @@ void ipath_get_faststats(unsigned long opaque) if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) && time_after(jiffies, dd->ipath_unmasktime)) { char ebuf[256]; - ipath_decode_err(ebuf, sizeof ebuf, + int iserr; + iserr = ipath_decode_err(ebuf, sizeof ebuf, (dd->ipath_maskederrs & ~dd-> ipath_ignorederrs)); if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) & - ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL)) + ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | + INFINIPATH_E_PKTERRS )) ipath_dev_err(dd, "Re-enabling masked errors " "(%s)\n", ebuf); else { @@ -252,8 +252,12 @@ void ipath_get_faststats(unsigned long opaque) * them. So only complain about these at debug * level. */ - ipath_dbg("Disabling frequent queue full errors " - "(%s)\n", ebuf); + if (iserr) + ipath_dbg("Re-enabling queue full errors (%s)\n", + ebuf); + else + ipath_cdbg(ERRPKT, "Re-enabling packet" + " problem interrupt (%s)\n", ebuf); } dd->ipath_maskederrs = dd->ipath_ignorederrs; ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c index ffa6318ad0c..4dc398d5e01 100644 --- a/drivers/infiniband/hw/ipath/ipath_sysfs.c +++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c @@ -32,7 +32,6 @@ */ #include <linux/ctype.h> -#include <linux/pci.h> #include "ipath_kernel.h" #include "ipath_common.h" diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index 325d6634ff5..1c2b03c2ef5 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -42,7 +42,7 @@ static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe, { if (++qp->s_last == qp->s_size) qp->s_last = 0; - if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || + if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || (wqe->wr.send_flags & IB_SEND_SIGNALED)) { wc->wr_id = wqe->wr.wr_id; wc->status = IB_WC_SUCCESS; @@ -344,13 +344,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, send_first: if (qp->r_reuse_sge) { qp->r_reuse_sge = 0; - qp->r_sge = qp->s_rdma_sge; + qp->r_sge = qp->s_rdma_read_sge; } else if (!ipath_get_rwqe(qp, 0)) { dev->n_pkt_drops++; goto done; } /* Save the WQE so we can reuse it in case of an error. */ - qp->s_rdma_sge = qp->r_sge; + qp->s_rdma_read_sge = qp->r_sge; qp->r_rcv_len = 0; if (opcode == OP(SEND_ONLY)) goto send_last; diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 9a3e54664ee..a518f7c8fa8 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -308,6 +308,11 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) goto bail; } + if (wr->wr.ud.ah->pd != qp->ibqp.pd) { + ret = -EPERM; + goto bail; + } + /* IB spec says that num_sge == 0 is OK. */ if (wr->num_sge > qp->s_max_sge) { ret = -EINVAL; @@ -467,7 +472,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) done: /* Queue the completion status entry. */ - if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || + if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || (wr->send_flags & IB_SEND_SIGNALED)) { wc.wr_id = wr->wr_id; wc.status = IB_WC_SUCCESS; @@ -647,6 +652,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh)); ipath_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh)); + qp->r_wrid_valid = 0; wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.opcode = IB_WC_RECV; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 2aaacdb7e52..12933e77c7e 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -438,6 +438,10 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data, struct ipath_mcast *mcast; struct ipath_mcast_qp *p; + if (lnh != IPATH_LRH_GRH) { + dev->n_pkt_drops++; + goto bail; + } mcast = ipath_mcast_find(&hdr->u.l.grh.dgid); if (mcast == NULL) { dev->n_pkt_drops++; @@ -445,8 +449,7 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data, } dev->n_multicast_rcv++; list_for_each_entry_rcu(p, &mcast->qp_list, list) - ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data, - tlen, p->qp); + ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp); /* * Notify ipath_multicast_detach() if it is waiting for us * to finish. @@ -773,7 +776,6 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords, /* +1 is for the qword padding of pbc */ plen = hdrwords + ((len + 3) >> 2) + 1; if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) { - ipath_dbg("packet len 0x%x too long, failing\n", plen); ret = -EINVAL; goto bail; } @@ -980,14 +982,14 @@ static int ipath_query_device(struct ib_device *ibdev, props->max_cqe = ib_ipath_max_cqes; props->max_mr = dev->lk_table.max; props->max_pd = ib_ipath_max_pds; - props->max_qp_rd_atom = 1; - props->max_qp_init_rd_atom = 1; + props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC; + props->max_qp_init_rd_atom = 255; /* props->max_res_rd_atom */ props->max_srq = ib_ipath_max_srqs; props->max_srq_wr = ib_ipath_max_srq_wrs; props->max_srq_sge = ib_ipath_max_srq_sges; /* props->local_ca_ack_delay */ - props->atomic_cap = IB_ATOMIC_HCA; + props->atomic_cap = IB_ATOMIC_GLOB; props->max_pkeys = ipath_get_npkeys(dev->dd); props->max_mcast_grp = ib_ipath_max_mcast_grps; props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached; @@ -1474,7 +1476,10 @@ int ipath_register_ib_device(struct ipath_devdata *dd) ret = -ENOMEM; goto err_lk; } + INIT_LIST_HEAD(&idev->pending_mmaps); spin_lock_init(&idev->pending_lock); + idev->mmap_offset = PAGE_SIZE; + spin_lock_init(&idev->mmap_offset_lock); INIT_LIST_HEAD(&idev->pending[0]); INIT_LIST_HEAD(&idev->pending[1]); INIT_LIST_HEAD(&idev->pending[2]); @@ -1556,8 +1561,8 @@ int ipath_register_ib_device(struct ipath_devdata *dd) (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); dev->node_type = RDMA_NODE_IB_CA; dev->phys_port_cnt = 1; + dev->num_comp_vectors = 1; dev->dma_device = &dd->pcidev->dev; - dev->class_dev.dev = dev->dma_device; dev->query_device = ipath_query_device; dev->modify_device = ipath_modify_device; dev->query_port = ipath_query_port; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index c0c8d5b24a7..7064fc22272 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -40,9 +40,12 @@ #include <linux/interrupt.h> #include <linux/kref.h> #include <rdma/ib_pack.h> +#include <rdma/ib_user_verbs.h> #include "ipath_layer.h" +#define IPATH_MAX_RDMA_ATOMIC 4 + #define QPN_MAX (1 << 24) #define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) @@ -89,7 +92,7 @@ struct ib_reth { } __attribute__ ((packed)); struct ib_atomic_eth { - __be64 vaddr; + __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ __be32 rkey; __be64 swap_data; __be64 compare_data; @@ -108,7 +111,7 @@ struct ipath_other_headers { } rc; struct { __be32 aeth; - __be64 atomic_ack_eth; + __be32 atomic_ack_eth[2]; } at; __be32 imm_data; __be32 aeth; @@ -170,12 +173,12 @@ struct ipath_ah { * this as its vm_private_data. */ struct ipath_mmap_info { - struct ipath_mmap_info *next; + struct list_head pending_mmaps; struct ib_ucontext *context; void *obj; + __u64 offset; struct kref ref; unsigned size; - unsigned mmap_cnt; }; /* @@ -186,7 +189,7 @@ struct ipath_mmap_info { struct ipath_cq_wc { u32 head; /* index of next entry to fill */ u32 tail; /* index of next ib_poll_cq() entry */ - struct ib_wc queue[1]; /* this is actually size ibcq.cqe + 1 */ + struct ib_uverbs_wc queue[1]; /* this is actually size ibcq.cqe + 1 */ }; /* @@ -312,6 +315,19 @@ struct ipath_sge_state { }; /* + * This structure holds the information that the send tasklet needs + * to send a RDMA read response or atomic operation. + */ +struct ipath_ack_entry { + u8 opcode; + u32 psn; + union { + struct ipath_sge_state rdma_sge; + u64 atomic_data; + }; +}; + +/* * Variables prefixed with s_ are for the requester (sender). * Variables prefixed with r_ are for the responder (receiver). * Variables prefixed with ack_ are for responder replies. @@ -333,24 +349,24 @@ struct ipath_qp { struct ipath_mmap_info *ip; struct ipath_sge_state *s_cur_sge; struct ipath_sge_state s_sge; /* current send request data */ - /* current RDMA read send data */ - struct ipath_sge_state s_rdma_sge; + struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1]; + struct ipath_sge_state s_ack_rdma_sge; + struct ipath_sge_state s_rdma_read_sge; struct ipath_sge_state r_sge; /* current receive data */ spinlock_t s_lock; - unsigned long s_flags; + unsigned long s_busy; u32 s_hdrwords; /* size of s_hdr in 32 bit words */ u32 s_cur_size; /* size of send packet in bytes */ u32 s_len; /* total length of s_sge */ - u32 s_rdma_len; /* total length of s_rdma_sge */ + u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ u32 s_next_psn; /* PSN for next request */ u32 s_last_psn; /* last response PSN processed */ u32 s_psn; /* current packet sequence number */ - u32 s_ack_psn; /* PSN for RDMA_READ */ + u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ + u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ u64 r_wr_id; /* ID for current receive WQE */ - u64 r_atomic_data; /* data for last atomic op */ - u32 r_atomic_psn; /* PSN of last atomic op */ u32 r_len; /* total length of r_sge */ u32 r_rcv_len; /* receive data len processed */ u32 r_psn; /* expected rcv packet sequence number */ @@ -360,12 +376,13 @@ struct ipath_qp { u8 s_ack_state; /* opcode of packet to ACK */ u8 s_nak_state; /* non-zero if NAK is pending */ u8 r_state; /* opcode of last packet received */ - u8 r_ack_state; /* opcode of packet to ACK */ u8 r_nak_state; /* non-zero if NAK is pending */ u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ u8 r_reuse_sge; /* for UC receive errors */ u8 r_sge_inx; /* current index into sg_list */ u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */ + u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ + u8 r_head_ack_queue; /* index into s_ack_queue[] */ u8 qp_access_flags; u8 s_max_sge; /* size of s_wq->sg_list */ u8 s_retry_cnt; /* number of times to retry */ @@ -374,6 +391,10 @@ struct ipath_qp { u8 s_rnr_retry; /* requester RNR retry counter */ u8 s_wait_credit; /* limit number of unacked packets sent */ u8 s_pkey_index; /* PKEY index to use */ + u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ + u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ + u8 s_tail_ack_queue; /* index into s_ack_queue[] */ + u8 s_flags; u8 timeout; /* Timeout for this QP */ enum ib_mtu path_mtu; u32 remote_qpn; @@ -390,13 +411,18 @@ struct ipath_qp { struct ipath_sge r_sg_list[0]; /* verified SGEs */ }; +/* Bit definition for s_busy. */ +#define IPATH_S_BUSY 0 + /* * Bit definitions for s_flags. */ -#define IPATH_S_BUSY 0 -#define IPATH_S_SIGNAL_REQ_WR 1 +#define IPATH_S_SIGNAL_REQ_WR 0x01 +#define IPATH_S_FENCE_PENDING 0x02 +#define IPATH_S_RDMAR_PENDING 0x04 +#define IPATH_S_ACK_PENDING 0x08 -#define IPATH_PSN_CREDIT 2048 +#define IPATH_PSN_CREDIT 512 /* * Since struct ipath_swqe is not a fixed size, we can't simply index into @@ -459,9 +485,10 @@ struct ipath_opcode_stats { struct ipath_ibdev { struct ib_device ibdev; - struct list_head dev_list; struct ipath_devdata *dd; - struct ipath_mmap_info *pending_mmaps; + struct list_head pending_mmaps; + spinlock_t mmap_offset_lock; + u32 mmap_offset; int ib_unit; /* This is the device number */ u16 sm_lid; /* in host order */ u8 sm_sl; @@ -706,17 +733,15 @@ int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); int ipath_destroy_srq(struct ib_srq *ibsrq); -void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig); - int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); -struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, +struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector, struct ib_ucontext *context, struct ib_udata *udata); int ipath_destroy_cq(struct ib_cq *ibcq); -int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify); +int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags); int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); @@ -744,6 +769,15 @@ int ipath_dealloc_fmr(struct ib_fmr *ibfmr); void ipath_release_mmap_info(struct kref *ref); +struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev, + u32 size, + struct ib_ucontext *context, + void *obj); + +void ipath_update_mmap_info(struct ipath_ibdev *dev, + struct ipath_mmap_info *ip, + u32 size, void *obj); + int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev); @@ -757,9 +791,6 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr, void ipath_do_ruc_send(unsigned long data); -u32 ipath_make_rc_ack(struct ipath_qp *qp, struct ipath_other_headers *ohdr, - u32 pmtu); - int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr, u32 pmtu, u32 *bth0p, u32 *bth2p); diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index efd79ef109a..cf0868f6e96 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -726,11 +726,12 @@ repoll: return err == 0 || err == -EAGAIN ? npolled : err; } -int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify) +int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags) { __be32 doorbell[2]; - doorbell[0] = cpu_to_be32((notify == IB_CQ_SOLICITED ? + doorbell[0] = cpu_to_be32(((flags & IB_CQ_SOLICITED_MASK) == + IB_CQ_SOLICITED ? MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL : MTHCA_TAVOR_CQ_DB_REQ_NOT) | to_mcq(cq)->cqn); @@ -743,7 +744,7 @@ int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify) return 0; } -int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) +int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct mthca_cq *cq = to_mcq(ibcq); __be32 doorbell[2]; @@ -755,7 +756,8 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) doorbell[0] = ci; doorbell[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) | - (notify == IB_CQ_SOLICITED ? 1 : 2)); + ((flags & IB_CQ_SOLICITED_MASK) == + IB_CQ_SOLICITED ? 1 : 2)); mthca_write_db_rec(doorbell, cq->arm_db); @@ -766,7 +768,7 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) wmb(); doorbell[0] = cpu_to_be32((sn << 28) | - (notify == IB_CQ_SOLICITED ? + ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL : MTHCA_ARBEL_CQ_DB_REQ_NOT) | cq->cqn); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index b7e42efaf43..9bae3cc6060 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -495,8 +495,8 @@ void mthca_unmap_eq_icm(struct mthca_dev *dev); int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); -int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify); -int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify); +int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); +int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); int mthca_init_cq(struct mthca_dev *dev, int nent, struct mthca_ucontext *ctx, u32 pdn, struct mthca_cq *cq); diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 0d9b7d06bbc..773145e2994 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -1013,14 +1013,14 @@ static struct { u64 latest_fw; u32 flags; } mthca_hca_table[] = { - [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 4, 0), + [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 5, 0), .flags = 0 }, - [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 600), + [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 8, 200), .flags = MTHCA_FLAG_PCIE }, - [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 400), + [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 2, 0), .flags = MTHCA_FLAG_MEMFREE | MTHCA_FLAG_PCIE }, - [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 1, 0), + [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 2, 0), .flags = MTHCA_FLAG_MEMFREE | MTHCA_FLAG_PCIE | MTHCA_FLAG_SINAI_OPT } @@ -1135,7 +1135,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) goto err_cmd; if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) { - mthca_warn(mdev, "HCA FW version %d.%d.%d is old (%d.%d.%d is current).\n", + mthca_warn(mdev, "HCA FW version %d.%d.%3d is old (%d.%d.%3d is current).\n", (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff, (int) (mdev->fw_ver & 0xffff), (int) (mthca_hca_table[hca_type].latest_fw >> 32), diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h index 594144145f4..a1ab06847b7 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.h +++ b/drivers/infiniband/hw/mthca/mthca_memfree.h @@ -38,7 +38,6 @@ #define MTHCA_MEMFREE_H #include <linux/list.h> -#include <linux/pci.h> #include <linux/mutex.h> #define MTHCA_ICM_CHUNK_LEN \ diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 8e4846b5c64..aa6c70a6a36 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -297,7 +297,8 @@ out: int mthca_write_mtt_size(struct mthca_dev *dev) { - if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy) + if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy || + !(dev->mthca_flags & MTHCA_FLAG_FMR)) /* * Be friendly to WRITE_MTT command * and leave two empty slots for the @@ -355,7 +356,8 @@ int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, int size = mthca_write_mtt_size(dev); int chunk; - if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy) + if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy || + !(dev->mthca_flags & MTHCA_FLAG_FMR)) return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len); while (list_len > 0) { @@ -835,6 +837,7 @@ void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) key = arbel_key_to_hw_index(fmr->ibmr.lkey); key &= dev->limits.num_mpts - 1; + key = adjust_key(dev, key); fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key); fmr->maps = 0; @@ -881,8 +884,8 @@ int mthca_init_mr_table(struct mthca_dev *dev) } mpts = mtts = 1 << i; } else { - mpts = dev->limits.num_mtt_segs; - mtts = dev->limits.num_mpts; + mtts = dev->limits.num_mtt_segs; + mpts = dev->limits.num_mpts; } if (!mthca_is_memfree(dev) && diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 0725ad7ad9b..1c05486c3c6 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -663,6 +663,7 @@ static int mthca_destroy_qp(struct ib_qp *qp) } static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, + int comp_vector, struct ib_ucontext *context, struct ib_udata *udata) { @@ -1292,8 +1293,8 @@ int mthca_register_device(struct mthca_dev *dev) (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; + dev->ib_dev.num_comp_vectors = 1; dev->ib_dev.dma_device = &dev->pdev->dev; - dev->ib_dev.class_dev.dev = &dev->pdev->dev; dev->ib_dev.query_device = mthca_query_device; dev->ib_dev.query_port = mthca_query_port; dev->ib_dev.modify_device = mthca_modify_device; diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 1c6b63aca26..fee60c852d1 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -701,6 +701,19 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH); } + if (ibqp->qp_type == IB_QPT_RC && + cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { + u8 sched_queue = ibqp->uobject ? 0x2 : 0x1; + + if (mthca_is_memfree(dev)) + qp_context->rlkey_arbel_sched_queue |= sched_queue; + else + qp_context->tavor_sched_queue |= cpu_to_be32(sched_queue); + + qp_param->opt_param_mask |= + cpu_to_be32(MTHCA_QP_OPTPAR_SCHED_QUEUE); + } + if (attr_mask & IB_QP_TIMEOUT) { qp_context->pri_path.ackto = attr->timeout << 3; qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT); @@ -1419,11 +1432,10 @@ void mthca_free_qp(struct mthca_dev *dev, * unref the mem-free tables and free the QPN in our table. */ if (!qp->ibqp.uobject) { - mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, + mthca_cq_clean(dev, recv_cq, qp->qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); - if (qp->ibqp.send_cq != qp->ibqp.recv_cq) - mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn, - qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); + if (send_cq != recv_cq) + mthca_cq_clean(dev, send_cq, qp->qpn, NULL); mthca_free_memfree(dev, qp); mthca_free_wqe_buf(dev, qp); |