diff options
Diffstat (limited to 'drivers/infiniband')
55 files changed, 1237 insertions, 701 deletions
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 4e3128ff73c..fe78f7d2509 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -38,6 +38,7 @@ #include <linux/dma-mapping.h> #include <linux/sched.h> #include <linux/hugetlb.h> +#include <linux/dma-attrs.h> #include "uverbs.h" @@ -72,9 +73,10 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d * @addr: userspace virtual address to start at * @size: length of region to pin * @access: IB_ACCESS_xxx flags for memory being pinned + * @dmasync: flush in-flight DMA when the memory region is written */ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, - size_t size, int access) + size_t size, int access, int dmasync) { struct ib_umem *umem; struct page **page_list; @@ -87,6 +89,10 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, int ret; int off; int i; + DEFINE_DMA_ATTRS(attrs); + + if (dmasync) + dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); if (!can_do_mlock()) return ERR_PTR(-EPERM); @@ -174,10 +180,11 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, sg_set_page(&chunk->page_list[i], page_list[i + off], PAGE_SIZE, 0); } - chunk->nmap = ib_dma_map_sg(context->device, - &chunk->page_list[0], - chunk->nents, - DMA_BIDIRECTIONAL); + chunk->nmap = ib_dma_map_sg_attrs(context->device, + &chunk->page_list[0], + chunk->nents, + DMA_BIDIRECTIONAL, + &attrs); if (chunk->nmap <= 0) { for (i = 0; i < chunk->nents; ++i) put_page(sg_page(&chunk->page_list[i])); diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index 6af2c0f79a6..2acf9b62cf9 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -452,7 +452,7 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_PTR(-ENOMEM); c2mr->pd = c2pd; - c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc); + c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); if (IS_ERR(c2mr->umem)) { err = PTR_ERR(c2mr->umem); kfree(c2mr); diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index 66eb7030aea..5fd8506a865 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -359,9 +359,10 @@ static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq) cq->sw_wptr++; } -void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count) +int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count) { u32 ptr; + int flushed = 0; PDBG("%s wq %p cq %p\n", __func__, wq, cq); @@ -369,8 +370,11 @@ void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count) PDBG("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__, wq->rq_rptr, wq->rq_wptr, count); ptr = wq->rq_rptr + count; - while (ptr++ != wq->rq_wptr) + while (ptr++ != wq->rq_wptr) { insert_recv_cqe(wq, cq); + flushed++; + } + return flushed; } static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq, @@ -394,9 +398,10 @@ static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq, cq->sw_wptr++; } -void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) +int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) { __u32 ptr; + int flushed = 0; struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2); ptr = wq->sq_rptr + count; @@ -405,7 +410,9 @@ void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) insert_sq_cqe(wq, cq, sqp); sqp++; ptr++; + flushed++; } + return flushed; } /* @@ -456,7 +463,8 @@ void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count) ptr = cq->sw_rptr; while (!Q_EMPTY(ptr, cq->sw_wptr)) { cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2)); - if ((SQ_TYPE(*cqe) || (CQE_OPCODE(*cqe) == T3_READ_RESP)) && + if ((SQ_TYPE(*cqe) || + ((CQE_OPCODE(*cqe) == T3_READ_RESP) && wq->oldest_read)) && (CQE_QPID(*cqe) == wq->qpid)) (*count)++; ptr++; @@ -829,7 +837,8 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) wqe->mpaattrs = attr->mpaattrs; wqe->qpcaps = attr->qpcaps; wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss); - wqe->flags = cpu_to_be32(attr->flags); + wqe->rqe_count = cpu_to_be16(attr->rqe_count); + wqe->flags_rtr_type = cpu_to_be16(attr->flags|V_RTR_TYPE(attr->rtr_type)); wqe->ord = cpu_to_be32(attr->ord); wqe->ird = cpu_to_be32(attr->ird); wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr); @@ -1135,6 +1144,18 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) { /* + * If this is an unsolicited read response, then the read + * was generated by the kernel driver as part of peer-2-peer + * connection setup. So ignore the completion. + */ + if (!wq->oldest_read) { + if (CQE_STATUS(*hw_cqe)) + wq->error = 1; + ret = -1; + goto skip_cqe; + } + + /* * Don't write to the HWCQ, so create a new read req CQE * in local memory. */ diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h index 99543d63470..69ab08ebc68 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h @@ -53,6 +53,7 @@ #define T3_MAX_PBL_SIZE 256 #define T3_MAX_RQ_SIZE 1024 #define T3_MAX_NUM_STAG (1<<15) +#define T3_MAX_MR_SIZE 0x100000000ULL #define T3_STAG_UNSET 0xffffffff @@ -172,8 +173,8 @@ u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp); void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid); int __init cxio_hal_init(void); void __exit cxio_hal_exit(void); -void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count); -void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count); +int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count); +int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count); void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count); void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count); void cxio_flush_hw_cq(struct t3_cq *cq); diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index 969d4d92845..f1a25a821a4 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -278,6 +278,17 @@ enum t3_qp_caps { uP_RI_QP_STAG0_ENABLE = 0x10 } __attribute__ ((packed)); +enum rdma_init_rtr_types { + RTR_READ = 1, + RTR_WRITE = 2, + RTR_SEND = 3, +}; + +#define S_RTR_TYPE 2 +#define M_RTR_TYPE 0x3 +#define V_RTR_TYPE(x) ((x) << S_RTR_TYPE) +#define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE) + struct t3_rdma_init_attr { u32 tid; u32 qpid; @@ -293,7 +304,9 @@ struct t3_rdma_init_attr { u32 ird; u64 qp_dma_addr; u32 qp_dma_size; - u32 flags; + enum rdma_init_rtr_types rtr_type; + u16 flags; + u16 rqe_count; u32 irs; }; @@ -309,8 +322,8 @@ struct t3_rdma_init_wr { u8 mpaattrs; /* 5 */ u8 qpcaps; __be16 ulpdu_size; - __be32 flags; /* bits 31-1 - reservered */ - /* bit 0 - set if RECV posted */ + __be16 flags_rtr_type; + __be16 rqe_count; __be32 ord; /* 6 */ __be32 ird; __be64 qp_dma_addr; /* 7 */ @@ -324,7 +337,7 @@ struct t3_genbit { }; enum rdma_init_wr_flags { - RECVS_POSTED = (1<<0), + MPA_INITIATOR = (1<<0), PRIV_QP = (1<<1), }; diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index 6ba4138c8ec..71554eacb13 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -83,6 +83,7 @@ static void rnic_init(struct iwch_dev *rnicp) rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE; rnicp->attr.max_pds = T3_MAX_NUM_PD - 1; rnicp->attr.mem_pgsizes_bitmask = 0x7FFF; /* 4KB-128MB */ + rnicp->attr.max_mr_size = T3_MAX_MR_SIZE; rnicp->attr.can_resize_wq = 0; rnicp->attr.max_rdma_reads_per_qp = 8; rnicp->attr.max_rdma_read_resources = diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h index 9ad9b1e7c8c..d2409a505e8 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -66,6 +66,7 @@ struct iwch_rnic_attributes { * size (4k)^i. Phys block list mode unsupported. */ u32 mem_pgsizes_bitmask; + u64 max_mr_size; u8 can_resize_wq; /* diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 72ca360c3db..c325c44807e 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -63,10 +63,14 @@ static char *states[] = { NULL, }; -static int ep_timeout_secs = 10; +int peer2peer = 0; +module_param(peer2peer, int, 0644); +MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)"); + +static int ep_timeout_secs = 60; module_param(ep_timeout_secs, int, 0644); MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " - "in seconds (default=10)"); + "in seconds (default=60)"); static int mpa_rev = 1; module_param(mpa_rev, int, 0644); @@ -125,6 +129,12 @@ static void start_ep_timer(struct iwch_ep *ep) static void stop_ep_timer(struct iwch_ep *ep) { PDBG("%s ep %p\n", __func__, ep); + if (!timer_pending(&ep->timer)) { + printk(KERN_ERR "%s timer stopped when its not running! ep %p state %u\n", + __func__, ep, ep->com.state); + WARN_ON(1); + return; + } del_timer_sync(&ep->timer); put_ep(&ep->com); } @@ -508,7 +518,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb) skb_reset_transport_header(skb); len = skb->len; req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); + req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); req->wr_lo = htonl(V_WR_TID(ep->hwtid)); req->len = htonl(len); req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | @@ -559,7 +569,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) set_arp_failure_handler(skb, arp_failure_discard); skb_reset_transport_header(skb); req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); + req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); req->wr_lo = htonl(V_WR_TID(ep->hwtid)); req->len = htonl(mpalen); req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | @@ -611,7 +621,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) skb_reset_transport_header(skb); len = skb->len; req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); + req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); req->wr_lo = htonl(V_WR_TID(ep->hwtid)); req->len = htonl(len); req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | @@ -879,6 +889,7 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb) * the MPA header is valid. */ state_set(&ep->com, FPDU_MODE); + ep->mpa_attr.initiator = 1; ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; @@ -901,8 +912,14 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb) /* bind QP and TID with INIT_WR */ err = iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1); - if (!err) - goto out; + if (err) + goto err; + + if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) { + iwch_post_zb_read(ep->com.qp); + } + + goto out; err: abort_connection(ep, skb, GFP_KERNEL); out: @@ -995,6 +1012,7 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb) * If we get here we have accumulated the entire mpa * start reply message including private data. */ + ep->mpa_attr.initiator = 0; ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; @@ -1065,17 +1083,33 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) PDBG("%s ep %p credits %u\n", __func__, ep, credits); - if (credits == 0) + if (credits == 0) { + PDBG(KERN_ERR "%s 0 credit ack ep %p state %u\n", + __func__, ep, state_read(&ep->com)); return CPL_RET_BUF_DONE; + } + BUG_ON(credits != 1); - BUG_ON(ep->mpa_skb == NULL); - kfree_skb(ep->mpa_skb); - ep->mpa_skb = NULL; dst_confirm(ep->dst); - if (state_read(&ep->com) == MPA_REP_SENT) { - ep->com.rpl_done = 1; - PDBG("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); + if (!ep->mpa_skb) { + PDBG("%s rdma_init wr_ack ep %p state %u\n", + __func__, ep, state_read(&ep->com)); + if (ep->mpa_attr.initiator) { + PDBG("%s initiator ep %p state %u\n", + __func__, ep, state_read(&ep->com)); + if (peer2peer) + iwch_post_zb_read(ep->com.qp); + } else { + PDBG("%s responder ep %p state %u\n", + __func__, ep, state_read(&ep->com)); + ep->com.rpl_done = 1; + wake_up(&ep->com.waitq); + } + } else { + PDBG("%s lsm ack ep %p state %u freeing skb\n", + __func__, ep, state_read(&ep->com)); + kfree_skb(ep->mpa_skb); + ep->mpa_skb = NULL; } return CPL_RET_BUF_DONE; } @@ -1083,8 +1117,11 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) { struct iwch_ep *ep = ctx; + unsigned long flags; + int release = 0; PDBG("%s ep %p\n", __func__, ep); + BUG_ON(!ep); /* * We get 2 abort replies from the HW. The first one must @@ -1095,9 +1132,22 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) return CPL_RET_BUF_DONE; } - close_complete_upcall(ep); - state_set(&ep->com, DEAD); - release_ep_resources(ep); + spin_lock_irqsave(&ep->com.lock, flags); + switch (ep->com.state) { + case ABORTING: + close_complete_upcall(ep); + __state_set(&ep->com, DEAD); + release = 1; + break; + default: + printk(KERN_ERR "%s ep %p state %d\n", + __func__, ep, ep->com.state); + break; + } + spin_unlock_irqrestore(&ep->com.lock, flags); + + if (release) + release_ep_resources(ep); return CPL_RET_BUF_DONE; } @@ -1470,7 +1520,8 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) struct sk_buff *rpl_skb; struct iwch_qp_attributes attrs; int ret; - int state; + int release = 0; + unsigned long flags; if (is_neg_adv_abort(req->status)) { PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep, @@ -1488,9 +1539,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) return CPL_RET_BUF_DONE; } - state = state_read(&ep->com); - PDBG("%s ep %p state %u\n", __func__, ep, state); - switch (state) { + spin_lock_irqsave(&ep->com.lock, flags); + PDBG("%s ep %p state %u\n", __func__, ep, ep->com.state); + switch (ep->com.state) { case CONNECTING: break; case MPA_REQ_WAIT: @@ -1536,21 +1587,25 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) break; case DEAD: PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); + spin_unlock_irqrestore(&ep->com.lock, flags); return CPL_RET_BUF_DONE; default: BUG_ON(1); break; } dst_confirm(ep->dst); + if (ep->com.state != ABORTING) { + __state_set(&ep->com, DEAD); + release = 1; + } + spin_unlock_irqrestore(&ep->com.lock, flags); rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); if (!rpl_skb) { printk(KERN_ERR MOD "%s - cannot allocate skb!\n", __func__); - dst_release(ep->dst); - l2t_release(L2DATA(ep->com.tdev), ep->l2t); - put_ep(&ep->com); - return CPL_RET_BUF_DONE; + release = 1; + goto out; } rpl_skb->priority = CPL_PRIORITY_DATA; rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl)); @@ -1559,10 +1614,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); rpl->cmd = CPL_ABORT_NO_RST; cxgb3_ofld_send(ep->com.tdev, rpl_skb); - if (state != ABORTING) { - state_set(&ep->com, DEAD); +out: + if (release) release_ep_resources(ep); - } return CPL_RET_BUF_DONE; } @@ -1596,8 +1650,8 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) release = 1; break; case ABORTING: - break; case DEAD: + break; default: BUG_ON(1); break; @@ -1661,15 +1715,18 @@ static void ep_timeout(unsigned long arg) struct iwch_ep *ep = (struct iwch_ep *)arg; struct iwch_qp_attributes attrs; unsigned long flags; + int abort = 1; spin_lock_irqsave(&ep->com.lock, flags); PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, ep->com.state); switch (ep->com.state) { case MPA_REQ_SENT: + __state_set(&ep->com, ABORTING); connect_reply_upcall(ep, -ETIMEDOUT); break; case MPA_REQ_WAIT: + __state_set(&ep->com, ABORTING); break; case CLOSING: case MORIBUND: @@ -1679,13 +1736,17 @@ static void ep_timeout(unsigned long arg) ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); } + __state_set(&ep->com, ABORTING); break; default: - BUG(); + printk(KERN_ERR "%s unexpected state ep %p state %u\n", + __func__, ep, ep->com.state); + WARN_ON(1); + abort = 0; } - __state_set(&ep->com, CLOSING); spin_unlock_irqrestore(&ep->com.lock, flags); - abort_connection(ep, NULL, GFP_ATOMIC); + if (abort) + abort_connection(ep, NULL, GFP_ATOMIC); put_ep(&ep->com); } @@ -1762,16 +1823,19 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (err) goto err; + /* if needed, wait for wr_ack */ + if (iwch_rqes_posted(qp)) { + wait_event(ep->com.waitq, ep->com.rpl_done); + err = ep->com.rpl_err; + if (err) + goto err; + } + err = send_mpa_reply(ep, conn_param->private_data, conn_param->private_data_len); if (err) goto err; - /* wait for wr_ack */ - wait_event(ep->com.waitq, ep->com.rpl_done); - err = ep->com.rpl_err; - if (err) - goto err; state_set(&ep->com, FPDU_MODE); established_upcall(ep); @@ -1968,40 +2032,39 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp) PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep, states[ep->com.state], abrupt); - if (ep->com.state == DEAD) { - PDBG("%s already dead ep %p\n", __func__, ep); - goto out; - } - - if (abrupt) { - if (ep->com.state != ABORTING) { - ep->com.state = ABORTING; - close = 1; - } - goto out; - } - switch (ep->com.state) { case MPA_REQ_WAIT: case MPA_REQ_SENT: case MPA_REQ_RCVD: case MPA_REP_SENT: case FPDU_MODE: - start_ep_timer(ep); - ep->com.state = CLOSING; close = 1; + if (abrupt) + ep->com.state = ABORTING; + else { + ep->com.state = CLOSING; + start_ep_timer(ep); + } break; case CLOSING: - ep->com.state = MORIBUND; close = 1; + if (abrupt) { + stop_ep_timer(ep); + ep->com.state = ABORTING; + } else + ep->com.state = MORIBUND; break; case MORIBUND: + case ABORTING: + case DEAD: + PDBG("%s ignoring disconnect ep %p state %u\n", + __func__, ep, ep->com.state); break; default: BUG(); break; } -out: + spin_unlock_irqrestore(&ep->com.lock, flags); if (close) { if (abrupt) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h index 2bb7fbdb3ff..d7c7e09f099 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.h +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h @@ -56,6 +56,7 @@ #define put_ep(ep) { \ PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \ ep, atomic_read(&((ep)->kref.refcount))); \ + WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \ kref_put(&((ep)->kref), __free_ep); \ } @@ -225,5 +226,6 @@ int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, st int __init iwch_cm_init(void); void __exit iwch_cm_term(void); +extern int peer2peer; #endif /* _IWCH_CM_H_ */ diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index ab4695c1dd5..d07d3a377b5 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -602,7 +602,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mhp) return ERR_PTR(-ENOMEM); - mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc); + mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); if (IS_ERR(mhp->umem)) { err = PTR_ERR(mhp->umem); kfree(mhp); @@ -998,7 +998,7 @@ static int iwch_query_device(struct ib_device *ibdev, props->device_cap_flags = dev->device_cap_flags; props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor; props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device; - props->max_mr_size = ~0ull; + props->max_mr_size = dev->attr.max_mr_size; props->max_qp = dev->attr.max_qps; props->max_qp_wr = dev->attr.max_wrs; props->max_sge = dev->attr.max_sge_per_wr; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h index 61356f91109..db5100d27ca 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -118,6 +118,7 @@ enum IWCH_QP_FLAGS { }; struct iwch_mpa_attributes { + u8 initiator; u8 recv_marker_enabled; u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */ u8 crc_enabled; @@ -322,6 +323,7 @@ enum iwch_qp_query_flags { IWCH_QP_QUERY_TEST_USERWRITE = 0x32 /* Test special */ }; +u16 iwch_rqes_posted(struct iwch_qp *qhp); int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, @@ -331,6 +333,7 @@ int iwch_bind_mw(struct ib_qp *qp, struct ib_mw_bind *mw_bind); int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg); +int iwch_post_zb_read(struct iwch_qp *qhp); int iwch_register_device(struct iwch_dev *dev); void iwch_unregister_device(struct iwch_dev *dev); int iwch_quiesce_qps(struct iwch_cq *chp); diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 8891c3b0a3d..79dbe5beae5 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -586,6 +586,36 @@ static inline void build_term_codes(struct respQ_msg_t *rsp_msg, } } +int iwch_post_zb_read(struct iwch_qp *qhp) +{ + union t3_wr *wqe; + struct sk_buff *skb; + u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3; + + PDBG("%s enter\n", __func__); + skb = alloc_skb(40, GFP_KERNEL); + if (!skb) { + printk(KERN_ERR "%s cannot send zb_read!!\n", __func__); + return -ENOMEM; + } + wqe = (union t3_wr *)skb_put(skb, sizeof(struct t3_rdma_read_wr)); + memset(wqe, 0, sizeof(struct t3_rdma_read_wr)); + wqe->read.rdmaop = T3_READ_REQ; + wqe->read.reserved[0] = 0; + wqe->read.reserved[1] = 0; + wqe->read.reserved[2] = 0; + wqe->read.rem_stag = cpu_to_be32(1); + wqe->read.rem_to = cpu_to_be64(1); + wqe->read.local_stag = cpu_to_be32(1); + wqe->read.local_len = cpu_to_be32(0); + wqe->read.local_to = cpu_to_be64(1); + wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ)); + wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)| + V_FW_RIWR_LEN(flit_cnt)); + skb->priority = CPL_PRIORITY_DATA; + return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb); +} + /* * This posts a TERMINATE with layer=RDMA, type=catastrophic. */ @@ -625,6 +655,7 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag) { struct iwch_cq *rchp, *schp; int count; + int flushed; rchp = get_chp(qhp->rhp, qhp->attr.rcq); schp = get_chp(qhp->rhp, qhp->attr.scq); @@ -639,20 +670,22 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag) spin_lock(&qhp->lock); cxio_flush_hw_cq(&rchp->cq); cxio_count_rcqes(&rchp->cq, &qhp->wq, &count); - cxio_flush_rq(&qhp->wq, &rchp->cq, count); + flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&rchp->lock, *flag); - (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + if (flushed) + (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); /* locking heirarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&schp->lock, *flag); spin_lock(&qhp->lock); cxio_flush_hw_cq(&schp->cq); cxio_count_scqes(&schp->cq, &qhp->wq, &count); - cxio_flush_sq(&qhp->wq, &schp->cq, count); + flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&schp->lock, *flag); - (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); + if (flushed) + (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); /* deref */ if (atomic_dec_and_test(&qhp->refcnt)) @@ -671,11 +704,18 @@ static void flush_qp(struct iwch_qp *qhp, unsigned long *flag) /* - * Return non zero if at least one RECV was pre-posted. + * Return count of RECV WRs posted */ -static int rqes_posted(struct iwch_qp *qhp) +u16 iwch_rqes_posted(struct iwch_qp *qhp) { - return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV; + union t3_wr *wqe = qhp->wq.queue; + u16 count = 0; + while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) { + count++; + wqe++; + } + PDBG("%s qhp %p count %u\n", __func__, qhp, count); + return count; } static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, @@ -716,8 +756,17 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, init_attr.ird = qhp->attr.max_ird; init_attr.qp_dma_addr = qhp->wq.dma_addr; init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); - init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0; + init_attr.rqe_count = iwch_rqes_posted(qhp); + init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0; init_attr.flags |= capable(CAP_NET_BIND_SERVICE) ? PRIV_QP : 0; + if (peer2peer) { + init_attr.rtr_type = RTR_READ; + if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator) + init_attr.ord = 1; + if (init_attr.ird == 0 && !qhp->attr.mpa_attr.initiator) + init_attr.ird = 1; + } else + init_attr.rtr_type = 0; init_attr.irs = qhp->ep->rcv_seq; PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d " "flags 0x%x qpcaps 0x%x\n", __func__, @@ -832,8 +881,8 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, abort=0; disconnect = 1; ep = qhp->ep; + get_ep(&ep->com); } - flush_qp(qhp, &flag); break; case IWCH_QP_STATE_TERMINATE: qhp->attr.state = IWCH_QP_STATE_TERMINATE; @@ -848,6 +897,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, abort=1; disconnect = 1; ep = qhp->ep; + get_ep(&ep->com); } goto err; break; @@ -863,6 +913,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, } switch (attrs->next_state) { case IWCH_QP_STATE_IDLE: + flush_qp(qhp, &flag); qhp->attr.state = IWCH_QP_STATE_IDLE; qhp->attr.llp_stream_handle = NULL; put_ep(&qhp->ep->com); @@ -929,8 +980,10 @@ out: * on the EP. This can be a normal close (RTS->CLOSING) or * an abnormal close (RTS/CLOSING->ERROR). */ - if (disconnect) + if (disconnect) { iwch_ep_disconnect(ep, abort, GFP_KERNEL); + put_ep(&ep->com); + } /* * If free is 1, then we've disassociated the EP from the QP diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 0d13fe0a260..00bab60f6de 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -66,6 +66,7 @@ struct ehca_av; #include "ehca_irq.h" #define EHCA_EQE_CACHE_SIZE 20 +#define EHCA_MAX_NUM_QUEUES 0xffff struct ehca_eqe_cache_entry { struct ehca_eqe *eqe; @@ -127,6 +128,8 @@ struct ehca_shca { /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */ u32 hca_cap_mr_pgsize; int max_mtu; + atomic_t num_cqs; + atomic_t num_qps; }; struct ehca_pd { @@ -160,6 +163,7 @@ struct ehca_qp { }; u32 qp_type; enum ehca_ext_qp_type ext_type; + enum ib_qp_state state; struct ipz_queue ipz_squeue; struct ipz_queue ipz_rqueue; struct h_galpas galpas; @@ -343,6 +347,8 @@ extern int ehca_use_hp_mr; extern int ehca_scaling_code; extern int ehca_lock_hcalls; extern int ehca_nr_ports; +extern int ehca_max_cq; +extern int ehca_max_qp; struct ipzu_queue_resp { u32 qe_size; /* queue entry size */ diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index ec0cfcf3073..5540b276a33 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -132,10 +132,19 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) return ERR_PTR(-EINVAL); + if (!atomic_add_unless(&shca->num_cqs, 1, ehca_max_cq)) { + ehca_err(device, "Unable to create CQ, max number of %i " + "CQs reached.", ehca_max_cq); + ehca_err(device, "To increase the maximum number of CQs " + "use the number_of_cqs module parameter.\n"); + return ERR_PTR(-ENOSPC); + } + my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL); if (!my_cq) { ehca_err(device, "Out of memory for ehca_cq struct device=%p", device); + atomic_dec(&shca->num_cqs); return ERR_PTR(-ENOMEM); } @@ -305,6 +314,7 @@ create_cq_exit2: create_cq_exit1: kmem_cache_free(cq_cache, my_cq); + atomic_dec(&shca->num_cqs); return cq; } @@ -359,6 +369,7 @@ int ehca_destroy_cq(struct ib_cq *cq) ipz_queue_dtor(NULL, &my_cq->ipz_queue); kmem_cache_free(cq_cache, my_cq); + atomic_dec(&shca->num_cqs); return 0; } diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c index b4ac617a70e..49660dfa186 100644 --- a/drivers/infiniband/hw/ehca/ehca_eq.c +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -54,7 +54,8 @@ int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq, const enum ehca_eq_type type, const u32 length) { - u64 ret; + int ret; + u64 h_ret; u32 nr_pages; u32 i; void *vpage; @@ -73,15 +74,15 @@ int ehca_create_eq(struct ehca_shca *shca, return -EINVAL; } - ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle, - &eq->pf, - type, - length, - &eq->ipz_eq_handle, - &eq->length, - &nr_pages, &eq->ist); + h_ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle, + &eq->pf, + type, + length, + &eq->ipz_eq_handle, + &eq->length, + &nr_pages, &eq->ist); - if (ret != H_SUCCESS) { + if (h_ret != H_SUCCESS) { ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq); return -EINVAL; } @@ -97,24 +98,22 @@ int ehca_create_eq(struct ehca_shca *shca, u64 rpage; vpage = ipz_qpageit_get_inc(&eq->ipz_queue); - if (!vpage) { - ret = H_RESOURCE; + if (!vpage) goto create_eq_exit2; - } rpage = virt_to_abs(vpage); - ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle, - eq->ipz_eq_handle, - &eq->pf, - 0, 0, rpage, 1); + h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle, + eq->ipz_eq_handle, + &eq->pf, + 0, 0, rpage, 1); if (i == (nr_pages - 1)) { /* last page */ vpage = ipz_qpageit_get_inc(&eq->ipz_queue); - if (ret != H_SUCCESS || vpage) + if (h_ret != H_SUCCESS || vpage) goto create_eq_exit2; } else { - if (ret != H_PAGE_REGISTERED || !vpage) + if (h_ret != H_PAGE_REGISTERED || !vpage) goto create_eq_exit2; } } diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index 2515cbde7e6..bc3b37d2070 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -101,7 +101,6 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) props->max_ee = limit_uint(rblock->max_rd_ee_context); props->max_rdd = limit_uint(rblock->max_rd_domain); props->max_fmr = limit_uint(rblock->max_mr); - props->local_ca_ack_delay = limit_uint(rblock->local_ca_ack_delay); props->max_qp_rd_atom = limit_uint(rblock->max_rr_qp); props->max_ee_rd_atom = limit_uint(rblock->max_rr_ee_context); props->max_res_rd_atom = limit_uint(rblock->max_rr_hca); @@ -115,7 +114,7 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) } props->max_pkeys = 16; - props->local_ca_ack_delay = limit_uint(rblock->local_ca_ack_delay); + props->local_ca_ack_delay = min_t(u8, rblock->local_ca_ack_delay, 255); props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp); props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp); props->max_mcast_grp = limit_uint(rblock->max_mcast_grp); @@ -136,7 +135,7 @@ query_device1: return ret; } -static int map_mtu(struct ehca_shca *shca, u32 fw_mtu) +static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu) { switch (fw_mtu) { case 0x1: @@ -156,7 +155,7 @@ static int map_mtu(struct ehca_shca *shca, u32 fw_mtu) } } -static int map_number_of_vls(struct ehca_shca *shca, u32 vl_cap) +static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap) { switch (vl_cap) { case 0x1: diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index b5ca94c6b8d..ca5eb0cb628 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -633,7 +633,7 @@ static inline int find_next_online_cpu(struct ehca_comp_pool *pool) unsigned long flags; WARN_ON_ONCE(!in_interrupt()); - if (ehca_debug_level) + if (ehca_debug_level >= 3) ehca_dmp(&cpu_online_map, sizeof(cpumask_t), ""); spin_lock_irqsave(&pool->last_cpu_lock, flags); diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 65b3362cdb9..482103eb6ea 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -50,7 +50,7 @@ #include "ehca_tools.h" #include "hcp_if.h" -#define HCAD_VERSION "0025" +#define HCAD_VERSION "0026" MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); @@ -60,7 +60,6 @@ MODULE_VERSION(HCAD_VERSION); static int ehca_open_aqp1 = 0; static int ehca_hw_level = 0; static int ehca_poll_all_eqs = 1; -static int ehca_mr_largepage = 1; int ehca_debug_level = 0; int ehca_nr_ports = 2; @@ -69,47 +68,52 @@ int ehca_port_act_time = 30; int ehca_static_rate = -1; int ehca_scaling_code = 0; int ehca_lock_hcalls = -1; - -module_param_named(open_aqp1, ehca_open_aqp1, int, S_IRUGO); -module_param_named(debug_level, ehca_debug_level, int, S_IRUGO); -module_param_named(hw_level, ehca_hw_level, int, S_IRUGO); -module_param_named(nr_ports, ehca_nr_ports, int, S_IRUGO); -module_param_named(use_hp_mr, ehca_use_hp_mr, int, S_IRUGO); -module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO); -module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, S_IRUGO); -module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); -module_param_named(scaling_code, ehca_scaling_code, int, S_IRUGO); -module_param_named(mr_largepage, ehca_mr_largepage, int, S_IRUGO); +int ehca_max_cq = -1; +int ehca_max_qp = -1; + +module_param_named(open_aqp1, ehca_open_aqp1, bool, S_IRUGO); +module_param_named(debug_level, ehca_debug_level, int, S_IRUGO); +module_param_named(hw_level, ehca_hw_level, int, S_IRUGO); +module_param_named(nr_ports, ehca_nr_ports, int, S_IRUGO); +module_param_named(use_hp_mr, ehca_use_hp_mr, bool, S_IRUGO); +module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO); +module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO); +module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); +module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO); module_param_named(lock_hcalls, ehca_lock_hcalls, bool, S_IRUGO); +module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO); +module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO); MODULE_PARM_DESC(open_aqp1, - "AQP1 on startup (0: no (default), 1: yes)"); + "Open AQP1 on startup (default: no)"); MODULE_PARM_DESC(debug_level, - "debug level" - " (0: no debug traces (default), 1: with debug traces)"); + "Amount of debug output (0: none (default), 1: traces, " + "2: some dumps, 3: lots)"); MODULE_PARM_DESC(hw_level, - "hardware level" - " (0: autosensing (default), 1: v. 0.20, 2: v. 0.21)"); + "Hardware level (0: autosensing (default), " + "0x10..0x14: eHCA, 0x20..0x23: eHCA2)"); MODULE_PARM_DESC(nr_ports, "number of connected ports (-1: autodetect, 1: port one only, " "2: two ports (default)"); MODULE_PARM_DESC(use_hp_mr, - "high performance MRs (0: no (default), 1: yes)"); + "Use high performance MRs (default: no)"); MODULE_PARM_DESC(port_act_time, - "time to wait for port activation (default: 30 sec)"); + "Time to wait for port activation (default: 30 sec)"); MODULE_PARM_DESC(poll_all_eqs, - "polls all event queues periodically" - " (0: no, 1: yes (default))"); + "Poll all event queues periodically (default: yes)"); MODULE_PARM_DESC(static_rate, - "set permanent static rate (default: disabled)"); + "Set permanent static rate (default: no static rate)"); MODULE_PARM_DESC(scaling_code, - "set scaling code (0: disabled/default, 1: enabled)"); -MODULE_PARM_DESC(mr_largepage, - "use large page for MR (0: use PAGE_SIZE (default), " - "1: use large page depending on MR size"); + "Enable scaling code (default: no)"); MODULE_PARM_DESC(lock_hcalls, - "serialize all hCalls made by the driver " + "Serialize all hCalls made by the driver " "(default: autodetect)"); +MODULE_PARM_DESC(number_of_cqs, + "Max number of CQs which can be allocated " + "(default: autodetect)"); +MODULE_PARM_DESC(number_of_qps, + "Max number of QPs which can be allocated " + "(default: autodetect)"); DEFINE_RWLOCK(ehca_qp_idr_lock); DEFINE_RWLOCK(ehca_cq_idr_lock); @@ -275,6 +279,7 @@ static int ehca_sense_attributes(struct ehca_shca *shca) u64 h_ret; struct hipz_query_hca *rblock; struct hipz_query_port *port; + const char *loc_code; static const u32 pgsize_map[] = { HCA_CAP_MR_PGSIZE_4K, 0x1000, @@ -283,6 +288,12 @@ static int ehca_sense_attributes(struct ehca_shca *shca) HCA_CAP_MR_PGSIZE_16M, 0x1000000, }; + ehca_gen_dbg("Probing adapter %s...", + shca->ofdev->node->full_name); + loc_code = of_get_property(shca->ofdev->node, "ibm,loc-code", NULL); + if (loc_code) + ehca_gen_dbg(" ... location lode=%s", loc_code); + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_gen_err("Cannot allocate rblock memory."); @@ -350,10 +361,27 @@ static int ehca_sense_attributes(struct ehca_shca *shca) /* translate supported MR page sizes; always support 4K */ shca->hca_cap_mr_pgsize = EHCA_PAGESIZE; - if (ehca_mr_largepage) { /* support extra sizes only if enabled */ - for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2) - if (rblock->memory_page_size_supported & pgsize_map[i]) - shca->hca_cap_mr_pgsize |= pgsize_map[i + 1]; + for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2) + if (rblock->memory_page_size_supported & pgsize_map[i]) + shca->hca_cap_mr_pgsize |= pgsize_map[i + 1]; + + /* Set maximum number of CQs and QPs to calculate EQ size */ + if (ehca_max_qp == -1) + ehca_max_qp = min_t(int, rblock->max_qp, EHCA_MAX_NUM_QUEUES); + else if (ehca_max_qp < 1 || ehca_max_qp > rblock->max_qp) { + ehca_gen_err("Requested number of QPs is out of range (1 - %i) " + "specified by HW", rblock->max_qp); + ret = -EINVAL; + goto sense_attributes1; + } + + if (ehca_max_cq == -1) + ehca_max_cq = min_t(int, rblock->max_cq, EHCA_MAX_NUM_QUEUES); + else if (ehca_max_cq < 1 || ehca_max_cq > rblock->max_cq) { + ehca_gen_err("Requested number of CQs is out of range (1 - %i) " + "specified by HW", rblock->max_cq); + ret = -EINVAL; + goto sense_attributes1; } /* query max MTU from first port -- it's the same for all ports */ @@ -567,8 +595,7 @@ static int ehca_destroy_aqp1(struct ehca_sport *sport) static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", - ehca_debug_level); + return snprintf(buf, PAGE_SIZE, "%d\n", ehca_debug_level); } static ssize_t ehca_store_debug_level(struct device_driver *ddp, @@ -657,14 +684,6 @@ static ssize_t ehca_show_adapter_handle(struct device *dev, } static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL); -static ssize_t ehca_show_mr_largepage(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return sprintf(buf, "%d\n", ehca_mr_largepage); -} -static DEVICE_ATTR(mr_largepage, S_IRUGO, ehca_show_mr_largepage, NULL); - static struct attribute *ehca_dev_attrs[] = { &dev_attr_adapter_handle.attr, &dev_attr_num_ports.attr, @@ -681,7 +700,6 @@ static struct attribute *ehca_dev_attrs[] = { &dev_attr_cur_mw.attr, &dev_attr_max_pd.attr, &dev_attr_max_ah.attr, - &dev_attr_mr_largepage.attr, NULL }; @@ -695,7 +713,7 @@ static int __devinit ehca_probe(struct of_device *dev, struct ehca_shca *shca; const u64 *handle; struct ib_pd *ibpd; - int ret, i; + int ret, i, eq_size; handle = of_get_property(dev->node, "ibm,hca-handle", NULL); if (!handle) { @@ -716,6 +734,8 @@ static int __devinit ehca_probe(struct of_device *dev, return -ENOMEM; } mutex_init(&shca->modify_mutex); + atomic_set(&shca->num_cqs, 0); + atomic_set(&shca->num_qps, 0); for (i = 0; i < ARRAY_SIZE(shca->sport); i++) spin_lock_init(&shca->sport[i].mod_sqp_lock); @@ -735,8 +755,9 @@ static int __devinit ehca_probe(struct of_device *dev, goto probe1; } + eq_size = 2 * ehca_max_cq + 4 * ehca_max_qp; /* create event queues */ - ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048); + ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size); if (ret) { ehca_err(&shca->ib_device, "Cannot create EQ."); goto probe1; diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index f26997fc00f..f974367cad4 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -323,7 +323,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } e_mr->umem = ib_umem_get(pd->uobject->context, start, length, - mr_access_flags); + mr_access_flags, 0); if (IS_ERR(e_mr->umem)) { ib_mr = (void *)e_mr->umem; goto reg_user_mr_exit1; @@ -1794,8 +1794,9 @@ static int ehca_check_kpages_per_ate(struct scatterlist *page_list, int t; for (t = start_idx; t <= end_idx; t++) { u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT; - ehca_gen_dbg("chunk_page=%lx value=%016lx", pgaddr, - *(u64 *)abs_to_virt(phys_to_abs(pgaddr))); + if (ehca_debug_level >= 3) + ehca_gen_dbg("chunk_page=%lx value=%016lx", pgaddr, + *(u64 *)abs_to_virt(phys_to_abs(pgaddr))); if (pgaddr - PAGE_SIZE != *prev_pgaddr) { ehca_gen_err("uncontiguous page found pgaddr=%lx " "prev_pgaddr=%lx page_list_i=%x", @@ -1862,10 +1863,13 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo, pgaddr & ~(pginfo->hwpage_size - 1)); } - ehca_gen_dbg("kpage=%lx chunk_page=%lx " - "value=%016lx", *kpage, pgaddr, - *(u64 *)abs_to_virt( - phys_to_abs(pgaddr))); + if (ehca_debug_level >= 3) { + u64 val = *(u64 *)abs_to_virt( + phys_to_abs(pgaddr)); + ehca_gen_dbg("kpage=%lx chunk_page=%lx " + "value=%016lx", + *kpage, pgaddr, val); + } prev_pgaddr = pgaddr; i++; pginfo->kpage_cnt++; diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 3eb14a52cbf..18fba92fa7a 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -421,8 +421,18 @@ static struct ehca_qp *internal_create_qp( u32 swqe_size = 0, rwqe_size = 0, ib_qp_num; unsigned long flags; - if (init_attr->create_flags) + if (!atomic_add_unless(&shca->num_qps, 1, ehca_max_qp)) { + ehca_err(pd->device, "Unable to create QP, max number of %i " + "QPs reached.", ehca_max_qp); + ehca_err(pd->device, "To increase the maximum number of QPs " + "use the number_of_qps module parameter.\n"); + return ERR_PTR(-ENOSPC); + } + + if (init_attr->create_flags) { + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); + } memset(&parms, 0, sizeof(parms)); qp_type = init_attr->qp_type; @@ -431,6 +441,7 @@ static struct ehca_qp *internal_create_qp( init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) { ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed", init_attr->sq_sig_type); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } @@ -455,6 +466,7 @@ static struct ehca_qp *internal_create_qp( if (is_llqp && has_srq) { ehca_err(pd->device, "LLQPs can't have an SRQ"); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } @@ -466,6 +478,7 @@ static struct ehca_qp *internal_create_qp( ehca_err(pd->device, "no more than three SGEs " "supported for SRQ pd=%p max_sge=%x", pd, init_attr->cap.max_recv_sge); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } } @@ -477,6 +490,7 @@ static struct ehca_qp *internal_create_qp( qp_type != IB_QPT_SMI && qp_type != IB_QPT_GSI) { ehca_err(pd->device, "wrong QP Type=%x", qp_type); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } @@ -490,6 +504,7 @@ static struct ehca_qp *internal_create_qp( "or max_rq_wr=%x for RC LLQP", init_attr->cap.max_send_wr, init_attr->cap.max_recv_wr); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } break; @@ -497,6 +512,7 @@ static struct ehca_qp *internal_create_qp( if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) { ehca_err(pd->device, "UD LLQP not supported " "by this adapter"); + atomic_dec(&shca->num_qps); return ERR_PTR(-ENOSYS); } if (!(init_attr->cap.max_send_sge <= 5 @@ -508,20 +524,22 @@ static struct ehca_qp *internal_create_qp( "or max_recv_sge=%x for UD LLQP", init_attr->cap.max_send_sge, init_attr->cap.max_recv_sge); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } else if (init_attr->cap.max_send_wr > 255) { ehca_err(pd->device, "Invalid Number of " "max_send_wr=%x for UD QP_TYPE=%x", init_attr->cap.max_send_wr, qp_type); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } break; default: ehca_err(pd->device, "unsupported LL QP Type=%x", qp_type); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); - break; } } else { int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI @@ -533,6 +551,7 @@ static struct ehca_qp *internal_create_qp( "send_sge=%x recv_sge=%x max_sge=%x", init_attr->cap.max_send_sge, init_attr->cap.max_recv_sge, max_sge); + atomic_dec(&shca->num_qps); return ERR_PTR(-EINVAL); } } @@ -543,6 +562,7 @@ static struct ehca_qp *internal_create_qp( my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL); if (!my_qp) { ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); + atomic_dec(&shca->num_qps); return ERR_PTR(-ENOMEM); } @@ -550,6 +570,7 @@ static struct ehca_qp *internal_create_qp( spin_lock_init(&my_qp->spinlock_r); my_qp->qp_type = qp_type; my_qp->ext_type = parms.ext_type; + my_qp->state = IB_QPS_RESET; if (init_attr->recv_cq) my_qp->recv_cq = @@ -822,6 +843,7 @@ create_qp_exit1: create_qp_exit0: kmem_cache_free(qp_cache, my_qp); + atomic_dec(&shca->num_qps); return ERR_PTR(ret); } @@ -965,7 +987,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, qp_num, bad_send_wqe_p); /* convert wqe pointer to vadr */ bad_send_wqe_v = abs_to_virt((u64)bad_send_wqe_p); - if (ehca_debug_level) + if (ehca_debug_level >= 2) ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num); squeue = &my_qp->ipz_squeue; if (ipz_queue_abs_to_offset(squeue, (u64)bad_send_wqe_p, &q_ofs)) { @@ -978,7 +1000,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); *bad_wqe_cnt = 0; while (wqe->optype != 0xff && wqe->wqef != 0xff) { - if (ehca_debug_level) + if (ehca_debug_level >= 2) ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num); wqe->nr_of_data_seg = 0; /* suppress data access */ wqe->wqef = WQEF_PURGE; /* WQE to be purged */ @@ -1450,7 +1472,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, /* no support for max_send/recv_sge yet */ } - if (ehca_debug_level) + if (ehca_debug_level >= 2) ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num); h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, @@ -1508,6 +1530,8 @@ static int internal_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_QP_QKEY) my_qp->qkey = attr->qkey; + my_qp->state = qp_new_state; + modify_qp_exit2: if (squeue_locked) { /* this means: sqe -> rts */ spin_unlock_irqrestore(&my_qp->spinlock_s, flags); @@ -1763,7 +1787,7 @@ int ehca_query_qp(struct ib_qp *qp, if (qp_init_attr) *qp_init_attr = my_qp->init_attr; - if (ehca_debug_level) + if (ehca_debug_level >= 2) ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num); query_qp_exit1: @@ -1811,7 +1835,7 @@ int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, goto modify_srq_exit0; } - if (ehca_debug_level) + if (ehca_debug_level >= 2) ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle, @@ -1864,7 +1888,7 @@ int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) srq_attr->srq_limit = EHCA_BMASK_GET( MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit); - if (ehca_debug_level) + if (ehca_debug_level >= 2) ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); query_srq_exit1: @@ -1945,6 +1969,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, if (HAS_SQ(my_qp)) ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); kmem_cache_free(qp_cache, my_qp); + atomic_dec(&shca->num_qps); return 0; } diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index a20bbf46618..bbe0436f4f7 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -81,7 +81,7 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, recv_wr->sg_list[cnt_ds].length; } - if (ehca_debug_level) { + if (ehca_debug_level >= 3) { ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", ipz_rqueue); ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); @@ -281,7 +281,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, return -EINVAL; } - if (ehca_debug_level) { + if (ehca_debug_level >= 3) { ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp); ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe"); } @@ -421,6 +421,11 @@ int ehca_post_send(struct ib_qp *qp, int ret = 0; unsigned long flags; + if (unlikely(my_qp->state != IB_QPS_RTS)) { + ehca_err(qp->device, "QP not in RTS state qpn=%x", qp->qp_num); + return -EINVAL; + } + /* LOCK the QUEUE */ spin_lock_irqsave(&my_qp->spinlock_s, flags); @@ -454,13 +459,14 @@ int ehca_post_send(struct ib_qp *qp, goto post_send_exit0; } wqe_cnt++; - ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d", - my_qp, qp->qp_num, wqe_cnt); } /* eof for cur_send_wr */ post_send_exit0: iosync(); /* serialize GAL register access */ hipz_update_sqa(my_qp, wqe_cnt); + if (unlikely(ret || ehca_debug_level >= 2)) + ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", + my_qp, qp->qp_num, wqe_cnt, ret); my_qp->message_count += wqe_cnt; spin_unlock_irqrestore(&my_qp->spinlock_s, flags); return ret; @@ -520,13 +526,14 @@ static int internal_post_recv(struct ehca_qp *my_qp, goto post_recv_exit0; } wqe_cnt++; - ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d", - my_qp, my_qp->real_qp_num, wqe_cnt); } /* eof for cur_recv_wr */ post_recv_exit0: iosync(); /* serialize GAL register access */ hipz_update_rqa(my_qp, wqe_cnt); + if (unlikely(ret || ehca_debug_level >= 2)) + ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", + my_qp, my_qp->real_qp_num, wqe_cnt, ret); spin_unlock_irqrestore(&my_qp->spinlock_r, flags); return ret; } @@ -570,16 +577,17 @@ static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); struct ehca_cqe *cqe; struct ehca_qp *my_qp; - int cqe_count = 0; + int cqe_count = 0, is_error; poll_cq_one_read_cqe: cqe = (struct ehca_cqe *) ipz_qeit_get_inc_valid(&my_cq->ipz_queue); if (!cqe) { ret = -EAGAIN; - ehca_dbg(cq->device, "Completion queue is empty ehca_cq=%p " - "cq_num=%x ret=%i", my_cq, my_cq->cq_number, ret); - goto poll_cq_one_exit0; + if (ehca_debug_level >= 3) + ehca_dbg(cq->device, "Completion queue is empty " + "my_cq=%p cq_num=%x", my_cq, my_cq->cq_number); + goto poll_cq_one_exit0; } /* prevents loads being reordered across this point */ @@ -609,7 +617,7 @@ poll_cq_one_read_cqe: ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x src_qp=%x", cqe->local_qp_number, cqe->remote_qp_number); - if (ehca_debug_level) + if (ehca_debug_level >= 2) ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x", cqe->local_qp_number, cqe->remote_qp_number); @@ -622,11 +630,13 @@ poll_cq_one_read_cqe: } } - /* tracing cqe */ - if (unlikely(ehca_debug_level)) { + is_error = cqe->status & WC_STATUS_ERROR_BIT; + + /* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */ + if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) { ehca_dbg(cq->device, - "Received COMPLETION ehca_cq=%p cq_num=%x -----", - my_cq, my_cq->cq_number); + "Received %sCOMPLETION ehca_cq=%p cq_num=%x -----", + is_error ? "ERROR " : "", my_cq, my_cq->cq_number); ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", my_cq, my_cq->cq_number); ehca_dbg(cq->device, @@ -649,8 +659,9 @@ poll_cq_one_read_cqe: /* update also queue adder to throw away this entry!!! */ goto poll_cq_one_exit0; } + /* eval ib_wc_status */ - if (unlikely(cqe->status & WC_STATUS_ERROR_BIT)) { + if (unlikely(is_error)) { /* complete with errors */ map_ib_wc_status(cqe->status, &wc->status); wc->vendor_err = wc->status; @@ -671,14 +682,6 @@ poll_cq_one_read_cqe: wc->imm_data = cpu_to_be32(cqe->immediate_data); wc->sl = cqe->service_level; - if (unlikely(wc->status != IB_WC_SUCCESS)) - ehca_dbg(cq->device, - "ehca_cq=%p cq_num=%x WARNING unsuccessful cqe " - "OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx " - "cqe=%p", my_cq, my_cq->cq_number, cqe->optype, - cqe->status, cqe->local_qp_number, - cqe->remote_qp_number, cqe->work_request_id, cqe); - poll_cq_one_exit0: if (cqe_count > 0) hipz_update_feca(my_cq, cqe_count); diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c index 1b07f2beafa..e43ed8f8a0c 100644 --- a/drivers/infiniband/hw/ehca/ehca_uverbs.c +++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c @@ -211,8 +211,7 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, break; case 1: /* qp rqueue_addr */ - ehca_dbg(qp->ib_qp.device, "qp_num=%x rqueue", - qp->ib_qp.qp_num); + ehca_dbg(qp->ib_qp.device, "qp_num=%x rq", qp->ib_qp.qp_num); ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, &qp->mm_count_rqueue); if (unlikely(ret)) { @@ -224,8 +223,7 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, break; case 2: /* qp squeue_addr */ - ehca_dbg(qp->ib_qp.device, "qp_num=%x squeue", - qp->ib_qp.qp_num); + ehca_dbg(qp->ib_qp.device, "qp_num=%x sq", qp->ib_qp.qp_num); ret = ehca_mmap_queue(vma, &qp->ipz_squeue, &qp->mm_count_squeue); if (unlikely(ret)) { diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index 7029aa65375..5245e13c3a3 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -123,8 +123,9 @@ static long ehca_plpar_hcall_norets(unsigned long opcode, int i, sleep_msecs; unsigned long flags = 0; - ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT, - opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7); + if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT, + opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7); for (i = 0; i < 5; i++) { /* serialize hCalls to work around firmware issue */ @@ -148,7 +149,8 @@ static long ehca_plpar_hcall_norets(unsigned long opcode, opcode, ret, arg1, arg2, arg3, arg4, arg5, arg6, arg7); else - ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret); + if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret); return ret; } @@ -172,8 +174,10 @@ static long ehca_plpar_hcall9(unsigned long opcode, int i, sleep_msecs; unsigned long flags = 0; - ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode, - arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9); + if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9); for (i = 0; i < 5; i++) { /* serialize hCalls to work around firmware issue */ @@ -201,7 +205,7 @@ static long ehca_plpar_hcall9(unsigned long opcode, ret, outs[0], outs[1], outs[2], outs[3], outs[4], outs[5], outs[6], outs[7], outs[8]); - } else + } else if (unlikely(ehca_debug_level >= 2)) ehca_gen_dbg("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT, ret, outs[0], outs[1], outs[2], outs[3], outs[4], outs[5], outs[6], outs[7], @@ -381,7 +385,7 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, r_cb, /* r6 */ 0, 0, 0, 0); - if (ehca_debug_level) + if (ehca_debug_level >= 2) ehca_dmp(query_port_response_block, 64, "response_block"); return ret; @@ -731,9 +735,6 @@ u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, u64 ret; u64 outs[PLPAR_HCALL9_BUFSIZE]; - ehca_gen_dbg("kernel PAGE_SIZE=%x access_ctrl=%016x " - "vaddr=%lx length=%lx", - (u32)PAGE_SIZE, access_ctrl, vaddr, length); ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, adapter_handle.handle, /* r4 */ 5, /* r5 */ @@ -758,7 +759,7 @@ u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, { u64 ret; - if (unlikely(ehca_debug_level >= 2)) { + if (unlikely(ehca_debug_level >= 3)) { if (count > 1) { u64 *kpage; int i; diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c index db4ba92f79f..9d343b7c2f3 100644 --- a/drivers/infiniband/hw/ipath/ipath_mr.c +++ b/drivers/infiniband/hw/ipath/ipath_mr.c @@ -195,7 +195,8 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto bail; } - umem = ib_umem_get(pd->uobject->context, start, length, mr_access_flags); + umem = ib_umem_get(pd->uobject->context, start, length, + mr_access_flags, 0); if (IS_ERR(umem)) return (void *) umem; diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 3557e7edc9b..4521319b140 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -137,7 +137,7 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont int err; *umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe), - IB_ACCESS_LOCAL_WRITE); + IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(*umem)) return PTR_ERR(*umem); @@ -204,7 +204,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector uar = &to_mucontext(context)->uar; } else { - err = mlx4_ib_db_alloc(dev, &cq->db, 1); + err = mlx4_db_alloc(dev->dev, &cq->db, 1); if (err) goto err_cq; @@ -221,7 +221,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector } err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, - cq->db.dma, &cq->mcq); + cq->db.dma, &cq->mcq, 0); if (err) goto err_dbmap; @@ -246,11 +246,11 @@ err_mtt: if (context) ib_umem_release(cq->umem); else - mlx4_ib_free_cq_buf(dev, &cq->buf, entries); + mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); err_db: if (!context) - mlx4_ib_db_free(dev, &cq->db); + mlx4_db_free(dev->dev, &cq->db); err_cq: kfree(cq); @@ -434,8 +434,8 @@ int mlx4_ib_destroy_cq(struct ib_cq *cq) mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db); ib_umem_release(mcq->umem); } else { - mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe + 1); - mlx4_ib_db_free(dev, &mcq->db); + mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe); + mlx4_db_free(dev->dev, &mcq->db); } kfree(mcq); diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c index 1c36087aef1..8aee4233b38 100644 --- a/drivers/infiniband/hw/mlx4/doorbell.c +++ b/drivers/infiniband/hw/mlx4/doorbell.c @@ -34,124 +34,6 @@ #include "mlx4_ib.h" -struct mlx4_ib_db_pgdir { - struct list_head list; - DECLARE_BITMAP(order0, MLX4_IB_DB_PER_PAGE); - DECLARE_BITMAP(order1, MLX4_IB_DB_PER_PAGE / 2); - unsigned long *bits[2]; - __be32 *db_page; - dma_addr_t db_dma; -}; - -static struct mlx4_ib_db_pgdir *mlx4_ib_alloc_db_pgdir(struct mlx4_ib_dev *dev) -{ - struct mlx4_ib_db_pgdir *pgdir; - - pgdir = kzalloc(sizeof *pgdir, GFP_KERNEL); - if (!pgdir) - return NULL; - - bitmap_fill(pgdir->order1, MLX4_IB_DB_PER_PAGE / 2); - pgdir->bits[0] = pgdir->order0; - pgdir->bits[1] = pgdir->order1; - pgdir->db_page = dma_alloc_coherent(dev->ib_dev.dma_device, - PAGE_SIZE, &pgdir->db_dma, - GFP_KERNEL); - if (!pgdir->db_page) { - kfree(pgdir); - return NULL; - } - - return pgdir; -} - -static int mlx4_ib_alloc_db_from_pgdir(struct mlx4_ib_db_pgdir *pgdir, - struct mlx4_ib_db *db, int order) -{ - int o; - int i; - - for (o = order; o <= 1; ++o) { - i = find_first_bit(pgdir->bits[o], MLX4_IB_DB_PER_PAGE >> o); - if (i < MLX4_IB_DB_PER_PAGE >> o) - goto found; - } - - return -ENOMEM; - -found: - clear_bit(i, pgdir->bits[o]); - - i <<= o; - - if (o > order) - set_bit(i ^ 1, pgdir->bits[order]); - - db->u.pgdir = pgdir; - db->index = i; - db->db = pgdir->db_page + db->index; - db->dma = pgdir->db_dma + db->index * 4; - db->order = order; - - return 0; -} - -int mlx4_ib_db_alloc(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db, int order) -{ - struct mlx4_ib_db_pgdir *pgdir; - int ret = 0; - - mutex_lock(&dev->pgdir_mutex); - - list_for_each_entry(pgdir, &dev->pgdir_list, list) - if (!mlx4_ib_alloc_db_from_pgdir(pgdir, db, order)) - goto out; - - pgdir = mlx4_ib_alloc_db_pgdir(dev); - if (!pgdir) { - ret = -ENOMEM; - goto out; - } - - list_add(&pgdir->list, &dev->pgdir_list); - - /* This should never fail -- we just allocated an empty page: */ - WARN_ON(mlx4_ib_alloc_db_from_pgdir(pgdir, db, order)); - -out: - mutex_unlock(&dev->pgdir_mutex); - - return ret; -} - -void mlx4_ib_db_free(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db) -{ - int o; - int i; - - mutex_lock(&dev->pgdir_mutex); - - o = db->order; - i = db->index; - - if (db->order == 0 && test_bit(i ^ 1, db->u.pgdir->order0)) { - clear_bit(i ^ 1, db->u.pgdir->order0); - ++o; - } - - i >>= o; - set_bit(i, db->u.pgdir->bits[o]); - - if (bitmap_full(db->u.pgdir->order1, MLX4_IB_DB_PER_PAGE / 2)) { - dma_free_coherent(dev->ib_dev.dma_device, PAGE_SIZE, - db->u.pgdir->db_page, db->u.pgdir->db_dma); - list_del(&db->u.pgdir->list); - kfree(db->u.pgdir); - } - - mutex_unlock(&dev->pgdir_mutex); -} - struct mlx4_ib_user_db_page { struct list_head list; struct ib_umem *umem; @@ -160,7 +42,7 @@ struct mlx4_ib_user_db_page { }; int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, - struct mlx4_ib_db *db) + struct mlx4_db *db) { struct mlx4_ib_user_db_page *page; struct ib_umem_chunk *chunk; @@ -181,7 +63,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, page->user_virt = (virt & PAGE_MASK); page->refcnt = 0; page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK, - PAGE_SIZE, 0); + PAGE_SIZE, 0, 0); if (IS_ERR(page->umem)) { err = PTR_ERR(page->umem); kfree(page); @@ -202,7 +84,7 @@ out: return err; } -void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_ib_db *db) +void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db) { mutex_lock(&context->db_page_mutex); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 4d9b5ac4220..4d61e32866c 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -557,9 +557,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) goto err_uar; MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock); - INIT_LIST_HEAD(&ibdev->pgdir_list); - mutex_init(&ibdev->pgdir_mutex); - ibdev->dev = dev; strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX); diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 9e637323c15..5cf994794d2 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -43,24 +43,6 @@ #include <linux/mlx4/device.h> #include <linux/mlx4/doorbell.h> -enum { - MLX4_IB_DB_PER_PAGE = PAGE_SIZE / 4 -}; - -struct mlx4_ib_db_pgdir; -struct mlx4_ib_user_db_page; - -struct mlx4_ib_db { - __be32 *db; - union { - struct mlx4_ib_db_pgdir *pgdir; - struct mlx4_ib_user_db_page *user_page; - } u; - dma_addr_t dma; - int index; - int order; -}; - struct mlx4_ib_ucontext { struct ib_ucontext ibucontext; struct mlx4_uar uar; @@ -88,7 +70,7 @@ struct mlx4_ib_cq { struct mlx4_cq mcq; struct mlx4_ib_cq_buf buf; struct mlx4_ib_cq_resize *resize_buf; - struct mlx4_ib_db db; + struct mlx4_db db; spinlock_t lock; struct mutex resize_mutex; struct ib_umem *umem; @@ -127,7 +109,7 @@ struct mlx4_ib_qp { struct mlx4_qp mqp; struct mlx4_buf buf; - struct mlx4_ib_db db; + struct mlx4_db db; struct mlx4_ib_wq rq; u32 doorbell_qpn; @@ -154,7 +136,7 @@ struct mlx4_ib_srq { struct ib_srq ibsrq; struct mlx4_srq msrq; struct mlx4_buf buf; - struct mlx4_ib_db db; + struct mlx4_db db; u64 *wrid; spinlock_t lock; int head; @@ -175,9 +157,6 @@ struct mlx4_ib_dev { struct mlx4_dev *dev; void __iomem *uar_map; - struct list_head pgdir_list; - struct mutex pgdir_mutex; - struct mlx4_uar priv_uar; u32 priv_pdn; MLX4_DECLARE_DOORBELL_LOCK(uar_lock); @@ -248,11 +227,9 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah) return container_of(ibah, struct mlx4_ib_ah, ibah); } -int mlx4_ib_db_alloc(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db, int order); -void mlx4_ib_db_free(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db); int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, - struct mlx4_ib_db *db); -void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_ib_db *db); + struct mlx4_db *db); +void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db); struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc); int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index fe2c2e94a5f..68e92485fc7 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -132,7 +132,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(pd->uobject->context, start, length, access_flags); + mr->umem = ib_umem_get(pd->uobject->context, start, length, + access_flags, 0); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); goto err_free; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index b75efae7e44..8e02ecfec18 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -482,7 +482,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err; qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, - qp->buf_size, 0); + qp->buf_size, 0, 0); if (IS_ERR(qp->umem)) { err = PTR_ERR(qp->umem); goto err; @@ -514,7 +514,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err; if (!init_attr->srq) { - err = mlx4_ib_db_alloc(dev, &qp->db, 0); + err = mlx4_db_alloc(dev->dev, &qp->db, 0); if (err) goto err; @@ -580,7 +580,7 @@ err_buf: err_db: if (!pd->uobject && !init_attr->srq) - mlx4_ib_db_free(dev, &qp->db); + mlx4_db_free(dev->dev, &qp->db); err: return err; @@ -666,7 +666,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, kfree(qp->rq.wrid); mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); if (!qp->ibqp.srq) - mlx4_ib_db_free(dev, &qp->db); + mlx4_db_free(dev->dev, &qp->db); } } diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index beaa3b06cf5..12d6bc6f800 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -109,7 +109,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, } srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, - buf_size, 0); + buf_size, 0, 0); if (IS_ERR(srq->umem)) { err = PTR_ERR(srq->umem); goto err_srq; @@ -129,7 +129,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_mtt; } else { - err = mlx4_ib_db_alloc(dev, &srq->db, 0); + err = mlx4_db_alloc(dev->dev, &srq->db, 0); if (err) goto err_srq; @@ -200,7 +200,7 @@ err_buf: err_db: if (!pd->uobject) - mlx4_ib_db_free(dev, &srq->db); + mlx4_db_free(dev->dev, &srq->db); err_srq: kfree(srq); @@ -267,7 +267,7 @@ int mlx4_ib_destroy_srq(struct ib_srq *srq) kfree(msrq->wrid); mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift, &msrq->buf); - mlx4_ib_db_free(dev, &msrq->db); + mlx4_db_free(dev->dev, &msrq->db); } kfree(msrq); diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 3538da16e3f..820205dec56 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -818,15 +818,9 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) { - u32 key; - if (!fmr->maps) return; - key = tavor_key_to_hw_index(fmr->ibmr.lkey); - key &= dev->limits.num_mpts - 1; - fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key); - fmr->maps = 0; writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt); @@ -834,16 +828,9 @@ void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) { - u32 key; - if (!fmr->maps) return; - key = arbel_key_to_hw_index(fmr->ibmr.lkey); - key &= dev->limits.num_mpts - 1; - key = adjust_key(dev, key); - fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key); - fmr->maps = 0; *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 696e1f30233..be34f99ca62 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -39,6 +39,8 @@ #include <rdma/ib_smi.h> #include <rdma/ib_umem.h> #include <rdma/ib_user_verbs.h> + +#include <linux/sched.h> #include <linux/mm.h> #include "mthca_dev.h" @@ -367,6 +369,8 @@ static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev, return ERR_PTR(-EFAULT); } + context->reg_mr_warned = 0; + return &context->ibucontext; } @@ -1006,17 +1010,31 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct mthca_dev *dev = to_mdev(pd->device); struct ib_umem_chunk *chunk; struct mthca_mr *mr; + struct mthca_reg_mr ucmd; u64 *pages; int shift, n, len; int i, j, k; int err = 0; int write_mtt_size; + if (udata->inlen - sizeof (struct ib_uverbs_cmd_hdr) < sizeof ucmd) { + if (!to_mucontext(pd->uobject->context)->reg_mr_warned) { + mthca_warn(dev, "Process '%s' did not pass in MR attrs.\n", + current->comm); + mthca_warn(dev, " Update libmthca to fix this.\n"); + } + ++to_mucontext(pd->uobject->context)->reg_mr_warned; + ucmd.mr_attrs = 0; + } else if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) + return ERR_PTR(-EFAULT); + mr = kmalloc(sizeof *mr, GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(pd->uobject->context, start, length, acc); + mr->umem = ib_umem_get(pd->uobject->context, start, length, acc, + ucmd.mr_attrs & MTHCA_MR_DMASYNC); + if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); goto err; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index 262616c8ebb..934bf954403 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -67,6 +67,7 @@ struct mthca_ucontext { struct ib_ucontext ibucontext; struct mthca_uar uar; struct mthca_user_db_table *db_tab; + int reg_mr_warned; }; struct mthca_mtt; diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h index 02cc0a766f3..e1262c942db 100644 --- a/drivers/infiniband/hw/mthca/mthca_user.h +++ b/drivers/infiniband/hw/mthca/mthca_user.h @@ -61,6 +61,16 @@ struct mthca_alloc_pd_resp { __u32 reserved; }; +struct mthca_reg_mr { +/* + * Mark the memory region with a DMA attribute that causes + * in-flight DMA to be flushed when the region is written to: + */ +#define MTHCA_MR_DMASYNC 0x1 + __u32 mr_attrs; + __u32 reserved; +}; + struct mthca_create_cq { __u32 lkey; __u32 pdn; diff --git a/drivers/infiniband/hw/nes/Kconfig b/drivers/infiniband/hw/nes/Kconfig index 2aeb7ac972a..d449eb6ec78 100644 --- a/drivers/infiniband/hw/nes/Kconfig +++ b/drivers/infiniband/hw/nes/Kconfig @@ -2,6 +2,7 @@ config INFINIBAND_NES tristate "NetEffect RNIC Driver" depends on PCI && INET && INFINIBAND select LIBCRC32C + select INET_LRO ---help--- This is a low-level driver for NetEffect RDMA enabled Network Interface Cards (RNIC). diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index b046262ed63..9f7364a9096 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -91,6 +91,10 @@ unsigned int nes_debug_level = 0; module_param_named(debug_level, nes_debug_level, uint, 0644); MODULE_PARM_DESC(debug_level, "Enable debug output level"); +unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR; +module_param(nes_lro_max_aggr, int, NES_LRO_MAX_AGGR); +MODULE_PARM_DESC(nes_mro_max_aggr, " nic LRO MAX packet aggregation"); + LIST_HEAD(nes_adapter_list); static LIST_HEAD(nes_dev_list); @@ -139,8 +143,9 @@ static int nes_inetaddr_event(struct notifier_block *notifier, addr = ntohl(ifa->ifa_address); mask = ntohl(ifa->ifa_mask); - nes_debug(NES_DBG_NETDEV, "nes_inetaddr_event: ip address %08X, netmask %08X.\n", - addr, mask); + nes_debug(NES_DBG_NETDEV, "nes_inetaddr_event: ip address " NIPQUAD_FMT + ", netmask " NIPQUAD_FMT ".\n", + HIPQUAD(addr), HIPQUAD(mask)); list_for_each_entry(nesdev, &nes_dev_list, list) { nes_debug(NES_DBG_NETDEV, "Nesdev list entry = 0x%p. (%s)\n", nesdev, nesdev->netdev[0]->name); @@ -353,13 +358,11 @@ struct ib_qp *nes_get_qp(struct ib_device *device, int qpn) */ static void nes_print_macaddr(struct net_device *netdev) { - nes_debug(NES_DBG_INIT, "%s: MAC %02X:%02X:%02X:%02X:%02X:%02X, IRQ %u\n", - netdev->name, - netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2], - netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5], - netdev->irq); -} + DECLARE_MAC_BUF(mac); + nes_debug(NES_DBG_INIT, "%s: %s, IRQ %u\n", + netdev->name, print_mac(mac, netdev->dev_addr), netdev->irq); +} /** * nes_interrupt - handle interrupts diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index cdf2e9ad62f..1f9f7bf7386 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -173,6 +173,7 @@ extern int disable_mpa_crc; extern unsigned int send_first; extern unsigned int nes_drv_opt; extern unsigned int nes_debug_level; +extern unsigned int nes_lro_max_aggr; extern struct list_head nes_adapter_list; @@ -535,8 +536,8 @@ int nes_register_ofa_device(struct nes_ib_device *); int nes_read_eeprom_values(struct nes_device *, struct nes_adapter *); void nes_write_1G_phy_reg(struct nes_device *, u8, u8, u16); void nes_read_1G_phy_reg(struct nes_device *, u8, u8, u16 *); -void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16); -void nes_read_10G_phy_reg(struct nes_device *, u16, u8); +void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16, u16); +void nes_read_10G_phy_reg(struct nes_device *, u8, u8, u16); struct nes_cqp_request *nes_get_cqp_request(struct nes_device *); void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *, int); int nes_arp_table(struct nes_device *, u32, u8 *, u32); diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index d0738623bcf..9a4b40fae40 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -594,7 +594,7 @@ static void nes_cm_timer_tick(unsigned long pass) continue; } /* this seems like the correct place, but leave send entry unprotected */ - // spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + /* spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); */ atomic_inc(&send_entry->skb->users); cm_packets_retrans++; nes_debug(NES_DBG_CM, "Retransmitting send_entry %p for node %p," @@ -852,8 +852,8 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core, /* get a handle on the hte */ hte = &cm_core->connected_nodes; - nes_debug(NES_DBG_CM, "Searching for an owner node:%x:%x from core %p->%p\n", - loc_addr, loc_port, cm_core, hte); + nes_debug(NES_DBG_CM, "Searching for an owner node: " NIPQUAD_FMT ":%x from core %p->%p\n", + HIPQUAD(loc_addr), loc_port, cm_core, hte); /* walk list and find cm_node associated with this session ID */ spin_lock_irqsave(&cm_core->ht_lock, flags); @@ -902,8 +902,8 @@ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core, } spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); - nes_debug(NES_DBG_CM, "Unable to find listener- %x:%x\n", - dst_addr, dst_port); + nes_debug(NES_DBG_CM, "Unable to find listener for " NIPQUAD_FMT ":%x\n", + HIPQUAD(dst_addr), dst_port); /* no listener */ return NULL; @@ -1054,6 +1054,7 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, int arpindex = 0; struct nes_device *nesdev; struct nes_adapter *nesadapter; + DECLARE_MAC_BUF(mac); /* create an hte and cm_node for this instance */ cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC); @@ -1066,8 +1067,9 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->loc_port = cm_info->loc_port; cm_node->rem_port = cm_info->rem_port; cm_node->send_write0 = send_first; - nes_debug(NES_DBG_CM, "Make node addresses : loc = %x:%x, rem = %x:%x\n", - cm_node->loc_addr, cm_node->loc_port, cm_node->rem_addr, cm_node->rem_port); + nes_debug(NES_DBG_CM, "Make node addresses : loc = " NIPQUAD_FMT ":%x, rem = " NIPQUAD_FMT ":%x\n", + HIPQUAD(cm_node->loc_addr), cm_node->loc_port, + HIPQUAD(cm_node->rem_addr), cm_node->rem_port); cm_node->listener = listener; cm_node->netdev = nesvnic->netdev; cm_node->cm_id = cm_info->cm_id; @@ -1116,11 +1118,8 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, /* copy the mac addr to node context */ memcpy(cm_node->rem_mac, nesadapter->arp_table[arpindex].mac_addr, ETH_ALEN); - nes_debug(NES_DBG_CM, "Remote mac addr from arp table:%02x," - " %02x, %02x, %02x, %02x, %02x\n", - cm_node->rem_mac[0], cm_node->rem_mac[1], - cm_node->rem_mac[2], cm_node->rem_mac[3], - cm_node->rem_mac[4], cm_node->rem_mac[5]); + nes_debug(NES_DBG_CM, "Remote mac addr from arp table: %s\n", + print_mac(mac, cm_node->rem_mac)); add_hte_node(cm_core, cm_node); atomic_inc(&cm_nodes_created); @@ -1336,7 +1335,7 @@ static int process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, cm_node->loc_addr, cm_node->loc_port, cm_node->rem_addr, cm_node->rem_port, cm_node->state, atomic_read(&cm_node->ref_count)); - // create event + /* create event */ cm_node->state = NES_CM_STATE_CLOSED; create_event(cm_node, NES_CM_EVENT_ABORTED); @@ -1670,7 +1669,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, if (!cm_node) return NULL; - // set our node side to client (active) side + /* set our node side to client (active) side */ cm_node->tcp_cntxt.client = 1; cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; @@ -1695,7 +1694,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, loopbackremotenode->mpa_frame_size = mpa_frame_size - sizeof(struct ietf_mpa_frame); - // we are done handling this state, set node to a TSA state + /* we are done handling this state, set node to a TSA state */ cm_node->state = NES_CM_STATE_TSA; cm_node->tcp_cntxt.rcv_nxt = loopbackremotenode->tcp_cntxt.loc_seq_num; loopbackremotenode->tcp_cntxt.rcv_nxt = cm_node->tcp_cntxt.loc_seq_num; @@ -1850,8 +1849,10 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct nes_vnic *nesvni nfo.rem_addr = ntohl(iph->saddr); nfo.rem_port = ntohs(tcph->source); - nes_debug(NES_DBG_CM, "Received packet: dest=0x%08X:0x%04X src=0x%08X:0x%04X\n", - iph->daddr, tcph->dest, iph->saddr, tcph->source); + nes_debug(NES_DBG_CM, "Received packet: dest=" NIPQUAD_FMT + ":0x%04X src=" NIPQUAD_FMT ":0x%04X\n", + NIPQUAD(iph->daddr), tcph->dest, + NIPQUAD(iph->saddr), tcph->source); /* note: this call is going to increment cm_node ref count */ cm_node = find_node(cm_core, diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index aa53aab91bf..8dc70f9bad2 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -38,6 +38,7 @@ #include <linux/ip.h> #include <linux/tcp.h> #include <linux/if_vlan.h> +#include <linux/inet_lro.h> #include "nes.h" @@ -636,6 +637,15 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_ nes_debug(NES_DBG_INIT, "Did not see full soft reset done.\n"); return 0; } + + i = 0; + while ((nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS) != 0x80) && i++ < 10000) + mdelay(1); + if (i >= 10000) { + printk(KERN_ERR PFX "Internal CPU not ready, status = %02X\n", + nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS)); + return 0; + } } /* port reset */ @@ -684,17 +694,6 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_ } } - - - i = 0; - while ((nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS) != 0x80) && i++ < 10000) - mdelay(1); - if (i >= 10000) { - printk(KERN_ERR PFX "Internal CPU not ready, status = %02X\n", - nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS)); - return 0; - } - return port_count; } @@ -834,7 +833,7 @@ static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_cou nes_write_indexed(nesdev, 0x00000900, 0x20000001); nes_write_indexed(nesdev, 0x000060C0, 0x0000028e); nes_write_indexed(nesdev, 0x000060C8, 0x00000020); - // + nes_write_indexed(nesdev, 0x000001EC, 0x7b2625a0); /* nes_write_indexed(nesdev, 0x000001EC, 0x5f2625a0); */ @@ -1209,11 +1208,16 @@ int nes_init_phy(struct nes_device *nesdev) { struct nes_adapter *nesadapter = nesdev->nesadapter; u32 counter = 0; + u32 sds_common_control0; u32 mac_index = nesdev->mac_index; - u32 tx_config; + u32 tx_config = 0; u16 phy_data; + u32 temp_phy_data = 0; + u32 temp_phy_data2 = 0; + u32 i = 0; - if (nesadapter->OneG_Mode) { + if ((nesadapter->OneG_Mode) && + (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) { nes_debug(NES_DBG_PHY, "1G PHY, mac_index = %d.\n", mac_index); if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_1G) { printk(PFX "%s: Programming mdc config for 1G\n", __func__); @@ -1225,7 +1229,7 @@ int nes_init_phy(struct nes_device *nesdev) nes_read_1G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index], &phy_data); nes_debug(NES_DBG_PHY, "Phy data from register 1 phy address %u = 0x%X.\n", nesadapter->phy_index[mac_index], phy_data); - nes_write_1G_phy_reg(nesdev, 23, nesadapter->phy_index[mac_index], 0xb000); + nes_write_1G_phy_reg(nesdev, 23, nesadapter->phy_index[mac_index], 0xb000); /* Reset the PHY */ nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], 0x8000); @@ -1279,12 +1283,126 @@ int nes_init_phy(struct nes_device *nesdev) nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data); nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], phy_data | 0x0300); } else { - if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) { + if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) || + (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) { /* setup 10G MDIO operation */ tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); tx_config |= 0x14; nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); } + if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) { + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee); + + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + mdelay(10); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee); + temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + + /* + * if firmware is already running (like from a + * driver un-load/load, don't do anything. + */ + if (temp_phy_data == temp_phy_data2) { + /* configure QT2505 AMCC PHY */ + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0x0000, 0x8000); + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc300, 0x0000); + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc302, 0x0044); + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc318, 0x0052); + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc319, 0x0008); + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc31a, 0x0098); + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0026, 0x0E00); + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0027, 0x0000); + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0028, 0xA528); + + /* + * remove micro from reset; chip boots from ROM, + * uploads EEPROM f/w image, uC executes f/w + */ + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc300, 0x0002); + + /* + * wait for heart beat to start to + * know loading is done + */ + counter = 0; + do { + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + if (counter++ > 1000) { + nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from heartbeat check <this is bad!!!> \n"); + break; + } + mdelay(100); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee); + temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + } while ((temp_phy_data2 == temp_phy_data)); + + /* + * wait for tracking to start to know + * f/w is good to go + */ + counter = 0; + do { + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7fd); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + if (counter++ > 1000) { + nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from status check <this is bad!!!> \n"); + break; + } + mdelay(1000); + /* + * nes_debug(NES_DBG_PHY, "AMCC PHY- phy_status not ready yet = 0x%02X\n", + * temp_phy_data); + */ + } while (((temp_phy_data & 0xff) != 0x50) && ((temp_phy_data & 0xff) != 0x70)); + + /* set LOS Control invert RXLOSB_I_PADINV */ + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd003, 0x0000); + /* set LOS Control to mask of RXLOSB_I */ + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc314, 0x0042); + /* set LED1 to input mode (LED1 and LED2 share same LED) */ + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd006, 0x0007); + /* set LED2 to RX link_status and activity */ + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd007, 0x000A); + /* set LED3 to RX link_status */ + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd008, 0x0009); + + /* + * reset the res-calibration on t2 + * serdes; ensures it is stable after + * the amcc phy is stable + */ + + sds_common_control0 = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0); + sds_common_control0 |= 0x1; + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds_common_control0); + + /* release the res-calibration reset */ + sds_common_control0 &= 0xfffffffe; + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds_common_control0); + + i = 0; + while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040) + && (i++ < 5000)) { + /* mdelay(1); */ + } + + /* + * wait for link train done before moving on, + * or will get an interupt storm + */ + counter = 0; + do { + temp_phy_data = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + + (0x200 * (nesdev->mac_index & 1))); + if (counter++ > 1000) { + nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from link train wait <this is bad, link didnt train!!!>\n"); + break; + } + mdelay(1); + } while (((temp_phy_data & 0x0f1f0000) != 0x0f0f0000)); + } + } } return 0; } @@ -1377,6 +1495,25 @@ static void nes_rq_wqes_timeout(unsigned long parm) } +static int nes_lro_get_skb_hdr(struct sk_buff *skb, void **iphdr, + void **tcph, u64 *hdr_flags, void *priv) +{ + unsigned int ip_len; + struct iphdr *iph; + skb_reset_network_header(skb); + iph = ip_hdr(skb); + if (iph->protocol != IPPROTO_TCP) + return -1; + ip_len = ip_hdrlen(skb); + skb_set_transport_header(skb, ip_len); + *tcph = tcp_hdr(skb); + + *hdr_flags = LRO_IPV4 | LRO_TCP; + *iphdr = iph; + return 0; +} + + /** * nes_init_nic_qp */ @@ -1522,10 +1659,10 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) } u64temp = (u64)nesvnic->nic.sq_pbase; - nic_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp); + nic_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp); nic_context->context_words[NES_NIC_CTX_SQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); u64temp = (u64)nesvnic->nic.rq_pbase; - nic_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp); + nic_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp); nic_context->context_words[NES_NIC_CTX_RQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_QP | @@ -1577,7 +1714,7 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) nic_rqe = &nesvnic->nic.rq_vbase[counter]; nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32(nesvnic->max_frame_size); nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0; - nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem); + nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem); nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = cpu_to_le32((u32)((u64)pmem >> 32)); nesvnic->nic.rx_skb[counter] = skb; } @@ -1594,15 +1731,21 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) nesvnic->rq_wqes_timer.function = nes_rq_wqes_timeout; nesvnic->rq_wqes_timer.data = (unsigned long)nesvnic; nes_debug(NES_DBG_INIT, "NAPI support Enabled\n"); - if (nesdev->nesadapter->et_use_adaptive_rx_coalesce) { nes_nic_init_timer(nesdev); if (netdev->mtu > 1500) jumbomode = 1; - nes_nic_init_timer_defaults(nesdev, jumbomode); - } - + nes_nic_init_timer_defaults(nesdev, jumbomode); + } + nesvnic->lro_mgr.max_aggr = NES_LRO_MAX_AGGR; + nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS; + nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc; + nesvnic->lro_mgr.get_skb_header = nes_lro_get_skb_hdr; + nesvnic->lro_mgr.features = LRO_F_NAPI | LRO_F_EXTRACT_VLAN_ID; + nesvnic->lro_mgr.dev = netdev; + nesvnic->lro_mgr.ip_summed = CHECKSUM_UNNECESSARY; + nesvnic->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY; return 0; } @@ -1622,8 +1765,8 @@ void nes_destroy_nic_qp(struct nes_vnic *nesvnic) /* Free remaining NIC receive buffers */ while (nesvnic->nic.rq_head != nesvnic->nic.rq_tail) { - nic_rqe = &nesvnic->nic.rq_vbase[nesvnic->nic.rq_tail]; - wqe_frag = (u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]); + nic_rqe = &nesvnic->nic.rq_vbase[nesvnic->nic.rq_tail]; + wqe_frag = (u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]); wqe_frag |= ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32; pci_unmap_single(nesdev->pcidev, (dma_addr_t)wqe_frag, nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); @@ -1706,17 +1849,17 @@ int nes_napi_isr(struct nes_device *nesdev) /* iff NIC, process here, else wait for DPC */ if ((int_stat) && ((int_stat & 0x0000ff00) == int_stat)) { nesdev->napi_isr_ran = 0; - nes_write32(nesdev->regs+NES_INT_STAT, - (int_stat & - ~(NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0|NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3))); + nes_write32(nesdev->regs + NES_INT_STAT, + (int_stat & + ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0 | NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3))); /* Process the CEQs */ nes_process_ceq(nesdev, &nesdev->nesadapter->ceq[nesdev->nic_ceq_index]); if (unlikely((((nesadapter->et_rx_coalesce_usecs_irq) && - (!nesadapter->et_use_adaptive_rx_coalesce)) || - ((nesadapter->et_use_adaptive_rx_coalesce) && - (nesdev->deepcq_count > nesadapter->et_pkt_rate_low)))) ) { + (!nesadapter->et_use_adaptive_rx_coalesce)) || + ((nesadapter->et_use_adaptive_rx_coalesce) && + (nesdev->deepcq_count > nesadapter->et_pkt_rate_low))))) { if ((nesdev->int_req & NES_INT_TIMER) == 0) { /* Enable Periodic timer interrupts */ nesdev->int_req |= NES_INT_TIMER; @@ -1794,12 +1937,12 @@ void nes_dpc(unsigned long param) } if (int_stat) { - if (int_stat & ~(NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0| - NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3)) { + if (int_stat & ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0| + NES_INT_MAC1|NES_INT_MAC2 | NES_INT_MAC3)) { /* Ack the interrupts */ nes_write32(nesdev->regs+NES_INT_STAT, - (int_stat & ~(NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0| - NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3))); + (int_stat & ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0| + NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3))); } temp_int_stat = int_stat; @@ -1864,8 +2007,8 @@ void nes_dpc(unsigned long param) } } /* Don't use the interface interrupt bit stay in loop */ - int_stat &= ~NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0| - NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3; + int_stat &= ~NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0 | + NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3; } while ((int_stat != 0) && (loop_counter++ < MAX_DPC_ITERATIONS)); if (timer_ints == 1) { @@ -1876,9 +2019,9 @@ void nes_dpc(unsigned long param) nesdev->timer_only_int_count = 0; nesdev->int_req &= ~NES_INT_TIMER; nes_write32(nesdev->regs + NES_INTF_INT_MASK, ~(nesdev->intf_int_req)); - nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); + nes_write32(nesdev->regs + NES_INT_MASK, ~nesdev->int_req); } else { - nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff|(~nesdev->int_req)); + nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req)); } } else { if (unlikely(nesadapter->et_use_adaptive_rx_coalesce)) @@ -1886,7 +2029,7 @@ void nes_dpc(unsigned long param) nes_nic_init_timer(nesdev); } nesdev->timer_only_int_count = 0; - nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff|(~nesdev->int_req)); + nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req)); } } else { nesdev->timer_only_int_count = 0; @@ -1935,7 +2078,7 @@ static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq) do { if (le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX]) & NES_CEQE_VALID) { - u64temp = (((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX])))<<32) | + u64temp = (((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX]))) << 32) | ((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_LOW_IDX]))); u64temp <<= 1; cq = *((struct nes_hw_cq **)&u64temp); @@ -1963,7 +2106,7 @@ static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq) */ static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq) { -// u64 u64temp; + /* u64 u64temp; */ u32 head; u32 aeq_size; u32 aeqe_misc; @@ -1982,8 +2125,10 @@ static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq) if (aeqe_misc & (NES_AEQE_QP|NES_AEQE_CQ)) { if (aeqe_cq_id >= NES_FIRST_QPN) { /* dealing with an accelerated QP related AE */ -// u64temp = (((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])))<<32) | -// ((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]))); + /* + * u64temp = (((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX]))) << 32) | + * ((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]))); + */ nes_process_iwarp_aeqe(nesdev, (struct nes_hw_aeqe *)aeqe); } else { /* TODO: dealing with a CQP related AE */ @@ -2083,6 +2228,8 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) u32 u32temp; u16 phy_data; u16 temp_phy_data; + u32 pcs_val = 0x0f0f0000; + u32 pcs_mask = 0x0f1f0000; spin_lock_irqsave(&nesadapter->phy_lock, flags); if (nesadapter->mac_sw_state[mac_number] != NES_MAC_SW_IDLE) { @@ -2146,13 +2293,30 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) nes_debug(NES_DBG_PHY, "Eth SERDES Common Status: 0=0x%08X, 1=0x%08X\n", nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0), nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0+0x200)); - pcs_control_status = nes_read_indexed(nesdev, - NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index&1)*0x200)); - pcs_control_status = nes_read_indexed(nesdev, - NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index&1)*0x200)); + + if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_PUMA_1G) { + switch (mac_index) { + case 1: + case 3: + pcs_control_status = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200); + break; + default: + pcs_control_status = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0); + break; + } + } else { + pcs_control_status = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index & 1) * 0x200)); + pcs_control_status = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index & 1) * 0x200)); + } + nes_debug(NES_DBG_PHY, "PCS PHY Control/Status%u: 0x%08X\n", mac_index, pcs_control_status); - if (nesadapter->OneG_Mode) { + if ((nesadapter->OneG_Mode) && + (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) { u32temp = 0x01010000; if (nesadapter->port_count > 2) { u32temp |= 0x02020000; @@ -2161,24 +2325,59 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) phy_data = 0; nes_debug(NES_DBG_PHY, "PCS says the link is down\n"); } - } else if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) { - nes_read_10G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index]); - temp_phy_data = (u16)nes_read_indexed(nesdev, - NES_IDX_MAC_MDIO_CONTROL); - u32temp = 20; - do { - nes_read_10G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index]); - phy_data = (u16)nes_read_indexed(nesdev, - NES_IDX_MAC_MDIO_CONTROL); - if ((phy_data == temp_phy_data) || (!(--u32temp))) - break; - temp_phy_data = phy_data; - } while (1); - nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n", - __func__, phy_data, nesadapter->mac_link_down ? "DOWN" : "UP"); - } else { - phy_data = (0x0f0f0000 == (pcs_control_status & 0x0f1f0000)) ? 4 : 0; + switch (nesadapter->phy_type[mac_index]) { + case NES_PHY_TYPE_IRIS: + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + u32temp = 20; + do { + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1); + phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + if ((phy_data == temp_phy_data) || (!(--u32temp))) + break; + temp_phy_data = phy_data; + } while (1); + nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n", + __func__, phy_data, nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP"); + break; + + case NES_PHY_TYPE_ARGUS: + /* clear the alarms */ + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0x0008); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc001); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc002); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc005); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc006); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9004); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9005); + /* check link status */ + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + u32temp = 100; + do { + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1); + + phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + if ((phy_data == temp_phy_data) || (!(--u32temp))) + break; + temp_phy_data = phy_data; + } while (1); + nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n", + __func__, phy_data, nesadapter->mac_link_down ? "DOWN" : "UP"); + break; + + case NES_PHY_TYPE_PUMA_1G: + if (mac_index < 2) + pcs_val = pcs_mask = 0x01010000; + else + pcs_val = pcs_mask = 0x02020000; + /* fall through */ + default: + phy_data = (pcs_val == (pcs_control_status & pcs_mask)) ? 0x4 : 0x0; + break; + } } if (phy_data & 0x0004) { @@ -2187,8 +2386,8 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) nes_debug(NES_DBG_PHY, "The Link is UP!!. linkup was %d\n", nesvnic->linkup); if (nesvnic->linkup == 0) { - printk(PFX "The Link is now up for port %u, netdev %p.\n", - mac_index, nesvnic->netdev); + printk(PFX "The Link is now up for port %s, netdev %p.\n", + nesvnic->netdev->name, nesvnic->netdev); if (netif_queue_stopped(nesvnic->netdev)) netif_start_queue(nesvnic->netdev); nesvnic->linkup = 1; @@ -2201,8 +2400,8 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) nes_debug(NES_DBG_PHY, "The Link is Down!!. linkup was %d\n", nesvnic->linkup); if (nesvnic->linkup == 1) { - printk(PFX "The Link is now down for port %u, netdev %p.\n", - mac_index, nesvnic->netdev); + printk(PFX "The Link is now down for port %s, netdev %p.\n", + nesvnic->netdev->name, nesvnic->netdev); if (!(netif_queue_stopped(nesvnic->netdev))) netif_stop_queue(nesvnic->netdev); nesvnic->linkup = 0; @@ -2256,10 +2455,13 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) u16 pkt_type; u16 rqes_processed = 0; u8 sq_cqes = 0; + u8 nes_use_lro = 0; head = cq->cq_head; cq_size = cq->cq_size; cq->cqes_pending = 1; + if (nesvnic->netdev->features & NETIF_F_LRO) + nes_use_lro = 1; do { if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]) & NES_NIC_CQE_VALID) { @@ -2274,8 +2476,10 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) /* bump past the vlan tag */ wqe_fragment_length++; if (le16_to_cpu(wqe_fragment_length[wqe_fragment_index]) != 0) { - u64temp = (u64) le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+wqe_fragment_index*2]); - u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+wqe_fragment_index*2]))<<32; + u64temp = (u64) le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX + + wqe_fragment_index * 2]); + u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX + + wqe_fragment_index * 2])) << 32; bus_address = (dma_addr_t)u64temp; if (test_and_clear_bit(nesnic->sq_tail, nesnic->first_frag_overflow)) { pci_unmap_single(nesdev->pcidev, @@ -2285,8 +2489,10 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) } for (; wqe_fragment_index < 5; wqe_fragment_index++) { if (wqe_fragment_length[wqe_fragment_index]) { - u64temp = le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+wqe_fragment_index*2]); - u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+wqe_fragment_index*2]))<<32; + u64temp = le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX + + wqe_fragment_index * 2]); + u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX + + wqe_fragment_index * 2])) <<32; bus_address = (dma_addr_t)u64temp; pci_unmap_page(nesdev->pcidev, bus_address, @@ -2333,7 +2539,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) if (atomic_read(&nesvnic->rx_skbs_needed) > (nesvnic->nic.rq_size>>1)) { nes_write32(nesdev->regs+NES_CQE_ALLOC, cq->cq_number | (cqe_count << 16)); -// nesadapter->tune_timer.cq_count += cqe_count; + /* nesadapter->tune_timer.cq_count += cqe_count; */ nesdev->currcq_count += cqe_count; cqe_count = 0; nes_replenish_nic_rq(nesvnic); @@ -2381,9 +2587,16 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) >> 16); nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n", nesvnic->netdev->name, vlan_tag); - nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag); + if (nes_use_lro) + lro_vlan_hwaccel_receive_skb(&nesvnic->lro_mgr, rx_skb, + nesvnic->vlan_grp, vlan_tag, NULL); + else + nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag); } else { - nes_netif_rx(rx_skb); + if (nes_use_lro) + lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL); + else + nes_netif_rx(rx_skb); } } @@ -2401,7 +2614,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) /* Replenish Nic CQ */ nes_write32(nesdev->regs+NES_CQE_ALLOC, cq->cq_number | (cqe_count << 16)); -// nesdev->nesadapter->tune_timer.cq_count += cqe_count; + /* nesdev->nesadapter->tune_timer.cq_count += cqe_count; */ nesdev->currcq_count += cqe_count; cqe_count = 0; } @@ -2415,26 +2628,27 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) } while (1); + if (nes_use_lro) + lro_flush_all(&nesvnic->lro_mgr); if (sq_cqes) { barrier(); /* restart the queue if it had been stopped */ if (netif_queue_stopped(nesvnic->netdev)) netif_wake_queue(nesvnic->netdev); } - cq->cq_head = head; /* nes_debug(NES_DBG_CQ, "CQ%u Processed = %u cqes, new head = %u.\n", cq->cq_number, cqe_count, cq->cq_head); */ cq->cqe_allocs_pending = cqe_count; if (unlikely(nesadapter->et_use_adaptive_rx_coalesce)) { -// nesdev->nesadapter->tune_timer.cq_count += cqe_count; + /* nesdev->nesadapter->tune_timer.cq_count += cqe_count; */ nesdev->currcq_count += cqe_count; nes_nic_tune_timer(nesdev); } if (atomic_read(&nesvnic->rx_skbs_needed)) nes_replenish_nic_rq(nesvnic); - } +} /** @@ -2463,7 +2677,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) { u64temp = (((u64)(le32_to_cpu(cq->cq_vbase[head]. - cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) | + cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]))) << 32) | ((u64)(le32_to_cpu(cq->cq_vbase[head]. cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]))); cqp = *((struct nes_hw_cqp **)&u64temp); @@ -2480,7 +2694,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) } u64temp = (((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail]. - wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX])))<<32) | + wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) | ((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail]. wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX]))); cqp_request = *((struct nes_cqp_request **)&u64temp); @@ -2517,7 +2731,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) } else { nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n", cqp_request, - le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f); + le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]) & 0x3f); if (cqp_request->dynamic) { kfree(cqp_request); } else { @@ -2531,7 +2745,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) } cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0; - nes_write32(nesdev->regs+NES_CQE_ALLOC, cq->cq_number | (1 << 16)); + nes_write32(nesdev->regs + NES_CQE_ALLOC, cq->cq_number | (1 << 16)); if (++cqp->sq_tail >= cqp->sq_size) cqp->sq_tail = 0; @@ -2600,13 +2814,13 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, nes_debug(NES_DBG_AEQ, "\n"); aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]); if ((NES_AEQE_INBOUND_RDMA&aeq_info) || (!(NES_AEQE_QP&aeq_info))) { - context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]); + context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]); context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32; } else { aeqe_context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]); aeqe_context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32; context = (unsigned long)nesadapter->qp_table[le32_to_cpu( - aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN]; + aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN]; BUG_ON(!context); } @@ -2619,7 +2833,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), aeqe, nes_tcp_state_str[tcp_state], nes_iwarp_state_str[iwarp_state]); - switch (async_event_id) { case NES_AEQE_AEID_LLP_FIN_RECEIVED: nesqp = *((struct nes_qp **)&context); @@ -3023,7 +3236,7 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr, cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_ARP_VALID); cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = cpu_to_le32( (((u32)mac_addr[2]) << 24) | (((u32)mac_addr[3]) << 16) | - (((u32)mac_addr[4]) << 8) | (u32)mac_addr[5]); + (((u32)mac_addr[4]) << 8) | (u32)mac_addr[5]); cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = cpu_to_le32( (((u32)mac_addr[0]) << 16) | (u32)mac_addr[1]); } else { diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index b7e2844f096..745bf94f3f0 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -33,8 +33,12 @@ #ifndef __NES_HW_H #define __NES_HW_H -#define NES_PHY_TYPE_1G 2 -#define NES_PHY_TYPE_IRIS 3 +#include <linux/inet_lro.h> + +#define NES_PHY_TYPE_1G 2 +#define NES_PHY_TYPE_IRIS 3 +#define NES_PHY_TYPE_ARGUS 4 +#define NES_PHY_TYPE_PUMA_1G 5 #define NES_PHY_TYPE_PUMA_10G 6 #define NES_MULTICAST_PF_MAX 8 @@ -905,7 +909,7 @@ struct nes_hw_qp { }; struct nes_hw_cq { - struct nes_hw_cqe volatile *cq_vbase; /* PCI memory for host rings */ + struct nes_hw_cqe *cq_vbase; /* PCI memory for host rings */ void (*ce_handler)(struct nes_device *nesdev, struct nes_hw_cq *cq); dma_addr_t cq_pbase; /* PCI memory for host rings */ u16 cq_head; @@ -965,7 +969,7 @@ struct nes_arp_entry { #define NES_NIC_CQ_DOWNWARD_TREND 16 struct nes_hw_tune_timer { - //u16 cq_count; + /* u16 cq_count; */ u16 threshold_low; u16 threshold_target; u16 threshold_high; @@ -982,8 +986,10 @@ struct nes_hw_tune_timer { #define NES_TIMER_INT_LIMIT 2 #define NES_TIMER_INT_LIMIT_DYNAMIC 10 #define NES_TIMER_ENABLE_LIMIT 4 -#define NES_MAX_LINK_INTERRUPTS 128 -#define NES_MAX_LINK_CHECK 200 +#define NES_MAX_LINK_INTERRUPTS 128 +#define NES_MAX_LINK_CHECK 200 +#define NES_MAX_LRO_DESCRIPTORS 32 +#define NES_LRO_MAX_AGGR 64 struct nes_adapter { u64 fw_ver; @@ -1183,6 +1189,9 @@ struct nes_vnic { u8 of_device_registered; u8 rdma_enabled; u8 rx_checksum_disabled; + u32 lro_max_aggr; + struct net_lro_mgr lro_mgr; + struct net_lro_desc lro_desc[NES_MAX_LRO_DESCRIPTORS]; }; struct nes_ib_device { diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 01cd0effc49..1b0938c8777 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -185,12 +185,13 @@ static int nes_netdev_open(struct net_device *netdev) nic_active |= nic_active_bit; nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active); - macaddr_high = ((u16)netdev->dev_addr[0]) << 8; + macaddr_high = ((u16)netdev->dev_addr[0]) << 8; macaddr_high += (u16)netdev->dev_addr[1]; - macaddr_low = ((u32)netdev->dev_addr[2]) << 24; - macaddr_low += ((u32)netdev->dev_addr[3]) << 16; - macaddr_low += ((u32)netdev->dev_addr[4]) << 8; - macaddr_low += (u32)netdev->dev_addr[5]; + + macaddr_low = ((u32)netdev->dev_addr[2]) << 24; + macaddr_low += ((u32)netdev->dev_addr[3]) << 16; + macaddr_low += ((u32)netdev->dev_addr[4]) << 8; + macaddr_low += (u32)netdev->dev_addr[5]; /* Program the various MAC regs */ for (i = 0; i < NES_MAX_PORT_COUNT; i++) { @@ -451,7 +452,7 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) __le16 *wqe_fragment_length; u32 nr_frags; u32 original_first_length; -// u64 *wqe_fragment_address; + /* u64 *wqe_fragment_address; */ /* first fragment (0) is used by copy buffer */ u16 wqe_fragment_index=1; u16 hoffset; @@ -461,11 +462,12 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) u32 old_head; u32 wqe_misc; - /* nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u," - " (%u frags), tso_size=%u\n", - netdev->name, skb->len, skb_headlen(skb), - skb_shinfo(skb)->nr_frags, skb_is_gso(skb)); - */ + /* + * nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u," + * " (%u frags), tso_size=%u\n", + * netdev->name, skb->len, skb_headlen(skb), + * skb_shinfo(skb)->nr_frags, skb_is_gso(skb)); + */ if (!netif_carrier_ok(netdev)) return NETDEV_TX_OK; @@ -787,22 +789,20 @@ static int nes_netdev_set_mac_address(struct net_device *netdev, void *p) int i; u32 macaddr_low; u16 macaddr_high; + DECLARE_MAC_BUF(mac); if (!is_valid_ether_addr(mac_addr->sa_data)) return -EADDRNOTAVAIL; memcpy(netdev->dev_addr, mac_addr->sa_data, netdev->addr_len); - printk(PFX "%s: Address length = %d, Address = %02X%02X%02X%02X%02X%02X..\n", - __func__, netdev->addr_len, - mac_addr->sa_data[0], mac_addr->sa_data[1], - mac_addr->sa_data[2], mac_addr->sa_data[3], - mac_addr->sa_data[4], mac_addr->sa_data[5]); - macaddr_high = ((u16)netdev->dev_addr[0]) << 8; + printk(PFX "%s: Address length = %d, Address = %s\n", + __func__, netdev->addr_len, print_mac(mac, mac_addr->sa_data)); + macaddr_high = ((u16)netdev->dev_addr[0]) << 8; macaddr_high += (u16)netdev->dev_addr[1]; - macaddr_low = ((u32)netdev->dev_addr[2]) << 24; - macaddr_low += ((u32)netdev->dev_addr[3]) << 16; - macaddr_low += ((u32)netdev->dev_addr[4]) << 8; - macaddr_low += (u32)netdev->dev_addr[5]; + macaddr_low = ((u32)netdev->dev_addr[2]) << 24; + macaddr_low += ((u32)netdev->dev_addr[3]) << 16; + macaddr_low += ((u32)netdev->dev_addr[4]) << 8; + macaddr_low += (u32)netdev->dev_addr[5]; for (i = 0; i < NES_MAX_PORT_COUNT; i++) { if (nesvnic->qp_nic_index[i] == 0xf) { @@ -878,17 +878,17 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) if (mc_nic_index < 0) mc_nic_index = nesvnic->nic_index; if (multicast_addr) { - nes_debug(NES_DBG_NIC_RX, "Assigning MC Address = %02X%02X%02X%02X%02X%02X to register 0x%04X nic_idx=%d\n", - multicast_addr->dmi_addr[0], multicast_addr->dmi_addr[1], - multicast_addr->dmi_addr[2], multicast_addr->dmi_addr[3], - multicast_addr->dmi_addr[4], multicast_addr->dmi_addr[5], - perfect_filter_register_address+(mc_index * 8), mc_nic_index); - macaddr_high = ((u16)multicast_addr->dmi_addr[0]) << 8; + DECLARE_MAC_BUF(mac); + nes_debug(NES_DBG_NIC_RX, "Assigning MC Address %s to register 0x%04X nic_idx=%d\n", + print_mac(mac, multicast_addr->dmi_addr), + perfect_filter_register_address+(mc_index * 8), + mc_nic_index); + macaddr_high = ((u16)multicast_addr->dmi_addr[0]) << 8; macaddr_high += (u16)multicast_addr->dmi_addr[1]; - macaddr_low = ((u32)multicast_addr->dmi_addr[2]) << 24; - macaddr_low += ((u32)multicast_addr->dmi_addr[3]) << 16; - macaddr_low += ((u32)multicast_addr->dmi_addr[4]) << 8; - macaddr_low += (u32)multicast_addr->dmi_addr[5]; + macaddr_low = ((u32)multicast_addr->dmi_addr[2]) << 24; + macaddr_low += ((u32)multicast_addr->dmi_addr[3]) << 16; + macaddr_low += ((u32)multicast_addr->dmi_addr[4]) << 8; + macaddr_low += (u32)multicast_addr->dmi_addr[5]; nes_write_indexed(nesdev, perfect_filter_register_address+(mc_index * 8), macaddr_low); @@ -912,23 +912,23 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) /** * nes_netdev_change_mtu */ -static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) +static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) { struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; - int ret = 0; - u8 jumbomode=0; + struct nes_device *nesdev = nesvnic->nesdev; + int ret = 0; + u8 jumbomode = 0; - if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu)) + if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu)) return -EINVAL; - netdev->mtu = new_mtu; + netdev->mtu = new_mtu; nesvnic->max_frame_size = new_mtu + VLAN_ETH_HLEN; if (netdev->mtu > 1500) { jumbomode=1; } - nes_nic_init_timer_defaults(nesdev, jumbomode); + nes_nic_init_timer_defaults(nesdev, jumbomode); if (netif_running(netdev)) { nes_netdev_stop(netdev); @@ -938,8 +938,7 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) return ret; } -#define NES_ETHTOOL_STAT_COUNT 55 -static const char nes_ethtool_stringset[NES_ETHTOOL_STAT_COUNT][ETH_GSTRING_LEN] = { +static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = { "Link Change Interrupts", "Linearized SKBs", "T/GSO Requests", @@ -995,8 +994,12 @@ static const char nes_ethtool_stringset[NES_ETHTOOL_STAT_COUNT][ETH_GSTRING_LEN] "CQ Depth 32", "CQ Depth 128", "CQ Depth 256", + "LRO aggregated", + "LRO flushed", + "LRO no_desc", }; +#define NES_ETHTOOL_STAT_COUNT ARRAY_SIZE(nes_ethtool_stringset) /** * nes_netdev_get_rx_csum @@ -1191,6 +1194,9 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev, target_stat_values[52] = int_mod_cq_depth_32; target_stat_values[53] = int_mod_cq_depth_128; target_stat_values[54] = int_mod_cq_depth_256; + target_stat_values[55] = nesvnic->lro_mgr.stats.aggregated; + target_stat_values[56] = nesvnic->lro_mgr.stats.flushed; + target_stat_values[57] = nesvnic->lro_mgr.stats.no_desc; } @@ -1221,14 +1227,14 @@ static int nes_netdev_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *et_coalesce) { struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; + struct nes_device *nesdev = nesvnic->nesdev; struct nes_adapter *nesadapter = nesdev->nesadapter; struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; unsigned long flags; - spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); + spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); if (et_coalesce->rx_max_coalesced_frames_low) { - shared_timer->threshold_low = et_coalesce->rx_max_coalesced_frames_low; + shared_timer->threshold_low = et_coalesce->rx_max_coalesced_frames_low; } if (et_coalesce->rx_max_coalesced_frames_irq) { shared_timer->threshold_target = et_coalesce->rx_max_coalesced_frames_irq; @@ -1248,14 +1254,14 @@ static int nes_netdev_set_coalesce(struct net_device *netdev, nesadapter->et_rx_coalesce_usecs_irq = et_coalesce->rx_coalesce_usecs_irq; if (et_coalesce->use_adaptive_rx_coalesce) { nesadapter->et_use_adaptive_rx_coalesce = 1; - nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC; + nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC; nesadapter->et_rx_coalesce_usecs_irq = 0; if (et_coalesce->pkt_rate_low) { - nesadapter->et_pkt_rate_low = et_coalesce->pkt_rate_low; + nesadapter->et_pkt_rate_low = et_coalesce->pkt_rate_low; } } else { nesadapter->et_use_adaptive_rx_coalesce = 0; - nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT; + nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT; if (nesadapter->et_rx_coalesce_usecs_irq) { nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, 0x80000000 | ((u32)(nesadapter->et_rx_coalesce_usecs_irq*8))); @@ -1272,28 +1278,28 @@ static int nes_netdev_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *et_coalesce) { struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; + struct nes_device *nesdev = nesvnic->nesdev; struct nes_adapter *nesadapter = nesdev->nesadapter; struct ethtool_coalesce temp_et_coalesce; struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; unsigned long flags; memset(&temp_et_coalesce, 0, sizeof(temp_et_coalesce)); - temp_et_coalesce.rx_coalesce_usecs_irq = nesadapter->et_rx_coalesce_usecs_irq; - temp_et_coalesce.use_adaptive_rx_coalesce = nesadapter->et_use_adaptive_rx_coalesce; - temp_et_coalesce.rate_sample_interval = nesadapter->et_rate_sample_interval; + temp_et_coalesce.rx_coalesce_usecs_irq = nesadapter->et_rx_coalesce_usecs_irq; + temp_et_coalesce.use_adaptive_rx_coalesce = nesadapter->et_use_adaptive_rx_coalesce; + temp_et_coalesce.rate_sample_interval = nesadapter->et_rate_sample_interval; temp_et_coalesce.pkt_rate_low = nesadapter->et_pkt_rate_low; spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); - temp_et_coalesce.rx_max_coalesced_frames_low = shared_timer->threshold_low; - temp_et_coalesce.rx_max_coalesced_frames_irq = shared_timer->threshold_target; + temp_et_coalesce.rx_max_coalesced_frames_low = shared_timer->threshold_low; + temp_et_coalesce.rx_max_coalesced_frames_irq = shared_timer->threshold_target; temp_et_coalesce.rx_max_coalesced_frames_high = shared_timer->threshold_high; - temp_et_coalesce.rx_coalesce_usecs_low = shared_timer->timer_in_use_min; + temp_et_coalesce.rx_coalesce_usecs_low = shared_timer->timer_in_use_min; temp_et_coalesce.rx_coalesce_usecs_high = shared_timer->timer_in_use_max; if (nesadapter->et_use_adaptive_rx_coalesce) { temp_et_coalesce.rx_coalesce_usecs_irq = shared_timer->timer_in_use; } spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); - memcpy(et_coalesce, &temp_et_coalesce, sizeof(*et_coalesce)); + memcpy(et_coalesce, &temp_et_coalesce, sizeof(*et_coalesce)); return 0; } @@ -1372,30 +1378,38 @@ static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd u16 phy_data; et_cmd->duplex = DUPLEX_FULL; - et_cmd->port = PORT_MII; + et_cmd->port = PORT_MII; + if (nesadapter->OneG_Mode) { - et_cmd->supported = SUPPORTED_1000baseT_Full|SUPPORTED_Autoneg; - et_cmd->advertising = ADVERTISED_1000baseT_Full|ADVERTISED_Autoneg; et_cmd->speed = SPEED_1000; - nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index], - &phy_data); - if (phy_data&0x1000) { - et_cmd->autoneg = AUTONEG_ENABLE; + if (nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) { + et_cmd->supported = SUPPORTED_1000baseT_Full; + et_cmd->advertising = ADVERTISED_1000baseT_Full; + et_cmd->autoneg = AUTONEG_DISABLE; + et_cmd->transceiver = XCVR_INTERNAL; + et_cmd->phy_address = nesdev->mac_index; } else { - et_cmd->autoneg = AUTONEG_DISABLE; + et_cmd->supported = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg; + et_cmd->advertising = ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg; + nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index], &phy_data); + if (phy_data & 0x1000) + et_cmd->autoneg = AUTONEG_ENABLE; + else + et_cmd->autoneg = AUTONEG_DISABLE; + et_cmd->transceiver = XCVR_EXTERNAL; + et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index]; } - et_cmd->transceiver = XCVR_EXTERNAL; - et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index]; } else { - if (nesadapter->phy_type[nesvnic->logical_port] == NES_PHY_TYPE_IRIS) { + if ((nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) || + (nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_ARGUS)) { et_cmd->transceiver = XCVR_EXTERNAL; - et_cmd->port = PORT_FIBRE; - et_cmd->supported = SUPPORTED_FIBRE; + et_cmd->port = PORT_FIBRE; + et_cmd->supported = SUPPORTED_FIBRE; et_cmd->advertising = ADVERTISED_FIBRE; et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index]; } else { et_cmd->transceiver = XCVR_INTERNAL; - et_cmd->supported = SUPPORTED_10000baseT_Full; + et_cmd->supported = SUPPORTED_10000baseT_Full; et_cmd->advertising = ADVERTISED_10000baseT_Full; et_cmd->phy_address = nesdev->mac_index; } @@ -1418,14 +1432,15 @@ static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd struct nes_adapter *nesadapter = nesdev->nesadapter; u16 phy_data; - if (nesadapter->OneG_Mode) { + if ((nesadapter->OneG_Mode) && + (nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G)) { nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index], &phy_data); if (et_cmd->autoneg) { /* Turn on Full duplex, Autoneg, and restart autonegotiation */ phy_data |= 0x1300; } else { - // Turn off autoneg + /* Turn off autoneg */ phy_data &= ~0x1000; } nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index], @@ -1456,6 +1471,8 @@ static struct ethtool_ops nes_ethtool_ops = { .set_sg = ethtool_op_set_sg, .get_tso = ethtool_op_get_tso, .set_tso = ethtool_op_set_tso, + .get_flags = ethtool_op_get_flags, + .set_flags = ethtool_op_set_flags, }; @@ -1609,27 +1626,34 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, list_add_tail(&nesvnic->list, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]); if ((nesdev->netdev_count == 0) && - (PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index)) { - nes_debug(NES_DBG_INIT, "Setting up PHY interrupt mask. Using register index 0x%04X\n", - NES_IDX_PHY_PCS_CONTROL_STATUS0+(0x200*(nesvnic->logical_port&1))); + ((PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index) || + ((nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) && + (((PCI_FUNC(nesdev->pcidev->devfn) == 1) && (nesdev->mac_index == 2)) || + ((PCI_FUNC(nesdev->pcidev->devfn) == 2) && (nesdev->mac_index == 1)))))) { + /* + * nes_debug(NES_DBG_INIT, "Setting up PHY interrupt mask. Using register index 0x%04X\n", + * NES_IDX_PHY_PCS_CONTROL_STATUS0 + (0x200 * (nesvnic->logical_port & 1))); + */ u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + - (0x200*(nesvnic->logical_port&1))); - u32temp |= 0x00200000; - nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + - (0x200*(nesvnic->logical_port&1)), u32temp); + (0x200 * (nesdev->mac_index & 1))); + if (nesdev->nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G) { + u32temp |= 0x00200000; + nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + + (0x200 * (nesdev->mac_index & 1)), u32temp); + } + u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + - (0x200*(nesvnic->logical_port&1)) ); + (0x200 * (nesdev->mac_index & 1))); + if ((u32temp&0x0f1f0000) == 0x0f0f0000) { - if (nesdev->nesadapter->phy_type[nesvnic->logical_port] == NES_PHY_TYPE_IRIS) { + if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) { nes_init_phy(nesdev); - nes_read_10G_phy_reg(nesdev, 1, - nesdev->nesadapter->phy_index[nesvnic->logical_port]); + nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1); temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); u32temp = 20; do { - nes_read_10G_phy_reg(nesdev, 1, - nesdev->nesadapter->phy_index[nesvnic->logical_port]); + nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1); phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); if ((phy_data == temp_phy_data) || (!(--u32temp))) @@ -1646,6 +1670,14 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, nes_debug(NES_DBG_INIT, "The Link is UP!!.\n"); nesvnic->linkup = 1; } + } else if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) { + nes_debug(NES_DBG_INIT, "mac_index=%d, logical_port=%d, u32temp=0x%04X, PCI_FUNC=%d\n", + nesdev->mac_index, nesvnic->logical_port, u32temp, PCI_FUNC(nesdev->pcidev->devfn)); + if (((nesdev->mac_index < 2) && ((u32temp&0x01010000) == 0x01010000)) || + ((nesdev->mac_index > 1) && ((u32temp&0x02020000) == 0x02020000))) { + nes_debug(NES_DBG_INIT, "The Link is UP!!.\n"); + nesvnic->linkup = 1; + } } /* clear the MAC interrupt status, assumes direct logical to physical mapping */ u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index)); diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c index f9db07c2717..fe83d1b2b17 100644 --- a/drivers/infiniband/hw/nes/nes_utils.c +++ b/drivers/infiniband/hw/nes/nes_utils.c @@ -444,15 +444,13 @@ void nes_read_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16 /** * nes_write_10G_phy_reg */ -void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg, - u8 phy_addr, u16 data) +void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_addr, u8 dev_addr, u16 phy_reg, + u16 data) { - u32 dev_addr; u32 port_addr; u32 u32temp; u32 counter; - dev_addr = 1; port_addr = phy_addr; /* set address */ @@ -492,14 +490,12 @@ void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg, * This routine only issues the read, the data must be read * separately. */ -void nes_read_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg, u8 phy_addr) +void nes_read_10G_phy_reg(struct nes_device *nesdev, u8 phy_addr, u8 dev_addr, u16 phy_reg) { - u32 dev_addr; u32 port_addr; u32 u32temp; u32 counter; - dev_addr = 1; port_addr = phy_addr; /* set address */ @@ -660,7 +656,9 @@ int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 acti /* DELETE or RESOLVE */ if (arp_index == nesadapter->arp_table_size) { - nes_debug(NES_DBG_NETDEV, "mac address not in ARP table - cannot delete or resolve\n"); + nes_debug(NES_DBG_NETDEV, "MAC for " NIPQUAD_FMT " not in ARP table - cannot %s\n", + HIPQUAD(ip_addr), + action == NES_ARP_RESOLVE ? "resolve" : "delete"); return -1; } diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index f9a5d439089..99b3c4ae86e 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1266,7 +1266,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, sq_size = init_attr->cap.max_send_wr; rq_size = init_attr->cap.max_recv_wr; - // check if the encoded sizes are OK or not... + /* check if the encoded sizes are OK or not... */ sq_encoded_size = nes_get_encoded_size(&sq_size); rq_encoded_size = nes_get_encoded_size(&rq_size); @@ -1976,7 +1976,7 @@ static int nes_destroy_cq(struct ib_cq *ib_cq) if (nescq->cq_mem_size) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, - (void *)nescq->hw_cq.cq_vbase, nescq->hw_cq.cq_pbase); + nescq->hw_cq.cq_vbase, nescq->hw_cq.cq_pbase); kfree(nescq); return ret; @@ -2377,7 +2377,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u8 single_page = 1; u8 stag_key; - region = ib_umem_get(pd->uobject->context, start, length, acc); + region = ib_umem_get(pd->uobject->context, start, length, acc, 0); if (IS_ERR(region)) { return (struct ib_mr *)region; } @@ -3610,6 +3610,12 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) while (cqe_count < num_entries) { if (le32_to_cpu(nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) { + /* + * Make sure we read CQ entry contents *after* + * we've checked the valid bit. + */ + rmb(); + cqe = nescq->hw_cq.cq_vbase[head]; nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0; u32temp = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]); diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 73b2b176ad0..ca126fc2b85 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -56,11 +56,11 @@ /* constants */ enum { - IPOIB_PACKET_SIZE = 2048, - IPOIB_BUF_SIZE = IPOIB_PACKET_SIZE + IB_GRH_BYTES, - IPOIB_ENCAP_LEN = 4, + IPOIB_UD_HEAD_SIZE = IB_GRH_BYTES + IPOIB_ENCAP_LEN, + IPOIB_UD_RX_SG = 2, /* max buffer needed for 4K mtu */ + IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */ IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN, IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE, @@ -95,6 +95,8 @@ enum { IPOIB_MCAST_FLAG_SENDONLY = 1, IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ IPOIB_MCAST_FLAG_ATTACHED = 3, + + MAX_SEND_CQE = 16, }; #define IPOIB_OP_RECV (1ul << 31) @@ -139,7 +141,7 @@ struct ipoib_mcast { struct ipoib_rx_buf { struct sk_buff *skb; - u64 mapping; + u64 mapping[IPOIB_UD_RX_SG]; }; struct ipoib_tx_buf { @@ -285,7 +287,8 @@ struct ipoib_dev_priv { u16 pkey_index; struct ib_pd *pd; struct ib_mr *mr; - struct ib_cq *cq; + struct ib_cq *recv_cq; + struct ib_cq *send_cq; struct ib_qp *qp; u32 qkey; @@ -294,6 +297,7 @@ struct ipoib_dev_priv { unsigned int admin_mtu; unsigned int mcast_mtu; + unsigned int max_ib_mtu; struct ipoib_rx_buf *rx_ring; @@ -304,6 +308,10 @@ struct ipoib_dev_priv { struct ib_sge tx_sge[MAX_SKB_FRAGS + 1]; struct ib_send_wr tx_wr; unsigned tx_outstanding; + struct ib_wc send_wc[MAX_SEND_CQE]; + + struct ib_recv_wr rx_wr; + struct ib_sge rx_sge[IPOIB_UD_RX_SG]; struct ib_wc ibwc[IPOIB_NUM_WC]; @@ -326,6 +334,7 @@ struct ipoib_dev_priv { #endif int hca_caps; struct ipoib_ethtool_st ethtool; + struct timer_list poll_timer; }; struct ipoib_ah { @@ -366,6 +375,14 @@ struct ipoib_neigh { struct list_head list; }; +#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) +#define IPOIB_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES) + +static inline int ipoib_ud_need_sg(unsigned int ib_mtu) +{ + return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE; +} + /* * We stash a pointer to our private neighbour information after our * hardware address in neigh->ha. The ALIGN() expression here makes @@ -388,6 +405,7 @@ extern struct workqueue_struct *ipoib_workqueue; int ipoib_poll(struct napi_struct *napi, int budget); void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr); +void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr); struct ipoib_ah *ipoib_create_ah(struct net_device *dev, struct ib_pd *pd, struct ib_ah_attr *attr); @@ -650,7 +668,6 @@ static inline int ipoib_register_debugfs(void) { return 0; } static inline void ipoib_unregister_debugfs(void) { } #endif - #define ipoib_printk(level, priv, format, arg...) \ printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg) #define ipoib_warn(priv, format, arg...) \ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 9db7b0bd913..97e67d36378 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -249,8 +249,8 @@ static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_qp_init_attr attr = { .event_handler = ipoib_cm_rx_event_handler, - .send_cq = priv->cq, /* For drain WR */ - .recv_cq = priv->cq, + .send_cq = priv->recv_cq, /* For drain WR */ + .recv_cq = priv->recv_cq, .srq = priv->cm.srq, .cap.max_send_wr = 1, /* For drain WR */ .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */ @@ -951,8 +951,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_qp_init_attr attr = { - .send_cq = priv->cq, - .recv_cq = priv->cq, + .send_cq = priv->recv_cq, + .recv_cq = priv->recv_cq, .srq = priv->cm.srq, .cap.max_send_wr = ipoib_sendq_size, .cap.max_send_sge = 1, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 9a47428366c..10279b79c44 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -71,7 +71,7 @@ static int ipoib_set_coalesce(struct net_device *dev, coal->rx_max_coalesced_frames > 0xffff) return -EINVAL; - ret = ib_modify_cq(priv->cq, coal->rx_max_coalesced_frames, + ret = ib_modify_cq(priv->recv_cq, coal->rx_max_coalesced_frames, coal->rx_coalesce_usecs); if (ret && ret != -ENOSYS) { ipoib_warn(priv, "failed modifying CQ (%d)\n", ret); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 0205eb7c1bd..f429bce24c2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -89,28 +89,59 @@ void ipoib_free_ah(struct kref *kref) spin_unlock_irqrestore(&priv->lock, flags); } +static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv, + u64 mapping[IPOIB_UD_RX_SG]) +{ + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_UD_HEAD_SIZE, + DMA_FROM_DEVICE); + ib_dma_unmap_page(priv->ca, mapping[1], PAGE_SIZE, + DMA_FROM_DEVICE); + } else + ib_dma_unmap_single(priv->ca, mapping[0], + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), + DMA_FROM_DEVICE); +} + +static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv, + struct sk_buff *skb, + unsigned int length) +{ + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; + unsigned int size; + /* + * There is only two buffers needed for max_payload = 4K, + * first buf size is IPOIB_UD_HEAD_SIZE + */ + skb->tail += IPOIB_UD_HEAD_SIZE; + skb->len += length; + + size = length - IPOIB_UD_HEAD_SIZE; + + frag->size = size; + skb->data_len += size; + skb->truesize += size; + } else + skb_put(skb, length); + +} + static int ipoib_ib_post_receive(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ib_sge list; - struct ib_recv_wr param; struct ib_recv_wr *bad_wr; int ret; - list.addr = priv->rx_ring[id].mapping; - list.length = IPOIB_BUF_SIZE; - list.lkey = priv->mr->lkey; + priv->rx_wr.wr_id = id | IPOIB_OP_RECV; + priv->rx_sge[0].addr = priv->rx_ring[id].mapping[0]; + priv->rx_sge[1].addr = priv->rx_ring[id].mapping[1]; - param.next = NULL; - param.wr_id = id | IPOIB_OP_RECV; - param.sg_list = &list; - param.num_sge = 1; - ret = ib_post_recv(priv->qp, ¶m, &bad_wr); + ret = ib_post_recv(priv->qp, &priv->rx_wr, &bad_wr); if (unlikely(ret)) { ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); - ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[id].mapping); dev_kfree_skb_any(priv->rx_ring[id].skb); priv->rx_ring[id].skb = NULL; } @@ -118,15 +149,21 @@ static int ipoib_ib_post_receive(struct net_device *dev, int id) return ret; } -static int ipoib_alloc_rx_skb(struct net_device *dev, int id) +static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct sk_buff *skb; - u64 addr; + int buf_size; + u64 *mapping; - skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4); - if (!skb) - return -ENOMEM; + if (ipoib_ud_need_sg(priv->max_ib_mtu)) + buf_size = IPOIB_UD_HEAD_SIZE; + else + buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu); + + skb = dev_alloc_skb(buf_size + 4); + if (unlikely(!skb)) + return NULL; /* * IB will leave a 40 byte gap for a GRH and IPoIB adds a 4 byte @@ -135,17 +172,32 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id) */ skb_reserve(skb, 4); - addr = ib_dma_map_single(priv->ca, skb->data, IPOIB_BUF_SIZE, - DMA_FROM_DEVICE); - if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { - dev_kfree_skb_any(skb); - return -EIO; + mapping = priv->rx_ring[id].mapping; + mapping[0] = ib_dma_map_single(priv->ca, skb->data, buf_size, + DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) + goto error; + + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + struct page *page = alloc_page(GFP_ATOMIC); + if (!page) + goto partial_error; + skb_fill_page_desc(skb, 0, page, 0, PAGE_SIZE); + mapping[1] = + ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[0].page, + 0, PAGE_SIZE, DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(priv->ca, mapping[1]))) + goto partial_error; } - priv->rx_ring[id].skb = skb; - priv->rx_ring[id].mapping = addr; + priv->rx_ring[id].skb = skb; + return skb; - return 0; +partial_error: + ib_dma_unmap_single(priv->ca, mapping[0], buf_size, DMA_FROM_DEVICE); +error: + dev_kfree_skb_any(skb); + return NULL; } static int ipoib_ib_post_receives(struct net_device *dev) @@ -154,7 +206,7 @@ static int ipoib_ib_post_receives(struct net_device *dev) int i; for (i = 0; i < ipoib_recvq_size; ++i) { - if (ipoib_alloc_rx_skb(dev, i)) { + if (!ipoib_alloc_rx_skb(dev, i)) { ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); return -ENOMEM; } @@ -172,7 +224,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) struct ipoib_dev_priv *priv = netdev_priv(dev); unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV; struct sk_buff *skb; - u64 addr; + u64 mapping[IPOIB_UD_RX_SG]; ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n", wr_id, wc->status); @@ -184,15 +236,13 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } skb = priv->rx_ring[wr_id].skb; - addr = priv->rx_ring[wr_id].mapping; if (unlikely(wc->status != IB_WC_SUCCESS)) { if (wc->status != IB_WC_WR_FLUSH_ERR) ipoib_warn(priv, "failed recv event " "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); - ib_dma_unmap_single(priv->ca, addr, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping); dev_kfree_skb_any(skb); priv->rx_ring[wr_id].skb = NULL; return; @@ -205,11 +255,14 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num) goto repost; + memcpy(mapping, priv->rx_ring[wr_id].mapping, + IPOIB_UD_RX_SG * sizeof *mapping); + /* * If we can't allocate a new RX buffer, dump * this packet and reuse the old buffer. */ - if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { + if (unlikely(!ipoib_alloc_rx_skb(dev, wr_id))) { ++dev->stats.rx_dropped; goto repost; } @@ -217,9 +270,9 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); - ib_dma_unmap_single(priv->ca, addr, IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + ipoib_ud_dma_unmap_rx(priv, mapping); + ipoib_ud_skb_put_frags(priv, skb, wc->byte_len); - skb_put(skb, wc->byte_len); skb_pull(skb, IB_GRH_BYTES); skb->protocol = ((struct ipoib_header *) skb->data)->proto; @@ -311,7 +364,6 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) struct ipoib_dev_priv *priv = netdev_priv(dev); unsigned int wr_id = wc->wr_id; struct ipoib_tx_buf *tx_req; - unsigned long flags; ipoib_dbg_data(priv, "send completion: id %d, status: %d\n", wr_id, wc->status); @@ -331,13 +383,11 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) dev_kfree_skb_any(tx_req->skb); - spin_lock_irqsave(&priv->tx_lock, flags); ++priv->tx_tail; if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && netif_queue_stopped(dev) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) netif_wake_queue(dev); - spin_unlock_irqrestore(&priv->tx_lock, flags); if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) @@ -346,6 +396,17 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) wc->status, wr_id, wc->vendor_err); } +static int poll_tx(struct ipoib_dev_priv *priv) +{ + int n, i; + + n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc); + for (i = 0; i < n; ++i) + ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i); + + return n == MAX_SEND_CQE; +} + int ipoib_poll(struct napi_struct *napi, int budget) { struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi); @@ -361,7 +422,7 @@ poll_more: int max = (budget - done); t = min(IPOIB_NUM_WC, max); - n = ib_poll_cq(priv->cq, t, priv->ibwc); + n = ib_poll_cq(priv->recv_cq, t, priv->ibwc); for (i = 0; i < n; i++) { struct ib_wc *wc = priv->ibwc + i; @@ -372,12 +433,8 @@ poll_more: ipoib_cm_handle_rx_wc(dev, wc); else ipoib_ib_handle_rx_wc(dev, wc); - } else { - if (wc->wr_id & IPOIB_OP_CM) - ipoib_cm_handle_tx_wc(dev, wc); - else - ipoib_ib_handle_tx_wc(dev, wc); - } + } else + ipoib_cm_handle_tx_wc(priv->dev, wc); } if (n != t) @@ -386,7 +443,7 @@ poll_more: if (done < budget) { netif_rx_complete(dev, napi); - if (unlikely(ib_req_notify_cq(priv->cq, + if (unlikely(ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)) && netif_rx_reschedule(dev, napi)) @@ -404,6 +461,26 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr) netif_rx_schedule(dev, &priv->napi); } +static void drain_tx_cq(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + unsigned long flags; + + spin_lock_irqsave(&priv->tx_lock, flags); + while (poll_tx(priv)) + ; /* nothing */ + + if (netif_queue_stopped(dev)) + mod_timer(&priv->poll_timer, jiffies + 1); + + spin_unlock_irqrestore(&priv->tx_lock, flags); +} + +void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr) +{ + drain_tx_cq((struct net_device *)dev_ptr); +} + static inline int post_send(struct ipoib_dev_priv *priv, unsigned int wr_id, struct ib_ah *address, u32 qpn, @@ -498,23 +575,34 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, else priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; + if (++priv->tx_outstanding == ipoib_sendq_size) { + ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); + if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP)) + ipoib_warn(priv, "request notify on send CQ failed\n"); + netif_stop_queue(dev); + } + if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), address->ah, qpn, tx_req, phead, hlen))) { ipoib_warn(priv, "post_send failed\n"); ++dev->stats.tx_errors; + --priv->tx_outstanding; ipoib_dma_unmap_tx(priv->ca, tx_req); dev_kfree_skb_any(skb); + if (netif_queue_stopped(dev)) + netif_wake_queue(dev); } else { dev->trans_start = jiffies; address->last_send = priv->tx_head; ++priv->tx_head; + skb_orphan(skb); - if (++priv->tx_outstanding == ipoib_sendq_size) { - ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); - netif_stop_queue(dev); - } } + + if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) + while (poll_tx(priv)) + ; /* nothing */ } static void __ipoib_reap_ah(struct net_device *dev) @@ -548,6 +636,11 @@ void ipoib_reap_ah(struct work_struct *work) round_jiffies_relative(HZ)); } +static void ipoib_ib_tx_timer_func(unsigned long ctx) +{ + drain_tx_cq((struct net_device *)ctx); +} + int ipoib_ib_dev_open(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -584,6 +677,10 @@ int ipoib_ib_dev_open(struct net_device *dev) queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, round_jiffies_relative(HZ)); + init_timer(&priv->poll_timer); + priv->poll_timer.function = ipoib_ib_tx_timer_func; + priv->poll_timer.data = (unsigned long)dev; + set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); return 0; @@ -661,7 +758,7 @@ void ipoib_drain_cq(struct net_device *dev) struct ipoib_dev_priv *priv = netdev_priv(dev); int i, n; do { - n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc); + n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc); for (i = 0; i < n; ++i) { /* * Convert any successful completions to flush @@ -676,14 +773,13 @@ void ipoib_drain_cq(struct net_device *dev) ipoib_cm_handle_rx_wc(dev, priv->ibwc + i); else ipoib_ib_handle_rx_wc(dev, priv->ibwc + i); - } else { - if (priv->ibwc[i].wr_id & IPOIB_OP_CM) - ipoib_cm_handle_tx_wc(dev, priv->ibwc + i); - else - ipoib_ib_handle_tx_wc(dev, priv->ibwc + i); - } + } else + ipoib_cm_handle_tx_wc(dev, priv->ibwc + i); } } while (n == IPOIB_NUM_WC); + + while (poll_tx(priv)) + ; /* nothing */ } int ipoib_ib_dev_stop(struct net_device *dev, int flush) @@ -733,10 +829,8 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush) rx_req = &priv->rx_ring[i]; if (!rx_req->skb) continue; - ib_dma_unmap_single(priv->ca, - rx_req->mapping, - IPOIB_BUF_SIZE, - DMA_FROM_DEVICE); + ipoib_ud_dma_unmap_rx(priv, + priv->rx_ring[i].mapping); dev_kfree_skb_any(rx_req->skb); rx_req->skb = NULL; } @@ -752,6 +846,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush) ipoib_dbg(priv, "All sends and receives done.\n"); timeout: + del_timer_sync(&priv->poll_timer); qp_attr.qp_state = IB_QPS_RESET; if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) ipoib_warn(priv, "Failed to modify QP to RESET state\n"); @@ -775,7 +870,7 @@ timeout: msleep(1); } - ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP); + ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP); return 0; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index bd07f02cf02..2442090ac8d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -195,7 +195,7 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) return 0; } - if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) + if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) return -EINVAL; priv->admin_mtu = new_mtu; @@ -971,10 +971,6 @@ static void ipoib_setup(struct net_device *dev) NETIF_F_LLTX | NETIF_F_HIGHDMA); - /* MTU will be reset when mcast join happens */ - dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN; - priv->mcast_mtu = priv->admin_mtu = dev->mtu; - memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); netif_carrier_off(dev); @@ -1107,6 +1103,7 @@ static struct net_device *ipoib_add_port(const char *format, { struct ipoib_dev_priv *priv; struct ib_device_attr *device_attr; + struct ib_port_attr attr; int result = -ENOMEM; priv = ipoib_intf_alloc(format); @@ -1115,6 +1112,18 @@ static struct net_device *ipoib_add_port(const char *format, SET_NETDEV_DEV(priv->dev, hca->dma_device); + if (!ib_query_port(hca, port, &attr)) + priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); + else { + printk(KERN_WARNING "%s: ib_query_port %d failed\n", + hca->name, port); + goto device_init_failed; + } + + /* MTU will be reset when mcast join happens */ + priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); + priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; + result = ib_query_pkey(hca, port, 0, &priv->pkey); if (result) { printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", @@ -1289,7 +1298,8 @@ static int __init ipoib_init_module(void) ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size); ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); - ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE); + ipoib_sendq_size = max(ipoib_sendq_size, max(2 * MAX_SEND_CQE, + IPOIB_MIN_QUEUE_SIZE)); #ifdef CONFIG_INFINIBAND_IPOIB_CM ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); #endif diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 31a53c5bcb1..d00a2c174ae 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -567,8 +567,7 @@ void ipoib_mcast_join_task(struct work_struct *work) return; } - priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) - - IPOIB_ENCAP_LEN; + priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); if (!ipoib_cm_admin_enabled(dev)) dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 8a20e3742c4..8766d29ce3b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -150,7 +150,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) .max_send_wr = ipoib_sendq_size, .max_recv_wr = ipoib_recvq_size, .max_send_sge = 1, - .max_recv_sge = 1 + .max_recv_sge = IPOIB_UD_RX_SG }, .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_UD @@ -171,26 +171,34 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) goto out_free_pd; } - size = ipoib_sendq_size + ipoib_recvq_size + 1; + size = ipoib_recvq_size + 1; ret = ipoib_cm_dev_init(dev); if (!ret) { + size += ipoib_sendq_size; if (ipoib_cm_has_srq(dev)) size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */ else size += ipoib_recvq_size * ipoib_max_conn_qp; } - priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); - if (IS_ERR(priv->cq)) { - printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); + priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); + if (IS_ERR(priv->recv_cq)) { + printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name); goto out_free_mr; } - if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP)) - goto out_free_cq; + priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL, + dev, ipoib_sendq_size, 0); + if (IS_ERR(priv->send_cq)) { + printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name); + goto out_free_recv_cq; + } + + if (ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP)) + goto out_free_send_cq; - init_attr.send_cq = priv->cq; - init_attr.recv_cq = priv->cq; + init_attr.send_cq = priv->send_cq; + init_attr.recv_cq = priv->recv_cq; if (priv->hca_caps & IB_DEVICE_UD_TSO) init_attr.create_flags = IB_QP_CREATE_IPOIB_UD_LSO; @@ -201,7 +209,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) priv->qp = ib_create_qp(priv->pd, &init_attr); if (IS_ERR(priv->qp)) { printk(KERN_WARNING "%s: failed to create QP\n", ca->name); - goto out_free_cq; + goto out_free_send_cq; } priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff; @@ -215,10 +223,26 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) priv->tx_wr.sg_list = priv->tx_sge; priv->tx_wr.send_flags = IB_SEND_SIGNALED; + priv->rx_sge[0].lkey = priv->mr->lkey; + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + priv->rx_sge[0].length = IPOIB_UD_HEAD_SIZE; + priv->rx_sge[1].length = PAGE_SIZE; + priv->rx_sge[1].lkey = priv->mr->lkey; + priv->rx_wr.num_sge = IPOIB_UD_RX_SG; + } else { + priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu); + priv->rx_wr.num_sge = 1; + } + priv->rx_wr.next = NULL; + priv->rx_wr.sg_list = priv->rx_sge; + return 0; -out_free_cq: - ib_destroy_cq(priv->cq); +out_free_send_cq: + ib_destroy_cq(priv->send_cq); + +out_free_recv_cq: + ib_destroy_cq(priv->recv_cq); out_free_mr: ib_dereg_mr(priv->mr); @@ -241,8 +265,11 @@ void ipoib_transport_dev_cleanup(struct net_device *dev) clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); } - if (ib_destroy_cq(priv->cq)) - ipoib_warn(priv, "ib_cq_destroy failed\n"); + if (ib_destroy_cq(priv->send_cq)) + ipoib_warn(priv, "ib_cq_destroy (send) failed\n"); + + if (ib_destroy_cq(priv->recv_cq)) + ipoib_warn(priv, "ib_cq_destroy (recv) failed\n"); ipoib_cm_dev_cleanup(dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 293f5b892e3..1cdb5cfb0ff 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -89,6 +89,10 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) goto err; } + priv->max_ib_mtu = ppriv->max_ib_mtu; + /* MTU will be reset when mcast join happens */ + priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); + priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); priv->pkey = pkey; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index be1b9fbd416..aeb58cae9a3 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -473,13 +473,15 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s stats->r2t_pdus = conn->r2t_pdus_cnt; /* always 0 */ stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt; stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt; - stats->custom_length = 3; + stats->custom_length = 4; strcpy(stats->custom[0].desc, "qp_tx_queue_full"); stats->custom[0].value = 0; /* TB iser_conn->qp_tx_queue_full; */ strcpy(stats->custom[1].desc, "fmr_map_not_avail"); stats->custom[1].value = 0; /* TB iser_conn->fmr_map_not_avail */; strcpy(stats->custom[2].desc, "eh_abort_cnt"); stats->custom[2].value = conn->eh_abort_cnt; + strcpy(stats->custom[3].desc, "fmr_unalign_cnt"); + stats->custom[3].value = conn->fmr_unalign_cnt; } static int diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 1ee867b1b34..a8c1b300e34 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -71,6 +71,13 @@ #define iser_dbg(fmt, arg...) \ do { \ + if (iser_debug_level > 1) \ + printk(KERN_DEBUG PFX "%s:" fmt,\ + __func__ , ## arg); \ + } while (0) + +#define iser_warn(fmt, arg...) \ + do { \ if (iser_debug_level > 0) \ printk(KERN_DEBUG PFX "%s:" fmt,\ __func__ , ## arg); \ diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 4a17743a639..cac50c4dc15 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -334,8 +334,11 @@ static void iser_data_buf_dump(struct iser_data_buf *data, struct scatterlist *sg; int i; + if (iser_debug_level == 0) + return; + for_each_sg(sgl, sg, data->dma_nents, i) - iser_err("sg[%d] dma_addr:0x%lX page:0x%p " + iser_warn("sg[%d] dma_addr:0x%lX page:0x%p " "off:0x%x sz:0x%x dma_len:0x%x\n", i, (unsigned long)ib_sg_dma_address(ibdev, sg), sg_page(sg), sg->offset, @@ -420,6 +423,7 @@ void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask) int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, enum iser_data_dir cmd_dir) { + struct iscsi_conn *iscsi_conn = iser_ctask->iser_conn->iscsi_conn; struct iser_conn *ib_conn = iser_ctask->iser_conn->ib_conn; struct iser_device *device = ib_conn->device; struct ib_device *ibdev = device->ib_device; @@ -434,7 +438,8 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, aligned_len = iser_data_buf_aligned_len(mem, ibdev); if (aligned_len != mem->dma_nents) { - iser_err("rdma alignment violation %d/%d aligned\n", + iscsi_conn->fmr_unalign_cnt++; + iser_warn("rdma alignment violation %d/%d aligned\n", aligned_len, mem->size); iser_data_buf_dump(mem, ibdev); |