diff options
Diffstat (limited to 'drivers/infiniband/hw')
41 files changed, 1536 insertions, 552 deletions
diff --git a/drivers/infiniband/hw/amso1100/Kbuild b/drivers/infiniband/hw/amso1100/Kbuild index 06964c4af84..950dfabcd89 100644 --- a/drivers/infiniband/hw/amso1100/Kbuild +++ b/drivers/infiniband/hw/amso1100/Kbuild @@ -1,6 +1,4 @@ -ifdef CONFIG_INFINIBAND_AMSO1100_DEBUG -EXTRA_CFLAGS += -DDEBUG -endif +ccflags-$(CONFIG_INFINIBAND_AMSO1100_DEBUG) := -DDEBUG obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o diff --git a/drivers/infiniband/hw/amso1100/c2_intr.c b/drivers/infiniband/hw/amso1100/c2_intr.c index 3b5095470cb..0ebe4e806b8 100644 --- a/drivers/infiniband/hw/amso1100/c2_intr.c +++ b/drivers/infiniband/hw/amso1100/c2_intr.c @@ -62,8 +62,8 @@ void c2_rnic_interrupt(struct c2_dev *c2dev) static void handle_mq(struct c2_dev *c2dev, u32 mq_index) { if (c2dev->qptr_array[mq_index] == NULL) { - pr_debug(KERN_INFO "handle_mq: stray activity for mq_index=%d\n", - mq_index); + pr_debug("handle_mq: stray activity for mq_index=%d\n", + mq_index); return; } diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile index 7e7b5a66f04..621619c794e 100644 --- a/drivers/infiniband/hw/cxgb3/Makefile +++ b/drivers/infiniband/hw/cxgb3/Makefile @@ -1,10 +1,8 @@ -EXTRA_CFLAGS += -Idrivers/net/cxgb3 +ccflags-y := -Idrivers/net/cxgb3 obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \ iwch_provider.o iwch.o cxio_hal.o cxio_resource.o -ifdef CONFIG_INFINIBAND_CXGB3_DEBUG -EXTRA_CFLAGS += -DDEBUG -endif +ccflags-$(CONFIG_INFINIBAND_CXGB3_DEBUG) += -DDEBUG diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index 005b7b52bc1..09dda0b8740 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -160,6 +160,7 @@ int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel) struct rdma_cq_setup setup; int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe); + size += 1; /* one extra page for storing cq-in-err state */ cq->cqid = cxio_hal_get_cqid(rdev_p->rscp); if (!cq->cqid) return -ENOMEM; diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index e5ddb63e7d2..4bb997aa39d 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -728,6 +728,22 @@ struct t3_cq { #define CQ_VLD_ENTRY(ptr,size_log2,cqe) (Q_GENBIT(ptr,size_log2) == \ CQE_GENBIT(*cqe)) +struct t3_cq_status_page { + u32 cq_err; +}; + +static inline int cxio_cq_in_error(struct t3_cq *cq) +{ + return ((struct t3_cq_status_page *) + &cq->queue[1 << cq->size_log2])->cq_err; +} + +static inline void cxio_set_cq_in_error(struct t3_cq *cq) +{ + ((struct t3_cq_status_page *) + &cq->queue[1 << cq->size_log2])->cq_err = 1; +} + static inline void cxio_set_wq_in_error(struct t3_wq *wq) { wq->queue->wq_in_err.err |= 1; diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 13c88871dc3..d02dcc6e596 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1093,8 +1093,8 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) PDBG("%s ep %p credits %u\n", __func__, ep, credits); if (credits == 0) { - PDBG(KERN_ERR "%s 0 credit ack ep %p state %u\n", - __func__, ep, state_read(&ep->com)); + PDBG("%s 0 credit ack ep %p state %u\n", + __func__, ep, state_read(&ep->com)); return CPL_RET_BUF_DONE; } diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c index 6afc89e7572..71e0d845da3 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_ev.c +++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c @@ -76,6 +76,14 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, atomic_inc(&qhp->refcnt); spin_unlock(&rnicp->lock); + if (qhp->attr.state == IWCH_QP_STATE_RTS) { + attrs.next_state = IWCH_QP_STATE_TERMINATE; + iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, + &attrs, 1); + if (send_term) + iwch_post_terminate(qhp, rsp_msg); + } + event.event = ib_event; event.device = chp->ibcq.device; if (ib_event == IB_EVENT_CQ_ERR) @@ -86,13 +94,7 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, if (qhp->ibqp.event_handler) (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context); - if (qhp->attr.state == IWCH_QP_STATE_RTS) { - attrs.next_state = IWCH_QP_STATE_TERMINATE; - iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - if (send_term) - iwch_post_terminate(qhp, rsp_msg); - } + (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); if (atomic_dec_and_test(&qhp->refcnt)) wake_up(&qhp->wait); @@ -179,7 +181,6 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb) case TPT_ERR_BOUND: case TPT_ERR_INVALIDATE_SHARED_MR: case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: - (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1); break; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index fca0b4b747e..2e2741307af 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -154,6 +154,8 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve struct iwch_create_cq_resp uresp; struct iwch_create_cq_req ureq; struct iwch_ucontext *ucontext = NULL; + static int warned; + size_t resplen; PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries); rhp = to_iwch_dev(ibdev); @@ -217,15 +219,26 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve uresp.key = ucontext->key; ucontext->key += PAGE_SIZE; spin_unlock(&ucontext->mmap_lock); - if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) { + mm->key = uresp.key; + mm->addr = virt_to_phys(chp->cq.queue); + if (udata->outlen < sizeof uresp) { + if (!warned++) + printk(KERN_WARNING MOD "Warning - " + "downlevel libcxgb3 (non-fatal).\n"); + mm->len = PAGE_ALIGN((1UL << uresp.size_log2) * + sizeof(struct t3_cqe)); + resplen = sizeof(struct iwch_create_cq_resp_v0); + } else { + mm->len = PAGE_ALIGN(((1UL << uresp.size_log2) + 1) * + sizeof(struct t3_cqe)); + uresp.memsize = mm->len; + resplen = sizeof uresp; + } + if (ib_copy_to_udata(udata, &uresp, resplen)) { kfree(mm); iwch_destroy_cq(&chp->ibcq); return ERR_PTR(-EFAULT); } - mm->key = uresp.key; - mm->addr = virt_to_phys(chp->cq.queue); - mm->len = PAGE_ALIGN((1UL << uresp.size_log2) * - sizeof (struct t3_cqe)); insert_mmap(ucontext, mm); } PDBG("created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx\n", @@ -1414,6 +1427,7 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.post_send = iwch_post_send; dev->ibdev.post_recv = iwch_post_receive; dev->ibdev.get_protocol_stats = iwch_get_mib; + dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index c64d27bf2c1..0993137181d 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -802,14 +802,12 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) /* * Assumes qhp lock is held. */ -static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag) +static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp, + struct iwch_cq *schp, unsigned long *flag) { - struct iwch_cq *rchp, *schp; int count; int flushed; - rchp = get_chp(qhp->rhp, qhp->attr.rcq); - schp = get_chp(qhp->rhp, qhp->attr.scq); PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); /* take a ref on the qhp since we must release the lock */ @@ -847,10 +845,23 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag) static void flush_qp(struct iwch_qp *qhp, unsigned long *flag) { - if (qhp->ibqp.uobject) + struct iwch_cq *rchp, *schp; + + rchp = get_chp(qhp->rhp, qhp->attr.rcq); + schp = get_chp(qhp->rhp, qhp->attr.scq); + + if (qhp->ibqp.uobject) { cxio_set_wq_in_error(&qhp->wq); - else - __flush_qp(qhp, flag); + cxio_set_cq_in_error(&rchp->cq); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + if (schp != rchp) { + cxio_set_cq_in_error(&schp->cq); + (*schp->ibcq.comp_handler)(&schp->ibcq, + schp->ibcq.cq_context); + } + return; + } + __flush_qp(qhp, rchp, schp, flag); } diff --git a/drivers/infiniband/hw/cxgb3/iwch_user.h b/drivers/infiniband/hw/cxgb3/iwch_user.h index cb7086f558c..a277c31fcaf 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_user.h +++ b/drivers/infiniband/hw/cxgb3/iwch_user.h @@ -45,10 +45,18 @@ struct iwch_create_cq_req { __u64 user_rptr_addr; }; +struct iwch_create_cq_resp_v0 { + __u64 key; + __u32 cqid; + __u32 size_log2; +}; + struct iwch_create_cq_resp { __u64 key; __u32 cqid; __u32 size_log2; + __u32 memsize; + __u32 reserved; }; struct iwch_create_qp_resp { diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile index e31a499f017..cd20b1342ae 100644 --- a/drivers/infiniband/hw/cxgb4/Makefile +++ b/drivers/infiniband/hw/cxgb4/Makefile @@ -1,4 +1,4 @@ -EXTRA_CFLAGS += -Idrivers/net/cxgb4 +ccflags-y := -Idrivers/net/cxgb4 obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 32d352a88d5..0dc62b1438b 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -117,9 +117,9 @@ static int rcv_win = 256 * 1024; module_param(rcv_win, int, 0644); MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256KB)"); -static int snd_win = 32 * 1024; +static int snd_win = 128 * 1024; module_param(snd_win, int, 0644); -MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)"); +MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=128KB)"); static struct workqueue_struct *workq; @@ -172,7 +172,7 @@ static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb, error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e); if (error < 0) kfree_skb(skb); - return error; + return error < 0 ? error : 0; } int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb) @@ -187,7 +187,7 @@ int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb) error = cxgb4_ofld_send(rdev->lldi.ports[0], skb); if (error < 0) kfree_skb(skb); - return error; + return error < 0 ? error : 0; } static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb) @@ -219,12 +219,11 @@ static void set_emss(struct c4iw_ep *ep, u16 opt) static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc) { - unsigned long flags; enum c4iw_ep_state state; - spin_lock_irqsave(&epc->lock, flags); + mutex_lock(&epc->mutex); state = epc->state; - spin_unlock_irqrestore(&epc->lock, flags); + mutex_unlock(&epc->mutex); return state; } @@ -235,12 +234,10 @@ static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) { - unsigned long flags; - - spin_lock_irqsave(&epc->lock, flags); + mutex_lock(&epc->mutex); PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]); __state_set(epc, new); - spin_unlock_irqrestore(&epc->lock, flags); + mutex_unlock(&epc->mutex); return; } @@ -251,8 +248,8 @@ static void *alloc_ep(int size, gfp_t gfp) epc = kzalloc(size, gfp); if (epc) { kref_init(&epc->kref); - spin_lock_init(&epc->lock); - init_waitqueue_head(&epc->waitq); + mutex_init(&epc->mutex); + c4iw_init_wr_wait(&epc->wr_wait); } PDBG("%s alloc ep %p\n", __func__, epc); return epc; @@ -1131,7 +1128,6 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *ep; struct cpl_abort_rpl_rss *rpl = cplhdr(skb); - unsigned long flags; int release = 0; unsigned int tid = GET_TID(rpl); struct tid_info *t = dev->rdev.lldi.tids; @@ -1139,7 +1135,7 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) ep = lookup_tid(t, tid); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); BUG_ON(!ep); - spin_lock_irqsave(&ep->com.lock, flags); + mutex_lock(&ep->com.mutex); switch (ep->com.state) { case ABORTING: __state_set(&ep->com, DEAD); @@ -1150,7 +1146,7 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) __func__, ep, ep->com.state); break; } - spin_unlock_irqrestore(&ep->com.lock, flags); + mutex_unlock(&ep->com.mutex); if (release) release_ep_resources(ep); @@ -1213,9 +1209,9 @@ static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) } PDBG("%s ep %p status %d error %d\n", __func__, ep, rpl->status, status2errno(rpl->status)); - ep->com.rpl_err = status2errno(rpl->status); - ep->com.rpl_done = 1; - wake_up(&ep->com.waitq); + ep->com.wr_wait.ret = status2errno(rpl->status); + ep->com.wr_wait.done = 1; + wake_up(&ep->com.wr_wait.wait); return 0; } @@ -1249,9 +1245,9 @@ static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb) struct c4iw_listen_ep *ep = lookup_stid(t, stid); PDBG("%s ep %p\n", __func__, ep); - ep->com.rpl_err = status2errno(rpl->status); - ep->com.rpl_done = 1; - wake_up(&ep->com.waitq); + ep->com.wr_wait.ret = status2errno(rpl->status); + ep->com.wr_wait.done = 1; + wake_up(&ep->com.wr_wait.wait); return 0; } @@ -1478,7 +1474,6 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) struct cpl_peer_close *hdr = cplhdr(skb); struct c4iw_ep *ep; struct c4iw_qp_attributes attrs; - unsigned long flags; int disconnect = 1; int release = 0; int closing = 0; @@ -1489,7 +1484,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); dst_confirm(ep->dst); - spin_lock_irqsave(&ep->com.lock, flags); + mutex_lock(&ep->com.mutex); switch (ep->com.state) { case MPA_REQ_WAIT: __state_set(&ep->com, CLOSING); @@ -1507,17 +1502,17 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) * in rdma connection migration (see c4iw_accept_cr()). */ __state_set(&ep->com, CLOSING); - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; + ep->com.wr_wait.done = 1; + ep->com.wr_wait.ret = -ECONNRESET; PDBG("waking up ep %p tid %u\n", ep, ep->hwtid); - wake_up(&ep->com.waitq); + wake_up(&ep->com.wr_wait.wait); break; case MPA_REP_SENT: __state_set(&ep->com, CLOSING); - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; + ep->com.wr_wait.done = 1; + ep->com.wr_wait.ret = -ECONNRESET; PDBG("waking up ep %p tid %u\n", ep, ep->hwtid); - wake_up(&ep->com.waitq); + wake_up(&ep->com.wr_wait.wait); break; case FPDU_MODE: start_ep_timer(ep); @@ -1550,7 +1545,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) default: BUG_ON(1); } - spin_unlock_irqrestore(&ep->com.lock, flags); + mutex_unlock(&ep->com.mutex); if (closing) { attrs.next_state = C4IW_QP_STATE_CLOSING; c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, @@ -1581,7 +1576,6 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) struct c4iw_qp_attributes attrs; int ret; int release = 0; - unsigned long flags; struct tid_info *t = dev->rdev.lldi.tids; unsigned int tid = GET_TID(req); @@ -1591,9 +1585,17 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) ep->hwtid); return 0; } - spin_lock_irqsave(&ep->com.lock, flags); PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, ep->com.state); + + /* + * Wake up any threads in rdma_init() or rdma_fini(). + */ + ep->com.wr_wait.done = 1; + ep->com.wr_wait.ret = -ECONNRESET; + wake_up(&ep->com.wr_wait.wait); + + mutex_lock(&ep->com.mutex); switch (ep->com.state) { case CONNECTING: break; @@ -1605,23 +1607,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) connect_reply_upcall(ep, -ECONNRESET); break; case MPA_REP_SENT: - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - PDBG("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); break; case MPA_REQ_RCVD: - - /* - * We're gonna mark this puppy DEAD, but keep - * the reference on it until the ULP accepts or - * rejects the CR. Also wake up anyone waiting - * in rdma connection migration (see c4iw_accept_cr()). - */ - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - PDBG("waking up ep %p tid %u\n", ep, ep->hwtid); - wake_up(&ep->com.waitq); break; case MORIBUND: case CLOSING: @@ -1644,7 +1631,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) break; case DEAD: PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); - spin_unlock_irqrestore(&ep->com.lock, flags); + mutex_unlock(&ep->com.mutex); return 0; default: BUG_ON(1); @@ -1655,7 +1642,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) __state_set(&ep->com, DEAD); release = 1; } - spin_unlock_irqrestore(&ep->com.lock, flags); + mutex_unlock(&ep->com.mutex); rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); if (!rpl_skb) { @@ -1681,7 +1668,6 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) struct c4iw_ep *ep; struct c4iw_qp_attributes attrs; struct cpl_close_con_rpl *rpl = cplhdr(skb); - unsigned long flags; int release = 0; struct tid_info *t = dev->rdev.lldi.tids; unsigned int tid = GET_TID(rpl); @@ -1692,7 +1678,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) BUG_ON(!ep); /* The cm_id may be null if we failed to connect */ - spin_lock_irqsave(&ep->com.lock, flags); + mutex_lock(&ep->com.mutex); switch (ep->com.state) { case CLOSING: __state_set(&ep->com, MORIBUND); @@ -1717,7 +1703,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) BUG_ON(1); break; } - spin_unlock_irqrestore(&ep->com.lock, flags); + mutex_unlock(&ep->com.mutex); if (release) release_ep_resources(ep); return 0; @@ -1725,23 +1711,24 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) static int terminate(struct c4iw_dev *dev, struct sk_buff *skb) { - struct c4iw_ep *ep; - struct cpl_rdma_terminate *term = cplhdr(skb); + struct cpl_rdma_terminate *rpl = cplhdr(skb); struct tid_info *t = dev->rdev.lldi.tids; - unsigned int tid = GET_TID(term); + unsigned int tid = GET_TID(rpl); + struct c4iw_ep *ep; + struct c4iw_qp_attributes attrs; ep = lookup_tid(t, tid); + BUG_ON(!ep); - if (state_read(&ep->com) != FPDU_MODE) - return 0; + if (ep->com.qp) { + printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid, + ep->com.qp->wq.sq.qid); + attrs.next_state = C4IW_QP_STATE_TERMINATE; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + } else + printk(KERN_WARNING MOD "TERM received tid %u no qp\n", tid); - PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - skb_pull(skb, sizeof *term); - PDBG("%s saving %d bytes of term msg\n", __func__, skb->len); - skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer, - skb->len); - ep->com.qp->attr.terminate_msg_len = skb->len; - ep->com.qp->attr.is_terminate_local = 0; return 0; } @@ -1762,8 +1749,8 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb) ep = lookup_tid(t, tid); PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits); if (credits == 0) { - PDBG(KERN_ERR "%s 0 credit ack ep %p tid %u state %u\n", - __func__, ep, ep->hwtid, state_read(&ep->com)); + PDBG("%s 0 credit ack ep %p tid %u state %u\n", + __func__, ep, ep->hwtid, state_read(&ep->com)); return 0; } @@ -2042,6 +2029,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) } state_set(&ep->com, LISTEN); + c4iw_init_wr_wait(&ep->com.wr_wait); err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], ep->stid, ep->com.local_addr.sin_addr.s_addr, ep->com.local_addr.sin_port, @@ -2050,15 +2038,8 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) goto fail3; /* wait for pass_open_rpl */ - wait_event_timeout(ep->com.waitq, ep->com.rpl_done, C4IW_WR_TO); - if (ep->com.rpl_done) - err = ep->com.rpl_err; - else { - printk(KERN_ERR MOD "Device %s not responding!\n", - pci_name(ep->com.dev->rdev.lldi.pdev)); - ep->com.dev->rdev.flags = T4_FATAL_ERROR; - err = -EIO; - } + err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0, + __func__); if (!err) { cm_id->provider_data = ep; goto out; @@ -2082,20 +2063,12 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id) might_sleep(); state_set(&ep->com, DEAD); - ep->com.rpl_done = 0; - ep->com.rpl_err = 0; + c4iw_init_wr_wait(&ep->com.wr_wait); err = listen_stop(ep); if (err) goto done; - wait_event_timeout(ep->com.waitq, ep->com.rpl_done, C4IW_WR_TO); - if (ep->com.rpl_done) - err = ep->com.rpl_err; - else { - printk(KERN_ERR MOD "Device %s not responding!\n", - pci_name(ep->com.dev->rdev.lldi.pdev)); - ep->com.dev->rdev.flags = T4_FATAL_ERROR; - err = -EIO; - } + err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0, + __func__); cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET); done: cm_id->rem_ref(cm_id); @@ -2106,12 +2079,11 @@ done: int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) { int ret = 0; - unsigned long flags; int close = 0; int fatal = 0; struct c4iw_rdev *rdev; - spin_lock_irqsave(&ep->com.lock, flags); + mutex_lock(&ep->com.mutex); PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep, states[ep->com.state], abrupt); @@ -2158,7 +2130,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) break; } - spin_unlock_irqrestore(&ep->com.lock, flags); + mutex_unlock(&ep->com.mutex); if (close) { if (abrupt) ret = abort_connection(ep, NULL, gfp); @@ -2172,6 +2144,13 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) return ret; } +static int async_event(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_fw6_msg *rpl = cplhdr(skb); + c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]); + return 0; +} + /* * These are the real handlers that are called from a * work queue. @@ -2190,7 +2169,8 @@ static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = { [CPL_ABORT_REQ_RSS] = peer_abort, [CPL_CLOSE_CON_RPL] = close_con_rpl, [CPL_RDMA_TERMINATE] = terminate, - [CPL_FW4_ACK] = fw4_ack + [CPL_FW4_ACK] = fw4_ack, + [CPL_FW6_MSG] = async_event }; static void process_timeout(struct c4iw_ep *ep) @@ -2198,7 +2178,7 @@ static void process_timeout(struct c4iw_ep *ep) struct c4iw_qp_attributes attrs; int abort = 1; - spin_lock_irq(&ep->com.lock); + mutex_lock(&ep->com.mutex); PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, ep->com.state); switch (ep->com.state) { @@ -2225,7 +2205,7 @@ static void process_timeout(struct c4iw_ep *ep) WARN_ON(1); abort = 0; } - spin_unlock_irq(&ep->com.lock); + mutex_unlock(&ep->com.mutex); if (abort) abort_connection(ep, NULL, GFP_KERNEL); c4iw_put_ep(&ep->com); @@ -2309,6 +2289,7 @@ static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb) printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u " "for tid %u\n", rpl->status, GET_TID(rpl)); } + kfree_skb(skb); return 0; } @@ -2323,20 +2304,25 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) switch (rpl->type) { case 1: ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff); - wr_waitp = (__force struct c4iw_wr_wait *)rpl->data[1]; + wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1]; PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret); if (wr_waitp) { - wr_waitp->ret = ret; + if (ret) + wr_waitp->ret = -ret; + else + wr_waitp->ret = 0; wr_waitp->done = 1; wake_up(&wr_waitp->wait); } + kfree_skb(skb); break; case 2: - c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]); + sched(dev, skb); break; default: printk(KERN_ERR MOD "%s unexpected fw6 msg type %u\n", __func__, rpl->type); + kfree_skb(skb); break; } return 0; diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index b3daf39eed4..8d8f8add6fc 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -55,7 +55,7 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, V_FW_RI_RES_WR_NRES(1) | FW_WR_COMPL(1)); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); - res_wr->cookie = (u64)&wr_wait; + res_wr->cookie = (unsigned long) &wr_wait; res = res_wr->res; res->u.cq.restype = FW_RI_RES_TYPE_CQ; res->u.cq.op = FW_RI_RES_OP_RESET; @@ -64,14 +64,7 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, c4iw_init_wr_wait(&wr_wait); ret = c4iw_ofld_send(rdev, skb); if (!ret) { - wait_event_timeout(wr_wait.wait, wr_wait.done, C4IW_WR_TO); - if (!wr_wait.done) { - printk(KERN_ERR MOD "Device %s not responding!\n", - pci_name(rdev->lldi.pdev)); - rdev->flags = T4_FATAL_ERROR; - ret = -EIO; - } else - ret = wr_wait.ret; + ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); } kfree(cq->sw_queue); @@ -132,7 +125,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, V_FW_RI_RES_WR_NRES(1) | FW_WR_COMPL(1)); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); - res_wr->cookie = (u64)&wr_wait; + res_wr->cookie = (unsigned long) &wr_wait; res = res_wr->res; res->u.cq.restype = FW_RI_RES_TYPE_CQ; res->u.cq.op = FW_RI_RES_OP_WRITE; @@ -157,14 +150,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, if (ret) goto err4; PDBG("%s wait_event wr_wait %p\n", __func__, &wr_wait); - wait_event_timeout(wr_wait.wait, wr_wait.done, C4IW_WR_TO); - if (!wr_wait.done) { - printk(KERN_ERR MOD "Device %s not responding!\n", - pci_name(rdev->lldi.pdev)); - rdev->flags = T4_FATAL_ERROR; - ret = -EIO; - } else - ret = wr_wait.ret; + ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); if (ret) goto err4; @@ -476,6 +462,11 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, goto proc_cqe; } + if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) { + ret = -EAGAIN; + goto skip_cqe; + } + /* * RECV completion. */ @@ -696,6 +687,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) case T4_ERR_MSN_RANGE: case T4_ERR_IRD_OVERFLOW: case T4_ERR_OPCODE: + case T4_ERR_INTERNAL_ERR: wc->status = IB_WC_FATAL_ERR; break; case T4_ERR_SWFLUSH: diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 9bbf491d5d9..54fbc1118ab 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -49,29 +49,33 @@ static DEFINE_MUTEX(dev_mutex); static struct dentry *c4iw_debugfs_root; -struct debugfs_qp_data { +struct c4iw_debugfs_data { struct c4iw_dev *devp; char *buf; int bufsize; int pos; }; -static int count_qps(int id, void *p, void *data) +static int count_idrs(int id, void *p, void *data) { - struct c4iw_qp *qp = p; int *countp = data; - if (id != qp->wq.sq.qid) - return 0; - *countp = *countp + 1; return 0; } -static int dump_qps(int id, void *p, void *data) +static ssize_t debugfs_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct c4iw_debugfs_data *d = file->private_data; + + return simple_read_from_buffer(buf, count, ppos, d->buf, d->pos); +} + +static int dump_qp(int id, void *p, void *data) { struct c4iw_qp *qp = p; - struct debugfs_qp_data *qpd = data; + struct c4iw_debugfs_data *qpd = data; int space; int cc; @@ -101,7 +105,7 @@ static int dump_qps(int id, void *p, void *data) static int qp_release(struct inode *inode, struct file *file) { - struct debugfs_qp_data *qpd = file->private_data; + struct c4iw_debugfs_data *qpd = file->private_data; if (!qpd) { printk(KERN_INFO "%s null qpd?\n", __func__); return 0; @@ -113,7 +117,7 @@ static int qp_release(struct inode *inode, struct file *file) static int qp_open(struct inode *inode, struct file *file) { - struct debugfs_qp_data *qpd; + struct c4iw_debugfs_data *qpd; int ret = 0; int count = 1; @@ -126,7 +130,7 @@ static int qp_open(struct inode *inode, struct file *file) qpd->pos = 0; spin_lock_irq(&qpd->devp->lock); - idr_for_each(&qpd->devp->qpidr, count_qps, &count); + idr_for_each(&qpd->devp->qpidr, count_idrs, &count); spin_unlock_irq(&qpd->devp->lock); qpd->bufsize = count * 128; @@ -137,7 +141,7 @@ static int qp_open(struct inode *inode, struct file *file) } spin_lock_irq(&qpd->devp->lock); - idr_for_each(&qpd->devp->qpidr, dump_qps, qpd); + idr_for_each(&qpd->devp->qpidr, dump_qp, qpd); spin_unlock_irq(&qpd->devp->lock); qpd->buf[qpd->pos++] = 0; @@ -149,43 +153,86 @@ out: return ret; } -static ssize_t qp_read(struct file *file, char __user *buf, size_t count, - loff_t *ppos) +static const struct file_operations qp_debugfs_fops = { + .owner = THIS_MODULE, + .open = qp_open, + .release = qp_release, + .read = debugfs_read, + .llseek = default_llseek, +}; + +static int dump_stag(int id, void *p, void *data) { - struct debugfs_qp_data *qpd = file->private_data; - loff_t pos = *ppos; - loff_t avail = qpd->pos; + struct c4iw_debugfs_data *stagd = data; + int space; + int cc; - if (pos < 0) - return -EINVAL; - if (pos >= avail) + space = stagd->bufsize - stagd->pos - 1; + if (space == 0) + return 1; + + cc = snprintf(stagd->buf + stagd->pos, space, "0x%x\n", id<<8); + if (cc < space) + stagd->pos += cc; + return 0; +} + +static int stag_release(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *stagd = file->private_data; + if (!stagd) { + printk(KERN_INFO "%s null stagd?\n", __func__); return 0; - if (count > avail - pos) - count = avail - pos; + } + kfree(stagd->buf); + kfree(stagd); + return 0; +} - while (count) { - size_t len = 0; +static int stag_open(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *stagd; + int ret = 0; + int count = 1; - len = min((int)count, (int)qpd->pos - (int)pos); - if (copy_to_user(buf, qpd->buf + pos, len)) - return -EFAULT; - if (len == 0) - return -EINVAL; + stagd = kmalloc(sizeof *stagd, GFP_KERNEL); + if (!stagd) { + ret = -ENOMEM; + goto out; + } + stagd->devp = inode->i_private; + stagd->pos = 0; + + spin_lock_irq(&stagd->devp->lock); + idr_for_each(&stagd->devp->mmidr, count_idrs, &count); + spin_unlock_irq(&stagd->devp->lock); - buf += len; - pos += len; - count -= len; + stagd->bufsize = count * sizeof("0x12345678\n"); + stagd->buf = kmalloc(stagd->bufsize, GFP_KERNEL); + if (!stagd->buf) { + ret = -ENOMEM; + goto err1; } - count = pos - *ppos; - *ppos = pos; - return count; + + spin_lock_irq(&stagd->devp->lock); + idr_for_each(&stagd->devp->mmidr, dump_stag, stagd); + spin_unlock_irq(&stagd->devp->lock); + + stagd->buf[stagd->pos++] = 0; + file->private_data = stagd; + goto out; +err1: + kfree(stagd); +out: + return ret; } -static const struct file_operations qp_debugfs_fops = { +static const struct file_operations stag_debugfs_fops = { .owner = THIS_MODULE, - .open = qp_open, - .release = qp_release, - .read = qp_read, + .open = stag_open, + .release = stag_release, + .read = debugfs_read, + .llseek = default_llseek, }; static int setup_debugfs(struct c4iw_dev *devp) @@ -199,6 +246,11 @@ static int setup_debugfs(struct c4iw_dev *devp) (void *)devp, &qp_debugfs_fops); if (de && de->d_inode) de->d_inode->i_size = 4096; + + de = debugfs_create_file("stags", S_IWUSR, devp->debugfs_root, + (void *)devp, &stag_debugfs_fops); + if (de && de->d_inode) + de->d_inode->i_size = 4096; return 0; } @@ -290,7 +342,14 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) printk(KERN_ERR MOD "error %d initializing rqt pool\n", err); goto err3; } + err = c4iw_ocqp_pool_create(rdev); + if (err) { + printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err); + goto err4; + } return 0; +err4: + c4iw_rqtpool_destroy(rdev); err3: c4iw_pblpool_destroy(rdev); err2: @@ -317,6 +376,7 @@ static void c4iw_remove(struct c4iw_dev *dev) idr_destroy(&dev->cqidr); idr_destroy(&dev->qpidr); idr_destroy(&dev->mmidr); + iounmap(dev->rdev.oc_mw_kva); ib_dealloc_device(&dev->ibdev); } @@ -332,6 +392,17 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) } devp->rdev.lldi = *infop; + devp->rdev.oc_mw_pa = pci_resource_start(devp->rdev.lldi.pdev, 2) + + (pci_resource_len(devp->rdev.lldi.pdev, 2) - + roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size)); + devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa, + devp->rdev.lldi.vr->ocq.size); + + printk(KERN_INFO MOD "ocq memory: " + "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n", + devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size, + devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva); + mutex_lock(&dev_mutex); ret = c4iw_rdev_open(&devp->rdev); @@ -383,46 +454,6 @@ out: return dev; } -static struct sk_buff *t4_pktgl_to_skb(const struct pkt_gl *gl, - unsigned int skb_len, - unsigned int pull_len) -{ - struct sk_buff *skb; - struct skb_shared_info *ssi; - - if (gl->tot_len <= 512) { - skb = alloc_skb(gl->tot_len, GFP_ATOMIC); - if (unlikely(!skb)) - goto out; - __skb_put(skb, gl->tot_len); - skb_copy_to_linear_data(skb, gl->va, gl->tot_len); - } else { - skb = alloc_skb(skb_len, GFP_ATOMIC); - if (unlikely(!skb)) - goto out; - __skb_put(skb, pull_len); - skb_copy_to_linear_data(skb, gl->va, pull_len); - - ssi = skb_shinfo(skb); - ssi->frags[0].page = gl->frags[0].page; - ssi->frags[0].page_offset = gl->frags[0].page_offset + pull_len; - ssi->frags[0].size = gl->frags[0].size - pull_len; - if (gl->nfrags > 1) - memcpy(&ssi->frags[1], &gl->frags[1], - (gl->nfrags - 1) * sizeof(skb_frag_t)); - ssi->nr_frags = gl->nfrags; - - skb->len = gl->tot_len; - skb->data_len = skb->len - pull_len; - skb->truesize += skb->data_len; - - /* Get a reference for the last page, we don't own it */ - get_page(gl->frags[gl->nfrags - 1].page); - } -out: - return skb; -} - static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, const struct pkt_gl *gl) { @@ -447,7 +478,7 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, c4iw_ev_handler(dev, qid); return 0; } else { - skb = t4_pktgl_to_skb(gl, 128, 128); + skb = cxgb4_pktgl_to_skb(gl, 128, 128); if (unlikely(!skb)) goto nomem; } diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index 491e76a0327..c13041a0aeb 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -60,7 +60,7 @@ static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp, if (qhp->attr.state == C4IW_QP_STATE_RTS) { attrs.next_state = C4IW_QP_STATE_TERMINATE; c4iw_modify_qp(qhp->rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, - &attrs, 1); + &attrs, 0); } event.event = ib_event; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index ed459b8f800..16032cdb433 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -46,6 +46,7 @@ #include <linux/timer.h> #include <linux/io.h> #include <linux/kfifo.h> +#include <linux/mutex.h> #include <asm/byteorder.h> @@ -79,21 +80,6 @@ static inline void *cplhdr(struct sk_buff *skb) return skb->data; } -#define C4IW_WR_TO (10*HZ) - -struct c4iw_wr_wait { - wait_queue_head_t wait; - int done; - int ret; -}; - -static inline void c4iw_init_wr_wait(struct c4iw_wr_wait *wr_waitp) -{ - wr_waitp->ret = 0; - wr_waitp->done = 0; - init_waitqueue_head(&wr_waitp->wait); -} - struct c4iw_resource { struct kfifo tpt_fifo; spinlock_t tpt_fifo_lock; @@ -127,8 +113,11 @@ struct c4iw_rdev { struct c4iw_dev_ucontext uctx; struct gen_pool *pbl_pool; struct gen_pool *rqt_pool; + struct gen_pool *ocqp_pool; u32 flags; struct cxgb4_lld_info lldi; + unsigned long oc_mw_pa; + void __iomem *oc_mw_kva; }; static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) @@ -141,6 +130,44 @@ static inline int c4iw_num_stags(struct c4iw_rdev *rdev) return min((int)T4_MAX_NUM_STAG, (int)(rdev->lldi.vr->stag.size >> 5)); } +#define C4IW_WR_TO (10*HZ) + +struct c4iw_wr_wait { + wait_queue_head_t wait; + int done; + int ret; +}; + +static inline void c4iw_init_wr_wait(struct c4iw_wr_wait *wr_waitp) +{ + wr_waitp->ret = 0; + wr_waitp->done = 0; + init_waitqueue_head(&wr_waitp->wait); +} + +static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, + struct c4iw_wr_wait *wr_waitp, + u32 hwtid, u32 qpid, + const char *func) +{ + unsigned to = C4IW_WR_TO; + do { + + wait_event_timeout(wr_waitp->wait, wr_waitp->done, to); + if (!wr_waitp->done) { + printk(KERN_ERR MOD "%s - Device %s not responding - " + "tid %u qpid %u\n", func, + pci_name(rdev->lldi.pdev), hwtid, qpid); + to = to << 2; + } + } while (!wr_waitp->done); + if (wr_waitp->ret) + printk(KERN_WARNING MOD "%s: FW reply %d tid %u qpid %u\n", + pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid); + return wr_waitp->ret; +} + + struct c4iw_dev { struct ib_device ibdev; struct c4iw_rdev rdev; @@ -327,6 +354,7 @@ struct c4iw_qp { struct c4iw_qp_attributes attr; struct t4_wq wq; spinlock_t lock; + struct mutex mutex; atomic_t refcnt; wait_queue_head_t wait; struct timer_list timer; @@ -579,12 +607,10 @@ struct c4iw_ep_common { struct c4iw_dev *dev; enum c4iw_ep_state state; struct kref kref; - spinlock_t lock; + struct mutex mutex; struct sockaddr_in local_addr; struct sockaddr_in remote_addr; - wait_queue_head_t waitq; - int rpl_done; - int rpl_err; + struct c4iw_wr_wait wr_wait; unsigned long flags; }; @@ -654,8 +680,10 @@ int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid); int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev); int c4iw_pblpool_create(struct c4iw_rdev *rdev); int c4iw_rqtpool_create(struct c4iw_rdev *rdev); +int c4iw_ocqp_pool_create(struct c4iw_rdev *rdev); void c4iw_pblpool_destroy(struct c4iw_rdev *rdev); void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev); +void c4iw_ocqp_pool_destroy(struct c4iw_rdev *rdev); void c4iw_destroy_resource(struct c4iw_resource *rscp); int c4iw_destroy_ctrl_qp(struct c4iw_rdev *rdev); int c4iw_register_device(struct c4iw_dev *dev); @@ -721,6 +749,8 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size); void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size); u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size); void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size); +u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size); +void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size); int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb); void c4iw_flush_hw_cq(struct t4_cq *cq); void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count); diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 269373a62f2..273ffe49525 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -71,7 +71,7 @@ static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, if (i == (num_wqe-1)) { req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR) | FW_WR_COMPL(1)); - req->wr.wr_lo = (__force __be64)&wr_wait; + req->wr.wr_lo = (__force __be64)(unsigned long) &wr_wait; } else req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR)); req->wr.wr_mid = cpu_to_be32( @@ -103,14 +103,7 @@ static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, len -= C4IW_MAX_INLINE_SIZE; } - wait_event_timeout(wr_wait.wait, wr_wait.done, C4IW_WR_TO); - if (!wr_wait.done) { - printk(KERN_ERR MOD "Device %s not responding!\n", - pci_name(rdev->lldi.pdev)); - rdev->flags = T4_FATAL_ERROR; - ret = -EIO; - } else - ret = wr_wait.ret; + ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); return ret; } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 8f645c83a12..f66dd8bf512 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -54,9 +54,9 @@ #include "iw_cxgb4.h" -static int fastreg_support; +static int fastreg_support = 1; module_param(fastreg_support, int, 0644); -MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=0)"); +MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)"); static int c4iw_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask, @@ -149,19 +149,28 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) addr = mm->addr; kfree(mm); - if ((addr >= pci_resource_start(rdev->lldi.pdev, 2)) && - (addr < (pci_resource_start(rdev->lldi.pdev, 2) + - pci_resource_len(rdev->lldi.pdev, 2)))) { + if ((addr >= pci_resource_start(rdev->lldi.pdev, 0)) && + (addr < (pci_resource_start(rdev->lldi.pdev, 0) + + pci_resource_len(rdev->lldi.pdev, 0)))) { /* - * Map T4 DB register. + * MA_SYNC register... */ - if (vma->vm_flags & VM_READ) - return -EPERM; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; - vma->vm_flags &= ~VM_MAYREAD; + ret = io_remap_pfn_range(vma, vma->vm_start, + addr >> PAGE_SHIFT, + len, vma->vm_page_prot); + } else if ((addr >= pci_resource_start(rdev->lldi.pdev, 2)) && + (addr < (pci_resource_start(rdev->lldi.pdev, 2) + + pci_resource_len(rdev->lldi.pdev, 2)))) { + + /* + * Map user DB or OCQP memory... + */ + if (addr >= rdev->oc_mw_pa) + vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot); + else + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); ret = io_remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT, len, vma->vm_page_prot); @@ -382,7 +391,17 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr, static int c4iw_get_mib(struct ib_device *ibdev, union rdma_protocol_stats *stats) { - return -ENOSYS; + struct tp_tcp_stats v4, v6; + struct c4iw_dev *c4iw_dev = to_c4iw_dev(ibdev); + + cxgb4_get_tcp_stats(c4iw_dev->rdev.lldi.pdev, &v4, &v6); + memset(stats, 0, sizeof *stats); + stats->iw.tcpInSegs = v4.tcpInSegs + v6.tcpInSegs; + stats->iw.tcpOutSegs = v4.tcpOutSegs + v6.tcpOutSegs; + stats->iw.tcpRetransSegs = v4.tcpRetransSegs + v6.tcpRetransSegs; + stats->iw.tcpOutRsts = v4.tcpOutRsts + v6.tcpOutSegs; + + return 0; } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); @@ -472,6 +491,7 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.post_send = c4iw_post_send; dev->ibdev.post_recv = c4iw_post_receive; dev->ibdev.get_protocol_stats = c4iw_get_mib; + dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 93f6e5bf0ec..057cb2505ea 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -31,6 +31,63 @@ */ #include "iw_cxgb4.h" +static int ocqp_support; +module_param(ocqp_support, int, 0644); +MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=0)"); + +static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state) +{ + unsigned long flag; + spin_lock_irqsave(&qhp->lock, flag); + qhp->attr.state = state; + spin_unlock_irqrestore(&qhp->lock, flag); +} + +static void dealloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + c4iw_ocqp_pool_free(rdev, sq->dma_addr, sq->memsize); +} + +static void dealloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + dma_free_coherent(&(rdev->lldi.pdev->dev), sq->memsize, sq->queue, + pci_unmap_addr(sq, mapping)); +} + +static void dealloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + if (t4_sq_onchip(sq)) + dealloc_oc_sq(rdev, sq); + else + dealloc_host_sq(rdev, sq); +} + +static int alloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + if (!ocqp_support || !t4_ocqp_supported()) + return -ENOSYS; + sq->dma_addr = c4iw_ocqp_pool_alloc(rdev, sq->memsize); + if (!sq->dma_addr) + return -ENOMEM; + sq->phys_addr = rdev->oc_mw_pa + sq->dma_addr - + rdev->lldi.vr->ocq.start; + sq->queue = (__force union t4_wr *)(rdev->oc_mw_kva + sq->dma_addr - + rdev->lldi.vr->ocq.start); + sq->flags |= T4_SQ_ONCHIP; + return 0; +} + +static int alloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + sq->queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), sq->memsize, + &(sq->dma_addr), GFP_KERNEL); + if (!sq->queue) + return -ENOMEM; + sq->phys_addr = virt_to_phys(sq->queue); + pci_unmap_addr_set(sq, mapping, sq->dma_addr); + return 0; +} + static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, struct c4iw_dev_ucontext *uctx) { @@ -41,9 +98,7 @@ static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, dma_free_coherent(&(rdev->lldi.pdev->dev), wq->rq.memsize, wq->rq.queue, dma_unmap_addr(&wq->rq, mapping)); - dma_free_coherent(&(rdev->lldi.pdev->dev), - wq->sq.memsize, wq->sq.queue, - dma_unmap_addr(&wq->sq, mapping)); + dealloc_sq(rdev, &wq->sq); c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); kfree(wq->rq.sw_rq); kfree(wq->sq.sw_sq); @@ -93,11 +148,12 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, if (!wq->rq.rqt_hwaddr) goto err4; - wq->sq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), - wq->sq.memsize, &(wq->sq.dma_addr), - GFP_KERNEL); - if (!wq->sq.queue) - goto err5; + if (user) { + if (alloc_oc_sq(rdev, &wq->sq) && alloc_host_sq(rdev, &wq->sq)) + goto err5; + } else + if (alloc_host_sq(rdev, &wq->sq)) + goto err5; memset(wq->sq.queue, 0, wq->sq.memsize); dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); @@ -144,7 +200,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, V_FW_RI_RES_WR_NRES(2) | FW_WR_COMPL(1)); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); - res_wr->cookie = (u64)&wr_wait; + res_wr->cookie = (unsigned long) &wr_wait; res = res_wr->res; res->u.sqrq.restype = FW_RI_RES_TYPE_SQ; res->u.sqrq.op = FW_RI_RES_OP_WRITE; @@ -158,6 +214,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */ + t4_sq_onchip(&wq->sq) ? F_FW_RI_RES_WR_ONCHIP : 0 | V_FW_RI_RES_WR_IQID(scq->cqid)); res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( V_FW_RI_RES_WR_DCAEN(0) | @@ -198,14 +255,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, ret = c4iw_ofld_send(rdev, skb); if (ret) goto err7; - wait_event_timeout(wr_wait.wait, wr_wait.done, C4IW_WR_TO); - if (!wr_wait.done) { - printk(KERN_ERR MOD "Device %s not responding!\n", - pci_name(rdev->lldi.pdev)); - rdev->flags = T4_FATAL_ERROR; - ret = -EIO; - } else - ret = wr_wait.ret; + ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__); if (ret) goto err7; @@ -219,9 +269,7 @@ err7: wq->rq.memsize, wq->rq.queue, dma_unmap_addr(&wq->rq, mapping)); err6: - dma_free_coherent(&(rdev->lldi.pdev->dev), - wq->sq.memsize, wq->sq.queue, - dma_unmap_addr(&wq->sq, mapping)); + dealloc_sq(rdev, &wq->sq); err5: c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); err4: @@ -263,6 +311,9 @@ static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp, rem -= len; } } + len = roundup(plen + sizeof *immdp, 16) - (plen + sizeof *immdp); + if (len) + memset(dstp, 0, len); immdp->op = FW_RI_DATA_IMMD; immdp->r1 = 0; immdp->r2 = 0; @@ -292,6 +343,7 @@ static int build_isgl(__be64 *queue_start, __be64 *queue_end, if (++flitp == queue_end) flitp = queue_start; } + *flitp = (__force __be64)0; isglp->op = FW_RI_DATA_ISGL; isglp->r1 = 0; isglp->nsge = cpu_to_be16(num_sge); @@ -453,13 +505,15 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, return 0; } -static int build_fastreg(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) +static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, + struct ib_send_wr *wr, u8 *len16) { struct fw_ri_immd *imdp; __be64 *p; int i; int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32); + int rem; if (wr->wr.fast_reg.page_list_len > T4_MAX_FR_DEPTH) return -EINVAL; @@ -474,32 +528,28 @@ static int build_fastreg(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff); - if (pbllen > T4_MAX_FR_IMMD) { - struct c4iw_fr_page_list *c4pl = - to_c4iw_fr_page_list(wr->wr.fast_reg.page_list); - struct fw_ri_dsgl *sglp; - - sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1); - sglp->op = FW_RI_DATA_DSGL; - sglp->r1 = 0; - sglp->nsge = cpu_to_be16(1); - sglp->addr0 = cpu_to_be64(c4pl->dma_addr); - sglp->len0 = cpu_to_be32(pbllen); - - *len16 = DIV_ROUND_UP(sizeof wqe->fr + sizeof *sglp, 16); - } else { - imdp = (struct fw_ri_immd *)(&wqe->fr + 1); - imdp->op = FW_RI_DATA_IMMD; - imdp->r1 = 0; - imdp->r2 = 0; - imdp->immdlen = cpu_to_be32(pbllen); - p = (__be64 *)(imdp + 1); - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++, p++) - *p = cpu_to_be64( - (u64)wr->wr.fast_reg.page_list->page_list[i]); - *len16 = DIV_ROUND_UP(sizeof wqe->fr + sizeof *imdp + pbllen, - 16); + WARN_ON(pbllen > T4_MAX_FR_IMMD); + imdp = (struct fw_ri_immd *)(&wqe->fr + 1); + imdp->op = FW_RI_DATA_IMMD; + imdp->r1 = 0; + imdp->r2 = 0; + imdp->immdlen = cpu_to_be32(pbllen); + p = (__be64 *)(imdp + 1); + rem = pbllen; + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { + *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]); + rem -= sizeof *p; + if (++p == (__be64 *)&sq->queue[sq->size]) + p = (__be64 *)sq->queue; } + BUG_ON(rem < 0); + while (rem) { + *p = 0; + rem -= sizeof *p; + if (++p == (__be64 *)&sq->queue[sq->size]) + p = (__be64 *)sq->queue; + } + *len16 = DIV_ROUND_UP(sizeof wqe->fr + sizeof *imdp + pbllen, 16); return 0; } @@ -587,7 +637,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, fw_opcode = FW_RI_RDMA_READ_WR; swsqe->opcode = FW_RI_READ_REQ; if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) - fw_flags |= FW_RI_RDMA_READ_INVALIDATE; + fw_flags = FW_RI_RDMA_READ_INVALIDATE; else fw_flags = 0; err = build_rdma_read(wqe, wr, &len16); @@ -600,7 +650,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_FAST_REG_MR: fw_opcode = FW_RI_FR_NSMR_WR; swsqe->opcode = FW_RI_FAST_REGISTER; - err = build_fastreg(wqe, wr, &len16); + err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16); break; case IB_WR_LOCAL_INV: if (wr->send_flags & IB_SEND_FENCE) @@ -905,46 +955,38 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe, * Assumes qhp lock is held. */ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, - struct c4iw_cq *schp, unsigned long *flag) + struct c4iw_cq *schp) { int count; int flushed; + unsigned long flag; PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); - /* take a ref on the qhp since we must release the lock */ - atomic_inc(&qhp->refcnt); - spin_unlock_irqrestore(&qhp->lock, *flag); /* locking hierarchy: cq lock first, then qp lock. */ - spin_lock_irqsave(&rchp->lock, *flag); + spin_lock_irqsave(&rchp->lock, flag); spin_lock(&qhp->lock); c4iw_flush_hw_cq(&rchp->cq); c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); - spin_unlock_irqrestore(&rchp->lock, *flag); + spin_unlock_irqrestore(&rchp->lock, flag); if (flushed) (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); /* locking hierarchy: cq lock first, then qp lock. */ - spin_lock_irqsave(&schp->lock, *flag); + spin_lock_irqsave(&schp->lock, flag); spin_lock(&qhp->lock); c4iw_flush_hw_cq(&schp->cq); c4iw_count_scqes(&schp->cq, &qhp->wq, &count); flushed = c4iw_flush_sq(&qhp->wq, &schp->cq, count); spin_unlock(&qhp->lock); - spin_unlock_irqrestore(&schp->lock, *flag); + spin_unlock_irqrestore(&schp->lock, flag); if (flushed) (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); - - /* deref */ - if (atomic_dec_and_test(&qhp->refcnt)) - wake_up(&qhp->wait); - - spin_lock_irqsave(&qhp->lock, *flag); } -static void flush_qp(struct c4iw_qp *qhp, unsigned long *flag) +static void flush_qp(struct c4iw_qp *qhp) { struct c4iw_cq *rchp, *schp; @@ -958,7 +1000,7 @@ static void flush_qp(struct c4iw_qp *qhp, unsigned long *flag) t4_set_cq_in_error(&schp->cq); return; } - __flush_qp(qhp, rchp, schp, flag); + __flush_qp(qhp, rchp, schp); } static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, @@ -966,7 +1008,6 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, { struct fw_ri_wr *wqe; int ret; - struct c4iw_wr_wait wr_wait; struct sk_buff *skb; PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid, @@ -985,28 +1026,16 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, wqe->flowid_len16 = cpu_to_be32( FW_WR_FLOWID(ep->hwtid) | FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16))); - wqe->cookie = (u64)&wr_wait; + wqe->cookie = (unsigned long) &ep->com.wr_wait; wqe->u.fini.type = FW_RI_TYPE_FINI; - c4iw_init_wr_wait(&wr_wait); + c4iw_init_wr_wait(&ep->com.wr_wait); ret = c4iw_ofld_send(&rhp->rdev, skb); if (ret) goto out; - wait_event_timeout(wr_wait.wait, wr_wait.done, C4IW_WR_TO); - if (!wr_wait.done) { - printk(KERN_ERR MOD "Device %s not responding!\n", - pci_name(rhp->rdev.lldi.pdev)); - rhp->rdev.flags = T4_FATAL_ERROR; - ret = -EIO; - } else { - ret = wr_wait.ret; - if (ret) - printk(KERN_WARNING MOD - "%s: Abnormal close qpid %d ret %u\n", - pci_name(rhp->rdev.lldi.pdev), qhp->wq.sq.qid, - ret); - } + ret = c4iw_wait_for_reply(&rhp->rdev, &ep->com.wr_wait, qhp->ep->hwtid, + qhp->wq.sq.qid, __func__); out: PDBG("%s ret %d\n", __func__, ret); return ret; @@ -1040,7 +1069,6 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) { struct fw_ri_wr *wqe; int ret; - struct c4iw_wr_wait wr_wait; struct sk_buff *skb; PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid, @@ -1060,7 +1088,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) FW_WR_FLOWID(qhp->ep->hwtid) | FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16))); - wqe->cookie = (u64)&wr_wait; + wqe->cookie = (unsigned long) &qhp->ep->com.wr_wait; wqe->u.init.type = FW_RI_TYPE_INIT; wqe->u.init.mpareqbit_p2ptype = @@ -1097,19 +1125,13 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) if (qhp->attr.mpa_attr.initiator) build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init); - c4iw_init_wr_wait(&wr_wait); + c4iw_init_wr_wait(&qhp->ep->com.wr_wait); ret = c4iw_ofld_send(&rhp->rdev, skb); if (ret) goto out; - wait_event_timeout(wr_wait.wait, wr_wait.done, C4IW_WR_TO); - if (!wr_wait.done) { - printk(KERN_ERR MOD "Device %s not responding!\n", - pci_name(rhp->rdev.lldi.pdev)); - rhp->rdev.flags = T4_FATAL_ERROR; - ret = -EIO; - } else - ret = wr_wait.ret; + ret = c4iw_wait_for_reply(&rhp->rdev, &qhp->ep->com.wr_wait, + qhp->ep->hwtid, qhp->wq.sq.qid, __func__); out: PDBG("%s ret %d\n", __func__, ret); return ret; @@ -1122,7 +1144,6 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, { int ret = 0; struct c4iw_qp_attributes newattr = qhp->attr; - unsigned long flag; int disconnect = 0; int terminate = 0; int abort = 0; @@ -1133,7 +1154,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, qhp, qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep, qhp->attr.state, (mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1); - spin_lock_irqsave(&qhp->lock, flag); + mutex_lock(&qhp->mutex); /* Process attr changes if in IDLE */ if (mask & C4IW_QP_ATTR_VALID_MODIFY) { @@ -1184,7 +1205,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, qhp->attr.mpa_attr = attrs->mpa_attr; qhp->attr.llp_stream_handle = attrs->llp_stream_handle; qhp->ep = qhp->attr.llp_stream_handle; - qhp->attr.state = C4IW_QP_STATE_RTS; + set_state(qhp, C4IW_QP_STATE_RTS); /* * Ref the endpoint here and deref when we @@ -1193,15 +1214,13 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, * transition. */ c4iw_get_ep(&qhp->ep->com); - spin_unlock_irqrestore(&qhp->lock, flag); ret = rdma_init(rhp, qhp); - spin_lock_irqsave(&qhp->lock, flag); if (ret) goto err; break; case C4IW_QP_STATE_ERROR: - qhp->attr.state = C4IW_QP_STATE_ERROR; - flush_qp(qhp, &flag); + set_state(qhp, C4IW_QP_STATE_ERROR); + flush_qp(qhp); break; default: ret = -EINVAL; @@ -1212,38 +1231,38 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, switch (attrs->next_state) { case C4IW_QP_STATE_CLOSING: BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2); - qhp->attr.state = C4IW_QP_STATE_CLOSING; + set_state(qhp, C4IW_QP_STATE_CLOSING); ep = qhp->ep; if (!internal) { abort = 0; disconnect = 1; - c4iw_get_ep(&ep->com); + c4iw_get_ep(&qhp->ep->com); } - spin_unlock_irqrestore(&qhp->lock, flag); ret = rdma_fini(rhp, qhp, ep); - spin_lock_irqsave(&qhp->lock, flag); if (ret) { - c4iw_get_ep(&ep->com); + if (internal) + c4iw_get_ep(&qhp->ep->com); disconnect = abort = 1; goto err; } break; case C4IW_QP_STATE_TERMINATE: - qhp->attr.state = C4IW_QP_STATE_TERMINATE; + set_state(qhp, C4IW_QP_STATE_TERMINATE); if (qhp->ibqp.uobject) t4_set_wq_in_error(&qhp->wq); ep = qhp->ep; - c4iw_get_ep(&ep->com); - terminate = 1; + if (!internal) + terminate = 1; disconnect = 1; + c4iw_get_ep(&qhp->ep->com); break; case C4IW_QP_STATE_ERROR: - qhp->attr.state = C4IW_QP_STATE_ERROR; + set_state(qhp, C4IW_QP_STATE_ERROR); if (!internal) { abort = 1; disconnect = 1; ep = qhp->ep; - c4iw_get_ep(&ep->com); + c4iw_get_ep(&qhp->ep->com); } goto err; break; @@ -1259,8 +1278,8 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, } switch (attrs->next_state) { case C4IW_QP_STATE_IDLE: - flush_qp(qhp, &flag); - qhp->attr.state = C4IW_QP_STATE_IDLE; + flush_qp(qhp); + set_state(qhp, C4IW_QP_STATE_IDLE); qhp->attr.llp_stream_handle = NULL; c4iw_put_ep(&qhp->ep->com); qhp->ep = NULL; @@ -1282,7 +1301,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, ret = -EINVAL; goto out; } - qhp->attr.state = C4IW_QP_STATE_IDLE; + set_state(qhp, C4IW_QP_STATE_IDLE); break; case C4IW_QP_STATE_TERMINATE: if (!internal) { @@ -1305,15 +1324,16 @@ err: /* disassociate the LLP connection */ qhp->attr.llp_stream_handle = NULL; - ep = qhp->ep; + if (!ep) + ep = qhp->ep; qhp->ep = NULL; - qhp->attr.state = C4IW_QP_STATE_ERROR; + set_state(qhp, C4IW_QP_STATE_ERROR); free = 1; wake_up(&qhp->wait); BUG_ON(!ep); - flush_qp(qhp, &flag); + flush_qp(qhp); out: - spin_unlock_irqrestore(&qhp->lock, flag); + mutex_unlock(&qhp->mutex); if (terminate) post_terminate(qhp, NULL, internal ? GFP_ATOMIC : GFP_KERNEL); @@ -1335,7 +1355,6 @@ out: */ if (free) c4iw_put_ep(&ep->com); - PDBG("%s exit state %d\n", __func__, qhp->attr.state); return ret; } @@ -1380,7 +1399,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, int sqsize, rqsize; struct c4iw_ucontext *ucontext; int ret; - struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4; + struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4, *mm5 = NULL; PDBG("%s ib_pd %p\n", __func__, pd); @@ -1450,6 +1469,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->attr.max_ord = 1; qhp->attr.max_ird = 1; spin_lock_init(&qhp->lock); + mutex_init(&qhp->mutex); init_waitqueue_head(&qhp->wait); atomic_set(&qhp->refcnt, 1); @@ -1478,7 +1498,15 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, ret = -ENOMEM; goto err6; } - + if (t4_sq_onchip(&qhp->wq.sq)) { + mm5 = kmalloc(sizeof *mm5, GFP_KERNEL); + if (!mm5) { + ret = -ENOMEM; + goto err7; + } + uresp.flags = C4IW_QPF_ONCHIP; + } else + uresp.flags = 0; uresp.qid_mask = rhp->rdev.qpmask; uresp.sqid = qhp->wq.sq.qid; uresp.sq_size = qhp->wq.sq.size; @@ -1487,6 +1515,10 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, uresp.rq_size = qhp->wq.rq.size; uresp.rq_memsize = qhp->wq.rq.memsize; spin_lock(&ucontext->mmap_lock); + if (mm5) { + uresp.ma_sync_key = ucontext->key; + ucontext->key += PAGE_SIZE; + } uresp.sq_key = ucontext->key; ucontext->key += PAGE_SIZE; uresp.rq_key = ucontext->key; @@ -1498,9 +1530,9 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, spin_unlock(&ucontext->mmap_lock); ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); if (ret) - goto err7; + goto err8; mm1->key = uresp.sq_key; - mm1->addr = virt_to_phys(qhp->wq.sq.queue); + mm1->addr = qhp->wq.sq.phys_addr; mm1->len = PAGE_ALIGN(qhp->wq.sq.memsize); insert_mmap(ucontext, mm1); mm2->key = uresp.rq_key; @@ -1515,6 +1547,13 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, mm4->addr = qhp->wq.rq.udb; mm4->len = PAGE_SIZE; insert_mmap(ucontext, mm4); + if (mm5) { + mm5->key = uresp.ma_sync_key; + mm5->addr = (pci_resource_start(rhp->rdev.lldi.pdev, 0) + + A_PCIE_MA_SYNC) & PAGE_MASK; + mm5->len = PAGE_SIZE; + insert_mmap(ucontext, mm5); + } } qhp->ibqp.qp_num = qhp->wq.sq.qid; init_timer(&(qhp->timer)); @@ -1522,6 +1561,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, __func__, qhp, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, qhp->wq.sq.qid); return &qhp->ibqp; +err8: + kfree(mm5); err7: kfree(mm4); err6: diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c index 83b23dfa250..4fb50d58b49 100644 --- a/drivers/infiniband/hw/cxgb4/resource.c +++ b/drivers/infiniband/hw/cxgb4/resource.c @@ -311,6 +311,9 @@ u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size) { unsigned long addr = gen_pool_alloc(rdev->pbl_pool, size); PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size); + if (!addr && printk_ratelimit()) + printk(KERN_WARNING MOD "%s: Out of PBL memory\n", + pci_name(rdev->lldi.pdev)); return (u32)addr; } @@ -370,6 +373,9 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size) { unsigned long addr = gen_pool_alloc(rdev->rqt_pool, size << 6); PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6); + if (!addr && printk_ratelimit()) + printk(KERN_WARNING MOD "%s: Out of RQT memory\n", + pci_name(rdev->lldi.pdev)); return (u32)addr; } @@ -416,3 +422,59 @@ void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev) { gen_pool_destroy(rdev->rqt_pool); } + +/* + * On-Chip QP Memory. + */ +#define MIN_OCQP_SHIFT 12 /* 4KB == min ocqp size */ + +u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size) +{ + unsigned long addr = gen_pool_alloc(rdev->ocqp_pool, size); + PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size); + return (u32)addr; +} + +void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size) +{ + PDBG("%s addr 0x%x size %d\n", __func__, addr, size); + gen_pool_free(rdev->ocqp_pool, (unsigned long)addr, size); +} + +int c4iw_ocqp_pool_create(struct c4iw_rdev *rdev) +{ + unsigned start, chunk, top; + + rdev->ocqp_pool = gen_pool_create(MIN_OCQP_SHIFT, -1); + if (!rdev->ocqp_pool) + return -ENOMEM; + + start = rdev->lldi.vr->ocq.start; + chunk = rdev->lldi.vr->ocq.size; + top = start + chunk; + + while (start < top) { + chunk = min(top - start + 1, chunk); + if (gen_pool_add(rdev->ocqp_pool, start, chunk, -1)) { + PDBG("%s failed to add OCQP chunk (%x/%x)\n", + __func__, start, chunk); + if (chunk <= 1024 << MIN_OCQP_SHIFT) { + printk(KERN_WARNING MOD + "Failed to add all OCQP chunks (%x/%x)\n", + start, top - start); + return 0; + } + chunk >>= 1; + } else { + PDBG("%s added OCQP chunk (%x/%x)\n", + __func__, start, chunk); + start += chunk; + } + } + return 0; +} + +void c4iw_ocqp_pool_destroy(struct c4iw_rdev *rdev) +{ + gen_pool_destroy(rdev->ocqp_pool); +} diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 24f369046ef..70004425d69 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -52,6 +52,7 @@ #define T4_STAG_UNSET 0xffffffff #define T4_FW_MAJ 0 #define T4_EQ_STATUS_ENTRIES (L1_CACHE_BYTES > 64 ? 2 : 1) +#define A_PCIE_MA_SYNC 0x30b4 struct t4_status_page { __be32 rsvd1; /* flit 0 - hw owns */ @@ -65,7 +66,7 @@ struct t4_status_page { #define T4_EQ_ENTRY_SIZE 64 -#define T4_SQ_NUM_SLOTS 4 +#define T4_SQ_NUM_SLOTS 5 #define T4_SQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_SQ_NUM_SLOTS) #define T4_MAX_SEND_SGE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \ sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) @@ -78,7 +79,7 @@ struct t4_status_page { sizeof(struct fw_ri_rdma_write_wr) - \ sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) #define T4_MAX_FR_IMMD ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_fr_nsmr_wr) - \ - sizeof(struct fw_ri_immd))) + sizeof(struct fw_ri_immd)) & ~31UL) #define T4_MAX_FR_DEPTH (T4_MAX_FR_IMMD / sizeof(u64)) #define T4_RQ_NUM_SLOTS 2 @@ -266,10 +267,36 @@ struct t4_swsqe { u16 idx; }; +static inline pgprot_t t4_pgprot_wc(pgprot_t prot) +{ +#if defined(__i386__) || defined(__x86_64__) + return pgprot_writecombine(prot); +#elif defined(CONFIG_PPC64) + return __pgprot((pgprot_val(prot) | _PAGE_NO_CACHE) & + ~(pgprot_t)_PAGE_GUARDED); +#else + return pgprot_noncached(prot); +#endif +} + +static inline int t4_ocqp_supported(void) +{ +#if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64) + return 1; +#else + return 0; +#endif +} + +enum { + T4_SQ_ONCHIP = (1<<0), +}; + struct t4_sq { union t4_wr *queue; dma_addr_t dma_addr; DEFINE_DMA_UNMAP_ADDR(mapping); + unsigned long phys_addr; struct t4_swsqe *sw_sq; struct t4_swsqe *oldest_read; u64 udb; @@ -280,6 +307,7 @@ struct t4_sq { u16 cidx; u16 pidx; u16 wq_pidx; + u16 flags; }; struct t4_swrqe { @@ -350,6 +378,11 @@ static inline void t4_rq_consume(struct t4_wq *wq) wq->rq.cidx = 0; } +static inline int t4_sq_onchip(struct t4_sq *sq) +{ + return sq->flags & T4_SQ_ONCHIP; +} + static inline int t4_sq_empty(struct t4_wq *wq) { return wq->sq.in_use == 0; @@ -396,30 +429,27 @@ static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc) static inline int t4_wq_in_error(struct t4_wq *wq) { - return wq->sq.queue[wq->sq.size].status.qp_err; + return wq->rq.queue[wq->rq.size].status.qp_err; } static inline void t4_set_wq_in_error(struct t4_wq *wq) { - wq->sq.queue[wq->sq.size].status.qp_err = 1; wq->rq.queue[wq->rq.size].status.qp_err = 1; } static inline void t4_disable_wq_db(struct t4_wq *wq) { - wq->sq.queue[wq->sq.size].status.db_off = 1; wq->rq.queue[wq->rq.size].status.db_off = 1; } static inline void t4_enable_wq_db(struct t4_wq *wq) { - wq->sq.queue[wq->sq.size].status.db_off = 0; wq->rq.queue[wq->rq.size].status.db_off = 0; } static inline int t4_wq_db_enabled(struct t4_wq *wq) { - return !wq->sq.queue[wq->sq.size].status.db_off; + return !wq->rq.queue[wq->rq.size].status.db_off; } struct t4_cq { diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h index ed6414abde0..e6669d54770 100644 --- a/drivers/infiniband/hw/cxgb4/user.h +++ b/drivers/infiniband/hw/cxgb4/user.h @@ -50,7 +50,13 @@ struct c4iw_create_cq_resp { __u32 qid_mask; }; + +enum { + C4IW_QPF_ONCHIP = (1<<0) +}; + struct c4iw_create_qp_resp { + __u64 ma_sync_key; __u64 sq_key; __u64 rq_key; __u64 sq_db_gts_key; @@ -62,5 +68,6 @@ struct c4iw_create_qp_resp { __u32 sq_size; __u32 rq_size; __u32 qid_mask; + __u32 flags; }; #endif diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index 53f4cd4fc19..43cae84005f 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -171,7 +171,7 @@ struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags) } ret = ehca_reg_maxmr(shca, e_maxmr, - (void *)ehca_map_vaddr((void *)KERNELBASE), + (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)), mr_access_flags, e_pd, &e_maxmr->ib.ib_mr.lkey, &e_maxmr->ib.ib_mr.rkey); @@ -1636,7 +1636,7 @@ int ehca_reg_internal_maxmr( /* register internal max-MR on HCA */ size_maxmr = ehca_mr_len; - iova_start = (u64 *)ehca_map_vaddr((void *)KERNELBASE); + iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)); ib_pbuf.addr = 0; ib_pbuf.size = size_maxmr; num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr, @@ -2209,7 +2209,7 @@ int ehca_mr_is_maxmr(u64 size, { /* a MR is treated as max-MR only if it fits following: */ if ((size == ehca_mr_len) && - (iova_start == (void *)ehca_map_vaddr((void *)KERNELBASE))) { + (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) { ehca_gen_dbg("this is a max-MR"); return 1; } else diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile index fa3df82681d..4496f2820c9 100644 --- a/drivers/infiniband/hw/ipath/Makefile +++ b/drivers/infiniband/hw/ipath/Makefile @@ -1,4 +1,4 @@ -EXTRA_CFLAGS += -DIPATH_IDSTR='"QLogic kernel.org driver"' \ +ccflags-y := -DIPATH_IDSTR='"QLogic kernel.org driver"' \ -DIPATH_KERN_TYPE=0 obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index d13e72685dc..8c8afc716b9 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -57,6 +57,7 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, goto bail; } + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_private = data; @@ -361,13 +362,13 @@ bail: return ret; } -static int ipathfs_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *ipathfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - int ret = get_sb_single(fs_type, flags, data, - ipathfs_fill_super, mnt); - if (ret >= 0) - ipath_super = mnt->mnt_sb; + struct dentry *ret; + ret = mount_single(fs_type, flags, data, ipathfs_fill_super); + if (!IS_ERR(ret)) + ipath_super = ret->d_sb; return ret; } @@ -410,7 +411,7 @@ bail: static struct file_system_type ipathfs_fs_type = { .owner = THIS_MODULE, .name = "ipathfs", - .get_sb = ipathfs_get_sb, + .mount = ipathfs_mount, .kill_sb = ipathfs_kill_super, }; diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 11a236f8d88..4b8f9c49397 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -30,66 +30,163 @@ * SOFTWARE. */ +#include <rdma/ib_addr.h> +#include <rdma/ib_cache.h> + #include <linux/slab.h> +#include <linux/inet.h> +#include <linux/string.h> #include "mlx4_ib.h" -struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, + u8 *mac, int *is_mcast, u8 port) { - struct mlx4_dev *dev = to_mdev(pd->device)->dev; - struct mlx4_ib_ah *ah; + struct in6_addr in6; - ah = kmalloc(sizeof *ah, GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); + *is_mcast = 0; - memset(&ah->av, 0, sizeof ah->av); + memcpy(&in6, ah_attr->grh.dgid.raw, sizeof in6); + if (rdma_link_local_addr(&in6)) + rdma_get_ll_mac(&in6, mac); + else if (rdma_is_multicast_addr(&in6)) { + rdma_get_mcast_mac(&in6, mac); + *is_mcast = 1; + } else + return -EINVAL; - ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); - ah->av.g_slid = ah_attr->src_path_bits; - ah->av.dlid = cpu_to_be16(ah_attr->dlid); - if (ah_attr->static_rate) { - ah->av.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; - while (ah->av.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && - !(1 << ah->av.stat_rate & dev->caps.stat_rate_support)) - --ah->av.stat_rate; - } - ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); + return 0; +} + +static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct mlx4_ib_ah *ah) +{ + struct mlx4_dev *dev = to_mdev(pd->device)->dev; + + ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); + ah->av.ib.g_slid = ah_attr->src_path_bits; if (ah_attr->ah_flags & IB_AH_GRH) { - ah->av.g_slid |= 0x80; - ah->av.gid_index = ah_attr->grh.sgid_index; - ah->av.hop_limit = ah_attr->grh.hop_limit; - ah->av.sl_tclass_flowlabel |= + ah->av.ib.g_slid |= 0x80; + ah->av.ib.gid_index = ah_attr->grh.sgid_index; + ah->av.ib.hop_limit = ah_attr->grh.hop_limit; + ah->av.ib.sl_tclass_flowlabel |= cpu_to_be32((ah_attr->grh.traffic_class << 20) | ah_attr->grh.flow_label); - memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16); + memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16); + } + + ah->av.ib.dlid = cpu_to_be16(ah_attr->dlid); + if (ah_attr->static_rate) { + ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; + while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && + !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support)) + --ah->av.ib.stat_rate; } + ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); return &ah->ibah; } +static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct mlx4_ib_ah *ah) +{ + struct mlx4_ib_dev *ibdev = to_mdev(pd->device); + struct mlx4_dev *dev = ibdev->dev; + union ib_gid sgid; + u8 mac[6]; + int err; + int is_mcast; + u16 vlan_tag; + + err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num); + if (err) + return ERR_PTR(err); + + memcpy(ah->av.eth.mac, mac, 6); + err = ib_get_cached_gid(pd->device, ah_attr->port_num, ah_attr->grh.sgid_index, &sgid); + if (err) + return ERR_PTR(err); + vlan_tag = rdma_get_vlan_id(&sgid); + if (vlan_tag < 0x1000) + vlan_tag |= (ah_attr->sl & 7) << 13; + ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); + ah->av.eth.gid_index = ah_attr->grh.sgid_index; + ah->av.eth.vlan = cpu_to_be16(vlan_tag); + if (ah_attr->static_rate) { + ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; + while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && + !(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support)) + --ah->av.eth.stat_rate; + } + + /* + * HW requires multicast LID so we just choose one. + */ + if (is_mcast) + ah->av.ib.dlid = cpu_to_be16(0xc000); + + memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16); + ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); + + return &ah->ibah; +} + +struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + struct mlx4_ib_ah *ah; + struct ib_ah *ret; + + ah = kzalloc(sizeof *ah, GFP_ATOMIC); + if (!ah) + return ERR_PTR(-ENOMEM); + + if (rdma_port_get_link_layer(pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) { + if (!(ah_attr->ah_flags & IB_AH_GRH)) { + ret = ERR_PTR(-EINVAL); + } else { + /* + * TBD: need to handle the case when we get + * called in an atomic context and there we + * might sleep. We don't expect this + * currently since we're working with link + * local addresses which we can translate + * without going to sleep. + */ + ret = create_iboe_ah(pd, ah_attr, ah); + } + + if (IS_ERR(ret)) + kfree(ah); + + return ret; + } else + return create_ib_ah(pd, ah_attr, ah); /* never fails */ +} + int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) { struct mlx4_ib_ah *ah = to_mah(ibah); + enum rdma_link_layer ll; memset(ah_attr, 0, sizeof *ah_attr); - ah_attr->dlid = be16_to_cpu(ah->av.dlid); - ah_attr->sl = be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28; - ah_attr->port_num = be32_to_cpu(ah->av.port_pd) >> 24; - if (ah->av.stat_rate) - ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET; - ah_attr->src_path_bits = ah->av.g_slid & 0x7F; + ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; + ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24; + ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num); + ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0; + if (ah->av.ib.stat_rate) + ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET; + ah_attr->src_path_bits = ah->av.ib.g_slid & 0x7F; if (mlx4_ib_ah_grh_present(ah)) { ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.traffic_class = - be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20; + be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20; ah_attr->grh.flow_label = - be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff; - ah_attr->grh.hop_limit = ah->av.hop_limit; - ah_attr->grh.sgid_index = ah->av.gid_index; - memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16); + be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) & 0xfffff; + ah_attr->grh.hop_limit = ah->av.ib.hop_limit; + ah_attr->grh.sgid_index = ah->av.ib.gid_index; + memcpy(ah_attr->grh.dgid.raw, ah->av.ib.dgid, 16); } return 0; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index f38d5b11892..c9a8dd63b9e 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -311,19 +311,25 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev) struct ib_mad_agent *agent; int p, q; int ret; + enum rdma_link_layer ll; - for (p = 0; p < dev->num_ports; ++p) + for (p = 0; p < dev->num_ports; ++p) { + ll = rdma_port_get_link_layer(&dev->ib_dev, p + 1); for (q = 0; q <= 1; ++q) { - agent = ib_register_mad_agent(&dev->ib_dev, p + 1, - q ? IB_QPT_GSI : IB_QPT_SMI, - NULL, 0, send_handler, - NULL, NULL); - if (IS_ERR(agent)) { - ret = PTR_ERR(agent); - goto err; - } - dev->send_agent[p][q] = agent; + if (ll == IB_LINK_LAYER_INFINIBAND) { + agent = ib_register_mad_agent(&dev->ib_dev, p + 1, + q ? IB_QPT_GSI : IB_QPT_SMI, + NULL, 0, send_handler, + NULL, NULL); + if (IS_ERR(agent)) { + ret = PTR_ERR(agent); + goto err; + } + dev->send_agent[p][q] = agent; + } else + dev->send_agent[p][q] = NULL; } + } return 0; @@ -344,8 +350,10 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev) for (p = 0; p < dev->num_ports; ++p) { for (q = 0; q <= 1; ++q) { agent = dev->send_agent[p][q]; - dev->send_agent[p][q] = NULL; - ib_unregister_mad_agent(agent); + if (agent) { + dev->send_agent[p][q] = NULL; + ib_unregister_mad_agent(agent); + } } if (dev->sm_ah[p]) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 4e94e360e43..bf3e20cd029 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -35,9 +35,14 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/errno.h> +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <linux/rtnetlink.h> +#include <linux/if_vlan.h> #include <rdma/ib_smi.h> #include <rdma/ib_user_verbs.h> +#include <rdma/ib_addr.h> #include <linux/mlx4/driver.h> #include <linux/mlx4/cmd.h> @@ -58,6 +63,15 @@ static const char mlx4_ib_version[] = DRV_NAME ": Mellanox ConnectX InfiniBand driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; +struct update_gid_work { + struct work_struct work; + union ib_gid gids[128]; + struct mlx4_ib_dev *dev; + int port; +}; + +static struct workqueue_struct *wq; + static void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; @@ -66,6 +80,8 @@ static void init_query_mad(struct ib_smp *mad) mad->method = IB_MGMT_METHOD_GET; } +static union ib_gid zgid; + static int mlx4_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { @@ -135,7 +151,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs; props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1; props->max_srq_sge = dev->dev->caps.max_srq_sge; - props->max_fast_reg_page_list_len = PAGE_SIZE / sizeof (u64); + props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES; props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? IB_ATOMIC_HCA : IB_ATOMIC_NONE; @@ -154,28 +170,19 @@ out: return err; } -static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props) +static enum rdma_link_layer +mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; - int err = -ENOMEM; - - in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); - out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); - if (!in_mad || !out_mad) - goto out; - - memset(props, 0, sizeof *props); - - init_query_mad(in_mad); - in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; - in_mad->attr_mod = cpu_to_be32(port); + struct mlx4_dev *dev = to_mdev(device)->dev; - err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); - if (err) - goto out; + return dev->caps.port_mask & (1 << (port_num - 1)) ? + IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; +} +static int ib_link_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props, + struct ib_smp *out_mad) +{ props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); props->lmc = out_mad->data[34] & 0x7; props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18)); @@ -196,6 +203,80 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, props->max_vl_num = out_mad->data[37] >> 4; props->init_type_reply = out_mad->data[41] >> 4; + return 0; +} + +static u8 state_to_phys_state(enum ib_port_state state) +{ + return state == IB_PORT_ACTIVE ? 5 : 3; +} + +static int eth_link_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props, + struct ib_smp *out_mad) +{ + struct mlx4_ib_iboe *iboe = &to_mdev(ibdev)->iboe; + struct net_device *ndev; + enum ib_mtu tmp; + + props->active_width = IB_WIDTH_4X; + props->active_speed = 4; + props->port_cap_flags = IB_PORT_CM_SUP; + props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; + props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz; + props->pkey_tbl_len = 1; + props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); + props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); + props->max_mtu = IB_MTU_2048; + props->subnet_timeout = 0; + props->max_vl_num = out_mad->data[37] >> 4; + props->init_type_reply = 0; + props->state = IB_PORT_DOWN; + props->phys_state = state_to_phys_state(props->state); + props->active_mtu = IB_MTU_256; + spin_lock(&iboe->lock); + ndev = iboe->netdevs[port - 1]; + if (!ndev) + goto out; + + tmp = iboe_get_mtu(ndev->mtu); + props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256; + + props->state = netif_running(ndev) && netif_oper_up(ndev) ? + IB_PORT_ACTIVE : IB_PORT_DOWN; + props->phys_state = state_to_phys_state(props->state); + +out: + spin_unlock(&iboe->lock); + return 0; +} + +static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + memset(props, 0, sizeof *props); + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ? + ib_link_query_port(ibdev, port, props, out_mad) : + eth_link_query_port(ibdev, port, props, out_mad); + out: kfree(in_mad); kfree(out_mad); @@ -203,8 +284,8 @@ out: return err; } -static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, - union ib_gid *gid) +static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; @@ -241,6 +322,25 @@ out: return err; } +static int iboe_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + + *gid = dev->iboe.gid_table[port - 1][index]; + + return 0; +} + +static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND) + return __mlx4_ib_query_gid(ibdev, port, index, gid); + else + return iboe_query_gid(ibdev, port, index, gid); +} + static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { @@ -272,14 +372,32 @@ out: static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, struct ib_device_modify *props) { + struct mlx4_cmd_mailbox *mailbox; + if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) return -EOPNOTSUPP; - if (mask & IB_DEVICE_MODIFY_NODE_DESC) { - spin_lock(&to_mdev(ibdev)->sm_lock); - memcpy(ibdev->node_desc, props->node_desc, 64); - spin_unlock(&to_mdev(ibdev)->sm_lock); - } + if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) + return 0; + + spin_lock(&to_mdev(ibdev)->sm_lock); + memcpy(ibdev->node_desc, props->node_desc, 64); + spin_unlock(&to_mdev(ibdev)->sm_lock); + + /* + * If possible, pass node desc to FW, so it can generate + * a 144 trap. If cmd fails, just ignore. + */ + mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev); + if (IS_ERR(mailbox)) + return 0; + + memset(mailbox->buf, 0, 256); + memcpy(mailbox->buf, props->node_desc, 64); + mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0, + MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A); + + mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox); return 0; } @@ -289,6 +407,7 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, { struct mlx4_cmd_mailbox *mailbox; int err; + u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; mailbox = mlx4_alloc_cmd_mailbox(dev->dev); if (IS_ERR(mailbox)) @@ -304,7 +423,7 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask); } - err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT, + err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B); mlx4_free_cmd_mailbox(dev->dev, mailbox); @@ -447,18 +566,132 @@ static int mlx4_ib_dealloc_pd(struct ib_pd *pd) return 0; } +static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) +{ + struct mlx4_ib_qp *mqp = to_mqp(ibqp); + struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + struct mlx4_ib_gid_entry *ge; + + ge = kzalloc(sizeof *ge, GFP_KERNEL); + if (!ge) + return -ENOMEM; + + ge->gid = *gid; + if (mlx4_ib_add_mc(mdev, mqp, gid)) { + ge->port = mqp->port; + ge->added = 1; + } + + mutex_lock(&mqp->mutex); + list_add_tail(&ge->list, &mqp->gid_list); + mutex_unlock(&mqp->mutex); + + return 0; +} + +int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, + union ib_gid *gid) +{ + u8 mac[6]; + struct net_device *ndev; + int ret = 0; + + if (!mqp->port) + return 0; + + spin_lock(&mdev->iboe.lock); + ndev = mdev->iboe.netdevs[mqp->port - 1]; + if (ndev) + dev_hold(ndev); + spin_unlock(&mdev->iboe.lock); + + if (ndev) { + rdma_get_mcast_mac((struct in6_addr *)gid, mac); + rtnl_lock(); + dev_mc_add(mdev->iboe.netdevs[mqp->port - 1], mac); + ret = 1; + rtnl_unlock(); + dev_put(ndev); + } + + return ret; +} + static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { - return mlx4_multicast_attach(to_mdev(ibqp->device)->dev, - &to_mqp(ibqp)->mqp, gid->raw, - !!(to_mqp(ibqp)->flags & - MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)); + int err; + struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + struct mlx4_ib_qp *mqp = to_mqp(ibqp); + + err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, !!(mqp->flags & + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)); + if (err) + return err; + + err = add_gid_entry(ibqp, gid); + if (err) + goto err_add; + + return 0; + +err_add: + mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw); + return err; +} + +static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw) +{ + struct mlx4_ib_gid_entry *ge; + struct mlx4_ib_gid_entry *tmp; + struct mlx4_ib_gid_entry *ret = NULL; + + list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { + if (!memcmp(raw, ge->gid.raw, 16)) { + ret = ge; + break; + } + } + + return ret; } static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { - return mlx4_multicast_detach(to_mdev(ibqp->device)->dev, - &to_mqp(ibqp)->mqp, gid->raw); + int err; + struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + struct mlx4_ib_qp *mqp = to_mqp(ibqp); + u8 mac[6]; + struct net_device *ndev; + struct mlx4_ib_gid_entry *ge; + + err = mlx4_multicast_detach(mdev->dev, + &mqp->mqp, gid->raw); + if (err) + return err; + + mutex_lock(&mqp->mutex); + ge = find_gid_entry(mqp, gid->raw); + if (ge) { + spin_lock(&mdev->iboe.lock); + ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL; + if (ndev) + dev_hold(ndev); + spin_unlock(&mdev->iboe.lock); + rdma_get_mcast_mac((struct in6_addr *)gid, mac); + if (ndev) { + rtnl_lock(); + dev_mc_del(mdev->iboe.netdevs[ge->port - 1], mac); + rtnl_unlock(); + dev_put(ndev); + } + list_del(&ge->list); + kfree(ge); + } else + printk(KERN_WARNING "could not find mgid entry\n"); + + mutex_unlock(&mqp->mutex); + + return 0; } static int init_node_data(struct mlx4_ib_dev *dev) @@ -543,15 +776,215 @@ static struct device_attribute *mlx4_class_attributes[] = { &dev_attr_board_id }; +static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev) +{ + memcpy(eui, dev->dev_addr, 3); + memcpy(eui + 5, dev->dev_addr + 3, 3); + if (vlan_id < 0x1000) { + eui[3] = vlan_id >> 8; + eui[4] = vlan_id & 0xff; + } else { + eui[3] = 0xff; + eui[4] = 0xfe; + } + eui[0] ^= 2; +} + +static void update_gids_task(struct work_struct *work) +{ + struct update_gid_work *gw = container_of(work, struct update_gid_work, work); + struct mlx4_cmd_mailbox *mailbox; + union ib_gid *gids; + int err; + struct mlx4_dev *dev = gw->dev->dev; + struct ib_event event; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + printk(KERN_WARNING "update gid table failed %ld\n", PTR_ERR(mailbox)); + return; + } + + gids = mailbox->buf; + memcpy(gids, gw->gids, sizeof gw->gids); + + err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port, + 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B); + if (err) + printk(KERN_WARNING "set port command failed\n"); + else { + memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids); + event.device = &gw->dev->ib_dev; + event.element.port_num = gw->port; + event.event = IB_EVENT_LID_CHANGE; + ib_dispatch_event(&event); + } + + mlx4_free_cmd_mailbox(dev, mailbox); + kfree(gw); +} + +static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear) +{ + struct net_device *ndev = dev->iboe.netdevs[port - 1]; + struct update_gid_work *work; + struct net_device *tmp; + int i; + u8 *hits; + int ret; + union ib_gid gid; + int free; + int found; + int need_update = 0; + u16 vid; + + work = kzalloc(sizeof *work, GFP_ATOMIC); + if (!work) + return -ENOMEM; + + hits = kzalloc(128, GFP_ATOMIC); + if (!hits) { + ret = -ENOMEM; + goto out; + } + + read_lock(&dev_base_lock); + for_each_netdev(&init_net, tmp) { + if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) { + gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + vid = rdma_vlan_dev_vlan_id(tmp); + mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev); + found = 0; + free = -1; + for (i = 0; i < 128; ++i) { + if (free < 0 && + !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid)) + free = i; + if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) { + hits[i] = 1; + found = 1; + break; + } + } + + if (!found) { + if (tmp == ndev && + (memcmp(&dev->iboe.gid_table[port - 1][0], + &gid, sizeof gid) || + !memcmp(&dev->iboe.gid_table[port - 1][0], + &zgid, sizeof gid))) { + dev->iboe.gid_table[port - 1][0] = gid; + ++need_update; + hits[0] = 1; + } else if (free >= 0) { + dev->iboe.gid_table[port - 1][free] = gid; + hits[free] = 1; + ++need_update; + } + } + } + } + read_unlock(&dev_base_lock); + + for (i = 0; i < 128; ++i) + if (!hits[i]) { + if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid)) + ++need_update; + dev->iboe.gid_table[port - 1][i] = zgid; + } + + if (need_update) { + memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids); + INIT_WORK(&work->work, update_gids_task); + work->port = port; + work->dev = dev; + queue_work(wq, &work->work); + } else + kfree(work); + + kfree(hits); + return 0; + +out: + kfree(work); + return ret; +} + +static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event) +{ + switch (event) { + case NETDEV_UP: + case NETDEV_CHANGEADDR: + update_ipv6_gids(dev, port, 0); + break; + + case NETDEV_DOWN: + update_ipv6_gids(dev, port, 1); + dev->iboe.netdevs[port - 1] = NULL; + } +} + +static void netdev_added(struct mlx4_ib_dev *dev, int port) +{ + update_ipv6_gids(dev, port, 0); +} + +static void netdev_removed(struct mlx4_ib_dev *dev, int port) +{ + update_ipv6_gids(dev, port, 1); +} + +static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + struct mlx4_ib_dev *ibdev; + struct net_device *oldnd; + struct mlx4_ib_iboe *iboe; + int port; + + if (!net_eq(dev_net(dev), &init_net)) + return NOTIFY_DONE; + + ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb); + iboe = &ibdev->iboe; + + spin_lock(&iboe->lock); + mlx4_foreach_ib_transport_port(port, ibdev->dev) { + oldnd = iboe->netdevs[port - 1]; + iboe->netdevs[port - 1] = + mlx4_get_protocol_dev(ibdev->dev, MLX4_PROTOCOL_EN, port); + if (oldnd != iboe->netdevs[port - 1]) { + if (iboe->netdevs[port - 1]) + netdev_added(ibdev, port); + else + netdev_removed(ibdev, port); + } + } + + if (dev == iboe->netdevs[0] || + (iboe->netdevs[0] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[0])) + handle_en_event(ibdev, 1, event); + else if (dev == iboe->netdevs[1] + || (iboe->netdevs[1] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[1])) + handle_en_event(ibdev, 2, event); + + spin_unlock(&iboe->lock); + + return NOTIFY_DONE; +} + static void *mlx4_ib_add(struct mlx4_dev *dev) { struct mlx4_ib_dev *ibdev; int num_ports = 0; int i; + int err; + struct mlx4_ib_iboe *iboe; printk_once(KERN_INFO "%s", mlx4_ib_version); - mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) + mlx4_foreach_ib_transport_port(i, dev) num_ports++; /* No point in registering a device with no ports... */ @@ -564,6 +997,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) return NULL; } + iboe = &ibdev->iboe; + if (mlx4_pd_alloc(dev, &ibdev->priv_pdn)) goto err_dealloc; @@ -612,6 +1047,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.query_device = mlx4_ib_query_device; ibdev->ib_dev.query_port = mlx4_ib_query_port; + ibdev->ib_dev.get_link_layer = mlx4_ib_port_link_layer; ibdev->ib_dev.query_gid = mlx4_ib_query_gid; ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey; ibdev->ib_dev.modify_device = mlx4_ib_modify_device; @@ -656,6 +1092,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; + spin_lock_init(&iboe->lock); + if (init_node_data(ibdev)) goto err_map; @@ -668,16 +1106,28 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (mlx4_ib_mad_init(ibdev)) goto err_reg; + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) { + iboe->nb.notifier_call = mlx4_ib_netdev_event; + err = register_netdevice_notifier(&iboe->nb); + if (err) + goto err_reg; + } + for (i = 0; i < ARRAY_SIZE(mlx4_class_attributes); ++i) { if (device_create_file(&ibdev->ib_dev.dev, mlx4_class_attributes[i])) - goto err_reg; + goto err_notif; } ibdev->ib_active = true; return ibdev; +err_notif: + if (unregister_netdevice_notifier(&ibdev->iboe.nb)) + printk(KERN_WARNING "failure unregistering notifier\n"); + flush_workqueue(wq); + err_reg: ib_unregister_device(&ibdev->ib_dev); @@ -703,11 +1153,16 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) mlx4_ib_mad_cleanup(ibdev); ib_unregister_device(&ibdev->ib_dev); + if (ibdev->iboe.nb.notifier_call) { + if (unregister_netdevice_notifier(&ibdev->iboe.nb)) + printk(KERN_WARNING "failure unregistering notifier\n"); + ibdev->iboe.nb.notifier_call = NULL; + } + iounmap(ibdev->uar_map); - for (p = 1; p <= ibdev->num_ports; ++p) + mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB) mlx4_CLOSE_PORT(dev, p); - iounmap(ibdev->uar_map); mlx4_uar_free(dev, &ibdev->priv_uar); mlx4_pd_free(dev, ibdev->priv_pdn); ib_dealloc_device(&ibdev->ib_dev); @@ -747,19 +1202,33 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, } static struct mlx4_interface mlx4_ib_interface = { - .add = mlx4_ib_add, - .remove = mlx4_ib_remove, - .event = mlx4_ib_event + .add = mlx4_ib_add, + .remove = mlx4_ib_remove, + .event = mlx4_ib_event, + .protocol = MLX4_PROTOCOL_IB }; static int __init mlx4_ib_init(void) { - return mlx4_register_interface(&mlx4_ib_interface); + int err; + + wq = create_singlethread_workqueue("mlx4_ib"); + if (!wq) + return -ENOMEM; + + err = mlx4_register_interface(&mlx4_ib_interface); + if (err) { + destroy_workqueue(wq); + return err; + } + + return 0; } static void __exit mlx4_ib_cleanup(void) { mlx4_unregister_interface(&mlx4_ib_interface); + destroy_workqueue(wq); } module_init(mlx4_ib_init); diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 3486d7675e5..2a322f21049 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -112,6 +112,13 @@ enum mlx4_ib_qp_flags { MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1, }; +struct mlx4_ib_gid_entry { + struct list_head list; + union ib_gid gid; + int added; + u8 port; +}; + struct mlx4_ib_qp { struct ib_qp ibqp; struct mlx4_qp mqp; @@ -138,6 +145,8 @@ struct mlx4_ib_qp { u8 resp_depth; u8 sq_no_prefetch; u8 state; + int mlx_type; + struct list_head gid_list; }; struct mlx4_ib_srq { @@ -157,7 +166,14 @@ struct mlx4_ib_srq { struct mlx4_ib_ah { struct ib_ah ibah; - struct mlx4_av av; + union mlx4_ext_av av; +}; + +struct mlx4_ib_iboe { + spinlock_t lock; + struct net_device *netdevs[MLX4_MAX_PORTS]; + struct notifier_block nb; + union ib_gid gid_table[MLX4_MAX_PORTS][128]; }; struct mlx4_ib_dev { @@ -176,6 +192,7 @@ struct mlx4_ib_dev { struct mutex cap_mask_mutex; bool ib_active; + struct mlx4_ib_iboe iboe; }; static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) @@ -314,9 +331,20 @@ int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages, int mlx4_ib_unmap_fmr(struct list_head *fmr_list); int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr); +int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, + u8 *mac, int *is_mcast, u8 port); + static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) { - return !!(ah->av.g_slid & 0x80); + u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3; + + if (rdma_port_get_link_layer(ah->ibah.device, port) == IB_LINK_LAYER_ETHERNET) + return 1; + + return !!(ah->av.ib.g_slid & 0x80); } +int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, + union ib_gid *gid); + #endif /* MLX4_IB_H */ diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 1d27b9a8e2d..dca55b19a6f 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -226,7 +226,7 @@ struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device struct mlx4_ib_fast_reg_page_list *mfrpl; int size = page_list_len * sizeof (u64); - if (size > PAGE_SIZE) + if (page_list_len > MLX4_MAX_FAST_REG_PAGES) return ERR_PTR(-EINVAL); mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 6a60827b230..9a7794ac34c 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -33,9 +33,11 @@ #include <linux/log2.h> #include <linux/slab.h> +#include <linux/netdevice.h> #include <rdma/ib_cache.h> #include <rdma/ib_pack.h> +#include <rdma/ib_addr.h> #include <linux/mlx4/qp.h> @@ -48,17 +50,26 @@ enum { enum { MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83, - MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f + MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f, + MLX4_IB_LINK_TYPE_IB = 0, + MLX4_IB_LINK_TYPE_ETH = 1 }; enum { /* - * Largest possible UD header: send with GRH and immediate data. + * Largest possible UD header: send with GRH and immediate + * data plus 18 bytes for an Ethernet header with VLAN/802.1Q + * tag. (LRH would only use 8 bytes, so Ethernet is the + * biggest case) */ - MLX4_IB_UD_HEADER_SIZE = 72, + MLX4_IB_UD_HEADER_SIZE = 82, MLX4_IB_LSO_HEADER_SPARE = 128, }; +enum { + MLX4_IB_IBOE_ETHERTYPE = 0x8915 +}; + struct mlx4_ib_sqp { struct mlx4_ib_qp qp; int pkey_index; @@ -462,6 +473,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); + INIT_LIST_HEAD(&qp->gid_list); qp->state = IB_QPS_RESET; if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) @@ -649,6 +661,16 @@ static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *re } } +static void del_gid_entries(struct mlx4_ib_qp *qp) +{ + struct mlx4_ib_gid_entry *ge, *tmp; + + list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { + list_del(&ge->list); + kfree(ge); + } +} + static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, int is_user) { @@ -695,6 +717,8 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, if (!qp->ibqp.srq) mlx4_db_free(dev->dev, &qp->db); } + + del_gid_entries(qp); } struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, @@ -852,6 +876,14 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port) static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, struct mlx4_qp_path *path, u8 port) { + int err; + int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) == + IB_LINK_LAYER_ETHERNET; + u8 mac[6]; + int is_mcast; + u16 vlan_tag; + int vidx; + path->grh_mylmc = ah->src_path_bits & 0x7f; path->rlid = cpu_to_be16(ah->dlid); if (ah->static_rate) { @@ -879,12 +911,49 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, memcpy(path->rgid, ah->grh.dgid.raw, 16); } - path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | - ((port - 1) << 6) | ((ah->sl & 0xf) << 2); + if (is_eth) { + path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | + ((port - 1) << 6) | ((ah->sl & 7) << 3) | ((ah->sl & 8) >> 1); + + if (!(ah->ah_flags & IB_AH_GRH)) + return -1; + + err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port); + if (err) + return err; + + memcpy(path->dmac, mac, 6); + path->ackto = MLX4_IB_LINK_TYPE_ETH; + /* use index 0 into MAC table for IBoE */ + path->grh_mylmc &= 0x80; + + vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]); + if (vlan_tag < 0x1000) { + if (mlx4_find_cached_vlan(dev->dev, port, vlan_tag, &vidx)) + return -ENOENT; + + path->vlan_index = vidx; + path->fl = 1 << 6; + } + } else + path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | + ((port - 1) << 6) | ((ah->sl & 0xf) << 2); return 0; } +static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) +{ + struct mlx4_ib_gid_entry *ge, *tmp; + + list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { + if (!ge->added && mlx4_ib_add_mc(dev, qp, &ge->gid)) { + ge->added = 1; + ge->port = qp->port; + } + } +} + static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) @@ -980,7 +1049,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_TIMEOUT) { - context->pri_path.ackto = attr->timeout << 3; + context->pri_path.ackto |= attr->timeout << 3; optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT; } @@ -1118,8 +1187,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, qp->atomic_rd_en = attr->qp_access_flags; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) qp->resp_depth = attr->max_dest_rd_atomic; - if (attr_mask & IB_QP_PORT) + if (attr_mask & IB_QP_PORT) { qp->port = attr->port_num; + update_mcg_macs(dev, qp); + } if (attr_mask & IB_QP_ALT_PATH) qp->alt_port = attr->alt_port_num; @@ -1221,40 +1292,59 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); + union ib_gid sgid; u16 pkey; int send_size; int header_size; int spc; int i; + int is_eth; + int is_vlan = 0; + int is_grh; + u16 vlan; send_size = 0; for (i = 0; i < wr->num_sge; ++i) send_size += wr->sg_list[i].length; - ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), 0, &sqp->ud_header); + is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; + is_grh = mlx4_ib_ah_grh_present(ah); + if (is_eth) { + ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, &sgid); + vlan = rdma_get_vlan_id(&sgid); + is_vlan = vlan < 0x1000; + } + ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header); + + if (!is_eth) { + sqp->ud_header.lrh.service_level = + be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; + sqp->ud_header.lrh.destination_lid = ah->av.ib.dlid; + sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f); + } - sqp->ud_header.lrh.service_level = - be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28; - sqp->ud_header.lrh.destination_lid = ah->av.dlid; - sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.g_slid & 0x7f); - if (mlx4_ib_ah_grh_present(ah)) { + if (is_grh) { sqp->ud_header.grh.traffic_class = - (be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff; + (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff; sqp->ud_header.grh.flow_label = - ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff); - sqp->ud_header.grh.hop_limit = ah->av.hop_limit; - ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24, - ah->av.gid_index, &sqp->ud_header.grh.source_gid); + ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); + sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; + ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid); memcpy(sqp->ud_header.grh.destination_gid.raw, - ah->av.dgid, 16); + ah->av.ib.dgid, 16); } mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | - (sqp->ud_header.lrh.destination_lid == - IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | - (sqp->ud_header.lrh.service_level << 8)); - mlx->rlid = sqp->ud_header.lrh.destination_lid; + + if (!is_eth) { + mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | + (sqp->ud_header.lrh.destination_lid == + IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | + (sqp->ud_header.lrh.service_level << 8)); + mlx->rlid = sqp->ud_header.lrh.destination_lid; + } switch (wr->opcode) { case IB_WR_SEND: @@ -1270,9 +1360,29 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, return -EINVAL; } - sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; - if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) - sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; + if (is_eth) { + u8 *smac; + + memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6); + /* FIXME: cache smac value? */ + smac = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]->dev_addr; + memcpy(sqp->ud_header.eth.smac_h, smac, 6); + if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) + mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); + if (!is_vlan) { + sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); + } else { + u16 pcp; + + sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); + pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 27 & 3) << 13; + sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp); + } + } else { + sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; + if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) + sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; + } sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); if (!sqp->qp.ibqp.qp_num) ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); @@ -1429,11 +1539,14 @@ static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, } static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, - struct ib_send_wr *wr) + struct ib_send_wr *wr, __be16 *vlan) { memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan; + memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); + *vlan = dseg->vlan; } static void set_mlx_icrc_seg(void *dseg) @@ -1536,6 +1649,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, __be32 uninitialized_var(lso_hdr_sz); __be32 blh; int i; + __be16 vlan = cpu_to_be16(0xffff); spin_lock_irqsave(&qp->sq.lock, flags); @@ -1639,7 +1753,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_QPT_UD: - set_datagram_seg(wqe, wr); + set_datagram_seg(wqe, wr, &vlan); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; @@ -1717,6 +1831,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh; + if (be16_to_cpu(vlan) < 0x1000) { + ctrl->ins_vlan = 1 << 6; + ctrl->vlan_tag = vlan; + } + stamp = ind + qp->sq_spare_wqes; ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); @@ -1866,17 +1985,27 @@ static int to_ib_qp_access_flags(int mlx4_flags) return ib_flags; } -static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr, +static void to_ib_ah_attr(struct mlx4_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr, struct mlx4_qp_path *path) { + struct mlx4_dev *dev = ibdev->dev; + int is_eth; + memset(ib_ah_attr, 0, sizeof *ib_ah_attr); ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1; if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) return; + is_eth = rdma_port_get_link_layer(&ibdev->ib_dev, ib_ah_attr->port_num) == + IB_LINK_LAYER_ETHERNET; + if (is_eth) + ib_ah_attr->sl = ((path->sched_queue >> 3) & 0x7) | + ((path->sched_queue & 4) << 1); + else + ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf; + ib_ah_attr->dlid = be16_to_cpu(path->rlid); - ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf; ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f; ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0; ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0; @@ -1929,8 +2058,8 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr to_ib_qp_access_flags(be32_to_cpu(context.params2)); if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { - to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path); - to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path); + to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path); + to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path); qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f; qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; } diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index d2d172e6289..a34c9d38e82 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1493,7 +1493,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, int err; u16 pkey; - ib_ud_header_init(256, /* assume a MAD */ + ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0, mthca_ah_grh_present(to_mah(wr->wr.ud.ah)), 0, &sqp->ud_header); diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 6220d9d75b5..25ad0f9944c 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1424,7 +1424,6 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, { int reset = 0; /* whether to send reset in case of err.. */ - int passive_state; atomic_inc(&cm_resets_recvd); nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u." " refcnt=%d\n", cm_node, cm_node->state, @@ -1439,7 +1438,7 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, active_open_err(cm_node, skb, reset); break; case NES_CM_STATE_MPAREQ_RCVD: - passive_state = atomic_add_return(1, &cm_node->passive_state); + atomic_inc(&cm_node->passive_state); dev_kfree_skb_any(skb); break; case NES_CM_STATE_ESTABLISHED: diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 10560c796fd..3892e2c0e95 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -271,6 +271,7 @@ static int nes_netdev_stop(struct net_device *netdev) if (netif_msg_ifdown(nesvnic)) printk(KERN_INFO PFX "%s: disabling interface\n", netdev->name); + netif_carrier_off(netdev); /* Disable network packets */ napi_disable(&nesvnic->napi); diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 546fc22405f..99933e4e48f 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -476,9 +476,9 @@ static struct ib_fast_reg_page_list *nes_alloc_fast_reg_page_list( } nes_debug(NES_DBG_MR, "nes_alloc_fast_reg_pbl: nes_frpl = %p, " "ibfrpl = %p, ibfrpl.page_list = %p, pbl.kva = %p, " - "pbl.paddr= %p\n", pnesfrpl, &pnesfrpl->ibfrpl, + "pbl.paddr = %llx\n", pnesfrpl, &pnesfrpl->ibfrpl, pnesfrpl->ibfrpl.page_list, pnesfrpl->nes_wqe_pbl.kva, - (void *)pnesfrpl->nes_wqe_pbl.paddr); + (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr); return pifrpl; } @@ -584,7 +584,9 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr props->lmc = 0; props->sm_lid = 0; props->sm_sl = 0; - if (nesvnic->linkup) + if (netif_queue_stopped(netdev)) + props->state = IB_PORT_DOWN; + else if (nesvnic->linkup) props->state = IB_PORT_ACTIVE; else props->state = IB_PORT_DOWN; @@ -3483,13 +3485,13 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, for (i = 0; i < ib_wr->wr.fast_reg.page_list_len; i++) dst_page_list[i] = cpu_to_le64(src_page_list[i]); - nes_debug(NES_DBG_IW_TX, "SQ_FMR: iova_start: %p, " - "length: %d, rkey: %0x, pgl_paddr: %p, " + nes_debug(NES_DBG_IW_TX, "SQ_FMR: iova_start: %llx, " + "length: %d, rkey: %0x, pgl_paddr: %llx, " "page_list_len: %u, wqe_misc: %x\n", - (void *)ib_wr->wr.fast_reg.iova_start, + (unsigned long long) ib_wr->wr.fast_reg.iova_start, ib_wr->wr.fast_reg.length, ib_wr->wr.fast_reg.rkey, - (void *)pnesfrpl->nes_wqe_pbl.paddr, + (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr, ib_wr->wr.fast_reg.page_list_len, wqe_misc); break; diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 61de0654820..64c9e7d02d4 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1406,7 +1406,7 @@ extern struct mutex qib_mutex; */ #define qib_early_err(dev, fmt, ...) \ do { \ - dev_info(dev, KERN_ERR QIB_DRV_NAME ": " fmt, ##__VA_ARGS__); \ + dev_err(dev, fmt, ##__VA_ARGS__); \ } while (0) #define qib_dev_err(dd, fmt, ...) \ diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index a0e6613e8be..f99bddc0171 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -58,6 +58,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry, goto bail; } + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = 0; inode->i_gid = 0; @@ -554,13 +555,13 @@ bail: return ret; } -static int qibfs_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *qibfs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) { - int ret = get_sb_single(fs_type, flags, data, - qibfs_fill_super, mnt); - if (ret >= 0) - qib_super = mnt->mnt_sb; + struct dentry *ret; + ret = mount_single(fs_type, flags, data, qibfs_fill_super); + if (!IS_ERR(ret)) + qib_super = ret->d_sb; return ret; } @@ -602,7 +603,7 @@ int qibfs_remove(struct qib_devdata *dd) static struct file_system_type qibfs_fs_type = { .owner = THIS_MODULE, .name = "ipathfs", - .get_sb = qibfs_get_sb, + .mount = qibfs_mount, .kill_sb = qibfs_kill_super, }; diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index f1d16d3a01f..f3b50393604 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1243,6 +1243,7 @@ static int __devinit qib_init_one(struct pci_dev *pdev, qib_early_err(&pdev->dev, "QLogic PCIE device 0x%x cannot " "work if CONFIG_PCI_MSI is not enabled\n", ent->device); + dd = ERR_PTR(-ENODEV); #endif break; diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 7fa6e559263..48b6674cbc4 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -103,16 +103,20 @@ int qib_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent) ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); } else ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (ret) + if (ret) { qib_early_err(&pdev->dev, "Unable to set DMA consistent mask: %d\n", ret); + goto bail; + } pci_set_master(pdev); ret = pci_enable_pcie_error_reporting(pdev); - if (ret) + if (ret) { qib_early_err(&pdev->dev, "Unable to enable pcie error reporting: %d\n", ret); + ret = 0; + } goto done; bail: diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index a0931119bd7..955fb715779 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -2068,7 +2068,10 @@ send_last: goto nack_op_err; if (!ret) goto rnr_nak; - goto send_last_imm; + wc.ex.imm_data = ohdr->u.rc.imm_data; + hdrsize += 4; + wc.wc_flags = IB_WC_WITH_IMM; + goto send_last; case OP(RDMA_READ_REQUEST): { struct qib_ack_entry *e; diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index b9c8b6346c1..32ccf3c824c 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -457,8 +457,10 @@ rdma_first: } if (opcode == OP(RDMA_WRITE_ONLY)) goto rdma_last; - else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) + else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) { + wc.ex.imm_data = ohdr->u.rc.imm_data; goto rdma_last_imm; + } /* FALLTHROUGH */ case OP(RDMA_WRITE_MIDDLE): /* Check for invalid length PMTU or posted rwqe len. */ @@ -471,8 +473,8 @@ rdma_first: break; case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): -rdma_last_imm: wc.ex.imm_data = ohdr->u.imm_data; +rdma_last_imm: hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; |