summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/umem.c17
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.c2
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c31
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h5
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h21
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c173
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c4
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h3
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c73
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h6
-rw-r--r--drivers/infiniband/hw/ehca/ehca_cq.c11
-rw-r--r--drivers/infiniband/hw/ehca/ehca_eq.c35
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c7
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c111
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c18
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c41
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c51
-rw-r--r--drivers/infiniband/hw/ehca/ehca_uverbs.c6
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.c23
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c3
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c14
-rw-r--r--drivers/infiniband/hw/mlx4/doorbell.c124
-rw-r--r--drivers/infiniband/hw/mlx4/main.c3
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h33
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c3
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c8
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c8
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c13
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c20
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_user.h10
-rw-r--r--drivers/infiniband/hw/nes/Kconfig1
-rw-r--r--drivers/infiniband/hw/nes/nes.c19
-rw-r--r--drivers/infiniband/hw/nes/nes.h5
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c35
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c391
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.h21
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c198
-rw-r--r--drivers/infiniband/hw/nes/nes_utils.c14
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c12
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h29
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c215
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c22
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c3
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c55
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c4
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c4
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h7
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c9
55 files changed, 1237 insertions, 701 deletions
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 4e3128ff73c..fe78f7d2509 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -38,6 +38,7 @@
#include <linux/dma-mapping.h>
#include <linux/sched.h>
#include <linux/hugetlb.h>
+#include <linux/dma-attrs.h>
#include "uverbs.h"
@@ -72,9 +73,10 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
* @addr: userspace virtual address to start at
* @size: length of region to pin
* @access: IB_ACCESS_xxx flags for memory being pinned
+ * @dmasync: flush in-flight DMA when the memory region is written
*/
struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
- size_t size, int access)
+ size_t size, int access, int dmasync)
{
struct ib_umem *umem;
struct page **page_list;
@@ -87,6 +89,10 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
int ret;
int off;
int i;
+ DEFINE_DMA_ATTRS(attrs);
+
+ if (dmasync)
+ dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
if (!can_do_mlock())
return ERR_PTR(-EPERM);
@@ -174,10 +180,11 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
sg_set_page(&chunk->page_list[i], page_list[i + off], PAGE_SIZE, 0);
}
- chunk->nmap = ib_dma_map_sg(context->device,
- &chunk->page_list[0],
- chunk->nents,
- DMA_BIDIRECTIONAL);
+ chunk->nmap = ib_dma_map_sg_attrs(context->device,
+ &chunk->page_list[0],
+ chunk->nents,
+ DMA_BIDIRECTIONAL,
+ &attrs);
if (chunk->nmap <= 0) {
for (i = 0; i < chunk->nents; ++i)
put_page(sg_page(&chunk->page_list[i]));
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index 6af2c0f79a6..2acf9b62cf9 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -452,7 +452,7 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_PTR(-ENOMEM);
c2mr->pd = c2pd;
- c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc);
+ c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
if (IS_ERR(c2mr->umem)) {
err = PTR_ERR(c2mr->umem);
kfree(c2mr);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index 66eb7030aea..5fd8506a865 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -359,9 +359,10 @@ static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq)
cq->sw_wptr++;
}
-void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
+int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
{
u32 ptr;
+ int flushed = 0;
PDBG("%s wq %p cq %p\n", __func__, wq, cq);
@@ -369,8 +370,11 @@ void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
PDBG("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__,
wq->rq_rptr, wq->rq_wptr, count);
ptr = wq->rq_rptr + count;
- while (ptr++ != wq->rq_wptr)
+ while (ptr++ != wq->rq_wptr) {
insert_recv_cqe(wq, cq);
+ flushed++;
+ }
+ return flushed;
}
static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
@@ -394,9 +398,10 @@ static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
cq->sw_wptr++;
}
-void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
+int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
{
__u32 ptr;
+ int flushed = 0;
struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2);
ptr = wq->sq_rptr + count;
@@ -405,7 +410,9 @@ void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
insert_sq_cqe(wq, cq, sqp);
sqp++;
ptr++;
+ flushed++;
}
+ return flushed;
}
/*
@@ -456,7 +463,8 @@ void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
ptr = cq->sw_rptr;
while (!Q_EMPTY(ptr, cq->sw_wptr)) {
cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
- if ((SQ_TYPE(*cqe) || (CQE_OPCODE(*cqe) == T3_READ_RESP)) &&
+ if ((SQ_TYPE(*cqe) ||
+ ((CQE_OPCODE(*cqe) == T3_READ_RESP) && wq->oldest_read)) &&
(CQE_QPID(*cqe) == wq->qpid))
(*count)++;
ptr++;
@@ -829,7 +837,8 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
wqe->mpaattrs = attr->mpaattrs;
wqe->qpcaps = attr->qpcaps;
wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss);
- wqe->flags = cpu_to_be32(attr->flags);
+ wqe->rqe_count = cpu_to_be16(attr->rqe_count);
+ wqe->flags_rtr_type = cpu_to_be16(attr->flags|V_RTR_TYPE(attr->rtr_type));
wqe->ord = cpu_to_be32(attr->ord);
wqe->ird = cpu_to_be32(attr->ird);
wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
@@ -1135,6 +1144,18 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) {
/*
+ * If this is an unsolicited read response, then the read
+ * was generated by the kernel driver as part of peer-2-peer
+ * connection setup. So ignore the completion.
+ */
+ if (!wq->oldest_read) {
+ if (CQE_STATUS(*hw_cqe))
+ wq->error = 1;
+ ret = -1;
+ goto skip_cqe;
+ }
+
+ /*
* Don't write to the HWCQ, so create a new read req CQE
* in local memory.
*/
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index 99543d63470..69ab08ebc68 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -53,6 +53,7 @@
#define T3_MAX_PBL_SIZE 256
#define T3_MAX_RQ_SIZE 1024
#define T3_MAX_NUM_STAG (1<<15)
+#define T3_MAX_MR_SIZE 0x100000000ULL
#define T3_STAG_UNSET 0xffffffff
@@ -172,8 +173,8 @@ u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp);
void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid);
int __init cxio_hal_init(void);
void __exit cxio_hal_exit(void);
-void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
-void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
+int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
+int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
void cxio_flush_hw_cq(struct t3_cq *cq);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
index 969d4d92845..f1a25a821a4 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -278,6 +278,17 @@ enum t3_qp_caps {
uP_RI_QP_STAG0_ENABLE = 0x10
} __attribute__ ((packed));
+enum rdma_init_rtr_types {
+ RTR_READ = 1,
+ RTR_WRITE = 2,
+ RTR_SEND = 3,
+};
+
+#define S_RTR_TYPE 2
+#define M_RTR_TYPE 0x3
+#define V_RTR_TYPE(x) ((x) << S_RTR_TYPE)
+#define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE)
+
struct t3_rdma_init_attr {
u32 tid;
u32 qpid;
@@ -293,7 +304,9 @@ struct t3_rdma_init_attr {
u32 ird;
u64 qp_dma_addr;
u32 qp_dma_size;
- u32 flags;
+ enum rdma_init_rtr_types rtr_type;
+ u16 flags;
+ u16 rqe_count;
u32 irs;
};
@@ -309,8 +322,8 @@ struct t3_rdma_init_wr {
u8 mpaattrs; /* 5 */
u8 qpcaps;
__be16 ulpdu_size;
- __be32 flags; /* bits 31-1 - reservered */
- /* bit 0 - set if RECV posted */
+ __be16 flags_rtr_type;
+ __be16 rqe_count;
__be32 ord; /* 6 */
__be32 ird;
__be64 qp_dma_addr; /* 7 */
@@ -324,7 +337,7 @@ struct t3_genbit {
};
enum rdma_init_wr_flags {
- RECVS_POSTED = (1<<0),
+ MPA_INITIATOR = (1<<0),
PRIV_QP = (1<<1),
};
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index 6ba4138c8ec..71554eacb13 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -83,6 +83,7 @@ static void rnic_init(struct iwch_dev *rnicp)
rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE;
rnicp->attr.max_pds = T3_MAX_NUM_PD - 1;
rnicp->attr.mem_pgsizes_bitmask = 0x7FFF; /* 4KB-128MB */
+ rnicp->attr.max_mr_size = T3_MAX_MR_SIZE;
rnicp->attr.can_resize_wq = 0;
rnicp->attr.max_rdma_reads_per_qp = 8;
rnicp->attr.max_rdma_read_resources =
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
index 9ad9b1e7c8c..d2409a505e8 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.h
+++ b/drivers/infiniband/hw/cxgb3/iwch.h
@@ -66,6 +66,7 @@ struct iwch_rnic_attributes {
* size (4k)^i. Phys block list mode unsupported.
*/
u32 mem_pgsizes_bitmask;
+ u64 max_mr_size;
u8 can_resize_wq;
/*
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 72ca360c3db..c325c44807e 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -63,10 +63,14 @@ static char *states[] = {
NULL,
};
-static int ep_timeout_secs = 10;
+int peer2peer = 0;
+module_param(peer2peer, int, 0644);
+MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
+
+static int ep_timeout_secs = 60;
module_param(ep_timeout_secs, int, 0644);
MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
- "in seconds (default=10)");
+ "in seconds (default=60)");
static int mpa_rev = 1;
module_param(mpa_rev, int, 0644);
@@ -125,6 +129,12 @@ static void start_ep_timer(struct iwch_ep *ep)
static void stop_ep_timer(struct iwch_ep *ep)
{
PDBG("%s ep %p\n", __func__, ep);
+ if (!timer_pending(&ep->timer)) {
+ printk(KERN_ERR "%s timer stopped when its not running! ep %p state %u\n",
+ __func__, ep, ep->com.state);
+ WARN_ON(1);
+ return;
+ }
del_timer_sync(&ep->timer);
put_ep(&ep->com);
}
@@ -508,7 +518,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
skb_reset_transport_header(skb);
len = skb->len;
req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
- req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+ req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
req->wr_lo = htonl(V_WR_TID(ep->hwtid));
req->len = htonl(len);
req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
@@ -559,7 +569,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
set_arp_failure_handler(skb, arp_failure_discard);
skb_reset_transport_header(skb);
req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
- req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+ req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
req->wr_lo = htonl(V_WR_TID(ep->hwtid));
req->len = htonl(mpalen);
req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
@@ -611,7 +621,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
skb_reset_transport_header(skb);
len = skb->len;
req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
- req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+ req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
req->wr_lo = htonl(V_WR_TID(ep->hwtid));
req->len = htonl(len);
req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
@@ -879,6 +889,7 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
* the MPA header is valid.
*/
state_set(&ep->com, FPDU_MODE);
+ ep->mpa_attr.initiator = 1;
ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
ep->mpa_attr.recv_marker_enabled = markers_enabled;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
@@ -901,8 +912,14 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
/* bind QP and TID with INIT_WR */
err = iwch_modify_qp(ep->com.qp->rhp,
ep->com.qp, mask, &attrs, 1);
- if (!err)
- goto out;
+ if (err)
+ goto err;
+
+ if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) {
+ iwch_post_zb_read(ep->com.qp);
+ }
+
+ goto out;
err:
abort_connection(ep, skb, GFP_KERNEL);
out:
@@ -995,6 +1012,7 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
* If we get here we have accumulated the entire mpa
* start reply message including private data.
*/
+ ep->mpa_attr.initiator = 0;
ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
ep->mpa_attr.recv_marker_enabled = markers_enabled;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
@@ -1065,17 +1083,33 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
PDBG("%s ep %p credits %u\n", __func__, ep, credits);
- if (credits == 0)
+ if (credits == 0) {
+ PDBG(KERN_ERR "%s 0 credit ack ep %p state %u\n",
+ __func__, ep, state_read(&ep->com));
return CPL_RET_BUF_DONE;
+ }
+
BUG_ON(credits != 1);
- BUG_ON(ep->mpa_skb == NULL);
- kfree_skb(ep->mpa_skb);
- ep->mpa_skb = NULL;
dst_confirm(ep->dst);
- if (state_read(&ep->com) == MPA_REP_SENT) {
- ep->com.rpl_done = 1;
- PDBG("waking up ep %p\n", ep);
- wake_up(&ep->com.waitq);
+ if (!ep->mpa_skb) {
+ PDBG("%s rdma_init wr_ack ep %p state %u\n",
+ __func__, ep, state_read(&ep->com));
+ if (ep->mpa_attr.initiator) {
+ PDBG("%s initiator ep %p state %u\n",
+ __func__, ep, state_read(&ep->com));
+ if (peer2peer)
+ iwch_post_zb_read(ep->com.qp);
+ } else {
+ PDBG("%s responder ep %p state %u\n",
+ __func__, ep, state_read(&ep->com));
+ ep->com.rpl_done = 1;
+ wake_up(&ep->com.waitq);
+ }
+ } else {
+ PDBG("%s lsm ack ep %p state %u freeing skb\n",
+ __func__, ep, state_read(&ep->com));
+ kfree_skb(ep->mpa_skb);
+ ep->mpa_skb = NULL;
}
return CPL_RET_BUF_DONE;
}
@@ -1083,8 +1117,11 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
{
struct iwch_ep *ep = ctx;
+ unsigned long flags;
+ int release = 0;
PDBG("%s ep %p\n", __func__, ep);
+ BUG_ON(!ep);
/*
* We get 2 abort replies from the HW. The first one must
@@ -1095,9 +1132,22 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
return CPL_RET_BUF_DONE;
}
- close_complete_upcall(ep);
- state_set(&ep->com, DEAD);
- release_ep_resources(ep);
+ spin_lock_irqsave(&ep->com.lock, flags);
+ switch (ep->com.state) {
+ case ABORTING:
+ close_complete_upcall(ep);
+ __state_set(&ep->com, DEAD);
+ release = 1;
+ break;
+ default:
+ printk(KERN_ERR "%s ep %p state %d\n",
+ __func__, ep, ep->com.state);
+ break;
+ }
+ spin_unlock_irqrestore(&ep->com.lock, flags);
+
+ if (release)
+ release_ep_resources(ep);
return CPL_RET_BUF_DONE;
}
@@ -1470,7 +1520,8 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct sk_buff *rpl_skb;
struct iwch_qp_attributes attrs;
int ret;
- int state;
+ int release = 0;
+ unsigned long flags;
if (is_neg_adv_abort(req->status)) {
PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
@@ -1488,9 +1539,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
return CPL_RET_BUF_DONE;
}
- state = state_read(&ep->com);
- PDBG("%s ep %p state %u\n", __func__, ep, state);
- switch (state) {
+ spin_lock_irqsave(&ep->com.lock, flags);
+ PDBG("%s ep %p state %u\n", __func__, ep, ep->com.state);
+ switch (ep->com.state) {
case CONNECTING:
break;
case MPA_REQ_WAIT:
@@ -1536,21 +1587,25 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
break;
case DEAD:
PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
+ spin_unlock_irqrestore(&ep->com.lock, flags);
return CPL_RET_BUF_DONE;
default:
BUG_ON(1);
break;
}
dst_confirm(ep->dst);
+ if (ep->com.state != ABORTING) {
+ __state_set(&ep->com, DEAD);
+ release = 1;
+ }
+ spin_unlock_irqrestore(&ep->com.lock, flags);
rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
if (!rpl_skb) {
printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
__func__);
- dst_release(ep->dst);
- l2t_release(L2DATA(ep->com.tdev), ep->l2t);
- put_ep(&ep->com);
- return CPL_RET_BUF_DONE;
+ release = 1;
+ goto out;
}
rpl_skb->priority = CPL_PRIORITY_DATA;
rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
@@ -1559,10 +1614,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
rpl->cmd = CPL_ABORT_NO_RST;
cxgb3_ofld_send(ep->com.tdev, rpl_skb);
- if (state != ABORTING) {
- state_set(&ep->com, DEAD);
+out:
+ if (release)
release_ep_resources(ep);
- }
return CPL_RET_BUF_DONE;
}
@@ -1596,8 +1650,8 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
release = 1;
break;
case ABORTING:
- break;
case DEAD:
+ break;
default:
BUG_ON(1);
break;
@@ -1661,15 +1715,18 @@ static void ep_timeout(unsigned long arg)
struct iwch_ep *ep = (struct iwch_ep *)arg;
struct iwch_qp_attributes attrs;
unsigned long flags;
+ int abort = 1;
spin_lock_irqsave(&ep->com.lock, flags);
PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
ep->com.state);
switch (ep->com.state) {
case MPA_REQ_SENT:
+ __state_set(&ep->com, ABORTING);
connect_reply_upcall(ep, -ETIMEDOUT);
break;
case MPA_REQ_WAIT:
+ __state_set(&ep->com, ABORTING);
break;
case CLOSING:
case MORIBUND:
@@ -1679,13 +1736,17 @@ static void ep_timeout(unsigned long arg)
ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
&attrs, 1);
}
+ __state_set(&ep->com, ABORTING);
break;
default:
- BUG();
+ printk(KERN_ERR "%s unexpected state ep %p state %u\n",
+ __func__, ep, ep->com.state);
+ WARN_ON(1);
+ abort = 0;
}
- __state_set(&ep->com, CLOSING);
spin_unlock_irqrestore(&ep->com.lock, flags);
- abort_connection(ep, NULL, GFP_ATOMIC);
+ if (abort)
+ abort_connection(ep, NULL, GFP_ATOMIC);
put_ep(&ep->com);
}
@@ -1762,16 +1823,19 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (err)
goto err;
+ /* if needed, wait for wr_ack */
+ if (iwch_rqes_posted(qp)) {
+ wait_event(ep->com.waitq, ep->com.rpl_done);
+ err = ep->com.rpl_err;
+ if (err)
+ goto err;
+ }
+
err = send_mpa_reply(ep, conn_param->private_data,
conn_param->private_data_len);
if (err)
goto err;
- /* wait for wr_ack */
- wait_event(ep->com.waitq, ep->com.rpl_done);
- err = ep->com.rpl_err;
- if (err)
- goto err;
state_set(&ep->com, FPDU_MODE);
established_upcall(ep);
@@ -1968,40 +2032,39 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
states[ep->com.state], abrupt);
- if (ep->com.state == DEAD) {
- PDBG("%s already dead ep %p\n", __func__, ep);
- goto out;
- }
-
- if (abrupt) {
- if (ep->com.state != ABORTING) {
- ep->com.state = ABORTING;
- close = 1;
- }
- goto out;
- }
-
switch (ep->com.state) {
case MPA_REQ_WAIT:
case MPA_REQ_SENT:
case MPA_REQ_RCVD:
case MPA_REP_SENT:
case FPDU_MODE:
- start_ep_timer(ep);
- ep->com.state = CLOSING;
close = 1;
+ if (abrupt)
+ ep->com.state = ABORTING;
+ else {
+ ep->com.state = CLOSING;
+ start_ep_timer(ep);
+ }
break;
case CLOSING:
- ep->com.state = MORIBUND;
close = 1;
+ if (abrupt) {
+ stop_ep_timer(ep);
+ ep->com.state = ABORTING;
+ } else
+ ep->com.state = MORIBUND;
break;
case MORIBUND:
+ case ABORTING:
+ case DEAD:
+ PDBG("%s ignoring disconnect ep %p state %u\n",
+ __func__, ep, ep->com.state);
break;
default:
BUG();
break;
}
-out:
+
spin_unlock_irqrestore(&ep->com.lock, flags);
if (close) {
if (abrupt)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
index 2bb7fbdb3ff..d7c7e09f099 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -56,6 +56,7 @@
#define put_ep(ep) { \
PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \
ep, atomic_read(&((ep)->kref.refcount))); \
+ WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \
kref_put(&((ep)->kref), __free_ep); \
}
@@ -225,5 +226,6 @@ int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, st
int __init iwch_cm_init(void);
void __exit iwch_cm_term(void);
+extern int peer2peer;
#endif /* _IWCH_CM_H_ */
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index ab4695c1dd5..d07d3a377b5 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -602,7 +602,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mhp)
return ERR_PTR(-ENOMEM);
- mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc);
+ mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
if (IS_ERR(mhp->umem)) {
err = PTR_ERR(mhp->umem);
kfree(mhp);
@@ -998,7 +998,7 @@ static int iwch_query_device(struct ib_device *ibdev,
props->device_cap_flags = dev->device_cap_flags;
props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor;
props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device;
- props->max_mr_size = ~0ull;
+ props->max_mr_size = dev->attr.max_mr_size;
props->max_qp = dev->attr.max_qps;
props->max_qp_wr = dev->attr.max_wrs;
props->max_sge = dev->attr.max_sge_per_wr;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 61356f91109..db5100d27ca 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -118,6 +118,7 @@ enum IWCH_QP_FLAGS {
};
struct iwch_mpa_attributes {
+ u8 initiator;
u8 recv_marker_enabled;
u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */
u8 crc_enabled;
@@ -322,6 +323,7 @@ enum iwch_qp_query_flags {
IWCH_QP_QUERY_TEST_USERWRITE = 0x32 /* Test special */
};
+u16 iwch_rqes_posted(struct iwch_qp *qhp);
int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr);
int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
@@ -331,6 +333,7 @@ int iwch_bind_mw(struct ib_qp *qp,
struct ib_mw_bind *mw_bind);
int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
+int iwch_post_zb_read(struct iwch_qp *qhp);
int iwch_register_device(struct iwch_dev *dev);
void iwch_unregister_device(struct iwch_dev *dev);
int iwch_quiesce_qps(struct iwch_cq *chp);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 8891c3b0a3d..79dbe5beae5 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -586,6 +586,36 @@ static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
}
}
+int iwch_post_zb_read(struct iwch_qp *qhp)
+{
+ union t3_wr *wqe;
+ struct sk_buff *skb;
+ u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
+
+ PDBG("%s enter\n", __func__);
+ skb = alloc_skb(40, GFP_KERNEL);
+ if (!skb) {
+ printk(KERN_ERR "%s cannot send zb_read!!\n", __func__);
+ return -ENOMEM;
+ }
+ wqe = (union t3_wr *)skb_put(skb, sizeof(struct t3_rdma_read_wr));
+ memset(wqe, 0, sizeof(struct t3_rdma_read_wr));
+ wqe->read.rdmaop = T3_READ_REQ;
+ wqe->read.reserved[0] = 0;
+ wqe->read.reserved[1] = 0;
+ wqe->read.reserved[2] = 0;
+ wqe->read.rem_stag = cpu_to_be32(1);
+ wqe->read.rem_to = cpu_to_be64(1);
+ wqe->read.local_stag = cpu_to_be32(1);
+ wqe->read.local_len = cpu_to_be32(0);
+ wqe->read.local_to = cpu_to_be64(1);
+ wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ));
+ wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)|
+ V_FW_RIWR_LEN(flit_cnt));
+ skb->priority = CPL_PRIORITY_DATA;
+ return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
+}
+
/*
* This posts a TERMINATE with layer=RDMA, type=catastrophic.
*/
@@ -625,6 +655,7 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag)
{
struct iwch_cq *rchp, *schp;
int count;
+ int flushed;
rchp = get_chp(qhp->rhp, qhp->attr.rcq);
schp = get_chp(qhp->rhp, qhp->attr.scq);
@@ -639,20 +670,22 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag)
spin_lock(&qhp->lock);
cxio_flush_hw_cq(&rchp->cq);
cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
- cxio_flush_rq(&qhp->wq, &rchp->cq, count);
+ flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count);
spin_unlock(&qhp->lock);
spin_unlock_irqrestore(&rchp->lock, *flag);
- (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+ if (flushed)
+ (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
/* locking heirarchy: cq lock first, then qp lock. */
spin_lock_irqsave(&schp->lock, *flag);
spin_lock(&qhp->lock);
cxio_flush_hw_cq(&schp->cq);
cxio_count_scqes(&schp->cq, &qhp->wq, &count);
- cxio_flush_sq(&qhp->wq, &schp->cq, count);
+ flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count);
spin_unlock(&qhp->lock);
spin_unlock_irqrestore(&schp->lock, *flag);
- (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
+ if (flushed)
+ (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
/* deref */
if (atomic_dec_and_test(&qhp->refcnt))
@@ -671,11 +704,18 @@ static void flush_qp(struct iwch_qp *qhp, unsigned long *flag)
/*
- * Return non zero if at least one RECV was pre-posted.
+ * Return count of RECV WRs posted
*/
-static int rqes_posted(struct iwch_qp *qhp)
+u16 iwch_rqes_posted(struct iwch_qp *qhp)
{
- return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV;
+ union t3_wr *wqe = qhp->wq.queue;
+ u16 count = 0;
+ while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
+ count++;
+ wqe++;
+ }
+ PDBG("%s qhp %p count %u\n", __func__, qhp, count);
+ return count;
}
static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
@@ -716,8 +756,17 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
init_attr.ird = qhp->attr.max_ird;
init_attr.qp_dma_addr = qhp->wq.dma_addr;
init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
- init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0;
+ init_attr.rqe_count = iwch_rqes_posted(qhp);
+ init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
init_attr.flags |= capable(CAP_NET_BIND_SERVICE) ? PRIV_QP : 0;
+ if (peer2peer) {
+ init_attr.rtr_type = RTR_READ;
+ if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator)
+ init_attr.ord = 1;
+ if (init_attr.ird == 0 && !qhp->attr.mpa_attr.initiator)
+ init_attr.ird = 1;
+ } else
+ init_attr.rtr_type = 0;
init_attr.irs = qhp->ep->rcv_seq;
PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
"flags 0x%x qpcaps 0x%x\n", __func__,
@@ -832,8 +881,8 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
abort=0;
disconnect = 1;
ep = qhp->ep;
+ get_ep(&ep->com);
}
- flush_qp(qhp, &flag);
break;
case IWCH_QP_STATE_TERMINATE:
qhp->attr.state = IWCH_QP_STATE_TERMINATE;
@@ -848,6 +897,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
abort=1;
disconnect = 1;
ep = qhp->ep;
+ get_ep(&ep->com);
}
goto err;
break;
@@ -863,6 +913,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
}
switch (attrs->next_state) {
case IWCH_QP_STATE_IDLE:
+ flush_qp(qhp, &flag);
qhp->attr.state = IWCH_QP_STATE_IDLE;
qhp->attr.llp_stream_handle = NULL;
put_ep(&qhp->ep->com);
@@ -929,8 +980,10 @@ out:
* on the EP. This can be a normal close (RTS->CLOSING) or
* an abnormal close (RTS/CLOSING->ERROR).
*/
- if (disconnect)
+ if (disconnect) {
iwch_ep_disconnect(ep, abort, GFP_KERNEL);
+ put_ep(&ep->com);
+ }
/*
* If free is 1, then we've disassociated the EP from the QP
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 0d13fe0a260..00bab60f6de 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -66,6 +66,7 @@ struct ehca_av;
#include "ehca_irq.h"
#define EHCA_EQE_CACHE_SIZE 20
+#define EHCA_MAX_NUM_QUEUES 0xffff
struct ehca_eqe_cache_entry {
struct ehca_eqe *eqe;
@@ -127,6 +128,8 @@ struct ehca_shca {
/* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */
u32 hca_cap_mr_pgsize;
int max_mtu;
+ atomic_t num_cqs;
+ atomic_t num_qps;
};
struct ehca_pd {
@@ -160,6 +163,7 @@ struct ehca_qp {
};
u32 qp_type;
enum ehca_ext_qp_type ext_type;
+ enum ib_qp_state state;
struct ipz_queue ipz_squeue;
struct ipz_queue ipz_rqueue;
struct h_galpas galpas;
@@ -343,6 +347,8 @@ extern int ehca_use_hp_mr;
extern int ehca_scaling_code;
extern int ehca_lock_hcalls;
extern int ehca_nr_ports;
+extern int ehca_max_cq;
+extern int ehca_max_qp;
struct ipzu_queue_resp {
u32 qe_size; /* queue entry size */
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index ec0cfcf3073..5540b276a33 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -132,10 +132,19 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
return ERR_PTR(-EINVAL);
+ if (!atomic_add_unless(&shca->num_cqs, 1, ehca_max_cq)) {
+ ehca_err(device, "Unable to create CQ, max number of %i "
+ "CQs reached.", ehca_max_cq);
+ ehca_err(device, "To increase the maximum number of CQs "
+ "use the number_of_cqs module parameter.\n");
+ return ERR_PTR(-ENOSPC);
+ }
+
my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL);
if (!my_cq) {
ehca_err(device, "Out of memory for ehca_cq struct device=%p",
device);
+ atomic_dec(&shca->num_cqs);
return ERR_PTR(-ENOMEM);
}
@@ -305,6 +314,7 @@ create_cq_exit2:
create_cq_exit1:
kmem_cache_free(cq_cache, my_cq);
+ atomic_dec(&shca->num_cqs);
return cq;
}
@@ -359,6 +369,7 @@ int ehca_destroy_cq(struct ib_cq *cq)
ipz_queue_dtor(NULL, &my_cq->ipz_queue);
kmem_cache_free(cq_cache, my_cq);
+ atomic_dec(&shca->num_cqs);
return 0;
}
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
index b4ac617a70e..49660dfa186 100644
--- a/drivers/infiniband/hw/ehca/ehca_eq.c
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -54,7 +54,8 @@ int ehca_create_eq(struct ehca_shca *shca,
struct ehca_eq *eq,
const enum ehca_eq_type type, const u32 length)
{
- u64 ret;
+ int ret;
+ u64 h_ret;
u32 nr_pages;
u32 i;
void *vpage;
@@ -73,15 +74,15 @@ int ehca_create_eq(struct ehca_shca *shca,
return -EINVAL;
}
- ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle,
- &eq->pf,
- type,
- length,
- &eq->ipz_eq_handle,
- &eq->length,
- &nr_pages, &eq->ist);
+ h_ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle,
+ &eq->pf,
+ type,
+ length,
+ &eq->ipz_eq_handle,
+ &eq->length,
+ &nr_pages, &eq->ist);
- if (ret != H_SUCCESS) {
+ if (h_ret != H_SUCCESS) {
ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq);
return -EINVAL;
}
@@ -97,24 +98,22 @@ int ehca_create_eq(struct ehca_shca *shca,
u64 rpage;
vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
- if (!vpage) {
- ret = H_RESOURCE;
+ if (!vpage)
goto create_eq_exit2;
- }
rpage = virt_to_abs(vpage);
- ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
- eq->ipz_eq_handle,
- &eq->pf,
- 0, 0, rpage, 1);
+ h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
+ eq->ipz_eq_handle,
+ &eq->pf,
+ 0, 0, rpage, 1);
if (i == (nr_pages - 1)) {
/* last page */
vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
- if (ret != H_SUCCESS || vpage)
+ if (h_ret != H_SUCCESS || vpage)
goto create_eq_exit2;
} else {
- if (ret != H_PAGE_REGISTERED || !vpage)
+ if (h_ret != H_PAGE_REGISTERED || !vpage)
goto create_eq_exit2;
}
}
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index 2515cbde7e6..bc3b37d2070 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -101,7 +101,6 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
props->max_ee = limit_uint(rblock->max_rd_ee_context);
props->max_rdd = limit_uint(rblock->max_rd_domain);
props->max_fmr = limit_uint(rblock->max_mr);
- props->local_ca_ack_delay = limit_uint(rblock->local_ca_ack_delay);
props->max_qp_rd_atom = limit_uint(rblock->max_rr_qp);
props->max_ee_rd_atom = limit_uint(rblock->max_rr_ee_context);
props->max_res_rd_atom = limit_uint(rblock->max_rr_hca);
@@ -115,7 +114,7 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
}
props->max_pkeys = 16;
- props->local_ca_ack_delay = limit_uint(rblock->local_ca_ack_delay);
+ props->local_ca_ack_delay = min_t(u8, rblock->local_ca_ack_delay, 255);
props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp);
props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp);
props->max_mcast_grp = limit_uint(rblock->max_mcast_grp);
@@ -136,7 +135,7 @@ query_device1:
return ret;
}
-static int map_mtu(struct ehca_shca *shca, u32 fw_mtu)
+static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu)
{
switch (fw_mtu) {
case 0x1:
@@ -156,7 +155,7 @@ static int map_mtu(struct ehca_shca *shca, u32 fw_mtu)
}
}
-static int map_number_of_vls(struct ehca_shca *shca, u32 vl_cap)
+static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap)
{
switch (vl_cap) {
case 0x1:
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index b5ca94c6b8d..ca5eb0cb628 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -633,7 +633,7 @@ static inline int find_next_online_cpu(struct ehca_comp_pool *pool)
unsigned long flags;
WARN_ON_ONCE(!in_interrupt());
- if (ehca_debug_level)
+ if (ehca_debug_level >= 3)
ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
spin_lock_irqsave(&pool->last_cpu_lock, flags);
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 65b3362cdb9..482103eb6ea 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -50,7 +50,7 @@
#include "ehca_tools.h"
#include "hcp_if.h"
-#define HCAD_VERSION "0025"
+#define HCAD_VERSION "0026"
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
@@ -60,7 +60,6 @@ MODULE_VERSION(HCAD_VERSION);
static int ehca_open_aqp1 = 0;
static int ehca_hw_level = 0;
static int ehca_poll_all_eqs = 1;
-static int ehca_mr_largepage = 1;
int ehca_debug_level = 0;
int ehca_nr_ports = 2;
@@ -69,47 +68,52 @@ int ehca_port_act_time = 30;
int ehca_static_rate = -1;
int ehca_scaling_code = 0;
int ehca_lock_hcalls = -1;
-
-module_param_named(open_aqp1, ehca_open_aqp1, int, S_IRUGO);
-module_param_named(debug_level, ehca_debug_level, int, S_IRUGO);
-module_param_named(hw_level, ehca_hw_level, int, S_IRUGO);
-module_param_named(nr_ports, ehca_nr_ports, int, S_IRUGO);
-module_param_named(use_hp_mr, ehca_use_hp_mr, int, S_IRUGO);
-module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO);
-module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, S_IRUGO);
-module_param_named(static_rate, ehca_static_rate, int, S_IRUGO);
-module_param_named(scaling_code, ehca_scaling_code, int, S_IRUGO);
-module_param_named(mr_largepage, ehca_mr_largepage, int, S_IRUGO);
+int ehca_max_cq = -1;
+int ehca_max_qp = -1;
+
+module_param_named(open_aqp1, ehca_open_aqp1, bool, S_IRUGO);
+module_param_named(debug_level, ehca_debug_level, int, S_IRUGO);
+module_param_named(hw_level, ehca_hw_level, int, S_IRUGO);
+module_param_named(nr_ports, ehca_nr_ports, int, S_IRUGO);
+module_param_named(use_hp_mr, ehca_use_hp_mr, bool, S_IRUGO);
+module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO);
+module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO);
+module_param_named(static_rate, ehca_static_rate, int, S_IRUGO);
+module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO);
module_param_named(lock_hcalls, ehca_lock_hcalls, bool, S_IRUGO);
+module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO);
+module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO);
MODULE_PARM_DESC(open_aqp1,
- "AQP1 on startup (0: no (default), 1: yes)");
+ "Open AQP1 on startup (default: no)");
MODULE_PARM_DESC(debug_level,
- "debug level"
- " (0: no debug traces (default), 1: with debug traces)");
+ "Amount of debug output (0: none (default), 1: traces, "
+ "2: some dumps, 3: lots)");
MODULE_PARM_DESC(hw_level,
- "hardware level"
- " (0: autosensing (default), 1: v. 0.20, 2: v. 0.21)");
+ "Hardware level (0: autosensing (default), "
+ "0x10..0x14: eHCA, 0x20..0x23: eHCA2)");
MODULE_PARM_DESC(nr_ports,
"number of connected ports (-1: autodetect, 1: port one only, "
"2: two ports (default)");
MODULE_PARM_DESC(use_hp_mr,
- "high performance MRs (0: no (default), 1: yes)");
+ "Use high performance MRs (default: no)");
MODULE_PARM_DESC(port_act_time,
- "time to wait for port activation (default: 30 sec)");
+ "Time to wait for port activation (default: 30 sec)");
MODULE_PARM_DESC(poll_all_eqs,
- "polls all event queues periodically"
- " (0: no, 1: yes (default))");
+ "Poll all event queues periodically (default: yes)");
MODULE_PARM_DESC(static_rate,
- "set permanent static rate (default: disabled)");
+ "Set permanent static rate (default: no static rate)");
MODULE_PARM_DESC(scaling_code,
- "set scaling code (0: disabled/default, 1: enabled)");
-MODULE_PARM_DESC(mr_largepage,
- "use large page for MR (0: use PAGE_SIZE (default), "
- "1: use large page depending on MR size");
+ "Enable scaling code (default: no)");
MODULE_PARM_DESC(lock_hcalls,
- "serialize all hCalls made by the driver "
+ "Serialize all hCalls made by the driver "
"(default: autodetect)");
+MODULE_PARM_DESC(number_of_cqs,
+ "Max number of CQs which can be allocated "
+ "(default: autodetect)");
+MODULE_PARM_DESC(number_of_qps,
+ "Max number of QPs which can be allocated "
+ "(default: autodetect)");
DEFINE_RWLOCK(ehca_qp_idr_lock);
DEFINE_RWLOCK(ehca_cq_idr_lock);
@@ -275,6 +279,7 @@ static int ehca_sense_attributes(struct ehca_shca *shca)
u64 h_ret;
struct hipz_query_hca *rblock;
struct hipz_query_port *port;
+ const char *loc_code;
static const u32 pgsize_map[] = {
HCA_CAP_MR_PGSIZE_4K, 0x1000,
@@ -283,6 +288,12 @@ static int ehca_sense_attributes(struct ehca_shca *shca)
HCA_CAP_MR_PGSIZE_16M, 0x1000000,
};
+ ehca_gen_dbg("Probing adapter %s...",
+ shca->ofdev->node->full_name);
+ loc_code = of_get_property(shca->ofdev->node, "ibm,loc-code", NULL);
+ if (loc_code)
+ ehca_gen_dbg(" ... location lode=%s", loc_code);
+
rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!rblock) {
ehca_gen_err("Cannot allocate rblock memory.");
@@ -350,10 +361,27 @@ static int ehca_sense_attributes(struct ehca_shca *shca)
/* translate supported MR page sizes; always support 4K */
shca->hca_cap_mr_pgsize = EHCA_PAGESIZE;
- if (ehca_mr_largepage) { /* support extra sizes only if enabled */
- for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2)
- if (rblock->memory_page_size_supported & pgsize_map[i])
- shca->hca_cap_mr_pgsize |= pgsize_map[i + 1];
+ for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2)
+ if (rblock->memory_page_size_supported & pgsize_map[i])
+ shca->hca_cap_mr_pgsize |= pgsize_map[i + 1];
+
+ /* Set maximum number of CQs and QPs to calculate EQ size */
+ if (ehca_max_qp == -1)
+ ehca_max_qp = min_t(int, rblock->max_qp, EHCA_MAX_NUM_QUEUES);
+ else if (ehca_max_qp < 1 || ehca_max_qp > rblock->max_qp) {
+ ehca_gen_err("Requested number of QPs is out of range (1 - %i) "
+ "specified by HW", rblock->max_qp);
+ ret = -EINVAL;
+ goto sense_attributes1;
+ }
+
+ if (ehca_max_cq == -1)
+ ehca_max_cq = min_t(int, rblock->max_cq, EHCA_MAX_NUM_QUEUES);
+ else if (ehca_max_cq < 1 || ehca_max_cq > rblock->max_cq) {
+ ehca_gen_err("Requested number of CQs is out of range (1 - %i) "
+ "specified by HW", rblock->max_cq);
+ ret = -EINVAL;
+ goto sense_attributes1;
}
/* query max MTU from first port -- it's the same for all ports */
@@ -567,8 +595,7 @@ static int ehca_destroy_aqp1(struct ehca_sport *sport)
static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%d\n",
- ehca_debug_level);
+ return snprintf(buf, PAGE_SIZE, "%d\n", ehca_debug_level);
}
static ssize_t ehca_store_debug_level(struct device_driver *ddp,
@@ -657,14 +684,6 @@ static ssize_t ehca_show_adapter_handle(struct device *dev,
}
static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL);
-static ssize_t ehca_show_mr_largepage(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- return sprintf(buf, "%d\n", ehca_mr_largepage);
-}
-static DEVICE_ATTR(mr_largepage, S_IRUGO, ehca_show_mr_largepage, NULL);
-
static struct attribute *ehca_dev_attrs[] = {
&dev_attr_adapter_handle.attr,
&dev_attr_num_ports.attr,
@@ -681,7 +700,6 @@ static struct attribute *ehca_dev_attrs[] = {
&dev_attr_cur_mw.attr,
&dev_attr_max_pd.attr,
&dev_attr_max_ah.attr,
- &dev_attr_mr_largepage.attr,
NULL
};
@@ -695,7 +713,7 @@ static int __devinit ehca_probe(struct of_device *dev,
struct ehca_shca *shca;
const u64 *handle;
struct ib_pd *ibpd;
- int ret, i;
+ int ret, i, eq_size;
handle = of_get_property(dev->node, "ibm,hca-handle", NULL);
if (!handle) {
@@ -716,6 +734,8 @@ static int __devinit ehca_probe(struct of_device *dev,
return -ENOMEM;
}
mutex_init(&shca->modify_mutex);
+ atomic_set(&shca->num_cqs, 0);
+ atomic_set(&shca->num_qps, 0);
for (i = 0; i < ARRAY_SIZE(shca->sport); i++)
spin_lock_init(&shca->sport[i].mod_sqp_lock);
@@ -735,8 +755,9 @@ static int __devinit ehca_probe(struct of_device *dev,
goto probe1;
}
+ eq_size = 2 * ehca_max_cq + 4 * ehca_max_qp;
/* create event queues */
- ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048);
+ ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size);
if (ret) {
ehca_err(&shca->ib_device, "Cannot create EQ.");
goto probe1;
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index f26997fc00f..f974367cad4 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -323,7 +323,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
- mr_access_flags);
+ mr_access_flags, 0);
if (IS_ERR(e_mr->umem)) {
ib_mr = (void *)e_mr->umem;
goto reg_user_mr_exit1;
@@ -1794,8 +1794,9 @@ static int ehca_check_kpages_per_ate(struct scatterlist *page_list,
int t;
for (t = start_idx; t <= end_idx; t++) {
u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT;
- ehca_gen_dbg("chunk_page=%lx value=%016lx", pgaddr,
- *(u64 *)abs_to_virt(phys_to_abs(pgaddr)));
+ if (ehca_debug_level >= 3)
+ ehca_gen_dbg("chunk_page=%lx value=%016lx", pgaddr,
+ *(u64 *)abs_to_virt(phys_to_abs(pgaddr)));
if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
ehca_gen_err("uncontiguous page found pgaddr=%lx "
"prev_pgaddr=%lx page_list_i=%x",
@@ -1862,10 +1863,13 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
pgaddr &
~(pginfo->hwpage_size - 1));
}
- ehca_gen_dbg("kpage=%lx chunk_page=%lx "
- "value=%016lx", *kpage, pgaddr,
- *(u64 *)abs_to_virt(
- phys_to_abs(pgaddr)));
+ if (ehca_debug_level >= 3) {
+ u64 val = *(u64 *)abs_to_virt(
+ phys_to_abs(pgaddr));
+ ehca_gen_dbg("kpage=%lx chunk_page=%lx "
+ "value=%016lx",
+ *kpage, pgaddr, val);
+ }
prev_pgaddr = pgaddr;
i++;
pginfo->kpage_cnt++;
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 3eb14a52cbf..18fba92fa7a 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -421,8 +421,18 @@ static struct ehca_qp *internal_create_qp(
u32 swqe_size = 0, rwqe_size = 0, ib_qp_num;
unsigned long flags;
- if (init_attr->create_flags)
+ if (!atomic_add_unless(&shca->num_qps, 1, ehca_max_qp)) {
+ ehca_err(pd->device, "Unable to create QP, max number of %i "
+ "QPs reached.", ehca_max_qp);
+ ehca_err(pd->device, "To increase the maximum number of QPs "
+ "use the number_of_qps module parameter.\n");
+ return ERR_PTR(-ENOSPC);
+ }
+
+ if (init_attr->create_flags) {
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
+ }
memset(&parms, 0, sizeof(parms));
qp_type = init_attr->qp_type;
@@ -431,6 +441,7 @@ static struct ehca_qp *internal_create_qp(
init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed",
init_attr->sq_sig_type);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
}
@@ -455,6 +466,7 @@ static struct ehca_qp *internal_create_qp(
if (is_llqp && has_srq) {
ehca_err(pd->device, "LLQPs can't have an SRQ");
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
}
@@ -466,6 +478,7 @@ static struct ehca_qp *internal_create_qp(
ehca_err(pd->device, "no more than three SGEs "
"supported for SRQ pd=%p max_sge=%x",
pd, init_attr->cap.max_recv_sge);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
}
}
@@ -477,6 +490,7 @@ static struct ehca_qp *internal_create_qp(
qp_type != IB_QPT_SMI &&
qp_type != IB_QPT_GSI) {
ehca_err(pd->device, "wrong QP Type=%x", qp_type);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
}
@@ -490,6 +504,7 @@ static struct ehca_qp *internal_create_qp(
"or max_rq_wr=%x for RC LLQP",
init_attr->cap.max_send_wr,
init_attr->cap.max_recv_wr);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
}
break;
@@ -497,6 +512,7 @@ static struct ehca_qp *internal_create_qp(
if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) {
ehca_err(pd->device, "UD LLQP not supported "
"by this adapter");
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-ENOSYS);
}
if (!(init_attr->cap.max_send_sge <= 5
@@ -508,20 +524,22 @@ static struct ehca_qp *internal_create_qp(
"or max_recv_sge=%x for UD LLQP",
init_attr->cap.max_send_sge,
init_attr->cap.max_recv_sge);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
} else if (init_attr->cap.max_send_wr > 255) {
ehca_err(pd->device,
"Invalid Number of "
"max_send_wr=%x for UD QP_TYPE=%x",
init_attr->cap.max_send_wr, qp_type);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
}
break;
default:
ehca_err(pd->device, "unsupported LL QP Type=%x",
qp_type);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
- break;
}
} else {
int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI
@@ -533,6 +551,7 @@ static struct ehca_qp *internal_create_qp(
"send_sge=%x recv_sge=%x max_sge=%x",
init_attr->cap.max_send_sge,
init_attr->cap.max_recv_sge, max_sge);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-EINVAL);
}
}
@@ -543,6 +562,7 @@ static struct ehca_qp *internal_create_qp(
my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL);
if (!my_qp) {
ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(-ENOMEM);
}
@@ -550,6 +570,7 @@ static struct ehca_qp *internal_create_qp(
spin_lock_init(&my_qp->spinlock_r);
my_qp->qp_type = qp_type;
my_qp->ext_type = parms.ext_type;
+ my_qp->state = IB_QPS_RESET;
if (init_attr->recv_cq)
my_qp->recv_cq =
@@ -822,6 +843,7 @@ create_qp_exit1:
create_qp_exit0:
kmem_cache_free(qp_cache, my_qp);
+ atomic_dec(&shca->num_qps);
return ERR_PTR(ret);
}
@@ -965,7 +987,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
qp_num, bad_send_wqe_p);
/* convert wqe pointer to vadr */
bad_send_wqe_v = abs_to_virt((u64)bad_send_wqe_p);
- if (ehca_debug_level)
+ if (ehca_debug_level >= 2)
ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num);
squeue = &my_qp->ipz_squeue;
if (ipz_queue_abs_to_offset(squeue, (u64)bad_send_wqe_p, &q_ofs)) {
@@ -978,7 +1000,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs);
*bad_wqe_cnt = 0;
while (wqe->optype != 0xff && wqe->wqef != 0xff) {
- if (ehca_debug_level)
+ if (ehca_debug_level >= 2)
ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num);
wqe->nr_of_data_seg = 0; /* suppress data access */
wqe->wqef = WQEF_PURGE; /* WQE to be purged */
@@ -1450,7 +1472,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
/* no support for max_send/recv_sge yet */
}
- if (ehca_debug_level)
+ if (ehca_debug_level >= 2)
ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num);
h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
@@ -1508,6 +1530,8 @@ static int internal_modify_qp(struct ib_qp *ibqp,
if (attr_mask & IB_QP_QKEY)
my_qp->qkey = attr->qkey;
+ my_qp->state = qp_new_state;
+
modify_qp_exit2:
if (squeue_locked) { /* this means: sqe -> rts */
spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
@@ -1763,7 +1787,7 @@ int ehca_query_qp(struct ib_qp *qp,
if (qp_init_attr)
*qp_init_attr = my_qp->init_attr;
- if (ehca_debug_level)
+ if (ehca_debug_level >= 2)
ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num);
query_qp_exit1:
@@ -1811,7 +1835,7 @@ int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
goto modify_srq_exit0;
}
- if (ehca_debug_level)
+ if (ehca_debug_level >= 2)
ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle,
@@ -1864,7 +1888,7 @@ int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr)
srq_attr->srq_limit = EHCA_BMASK_GET(
MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit);
- if (ehca_debug_level)
+ if (ehca_debug_level >= 2)
ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
query_srq_exit1:
@@ -1945,6 +1969,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
if (HAS_SQ(my_qp))
ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
kmem_cache_free(qp_cache, my_qp);
+ atomic_dec(&shca->num_qps);
return 0;
}
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index a20bbf46618..bbe0436f4f7 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -81,7 +81,7 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
recv_wr->sg_list[cnt_ds].length;
}
- if (ehca_debug_level) {
+ if (ehca_debug_level >= 3) {
ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p",
ipz_rqueue);
ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
@@ -281,7 +281,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
return -EINVAL;
}
- if (ehca_debug_level) {
+ if (ehca_debug_level >= 3) {
ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp);
ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe");
}
@@ -421,6 +421,11 @@ int ehca_post_send(struct ib_qp *qp,
int ret = 0;
unsigned long flags;
+ if (unlikely(my_qp->state != IB_QPS_RTS)) {
+ ehca_err(qp->device, "QP not in RTS state qpn=%x", qp->qp_num);
+ return -EINVAL;
+ }
+
/* LOCK the QUEUE */
spin_lock_irqsave(&my_qp->spinlock_s, flags);
@@ -454,13 +459,14 @@ int ehca_post_send(struct ib_qp *qp,
goto post_send_exit0;
}
wqe_cnt++;
- ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d",
- my_qp, qp->qp_num, wqe_cnt);
} /* eof for cur_send_wr */
post_send_exit0:
iosync(); /* serialize GAL register access */
hipz_update_sqa(my_qp, wqe_cnt);
+ if (unlikely(ret || ehca_debug_level >= 2))
+ ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
+ my_qp, qp->qp_num, wqe_cnt, ret);
my_qp->message_count += wqe_cnt;
spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
return ret;
@@ -520,13 +526,14 @@ static int internal_post_recv(struct ehca_qp *my_qp,
goto post_recv_exit0;
}
wqe_cnt++;
- ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d",
- my_qp, my_qp->real_qp_num, wqe_cnt);
} /* eof for cur_recv_wr */
post_recv_exit0:
iosync(); /* serialize GAL register access */
hipz_update_rqa(my_qp, wqe_cnt);
+ if (unlikely(ret || ehca_debug_level >= 2))
+ ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
+ my_qp, my_qp->real_qp_num, wqe_cnt, ret);
spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
return ret;
}
@@ -570,16 +577,17 @@ static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
struct ehca_cqe *cqe;
struct ehca_qp *my_qp;
- int cqe_count = 0;
+ int cqe_count = 0, is_error;
poll_cq_one_read_cqe:
cqe = (struct ehca_cqe *)
ipz_qeit_get_inc_valid(&my_cq->ipz_queue);
if (!cqe) {
ret = -EAGAIN;
- ehca_dbg(cq->device, "Completion queue is empty ehca_cq=%p "
- "cq_num=%x ret=%i", my_cq, my_cq->cq_number, ret);
- goto poll_cq_one_exit0;
+ if (ehca_debug_level >= 3)
+ ehca_dbg(cq->device, "Completion queue is empty "
+ "my_cq=%p cq_num=%x", my_cq, my_cq->cq_number);
+ goto poll_cq_one_exit0;
}
/* prevents loads being reordered across this point */
@@ -609,7 +617,7 @@ poll_cq_one_read_cqe:
ehca_dbg(cq->device,
"Got CQE with purged bit qp_num=%x src_qp=%x",
cqe->local_qp_number, cqe->remote_qp_number);
- if (ehca_debug_level)
+ if (ehca_debug_level >= 2)
ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x",
cqe->local_qp_number,
cqe->remote_qp_number);
@@ -622,11 +630,13 @@ poll_cq_one_read_cqe:
}
}
- /* tracing cqe */
- if (unlikely(ehca_debug_level)) {
+ is_error = cqe->status & WC_STATUS_ERROR_BIT;
+
+ /* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */
+ if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) {
ehca_dbg(cq->device,
- "Received COMPLETION ehca_cq=%p cq_num=%x -----",
- my_cq, my_cq->cq_number);
+ "Received %sCOMPLETION ehca_cq=%p cq_num=%x -----",
+ is_error ? "ERROR " : "", my_cq, my_cq->cq_number);
ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
my_cq, my_cq->cq_number);
ehca_dbg(cq->device,
@@ -649,8 +659,9 @@ poll_cq_one_read_cqe:
/* update also queue adder to throw away this entry!!! */
goto poll_cq_one_exit0;
}
+
/* eval ib_wc_status */
- if (unlikely(cqe->status & WC_STATUS_ERROR_BIT)) {
+ if (unlikely(is_error)) {
/* complete with errors */
map_ib_wc_status(cqe->status, &wc->status);
wc->vendor_err = wc->status;
@@ -671,14 +682,6 @@ poll_cq_one_read_cqe:
wc->imm_data = cpu_to_be32(cqe->immediate_data);
wc->sl = cqe->service_level;
- if (unlikely(wc->status != IB_WC_SUCCESS))
- ehca_dbg(cq->device,
- "ehca_cq=%p cq_num=%x WARNING unsuccessful cqe "
- "OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx "
- "cqe=%p", my_cq, my_cq->cq_number, cqe->optype,
- cqe->status, cqe->local_qp_number,
- cqe->remote_qp_number, cqe->work_request_id, cqe);
-
poll_cq_one_exit0:
if (cqe_count > 0)
hipz_update_feca(my_cq, cqe_count);
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c
index 1b07f2beafa..e43ed8f8a0c 100644
--- a/drivers/infiniband/hw/ehca/ehca_uverbs.c
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -211,8 +211,7 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp,
break;
case 1: /* qp rqueue_addr */
- ehca_dbg(qp->ib_qp.device, "qp_num=%x rqueue",
- qp->ib_qp.qp_num);
+ ehca_dbg(qp->ib_qp.device, "qp_num=%x rq", qp->ib_qp.qp_num);
ret = ehca_mmap_queue(vma, &qp->ipz_rqueue,
&qp->mm_count_rqueue);
if (unlikely(ret)) {
@@ -224,8 +223,7 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp,
break;
case 2: /* qp squeue_addr */
- ehca_dbg(qp->ib_qp.device, "qp_num=%x squeue",
- qp->ib_qp.qp_num);
+ ehca_dbg(qp->ib_qp.device, "qp_num=%x sq", qp->ib_qp.qp_num);
ret = ehca_mmap_queue(vma, &qp->ipz_squeue,
&qp->mm_count_squeue);
if (unlikely(ret)) {
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index 7029aa65375..5245e13c3a3 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -123,8 +123,9 @@ static long ehca_plpar_hcall_norets(unsigned long opcode,
int i, sleep_msecs;
unsigned long flags = 0;
- ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT,
- opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
+ if (unlikely(ehca_debug_level >= 2))
+ ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT,
+ opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
for (i = 0; i < 5; i++) {
/* serialize hCalls to work around firmware issue */
@@ -148,7 +149,8 @@ static long ehca_plpar_hcall_norets(unsigned long opcode,
opcode, ret, arg1, arg2, arg3,
arg4, arg5, arg6, arg7);
else
- ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret);
+ if (unlikely(ehca_debug_level >= 2))
+ ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret);
return ret;
}
@@ -172,8 +174,10 @@ static long ehca_plpar_hcall9(unsigned long opcode,
int i, sleep_msecs;
unsigned long flags = 0;
- ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode,
- arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9);
+ if (unlikely(ehca_debug_level >= 2))
+ ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode,
+ arg1, arg2, arg3, arg4, arg5,
+ arg6, arg7, arg8, arg9);
for (i = 0; i < 5; i++) {
/* serialize hCalls to work around firmware issue */
@@ -201,7 +205,7 @@ static long ehca_plpar_hcall9(unsigned long opcode,
ret, outs[0], outs[1], outs[2], outs[3],
outs[4], outs[5], outs[6], outs[7],
outs[8]);
- } else
+ } else if (unlikely(ehca_debug_level >= 2))
ehca_gen_dbg("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT,
ret, outs[0], outs[1], outs[2], outs[3],
outs[4], outs[5], outs[6], outs[7],
@@ -381,7 +385,7 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
r_cb, /* r6 */
0, 0, 0, 0);
- if (ehca_debug_level)
+ if (ehca_debug_level >= 2)
ehca_dmp(query_port_response_block, 64, "response_block");
return ret;
@@ -731,9 +735,6 @@ u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
u64 ret;
u64 outs[PLPAR_HCALL9_BUFSIZE];
- ehca_gen_dbg("kernel PAGE_SIZE=%x access_ctrl=%016x "
- "vaddr=%lx length=%lx",
- (u32)PAGE_SIZE, access_ctrl, vaddr, length);
ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
adapter_handle.handle, /* r4 */
5, /* r5 */
@@ -758,7 +759,7 @@ u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
{
u64 ret;
- if (unlikely(ehca_debug_level >= 2)) {
+ if (unlikely(ehca_debug_level >= 3)) {
if (count > 1) {
u64 *kpage;
int i;
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index db4ba92f79f..9d343b7c2f3 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -195,7 +195,8 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto bail;
}
- umem = ib_umem_get(pd->uobject->context, start, length, mr_access_flags);
+ umem = ib_umem_get(pd->uobject->context, start, length,
+ mr_access_flags, 0);
if (IS_ERR(umem))
return (void *) umem;
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 3557e7edc9b..4521319b140 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -137,7 +137,7 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont
int err;
*umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe),
- IB_ACCESS_LOCAL_WRITE);
+ IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(*umem))
return PTR_ERR(*umem);
@@ -204,7 +204,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
uar = &to_mucontext(context)->uar;
} else {
- err = mlx4_ib_db_alloc(dev, &cq->db, 1);
+ err = mlx4_db_alloc(dev->dev, &cq->db, 1);
if (err)
goto err_cq;
@@ -221,7 +221,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
}
err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
- cq->db.dma, &cq->mcq);
+ cq->db.dma, &cq->mcq, 0);
if (err)
goto err_dbmap;
@@ -246,11 +246,11 @@ err_mtt:
if (context)
ib_umem_release(cq->umem);
else
- mlx4_ib_free_cq_buf(dev, &cq->buf, entries);
+ mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
err_db:
if (!context)
- mlx4_ib_db_free(dev, &cq->db);
+ mlx4_db_free(dev->dev, &cq->db);
err_cq:
kfree(cq);
@@ -434,8 +434,8 @@ int mlx4_ib_destroy_cq(struct ib_cq *cq)
mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db);
ib_umem_release(mcq->umem);
} else {
- mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe + 1);
- mlx4_ib_db_free(dev, &mcq->db);
+ mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe);
+ mlx4_db_free(dev->dev, &mcq->db);
}
kfree(mcq);
diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c
index 1c36087aef1..8aee4233b38 100644
--- a/drivers/infiniband/hw/mlx4/doorbell.c
+++ b/drivers/infiniband/hw/mlx4/doorbell.c
@@ -34,124 +34,6 @@
#include "mlx4_ib.h"
-struct mlx4_ib_db_pgdir {
- struct list_head list;
- DECLARE_BITMAP(order0, MLX4_IB_DB_PER_PAGE);
- DECLARE_BITMAP(order1, MLX4_IB_DB_PER_PAGE / 2);
- unsigned long *bits[2];
- __be32 *db_page;
- dma_addr_t db_dma;
-};
-
-static struct mlx4_ib_db_pgdir *mlx4_ib_alloc_db_pgdir(struct mlx4_ib_dev *dev)
-{
- struct mlx4_ib_db_pgdir *pgdir;
-
- pgdir = kzalloc(sizeof *pgdir, GFP_KERNEL);
- if (!pgdir)
- return NULL;
-
- bitmap_fill(pgdir->order1, MLX4_IB_DB_PER_PAGE / 2);
- pgdir->bits[0] = pgdir->order0;
- pgdir->bits[1] = pgdir->order1;
- pgdir->db_page = dma_alloc_coherent(dev->ib_dev.dma_device,
- PAGE_SIZE, &pgdir->db_dma,
- GFP_KERNEL);
- if (!pgdir->db_page) {
- kfree(pgdir);
- return NULL;
- }
-
- return pgdir;
-}
-
-static int mlx4_ib_alloc_db_from_pgdir(struct mlx4_ib_db_pgdir *pgdir,
- struct mlx4_ib_db *db, int order)
-{
- int o;
- int i;
-
- for (o = order; o <= 1; ++o) {
- i = find_first_bit(pgdir->bits[o], MLX4_IB_DB_PER_PAGE >> o);
- if (i < MLX4_IB_DB_PER_PAGE >> o)
- goto found;
- }
-
- return -ENOMEM;
-
-found:
- clear_bit(i, pgdir->bits[o]);
-
- i <<= o;
-
- if (o > order)
- set_bit(i ^ 1, pgdir->bits[order]);
-
- db->u.pgdir = pgdir;
- db->index = i;
- db->db = pgdir->db_page + db->index;
- db->dma = pgdir->db_dma + db->index * 4;
- db->order = order;
-
- return 0;
-}
-
-int mlx4_ib_db_alloc(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db, int order)
-{
- struct mlx4_ib_db_pgdir *pgdir;
- int ret = 0;
-
- mutex_lock(&dev->pgdir_mutex);
-
- list_for_each_entry(pgdir, &dev->pgdir_list, list)
- if (!mlx4_ib_alloc_db_from_pgdir(pgdir, db, order))
- goto out;
-
- pgdir = mlx4_ib_alloc_db_pgdir(dev);
- if (!pgdir) {
- ret = -ENOMEM;
- goto out;
- }
-
- list_add(&pgdir->list, &dev->pgdir_list);
-
- /* This should never fail -- we just allocated an empty page: */
- WARN_ON(mlx4_ib_alloc_db_from_pgdir(pgdir, db, order));
-
-out:
- mutex_unlock(&dev->pgdir_mutex);
-
- return ret;
-}
-
-void mlx4_ib_db_free(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db)
-{
- int o;
- int i;
-
- mutex_lock(&dev->pgdir_mutex);
-
- o = db->order;
- i = db->index;
-
- if (db->order == 0 && test_bit(i ^ 1, db->u.pgdir->order0)) {
- clear_bit(i ^ 1, db->u.pgdir->order0);
- ++o;
- }
-
- i >>= o;
- set_bit(i, db->u.pgdir->bits[o]);
-
- if (bitmap_full(db->u.pgdir->order1, MLX4_IB_DB_PER_PAGE / 2)) {
- dma_free_coherent(dev->ib_dev.dma_device, PAGE_SIZE,
- db->u.pgdir->db_page, db->u.pgdir->db_dma);
- list_del(&db->u.pgdir->list);
- kfree(db->u.pgdir);
- }
-
- mutex_unlock(&dev->pgdir_mutex);
-}
-
struct mlx4_ib_user_db_page {
struct list_head list;
struct ib_umem *umem;
@@ -160,7 +42,7 @@ struct mlx4_ib_user_db_page {
};
int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
- struct mlx4_ib_db *db)
+ struct mlx4_db *db)
{
struct mlx4_ib_user_db_page *page;
struct ib_umem_chunk *chunk;
@@ -181,7 +63,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
page->user_virt = (virt & PAGE_MASK);
page->refcnt = 0;
page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
- PAGE_SIZE, 0);
+ PAGE_SIZE, 0, 0);
if (IS_ERR(page->umem)) {
err = PTR_ERR(page->umem);
kfree(page);
@@ -202,7 +84,7 @@ out:
return err;
}
-void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_ib_db *db)
+void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db)
{
mutex_lock(&context->db_page_mutex);
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 4d9b5ac4220..4d61e32866c 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -557,9 +557,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
goto err_uar;
MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
- INIT_LIST_HEAD(&ibdev->pgdir_list);
- mutex_init(&ibdev->pgdir_mutex);
-
ibdev->dev = dev;
strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 9e637323c15..5cf994794d2 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -43,24 +43,6 @@
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
-enum {
- MLX4_IB_DB_PER_PAGE = PAGE_SIZE / 4
-};
-
-struct mlx4_ib_db_pgdir;
-struct mlx4_ib_user_db_page;
-
-struct mlx4_ib_db {
- __be32 *db;
- union {
- struct mlx4_ib_db_pgdir *pgdir;
- struct mlx4_ib_user_db_page *user_page;
- } u;
- dma_addr_t dma;
- int index;
- int order;
-};
-
struct mlx4_ib_ucontext {
struct ib_ucontext ibucontext;
struct mlx4_uar uar;
@@ -88,7 +70,7 @@ struct mlx4_ib_cq {
struct mlx4_cq mcq;
struct mlx4_ib_cq_buf buf;
struct mlx4_ib_cq_resize *resize_buf;
- struct mlx4_ib_db db;
+ struct mlx4_db db;
spinlock_t lock;
struct mutex resize_mutex;
struct ib_umem *umem;
@@ -127,7 +109,7 @@ struct mlx4_ib_qp {
struct mlx4_qp mqp;
struct mlx4_buf buf;
- struct mlx4_ib_db db;
+ struct mlx4_db db;
struct mlx4_ib_wq rq;
u32 doorbell_qpn;
@@ -154,7 +136,7 @@ struct mlx4_ib_srq {
struct ib_srq ibsrq;
struct mlx4_srq msrq;
struct mlx4_buf buf;
- struct mlx4_ib_db db;
+ struct mlx4_db db;
u64 *wrid;
spinlock_t lock;
int head;
@@ -175,9 +157,6 @@ struct mlx4_ib_dev {
struct mlx4_dev *dev;
void __iomem *uar_map;
- struct list_head pgdir_list;
- struct mutex pgdir_mutex;
-
struct mlx4_uar priv_uar;
u32 priv_pdn;
MLX4_DECLARE_DOORBELL_LOCK(uar_lock);
@@ -248,11 +227,9 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah)
return container_of(ibah, struct mlx4_ib_ah, ibah);
}
-int mlx4_ib_db_alloc(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db, int order);
-void mlx4_ib_db_free(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db);
int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
- struct mlx4_ib_db *db);
-void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_ib_db *db);
+ struct mlx4_db *db);
+void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db);
struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc);
int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index fe2c2e94a5f..68e92485fc7 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -132,7 +132,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mr)
return ERR_PTR(-ENOMEM);
- mr->umem = ib_umem_get(pd->uobject->context, start, length, access_flags);
+ mr->umem = ib_umem_get(pd->uobject->context, start, length,
+ access_flags, 0);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err_free;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index b75efae7e44..8e02ecfec18 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -482,7 +482,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
goto err;
qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
- qp->buf_size, 0);
+ qp->buf_size, 0, 0);
if (IS_ERR(qp->umem)) {
err = PTR_ERR(qp->umem);
goto err;
@@ -514,7 +514,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
goto err;
if (!init_attr->srq) {
- err = mlx4_ib_db_alloc(dev, &qp->db, 0);
+ err = mlx4_db_alloc(dev->dev, &qp->db, 0);
if (err)
goto err;
@@ -580,7 +580,7 @@ err_buf:
err_db:
if (!pd->uobject && !init_attr->srq)
- mlx4_ib_db_free(dev, &qp->db);
+ mlx4_db_free(dev->dev, &qp->db);
err:
return err;
@@ -666,7 +666,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
kfree(qp->rq.wrid);
mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
if (!qp->ibqp.srq)
- mlx4_ib_db_free(dev, &qp->db);
+ mlx4_db_free(dev->dev, &qp->db);
}
}
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index beaa3b06cf5..12d6bc6f800 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -109,7 +109,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
}
srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
- buf_size, 0);
+ buf_size, 0, 0);
if (IS_ERR(srq->umem)) {
err = PTR_ERR(srq->umem);
goto err_srq;
@@ -129,7 +129,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (err)
goto err_mtt;
} else {
- err = mlx4_ib_db_alloc(dev, &srq->db, 0);
+ err = mlx4_db_alloc(dev->dev, &srq->db, 0);
if (err)
goto err_srq;
@@ -200,7 +200,7 @@ err_buf:
err_db:
if (!pd->uobject)
- mlx4_ib_db_free(dev, &srq->db);
+ mlx4_db_free(dev->dev, &srq->db);
err_srq:
kfree(srq);
@@ -267,7 +267,7 @@ int mlx4_ib_destroy_srq(struct ib_srq *srq)
kfree(msrq->wrid);
mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift,
&msrq->buf);
- mlx4_ib_db_free(dev, &msrq->db);
+ mlx4_db_free(dev->dev, &msrq->db);
}
kfree(msrq);
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index 3538da16e3f..820205dec56 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -818,15 +818,9 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
{
- u32 key;
-
if (!fmr->maps)
return;
- key = tavor_key_to_hw_index(fmr->ibmr.lkey);
- key &= dev->limits.num_mpts - 1;
- fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
-
fmr->maps = 0;
writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
@@ -834,16 +828,9 @@ void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
{
- u32 key;
-
if (!fmr->maps)
return;
- key = arbel_key_to_hw_index(fmr->ibmr.lkey);
- key &= dev->limits.num_mpts - 1;
- key = adjust_key(dev, key);
- fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
-
fmr->maps = 0;
*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 696e1f30233..be34f99ca62 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -39,6 +39,8 @@
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
+
+#include <linux/sched.h>
#include <linux/mm.h>
#include "mthca_dev.h"
@@ -367,6 +369,8 @@ static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
return ERR_PTR(-EFAULT);
}
+ context->reg_mr_warned = 0;
+
return &context->ibucontext;
}
@@ -1006,17 +1010,31 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct mthca_dev *dev = to_mdev(pd->device);
struct ib_umem_chunk *chunk;
struct mthca_mr *mr;
+ struct mthca_reg_mr ucmd;
u64 *pages;
int shift, n, len;
int i, j, k;
int err = 0;
int write_mtt_size;
+ if (udata->inlen - sizeof (struct ib_uverbs_cmd_hdr) < sizeof ucmd) {
+ if (!to_mucontext(pd->uobject->context)->reg_mr_warned) {
+ mthca_warn(dev, "Process '%s' did not pass in MR attrs.\n",
+ current->comm);
+ mthca_warn(dev, " Update libmthca to fix this.\n");
+ }
+ ++to_mucontext(pd->uobject->context)->reg_mr_warned;
+ ucmd.mr_attrs = 0;
+ } else if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
+ return ERR_PTR(-EFAULT);
+
mr = kmalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
- mr->umem = ib_umem_get(pd->uobject->context, start, length, acc);
+ mr->umem = ib_umem_get(pd->uobject->context, start, length, acc,
+ ucmd.mr_attrs & MTHCA_MR_DMASYNC);
+
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 262616c8ebb..934bf954403 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -67,6 +67,7 @@ struct mthca_ucontext {
struct ib_ucontext ibucontext;
struct mthca_uar uar;
struct mthca_user_db_table *db_tab;
+ int reg_mr_warned;
};
struct mthca_mtt;
diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h
index 02cc0a766f3..e1262c942db 100644
--- a/drivers/infiniband/hw/mthca/mthca_user.h
+++ b/drivers/infiniband/hw/mthca/mthca_user.h
@@ -61,6 +61,16 @@ struct mthca_alloc_pd_resp {
__u32 reserved;
};
+struct mthca_reg_mr {
+/*
+ * Mark the memory region with a DMA attribute that causes
+ * in-flight DMA to be flushed when the region is written to:
+ */
+#define MTHCA_MR_DMASYNC 0x1
+ __u32 mr_attrs;
+ __u32 reserved;
+};
+
struct mthca_create_cq {
__u32 lkey;
__u32 pdn;
diff --git a/drivers/infiniband/hw/nes/Kconfig b/drivers/infiniband/hw/nes/Kconfig
index 2aeb7ac972a..d449eb6ec78 100644
--- a/drivers/infiniband/hw/nes/Kconfig
+++ b/drivers/infiniband/hw/nes/Kconfig
@@ -2,6 +2,7 @@ config INFINIBAND_NES
tristate "NetEffect RNIC Driver"
depends on PCI && INET && INFINIBAND
select LIBCRC32C
+ select INET_LRO
---help---
This is a low-level driver for NetEffect RDMA enabled
Network Interface Cards (RNIC).
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index b046262ed63..9f7364a9096 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -91,6 +91,10 @@ unsigned int nes_debug_level = 0;
module_param_named(debug_level, nes_debug_level, uint, 0644);
MODULE_PARM_DESC(debug_level, "Enable debug output level");
+unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR;
+module_param(nes_lro_max_aggr, int, NES_LRO_MAX_AGGR);
+MODULE_PARM_DESC(nes_mro_max_aggr, " nic LRO MAX packet aggregation");
+
LIST_HEAD(nes_adapter_list);
static LIST_HEAD(nes_dev_list);
@@ -139,8 +143,9 @@ static int nes_inetaddr_event(struct notifier_block *notifier,
addr = ntohl(ifa->ifa_address);
mask = ntohl(ifa->ifa_mask);
- nes_debug(NES_DBG_NETDEV, "nes_inetaddr_event: ip address %08X, netmask %08X.\n",
- addr, mask);
+ nes_debug(NES_DBG_NETDEV, "nes_inetaddr_event: ip address " NIPQUAD_FMT
+ ", netmask " NIPQUAD_FMT ".\n",
+ HIPQUAD(addr), HIPQUAD(mask));
list_for_each_entry(nesdev, &nes_dev_list, list) {
nes_debug(NES_DBG_NETDEV, "Nesdev list entry = 0x%p. (%s)\n",
nesdev, nesdev->netdev[0]->name);
@@ -353,13 +358,11 @@ struct ib_qp *nes_get_qp(struct ib_device *device, int qpn)
*/
static void nes_print_macaddr(struct net_device *netdev)
{
- nes_debug(NES_DBG_INIT, "%s: MAC %02X:%02X:%02X:%02X:%02X:%02X, IRQ %u\n",
- netdev->name,
- netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
- netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5],
- netdev->irq);
-}
+ DECLARE_MAC_BUF(mac);
+ nes_debug(NES_DBG_INIT, "%s: %s, IRQ %u\n",
+ netdev->name, print_mac(mac, netdev->dev_addr), netdev->irq);
+}
/**
* nes_interrupt - handle interrupts
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index cdf2e9ad62f..1f9f7bf7386 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -173,6 +173,7 @@ extern int disable_mpa_crc;
extern unsigned int send_first;
extern unsigned int nes_drv_opt;
extern unsigned int nes_debug_level;
+extern unsigned int nes_lro_max_aggr;
extern struct list_head nes_adapter_list;
@@ -535,8 +536,8 @@ int nes_register_ofa_device(struct nes_ib_device *);
int nes_read_eeprom_values(struct nes_device *, struct nes_adapter *);
void nes_write_1G_phy_reg(struct nes_device *, u8, u8, u16);
void nes_read_1G_phy_reg(struct nes_device *, u8, u8, u16 *);
-void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16);
-void nes_read_10G_phy_reg(struct nes_device *, u16, u8);
+void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16, u16);
+void nes_read_10G_phy_reg(struct nes_device *, u8, u8, u16);
struct nes_cqp_request *nes_get_cqp_request(struct nes_device *);
void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *, int);
int nes_arp_table(struct nes_device *, u32, u8 *, u32);
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index d0738623bcf..9a4b40fae40 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -594,7 +594,7 @@ static void nes_cm_timer_tick(unsigned long pass)
continue;
}
/* this seems like the correct place, but leave send entry unprotected */
- // spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+ /* spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); */
atomic_inc(&send_entry->skb->users);
cm_packets_retrans++;
nes_debug(NES_DBG_CM, "Retransmitting send_entry %p for node %p,"
@@ -852,8 +852,8 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
/* get a handle on the hte */
hte = &cm_core->connected_nodes;
- nes_debug(NES_DBG_CM, "Searching for an owner node:%x:%x from core %p->%p\n",
- loc_addr, loc_port, cm_core, hte);
+ nes_debug(NES_DBG_CM, "Searching for an owner node: " NIPQUAD_FMT ":%x from core %p->%p\n",
+ HIPQUAD(loc_addr), loc_port, cm_core, hte);
/* walk list and find cm_node associated with this session ID */
spin_lock_irqsave(&cm_core->ht_lock, flags);
@@ -902,8 +902,8 @@ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
}
spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
- nes_debug(NES_DBG_CM, "Unable to find listener- %x:%x\n",
- dst_addr, dst_port);
+ nes_debug(NES_DBG_CM, "Unable to find listener for " NIPQUAD_FMT ":%x\n",
+ HIPQUAD(dst_addr), dst_port);
/* no listener */
return NULL;
@@ -1054,6 +1054,7 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
int arpindex = 0;
struct nes_device *nesdev;
struct nes_adapter *nesadapter;
+ DECLARE_MAC_BUF(mac);
/* create an hte and cm_node for this instance */
cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC);
@@ -1066,8 +1067,9 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
cm_node->loc_port = cm_info->loc_port;
cm_node->rem_port = cm_info->rem_port;
cm_node->send_write0 = send_first;
- nes_debug(NES_DBG_CM, "Make node addresses : loc = %x:%x, rem = %x:%x\n",
- cm_node->loc_addr, cm_node->loc_port, cm_node->rem_addr, cm_node->rem_port);
+ nes_debug(NES_DBG_CM, "Make node addresses : loc = " NIPQUAD_FMT ":%x, rem = " NIPQUAD_FMT ":%x\n",
+ HIPQUAD(cm_node->loc_addr), cm_node->loc_port,
+ HIPQUAD(cm_node->rem_addr), cm_node->rem_port);
cm_node->listener = listener;
cm_node->netdev = nesvnic->netdev;
cm_node->cm_id = cm_info->cm_id;
@@ -1116,11 +1118,8 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
/* copy the mac addr to node context */
memcpy(cm_node->rem_mac, nesadapter->arp_table[arpindex].mac_addr, ETH_ALEN);
- nes_debug(NES_DBG_CM, "Remote mac addr from arp table:%02x,"
- " %02x, %02x, %02x, %02x, %02x\n",
- cm_node->rem_mac[0], cm_node->rem_mac[1],
- cm_node->rem_mac[2], cm_node->rem_mac[3],
- cm_node->rem_mac[4], cm_node->rem_mac[5]);
+ nes_debug(NES_DBG_CM, "Remote mac addr from arp table: %s\n",
+ print_mac(mac, cm_node->rem_mac));
add_hte_node(cm_core, cm_node);
atomic_inc(&cm_nodes_created);
@@ -1336,7 +1335,7 @@ static int process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
cm_node->loc_addr, cm_node->loc_port,
cm_node->rem_addr, cm_node->rem_port,
cm_node->state, atomic_read(&cm_node->ref_count));
- // create event
+ /* create event */
cm_node->state = NES_CM_STATE_CLOSED;
create_event(cm_node, NES_CM_EVENT_ABORTED);
@@ -1670,7 +1669,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
if (!cm_node)
return NULL;
- // set our node side to client (active) side
+ /* set our node side to client (active) side */
cm_node->tcp_cntxt.client = 1;
cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
@@ -1695,7 +1694,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
loopbackremotenode->mpa_frame_size = mpa_frame_size -
sizeof(struct ietf_mpa_frame);
- // we are done handling this state, set node to a TSA state
+ /* we are done handling this state, set node to a TSA state */
cm_node->state = NES_CM_STATE_TSA;
cm_node->tcp_cntxt.rcv_nxt = loopbackremotenode->tcp_cntxt.loc_seq_num;
loopbackremotenode->tcp_cntxt.rcv_nxt = cm_node->tcp_cntxt.loc_seq_num;
@@ -1850,8 +1849,10 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct nes_vnic *nesvni
nfo.rem_addr = ntohl(iph->saddr);
nfo.rem_port = ntohs(tcph->source);
- nes_debug(NES_DBG_CM, "Received packet: dest=0x%08X:0x%04X src=0x%08X:0x%04X\n",
- iph->daddr, tcph->dest, iph->saddr, tcph->source);
+ nes_debug(NES_DBG_CM, "Received packet: dest=" NIPQUAD_FMT
+ ":0x%04X src=" NIPQUAD_FMT ":0x%04X\n",
+ NIPQUAD(iph->daddr), tcph->dest,
+ NIPQUAD(iph->saddr), tcph->source);
/* note: this call is going to increment cm_node ref count */
cm_node = find_node(cm_core,
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index aa53aab91bf..8dc70f9bad2 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -38,6 +38,7 @@
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/if_vlan.h>
+#include <linux/inet_lro.h>
#include "nes.h"
@@ -636,6 +637,15 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_
nes_debug(NES_DBG_INIT, "Did not see full soft reset done.\n");
return 0;
}
+
+ i = 0;
+ while ((nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS) != 0x80) && i++ < 10000)
+ mdelay(1);
+ if (i >= 10000) {
+ printk(KERN_ERR PFX "Internal CPU not ready, status = %02X\n",
+ nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS));
+ return 0;
+ }
}
/* port reset */
@@ -684,17 +694,6 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_
}
}
-
-
- i = 0;
- while ((nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS) != 0x80) && i++ < 10000)
- mdelay(1);
- if (i >= 10000) {
- printk(KERN_ERR PFX "Internal CPU not ready, status = %02X\n",
- nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS));
- return 0;
- }
-
return port_count;
}
@@ -834,7 +833,7 @@ static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_cou
nes_write_indexed(nesdev, 0x00000900, 0x20000001);
nes_write_indexed(nesdev, 0x000060C0, 0x0000028e);
nes_write_indexed(nesdev, 0x000060C8, 0x00000020);
- //
+
nes_write_indexed(nesdev, 0x000001EC, 0x7b2625a0);
/* nes_write_indexed(nesdev, 0x000001EC, 0x5f2625a0); */
@@ -1209,11 +1208,16 @@ int nes_init_phy(struct nes_device *nesdev)
{
struct nes_adapter *nesadapter = nesdev->nesadapter;
u32 counter = 0;
+ u32 sds_common_control0;
u32 mac_index = nesdev->mac_index;
- u32 tx_config;
+ u32 tx_config = 0;
u16 phy_data;
+ u32 temp_phy_data = 0;
+ u32 temp_phy_data2 = 0;
+ u32 i = 0;
- if (nesadapter->OneG_Mode) {
+ if ((nesadapter->OneG_Mode) &&
+ (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) {
nes_debug(NES_DBG_PHY, "1G PHY, mac_index = %d.\n", mac_index);
if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_1G) {
printk(PFX "%s: Programming mdc config for 1G\n", __func__);
@@ -1225,7 +1229,7 @@ int nes_init_phy(struct nes_device *nesdev)
nes_read_1G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index], &phy_data);
nes_debug(NES_DBG_PHY, "Phy data from register 1 phy address %u = 0x%X.\n",
nesadapter->phy_index[mac_index], phy_data);
- nes_write_1G_phy_reg(nesdev, 23, nesadapter->phy_index[mac_index], 0xb000);
+ nes_write_1G_phy_reg(nesdev, 23, nesadapter->phy_index[mac_index], 0xb000);
/* Reset the PHY */
nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], 0x8000);
@@ -1279,12 +1283,126 @@ int nes_init_phy(struct nes_device *nesdev)
nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data);
nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], phy_data | 0x0300);
} else {
- if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) {
+ if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) ||
+ (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) {
/* setup 10G MDIO operation */
tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
tx_config |= 0x14;
nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
}
+ if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) {
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
+
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ mdelay(10);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
+ temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+
+ /*
+ * if firmware is already running (like from a
+ * driver un-load/load, don't do anything.
+ */
+ if (temp_phy_data == temp_phy_data2) {
+ /* configure QT2505 AMCC PHY */
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0x0000, 0x8000);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc300, 0x0000);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc302, 0x0044);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc318, 0x0052);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc319, 0x0008);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc31a, 0x0098);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0026, 0x0E00);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0027, 0x0000);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0028, 0xA528);
+
+ /*
+ * remove micro from reset; chip boots from ROM,
+ * uploads EEPROM f/w image, uC executes f/w
+ */
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc300, 0x0002);
+
+ /*
+ * wait for heart beat to start to
+ * know loading is done
+ */
+ counter = 0;
+ do {
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ if (counter++ > 1000) {
+ nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from heartbeat check <this is bad!!!> \n");
+ break;
+ }
+ mdelay(100);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
+ temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ } while ((temp_phy_data2 == temp_phy_data));
+
+ /*
+ * wait for tracking to start to know
+ * f/w is good to go
+ */
+ counter = 0;
+ do {
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7fd);
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ if (counter++ > 1000) {
+ nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from status check <this is bad!!!> \n");
+ break;
+ }
+ mdelay(1000);
+ /*
+ * nes_debug(NES_DBG_PHY, "AMCC PHY- phy_status not ready yet = 0x%02X\n",
+ * temp_phy_data);
+ */
+ } while (((temp_phy_data & 0xff) != 0x50) && ((temp_phy_data & 0xff) != 0x70));
+
+ /* set LOS Control invert RXLOSB_I_PADINV */
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd003, 0x0000);
+ /* set LOS Control to mask of RXLOSB_I */
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc314, 0x0042);
+ /* set LED1 to input mode (LED1 and LED2 share same LED) */
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd006, 0x0007);
+ /* set LED2 to RX link_status and activity */
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd007, 0x000A);
+ /* set LED3 to RX link_status */
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd008, 0x0009);
+
+ /*
+ * reset the res-calibration on t2
+ * serdes; ensures it is stable after
+ * the amcc phy is stable
+ */
+
+ sds_common_control0 = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0);
+ sds_common_control0 |= 0x1;
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds_common_control0);
+
+ /* release the res-calibration reset */
+ sds_common_control0 &= 0xfffffffe;
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds_common_control0);
+
+ i = 0;
+ while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040)
+ && (i++ < 5000)) {
+ /* mdelay(1); */
+ }
+
+ /*
+ * wait for link train done before moving on,
+ * or will get an interupt storm
+ */
+ counter = 0;
+ do {
+ temp_phy_data = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
+ (0x200 * (nesdev->mac_index & 1)));
+ if (counter++ > 1000) {
+ nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from link train wait <this is bad, link didnt train!!!>\n");
+ break;
+ }
+ mdelay(1);
+ } while (((temp_phy_data & 0x0f1f0000) != 0x0f0f0000));
+ }
+ }
}
return 0;
}
@@ -1377,6 +1495,25 @@ static void nes_rq_wqes_timeout(unsigned long parm)
}
+static int nes_lro_get_skb_hdr(struct sk_buff *skb, void **iphdr,
+ void **tcph, u64 *hdr_flags, void *priv)
+{
+ unsigned int ip_len;
+ struct iphdr *iph;
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+ if (iph->protocol != IPPROTO_TCP)
+ return -1;
+ ip_len = ip_hdrlen(skb);
+ skb_set_transport_header(skb, ip_len);
+ *tcph = tcp_hdr(skb);
+
+ *hdr_flags = LRO_IPV4 | LRO_TCP;
+ *iphdr = iph;
+ return 0;
+}
+
+
/**
* nes_init_nic_qp
*/
@@ -1522,10 +1659,10 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
}
u64temp = (u64)nesvnic->nic.sq_pbase;
- nic_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
+ nic_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
nic_context->context_words[NES_NIC_CTX_SQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
u64temp = (u64)nesvnic->nic.rq_pbase;
- nic_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
+ nic_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
nic_context->context_words[NES_NIC_CTX_RQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_QP |
@@ -1577,7 +1714,7 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
nic_rqe = &nesvnic->nic.rq_vbase[counter];
nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32(nesvnic->max_frame_size);
nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0;
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem);
+ nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem);
nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = cpu_to_le32((u32)((u64)pmem >> 32));
nesvnic->nic.rx_skb[counter] = skb;
}
@@ -1594,15 +1731,21 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
nesvnic->rq_wqes_timer.function = nes_rq_wqes_timeout;
nesvnic->rq_wqes_timer.data = (unsigned long)nesvnic;
nes_debug(NES_DBG_INIT, "NAPI support Enabled\n");
-
if (nesdev->nesadapter->et_use_adaptive_rx_coalesce)
{
nes_nic_init_timer(nesdev);
if (netdev->mtu > 1500)
jumbomode = 1;
- nes_nic_init_timer_defaults(nesdev, jumbomode);
- }
-
+ nes_nic_init_timer_defaults(nesdev, jumbomode);
+ }
+ nesvnic->lro_mgr.max_aggr = NES_LRO_MAX_AGGR;
+ nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS;
+ nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc;
+ nesvnic->lro_mgr.get_skb_header = nes_lro_get_skb_hdr;
+ nesvnic->lro_mgr.features = LRO_F_NAPI | LRO_F_EXTRACT_VLAN_ID;
+ nesvnic->lro_mgr.dev = netdev;
+ nesvnic->lro_mgr.ip_summed = CHECKSUM_UNNECESSARY;
+ nesvnic->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
return 0;
}
@@ -1622,8 +1765,8 @@ void nes_destroy_nic_qp(struct nes_vnic *nesvnic)
/* Free remaining NIC receive buffers */
while (nesvnic->nic.rq_head != nesvnic->nic.rq_tail) {
- nic_rqe = &nesvnic->nic.rq_vbase[nesvnic->nic.rq_tail];
- wqe_frag = (u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]);
+ nic_rqe = &nesvnic->nic.rq_vbase[nesvnic->nic.rq_tail];
+ wqe_frag = (u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]);
wqe_frag |= ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32;
pci_unmap_single(nesdev->pcidev, (dma_addr_t)wqe_frag,
nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
@@ -1706,17 +1849,17 @@ int nes_napi_isr(struct nes_device *nesdev)
/* iff NIC, process here, else wait for DPC */
if ((int_stat) && ((int_stat & 0x0000ff00) == int_stat)) {
nesdev->napi_isr_ran = 0;
- nes_write32(nesdev->regs+NES_INT_STAT,
- (int_stat &
- ~(NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0|NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3)));
+ nes_write32(nesdev->regs + NES_INT_STAT,
+ (int_stat &
+ ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0 | NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3)));
/* Process the CEQs */
nes_process_ceq(nesdev, &nesdev->nesadapter->ceq[nesdev->nic_ceq_index]);
if (unlikely((((nesadapter->et_rx_coalesce_usecs_irq) &&
- (!nesadapter->et_use_adaptive_rx_coalesce)) ||
- ((nesadapter->et_use_adaptive_rx_coalesce) &&
- (nesdev->deepcq_count > nesadapter->et_pkt_rate_low)))) ) {
+ (!nesadapter->et_use_adaptive_rx_coalesce)) ||
+ ((nesadapter->et_use_adaptive_rx_coalesce) &&
+ (nesdev->deepcq_count > nesadapter->et_pkt_rate_low))))) {
if ((nesdev->int_req & NES_INT_TIMER) == 0) {
/* Enable Periodic timer interrupts */
nesdev->int_req |= NES_INT_TIMER;
@@ -1794,12 +1937,12 @@ void nes_dpc(unsigned long param)
}
if (int_stat) {
- if (int_stat & ~(NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0|
- NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3)) {
+ if (int_stat & ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0|
+ NES_INT_MAC1|NES_INT_MAC2 | NES_INT_MAC3)) {
/* Ack the interrupts */
nes_write32(nesdev->regs+NES_INT_STAT,
- (int_stat & ~(NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0|
- NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3)));
+ (int_stat & ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0|
+ NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3)));
}
temp_int_stat = int_stat;
@@ -1864,8 +2007,8 @@ void nes_dpc(unsigned long param)
}
}
/* Don't use the interface interrupt bit stay in loop */
- int_stat &= ~NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0|
- NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3;
+ int_stat &= ~NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0 |
+ NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3;
} while ((int_stat != 0) && (loop_counter++ < MAX_DPC_ITERATIONS));
if (timer_ints == 1) {
@@ -1876,9 +2019,9 @@ void nes_dpc(unsigned long param)
nesdev->timer_only_int_count = 0;
nesdev->int_req &= ~NES_INT_TIMER;
nes_write32(nesdev->regs + NES_INTF_INT_MASK, ~(nesdev->intf_int_req));
- nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req);
+ nes_write32(nesdev->regs + NES_INT_MASK, ~nesdev->int_req);
} else {
- nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff|(~nesdev->int_req));
+ nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req));
}
} else {
if (unlikely(nesadapter->et_use_adaptive_rx_coalesce))
@@ -1886,7 +2029,7 @@ void nes_dpc(unsigned long param)
nes_nic_init_timer(nesdev);
}
nesdev->timer_only_int_count = 0;
- nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff|(~nesdev->int_req));
+ nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req));
}
} else {
nesdev->timer_only_int_count = 0;
@@ -1935,7 +2078,7 @@ static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq)
do {
if (le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX]) &
NES_CEQE_VALID) {
- u64temp = (((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX])))<<32) |
+ u64temp = (((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX]))) << 32) |
((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_LOW_IDX])));
u64temp <<= 1;
cq = *((struct nes_hw_cq **)&u64temp);
@@ -1963,7 +2106,7 @@ static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq)
*/
static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq)
{
-// u64 u64temp;
+ /* u64 u64temp; */
u32 head;
u32 aeq_size;
u32 aeqe_misc;
@@ -1982,8 +2125,10 @@ static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq)
if (aeqe_misc & (NES_AEQE_QP|NES_AEQE_CQ)) {
if (aeqe_cq_id >= NES_FIRST_QPN) {
/* dealing with an accelerated QP related AE */
-// u64temp = (((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])))<<32) |
-// ((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX])));
+ /*
+ * u64temp = (((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX]))) << 32) |
+ * ((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX])));
+ */
nes_process_iwarp_aeqe(nesdev, (struct nes_hw_aeqe *)aeqe);
} else {
/* TODO: dealing with a CQP related AE */
@@ -2083,6 +2228,8 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
u32 u32temp;
u16 phy_data;
u16 temp_phy_data;
+ u32 pcs_val = 0x0f0f0000;
+ u32 pcs_mask = 0x0f1f0000;
spin_lock_irqsave(&nesadapter->phy_lock, flags);
if (nesadapter->mac_sw_state[mac_number] != NES_MAC_SW_IDLE) {
@@ -2146,13 +2293,30 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
nes_debug(NES_DBG_PHY, "Eth SERDES Common Status: 0=0x%08X, 1=0x%08X\n",
nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0),
nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0+0x200));
- pcs_control_status = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index&1)*0x200));
- pcs_control_status = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index&1)*0x200));
+
+ if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_PUMA_1G) {
+ switch (mac_index) {
+ case 1:
+ case 3:
+ pcs_control_status = nes_read_indexed(nesdev,
+ NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
+ break;
+ default:
+ pcs_control_status = nes_read_indexed(nesdev,
+ NES_IDX_PHY_PCS_CONTROL_STATUS0);
+ break;
+ }
+ } else {
+ pcs_control_status = nes_read_indexed(nesdev,
+ NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index & 1) * 0x200));
+ pcs_control_status = nes_read_indexed(nesdev,
+ NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index & 1) * 0x200));
+ }
+
nes_debug(NES_DBG_PHY, "PCS PHY Control/Status%u: 0x%08X\n",
mac_index, pcs_control_status);
- if (nesadapter->OneG_Mode) {
+ if ((nesadapter->OneG_Mode) &&
+ (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) {
u32temp = 0x01010000;
if (nesadapter->port_count > 2) {
u32temp |= 0x02020000;
@@ -2161,24 +2325,59 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
phy_data = 0;
nes_debug(NES_DBG_PHY, "PCS says the link is down\n");
}
- } else if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) {
- nes_read_10G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index]);
- temp_phy_data = (u16)nes_read_indexed(nesdev,
- NES_IDX_MAC_MDIO_CONTROL);
- u32temp = 20;
- do {
- nes_read_10G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index]);
- phy_data = (u16)nes_read_indexed(nesdev,
- NES_IDX_MAC_MDIO_CONTROL);
- if ((phy_data == temp_phy_data) || (!(--u32temp)))
- break;
- temp_phy_data = phy_data;
- } while (1);
- nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
- __func__, phy_data, nesadapter->mac_link_down ? "DOWN" : "UP");
-
} else {
- phy_data = (0x0f0f0000 == (pcs_control_status & 0x0f1f0000)) ? 4 : 0;
+ switch (nesadapter->phy_type[mac_index]) {
+ case NES_PHY_TYPE_IRIS:
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ u32temp = 20;
+ do {
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
+ phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ if ((phy_data == temp_phy_data) || (!(--u32temp)))
+ break;
+ temp_phy_data = phy_data;
+ } while (1);
+ nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
+ __func__, phy_data, nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP");
+ break;
+
+ case NES_PHY_TYPE_ARGUS:
+ /* clear the alarms */
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0x0008);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc001);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc002);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc005);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc006);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9004);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9005);
+ /* check link status */
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ u32temp = 100;
+ do {
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
+
+ phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ if ((phy_data == temp_phy_data) || (!(--u32temp)))
+ break;
+ temp_phy_data = phy_data;
+ } while (1);
+ nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
+ __func__, phy_data, nesadapter->mac_link_down ? "DOWN" : "UP");
+ break;
+
+ case NES_PHY_TYPE_PUMA_1G:
+ if (mac_index < 2)
+ pcs_val = pcs_mask = 0x01010000;
+ else
+ pcs_val = pcs_mask = 0x02020000;
+ /* fall through */
+ default:
+ phy_data = (pcs_val == (pcs_control_status & pcs_mask)) ? 0x4 : 0x0;
+ break;
+ }
}
if (phy_data & 0x0004) {
@@ -2187,8 +2386,8 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
nes_debug(NES_DBG_PHY, "The Link is UP!!. linkup was %d\n",
nesvnic->linkup);
if (nesvnic->linkup == 0) {
- printk(PFX "The Link is now up for port %u, netdev %p.\n",
- mac_index, nesvnic->netdev);
+ printk(PFX "The Link is now up for port %s, netdev %p.\n",
+ nesvnic->netdev->name, nesvnic->netdev);
if (netif_queue_stopped(nesvnic->netdev))
netif_start_queue(nesvnic->netdev);
nesvnic->linkup = 1;
@@ -2201,8 +2400,8 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
nes_debug(NES_DBG_PHY, "The Link is Down!!. linkup was %d\n",
nesvnic->linkup);
if (nesvnic->linkup == 1) {
- printk(PFX "The Link is now down for port %u, netdev %p.\n",
- mac_index, nesvnic->netdev);
+ printk(PFX "The Link is now down for port %s, netdev %p.\n",
+ nesvnic->netdev->name, nesvnic->netdev);
if (!(netif_queue_stopped(nesvnic->netdev)))
netif_stop_queue(nesvnic->netdev);
nesvnic->linkup = 0;
@@ -2256,10 +2455,13 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
u16 pkt_type;
u16 rqes_processed = 0;
u8 sq_cqes = 0;
+ u8 nes_use_lro = 0;
head = cq->cq_head;
cq_size = cq->cq_size;
cq->cqes_pending = 1;
+ if (nesvnic->netdev->features & NETIF_F_LRO)
+ nes_use_lro = 1;
do {
if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]) &
NES_NIC_CQE_VALID) {
@@ -2274,8 +2476,10 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
/* bump past the vlan tag */
wqe_fragment_length++;
if (le16_to_cpu(wqe_fragment_length[wqe_fragment_index]) != 0) {
- u64temp = (u64) le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+wqe_fragment_index*2]);
- u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+wqe_fragment_index*2]))<<32;
+ u64temp = (u64) le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX +
+ wqe_fragment_index * 2]);
+ u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX +
+ wqe_fragment_index * 2])) << 32;
bus_address = (dma_addr_t)u64temp;
if (test_and_clear_bit(nesnic->sq_tail, nesnic->first_frag_overflow)) {
pci_unmap_single(nesdev->pcidev,
@@ -2285,8 +2489,10 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
}
for (; wqe_fragment_index < 5; wqe_fragment_index++) {
if (wqe_fragment_length[wqe_fragment_index]) {
- u64temp = le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+wqe_fragment_index*2]);
- u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+wqe_fragment_index*2]))<<32;
+ u64temp = le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX +
+ wqe_fragment_index * 2]);
+ u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX
+ + wqe_fragment_index * 2])) <<32;
bus_address = (dma_addr_t)u64temp;
pci_unmap_page(nesdev->pcidev,
bus_address,
@@ -2333,7 +2539,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
if (atomic_read(&nesvnic->rx_skbs_needed) > (nesvnic->nic.rq_size>>1)) {
nes_write32(nesdev->regs+NES_CQE_ALLOC,
cq->cq_number | (cqe_count << 16));
-// nesadapter->tune_timer.cq_count += cqe_count;
+ /* nesadapter->tune_timer.cq_count += cqe_count; */
nesdev->currcq_count += cqe_count;
cqe_count = 0;
nes_replenish_nic_rq(nesvnic);
@@ -2381,9 +2587,16 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
>> 16);
nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n",
nesvnic->netdev->name, vlan_tag);
- nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag);
+ if (nes_use_lro)
+ lro_vlan_hwaccel_receive_skb(&nesvnic->lro_mgr, rx_skb,
+ nesvnic->vlan_grp, vlan_tag, NULL);
+ else
+ nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag);
} else {
- nes_netif_rx(rx_skb);
+ if (nes_use_lro)
+ lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL);
+ else
+ nes_netif_rx(rx_skb);
}
}
@@ -2401,7 +2614,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
/* Replenish Nic CQ */
nes_write32(nesdev->regs+NES_CQE_ALLOC,
cq->cq_number | (cqe_count << 16));
-// nesdev->nesadapter->tune_timer.cq_count += cqe_count;
+ /* nesdev->nesadapter->tune_timer.cq_count += cqe_count; */
nesdev->currcq_count += cqe_count;
cqe_count = 0;
}
@@ -2415,26 +2628,27 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
} while (1);
+ if (nes_use_lro)
+ lro_flush_all(&nesvnic->lro_mgr);
if (sq_cqes) {
barrier();
/* restart the queue if it had been stopped */
if (netif_queue_stopped(nesvnic->netdev))
netif_wake_queue(nesvnic->netdev);
}
-
cq->cq_head = head;
/* nes_debug(NES_DBG_CQ, "CQ%u Processed = %u cqes, new head = %u.\n",
cq->cq_number, cqe_count, cq->cq_head); */
cq->cqe_allocs_pending = cqe_count;
if (unlikely(nesadapter->et_use_adaptive_rx_coalesce))
{
-// nesdev->nesadapter->tune_timer.cq_count += cqe_count;
+ /* nesdev->nesadapter->tune_timer.cq_count += cqe_count; */
nesdev->currcq_count += cqe_count;
nes_nic_tune_timer(nesdev);
}
if (atomic_read(&nesvnic->rx_skbs_needed))
nes_replenish_nic_rq(nesvnic);
- }
+}
/**
@@ -2463,7 +2677,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) {
u64temp = (((u64)(le32_to_cpu(cq->cq_vbase[head].
- cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) |
+ cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]))) << 32) |
((u64)(le32_to_cpu(cq->cq_vbase[head].
cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX])));
cqp = *((struct nes_hw_cqp **)&u64temp);
@@ -2480,7 +2694,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
}
u64temp = (((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail].
- wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX])))<<32) |
+ wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) |
((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail].
wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX])));
cqp_request = *((struct nes_cqp_request **)&u64temp);
@@ -2517,7 +2731,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
} else {
nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n",
cqp_request,
- le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f);
+ le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]) & 0x3f);
if (cqp_request->dynamic) {
kfree(cqp_request);
} else {
@@ -2531,7 +2745,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
}
cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0;
- nes_write32(nesdev->regs+NES_CQE_ALLOC, cq->cq_number | (1 << 16));
+ nes_write32(nesdev->regs + NES_CQE_ALLOC, cq->cq_number | (1 << 16));
if (++cqp->sq_tail >= cqp->sq_size)
cqp->sq_tail = 0;
@@ -2600,13 +2814,13 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
nes_debug(NES_DBG_AEQ, "\n");
aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
if ((NES_AEQE_INBOUND_RDMA&aeq_info) || (!(NES_AEQE_QP&aeq_info))) {
- context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
+ context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32;
} else {
aeqe_context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
aeqe_context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32;
context = (unsigned long)nesadapter->qp_table[le32_to_cpu(
- aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
+ aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN];
BUG_ON(!context);
}
@@ -2619,7 +2833,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), aeqe,
nes_tcp_state_str[tcp_state], nes_iwarp_state_str[iwarp_state]);
-
switch (async_event_id) {
case NES_AEQE_AEID_LLP_FIN_RECEIVED:
nesqp = *((struct nes_qp **)&context);
@@ -3023,7 +3236,7 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr,
cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_ARP_VALID);
cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = cpu_to_le32(
(((u32)mac_addr[2]) << 24) | (((u32)mac_addr[3]) << 16) |
- (((u32)mac_addr[4]) << 8) | (u32)mac_addr[5]);
+ (((u32)mac_addr[4]) << 8) | (u32)mac_addr[5]);
cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = cpu_to_le32(
(((u32)mac_addr[0]) << 16) | (u32)mac_addr[1]);
} else {
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
index b7e2844f096..745bf94f3f0 100644
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ b/drivers/infiniband/hw/nes/nes_hw.h
@@ -33,8 +33,12 @@
#ifndef __NES_HW_H
#define __NES_HW_H
-#define NES_PHY_TYPE_1G 2
-#define NES_PHY_TYPE_IRIS 3
+#include <linux/inet_lro.h>
+
+#define NES_PHY_TYPE_1G 2
+#define NES_PHY_TYPE_IRIS 3
+#define NES_PHY_TYPE_ARGUS 4
+#define NES_PHY_TYPE_PUMA_1G 5
#define NES_PHY_TYPE_PUMA_10G 6
#define NES_MULTICAST_PF_MAX 8
@@ -905,7 +909,7 @@ struct nes_hw_qp {
};
struct nes_hw_cq {
- struct nes_hw_cqe volatile *cq_vbase; /* PCI memory for host rings */
+ struct nes_hw_cqe *cq_vbase; /* PCI memory for host rings */
void (*ce_handler)(struct nes_device *nesdev, struct nes_hw_cq *cq);
dma_addr_t cq_pbase; /* PCI memory for host rings */
u16 cq_head;
@@ -965,7 +969,7 @@ struct nes_arp_entry {
#define NES_NIC_CQ_DOWNWARD_TREND 16
struct nes_hw_tune_timer {
- //u16 cq_count;
+ /* u16 cq_count; */
u16 threshold_low;
u16 threshold_target;
u16 threshold_high;
@@ -982,8 +986,10 @@ struct nes_hw_tune_timer {
#define NES_TIMER_INT_LIMIT 2
#define NES_TIMER_INT_LIMIT_DYNAMIC 10
#define NES_TIMER_ENABLE_LIMIT 4
-#define NES_MAX_LINK_INTERRUPTS 128
-#define NES_MAX_LINK_CHECK 200
+#define NES_MAX_LINK_INTERRUPTS 128
+#define NES_MAX_LINK_CHECK 200
+#define NES_MAX_LRO_DESCRIPTORS 32
+#define NES_LRO_MAX_AGGR 64
struct nes_adapter {
u64 fw_ver;
@@ -1183,6 +1189,9 @@ struct nes_vnic {
u8 of_device_registered;
u8 rdma_enabled;
u8 rx_checksum_disabled;
+ u32 lro_max_aggr;
+ struct net_lro_mgr lro_mgr;
+ struct net_lro_desc lro_desc[NES_MAX_LRO_DESCRIPTORS];
};
struct nes_ib_device {
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 01cd0effc49..1b0938c8777 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -185,12 +185,13 @@ static int nes_netdev_open(struct net_device *netdev)
nic_active |= nic_active_bit;
nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active);
- macaddr_high = ((u16)netdev->dev_addr[0]) << 8;
+ macaddr_high = ((u16)netdev->dev_addr[0]) << 8;
macaddr_high += (u16)netdev->dev_addr[1];
- macaddr_low = ((u32)netdev->dev_addr[2]) << 24;
- macaddr_low += ((u32)netdev->dev_addr[3]) << 16;
- macaddr_low += ((u32)netdev->dev_addr[4]) << 8;
- macaddr_low += (u32)netdev->dev_addr[5];
+
+ macaddr_low = ((u32)netdev->dev_addr[2]) << 24;
+ macaddr_low += ((u32)netdev->dev_addr[3]) << 16;
+ macaddr_low += ((u32)netdev->dev_addr[4]) << 8;
+ macaddr_low += (u32)netdev->dev_addr[5];
/* Program the various MAC regs */
for (i = 0; i < NES_MAX_PORT_COUNT; i++) {
@@ -451,7 +452,7 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
__le16 *wqe_fragment_length;
u32 nr_frags;
u32 original_first_length;
-// u64 *wqe_fragment_address;
+ /* u64 *wqe_fragment_address; */
/* first fragment (0) is used by copy buffer */
u16 wqe_fragment_index=1;
u16 hoffset;
@@ -461,11 +462,12 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
u32 old_head;
u32 wqe_misc;
- /* nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u,"
- " (%u frags), tso_size=%u\n",
- netdev->name, skb->len, skb_headlen(skb),
- skb_shinfo(skb)->nr_frags, skb_is_gso(skb));
- */
+ /*
+ * nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u,"
+ * " (%u frags), tso_size=%u\n",
+ * netdev->name, skb->len, skb_headlen(skb),
+ * skb_shinfo(skb)->nr_frags, skb_is_gso(skb));
+ */
if (!netif_carrier_ok(netdev))
return NETDEV_TX_OK;
@@ -787,22 +789,20 @@ static int nes_netdev_set_mac_address(struct net_device *netdev, void *p)
int i;
u32 macaddr_low;
u16 macaddr_high;
+ DECLARE_MAC_BUF(mac);
if (!is_valid_ether_addr(mac_addr->sa_data))
return -EADDRNOTAVAIL;
memcpy(netdev->dev_addr, mac_addr->sa_data, netdev->addr_len);
- printk(PFX "%s: Address length = %d, Address = %02X%02X%02X%02X%02X%02X..\n",
- __func__, netdev->addr_len,
- mac_addr->sa_data[0], mac_addr->sa_data[1],
- mac_addr->sa_data[2], mac_addr->sa_data[3],
- mac_addr->sa_data[4], mac_addr->sa_data[5]);
- macaddr_high = ((u16)netdev->dev_addr[0]) << 8;
+ printk(PFX "%s: Address length = %d, Address = %s\n",
+ __func__, netdev->addr_len, print_mac(mac, mac_addr->sa_data));
+ macaddr_high = ((u16)netdev->dev_addr[0]) << 8;
macaddr_high += (u16)netdev->dev_addr[1];
- macaddr_low = ((u32)netdev->dev_addr[2]) << 24;
- macaddr_low += ((u32)netdev->dev_addr[3]) << 16;
- macaddr_low += ((u32)netdev->dev_addr[4]) << 8;
- macaddr_low += (u32)netdev->dev_addr[5];
+ macaddr_low = ((u32)netdev->dev_addr[2]) << 24;
+ macaddr_low += ((u32)netdev->dev_addr[3]) << 16;
+ macaddr_low += ((u32)netdev->dev_addr[4]) << 8;
+ macaddr_low += (u32)netdev->dev_addr[5];
for (i = 0; i < NES_MAX_PORT_COUNT; i++) {
if (nesvnic->qp_nic_index[i] == 0xf) {
@@ -878,17 +878,17 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
if (mc_nic_index < 0)
mc_nic_index = nesvnic->nic_index;
if (multicast_addr) {
- nes_debug(NES_DBG_NIC_RX, "Assigning MC Address = %02X%02X%02X%02X%02X%02X to register 0x%04X nic_idx=%d\n",
- multicast_addr->dmi_addr[0], multicast_addr->dmi_addr[1],
- multicast_addr->dmi_addr[2], multicast_addr->dmi_addr[3],
- multicast_addr->dmi_addr[4], multicast_addr->dmi_addr[5],
- perfect_filter_register_address+(mc_index * 8), mc_nic_index);
- macaddr_high = ((u16)multicast_addr->dmi_addr[0]) << 8;
+ DECLARE_MAC_BUF(mac);
+ nes_debug(NES_DBG_NIC_RX, "Assigning MC Address %s to register 0x%04X nic_idx=%d\n",
+ print_mac(mac, multicast_addr->dmi_addr),
+ perfect_filter_register_address+(mc_index * 8),
+ mc_nic_index);
+ macaddr_high = ((u16)multicast_addr->dmi_addr[0]) << 8;
macaddr_high += (u16)multicast_addr->dmi_addr[1];
- macaddr_low = ((u32)multicast_addr->dmi_addr[2]) << 24;
- macaddr_low += ((u32)multicast_addr->dmi_addr[3]) << 16;
- macaddr_low += ((u32)multicast_addr->dmi_addr[4]) << 8;
- macaddr_low += (u32)multicast_addr->dmi_addr[5];
+ macaddr_low = ((u32)multicast_addr->dmi_addr[2]) << 24;
+ macaddr_low += ((u32)multicast_addr->dmi_addr[3]) << 16;
+ macaddr_low += ((u32)multicast_addr->dmi_addr[4]) << 8;
+ macaddr_low += (u32)multicast_addr->dmi_addr[5];
nes_write_indexed(nesdev,
perfect_filter_register_address+(mc_index * 8),
macaddr_low);
@@ -912,23 +912,23 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
/**
* nes_netdev_change_mtu
*/
-static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu)
+static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu)
{
struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- int ret = 0;
- u8 jumbomode=0;
+ struct nes_device *nesdev = nesvnic->nesdev;
+ int ret = 0;
+ u8 jumbomode = 0;
- if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu))
+ if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu))
return -EINVAL;
- netdev->mtu = new_mtu;
+ netdev->mtu = new_mtu;
nesvnic->max_frame_size = new_mtu + VLAN_ETH_HLEN;
if (netdev->mtu > 1500) {
jumbomode=1;
}
- nes_nic_init_timer_defaults(nesdev, jumbomode);
+ nes_nic_init_timer_defaults(nesdev, jumbomode);
if (netif_running(netdev)) {
nes_netdev_stop(netdev);
@@ -938,8 +938,7 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu)
return ret;
}
-#define NES_ETHTOOL_STAT_COUNT 55
-static const char nes_ethtool_stringset[NES_ETHTOOL_STAT_COUNT][ETH_GSTRING_LEN] = {
+static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = {
"Link Change Interrupts",
"Linearized SKBs",
"T/GSO Requests",
@@ -995,8 +994,12 @@ static const char nes_ethtool_stringset[NES_ETHTOOL_STAT_COUNT][ETH_GSTRING_LEN]
"CQ Depth 32",
"CQ Depth 128",
"CQ Depth 256",
+ "LRO aggregated",
+ "LRO flushed",
+ "LRO no_desc",
};
+#define NES_ETHTOOL_STAT_COUNT ARRAY_SIZE(nes_ethtool_stringset)
/**
* nes_netdev_get_rx_csum
@@ -1191,6 +1194,9 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev,
target_stat_values[52] = int_mod_cq_depth_32;
target_stat_values[53] = int_mod_cq_depth_128;
target_stat_values[54] = int_mod_cq_depth_256;
+ target_stat_values[55] = nesvnic->lro_mgr.stats.aggregated;
+ target_stat_values[56] = nesvnic->lro_mgr.stats.flushed;
+ target_stat_values[57] = nesvnic->lro_mgr.stats.no_desc;
}
@@ -1221,14 +1227,14 @@ static int nes_netdev_set_coalesce(struct net_device *netdev,
struct ethtool_coalesce *et_coalesce)
{
struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_device *nesdev = nesvnic->nesdev;
struct nes_adapter *nesadapter = nesdev->nesadapter;
struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
unsigned long flags;
- spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
+ spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
if (et_coalesce->rx_max_coalesced_frames_low) {
- shared_timer->threshold_low = et_coalesce->rx_max_coalesced_frames_low;
+ shared_timer->threshold_low = et_coalesce->rx_max_coalesced_frames_low;
}
if (et_coalesce->rx_max_coalesced_frames_irq) {
shared_timer->threshold_target = et_coalesce->rx_max_coalesced_frames_irq;
@@ -1248,14 +1254,14 @@ static int nes_netdev_set_coalesce(struct net_device *netdev,
nesadapter->et_rx_coalesce_usecs_irq = et_coalesce->rx_coalesce_usecs_irq;
if (et_coalesce->use_adaptive_rx_coalesce) {
nesadapter->et_use_adaptive_rx_coalesce = 1;
- nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC;
+ nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC;
nesadapter->et_rx_coalesce_usecs_irq = 0;
if (et_coalesce->pkt_rate_low) {
- nesadapter->et_pkt_rate_low = et_coalesce->pkt_rate_low;
+ nesadapter->et_pkt_rate_low = et_coalesce->pkt_rate_low;
}
} else {
nesadapter->et_use_adaptive_rx_coalesce = 0;
- nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT;
+ nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT;
if (nesadapter->et_rx_coalesce_usecs_irq) {
nes_write32(nesdev->regs+NES_PERIODIC_CONTROL,
0x80000000 | ((u32)(nesadapter->et_rx_coalesce_usecs_irq*8)));
@@ -1272,28 +1278,28 @@ static int nes_netdev_get_coalesce(struct net_device *netdev,
struct ethtool_coalesce *et_coalesce)
{
struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_device *nesdev = nesvnic->nesdev;
struct nes_adapter *nesadapter = nesdev->nesadapter;
struct ethtool_coalesce temp_et_coalesce;
struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
unsigned long flags;
memset(&temp_et_coalesce, 0, sizeof(temp_et_coalesce));
- temp_et_coalesce.rx_coalesce_usecs_irq = nesadapter->et_rx_coalesce_usecs_irq;
- temp_et_coalesce.use_adaptive_rx_coalesce = nesadapter->et_use_adaptive_rx_coalesce;
- temp_et_coalesce.rate_sample_interval = nesadapter->et_rate_sample_interval;
+ temp_et_coalesce.rx_coalesce_usecs_irq = nesadapter->et_rx_coalesce_usecs_irq;
+ temp_et_coalesce.use_adaptive_rx_coalesce = nesadapter->et_use_adaptive_rx_coalesce;
+ temp_et_coalesce.rate_sample_interval = nesadapter->et_rate_sample_interval;
temp_et_coalesce.pkt_rate_low = nesadapter->et_pkt_rate_low;
spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
- temp_et_coalesce.rx_max_coalesced_frames_low = shared_timer->threshold_low;
- temp_et_coalesce.rx_max_coalesced_frames_irq = shared_timer->threshold_target;
+ temp_et_coalesce.rx_max_coalesced_frames_low = shared_timer->threshold_low;
+ temp_et_coalesce.rx_max_coalesced_frames_irq = shared_timer->threshold_target;
temp_et_coalesce.rx_max_coalesced_frames_high = shared_timer->threshold_high;
- temp_et_coalesce.rx_coalesce_usecs_low = shared_timer->timer_in_use_min;
+ temp_et_coalesce.rx_coalesce_usecs_low = shared_timer->timer_in_use_min;
temp_et_coalesce.rx_coalesce_usecs_high = shared_timer->timer_in_use_max;
if (nesadapter->et_use_adaptive_rx_coalesce) {
temp_et_coalesce.rx_coalesce_usecs_irq = shared_timer->timer_in_use;
}
spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
- memcpy(et_coalesce, &temp_et_coalesce, sizeof(*et_coalesce));
+ memcpy(et_coalesce, &temp_et_coalesce, sizeof(*et_coalesce));
return 0;
}
@@ -1372,30 +1378,38 @@ static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd
u16 phy_data;
et_cmd->duplex = DUPLEX_FULL;
- et_cmd->port = PORT_MII;
+ et_cmd->port = PORT_MII;
+
if (nesadapter->OneG_Mode) {
- et_cmd->supported = SUPPORTED_1000baseT_Full|SUPPORTED_Autoneg;
- et_cmd->advertising = ADVERTISED_1000baseT_Full|ADVERTISED_Autoneg;
et_cmd->speed = SPEED_1000;
- nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index],
- &phy_data);
- if (phy_data&0x1000) {
- et_cmd->autoneg = AUTONEG_ENABLE;
+ if (nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) {
+ et_cmd->supported = SUPPORTED_1000baseT_Full;
+ et_cmd->advertising = ADVERTISED_1000baseT_Full;
+ et_cmd->autoneg = AUTONEG_DISABLE;
+ et_cmd->transceiver = XCVR_INTERNAL;
+ et_cmd->phy_address = nesdev->mac_index;
} else {
- et_cmd->autoneg = AUTONEG_DISABLE;
+ et_cmd->supported = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg;
+ et_cmd->advertising = ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg;
+ nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index], &phy_data);
+ if (phy_data & 0x1000)
+ et_cmd->autoneg = AUTONEG_ENABLE;
+ else
+ et_cmd->autoneg = AUTONEG_DISABLE;
+ et_cmd->transceiver = XCVR_EXTERNAL;
+ et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index];
}
- et_cmd->transceiver = XCVR_EXTERNAL;
- et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index];
} else {
- if (nesadapter->phy_type[nesvnic->logical_port] == NES_PHY_TYPE_IRIS) {
+ if ((nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) ||
+ (nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_ARGUS)) {
et_cmd->transceiver = XCVR_EXTERNAL;
- et_cmd->port = PORT_FIBRE;
- et_cmd->supported = SUPPORTED_FIBRE;
+ et_cmd->port = PORT_FIBRE;
+ et_cmd->supported = SUPPORTED_FIBRE;
et_cmd->advertising = ADVERTISED_FIBRE;
et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index];
} else {
et_cmd->transceiver = XCVR_INTERNAL;
- et_cmd->supported = SUPPORTED_10000baseT_Full;
+ et_cmd->supported = SUPPORTED_10000baseT_Full;
et_cmd->advertising = ADVERTISED_10000baseT_Full;
et_cmd->phy_address = nesdev->mac_index;
}
@@ -1418,14 +1432,15 @@ static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd
struct nes_adapter *nesadapter = nesdev->nesadapter;
u16 phy_data;
- if (nesadapter->OneG_Mode) {
+ if ((nesadapter->OneG_Mode) &&
+ (nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G)) {
nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index],
&phy_data);
if (et_cmd->autoneg) {
/* Turn on Full duplex, Autoneg, and restart autonegotiation */
phy_data |= 0x1300;
} else {
- // Turn off autoneg
+ /* Turn off autoneg */
phy_data &= ~0x1000;
}
nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index],
@@ -1456,6 +1471,8 @@ static struct ethtool_ops nes_ethtool_ops = {
.set_sg = ethtool_op_set_sg,
.get_tso = ethtool_op_get_tso,
.set_tso = ethtool_op_set_tso,
+ .get_flags = ethtool_op_get_flags,
+ .set_flags = ethtool_op_set_flags,
};
@@ -1609,27 +1626,34 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
list_add_tail(&nesvnic->list, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]);
if ((nesdev->netdev_count == 0) &&
- (PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index)) {
- nes_debug(NES_DBG_INIT, "Setting up PHY interrupt mask. Using register index 0x%04X\n",
- NES_IDX_PHY_PCS_CONTROL_STATUS0+(0x200*(nesvnic->logical_port&1)));
+ ((PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index) ||
+ ((nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) &&
+ (((PCI_FUNC(nesdev->pcidev->devfn) == 1) && (nesdev->mac_index == 2)) ||
+ ((PCI_FUNC(nesdev->pcidev->devfn) == 2) && (nesdev->mac_index == 1)))))) {
+ /*
+ * nes_debug(NES_DBG_INIT, "Setting up PHY interrupt mask. Using register index 0x%04X\n",
+ * NES_IDX_PHY_PCS_CONTROL_STATUS0 + (0x200 * (nesvnic->logical_port & 1)));
+ */
u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
- (0x200*(nesvnic->logical_port&1)));
- u32temp |= 0x00200000;
- nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
- (0x200*(nesvnic->logical_port&1)), u32temp);
+ (0x200 * (nesdev->mac_index & 1)));
+ if (nesdev->nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G) {
+ u32temp |= 0x00200000;
+ nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
+ (0x200 * (nesdev->mac_index & 1)), u32temp);
+ }
+
u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
- (0x200*(nesvnic->logical_port&1)) );
+ (0x200 * (nesdev->mac_index & 1)));
+
if ((u32temp&0x0f1f0000) == 0x0f0f0000) {
- if (nesdev->nesadapter->phy_type[nesvnic->logical_port] == NES_PHY_TYPE_IRIS) {
+ if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) {
nes_init_phy(nesdev);
- nes_read_10G_phy_reg(nesdev, 1,
- nesdev->nesadapter->phy_index[nesvnic->logical_port]);
+ nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1);
temp_phy_data = (u16)nes_read_indexed(nesdev,
NES_IDX_MAC_MDIO_CONTROL);
u32temp = 20;
do {
- nes_read_10G_phy_reg(nesdev, 1,
- nesdev->nesadapter->phy_index[nesvnic->logical_port]);
+ nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1);
phy_data = (u16)nes_read_indexed(nesdev,
NES_IDX_MAC_MDIO_CONTROL);
if ((phy_data == temp_phy_data) || (!(--u32temp)))
@@ -1646,6 +1670,14 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
nesvnic->linkup = 1;
}
+ } else if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) {
+ nes_debug(NES_DBG_INIT, "mac_index=%d, logical_port=%d, u32temp=0x%04X, PCI_FUNC=%d\n",
+ nesdev->mac_index, nesvnic->logical_port, u32temp, PCI_FUNC(nesdev->pcidev->devfn));
+ if (((nesdev->mac_index < 2) && ((u32temp&0x01010000) == 0x01010000)) ||
+ ((nesdev->mac_index > 1) && ((u32temp&0x02020000) == 0x02020000))) {
+ nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
+ nesvnic->linkup = 1;
+ }
}
/* clear the MAC interrupt status, assumes direct logical to physical mapping */
u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index));
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index f9db07c2717..fe83d1b2b17 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -444,15 +444,13 @@ void nes_read_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16
/**
* nes_write_10G_phy_reg
*/
-void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg,
- u8 phy_addr, u16 data)
+void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_addr, u8 dev_addr, u16 phy_reg,
+ u16 data)
{
- u32 dev_addr;
u32 port_addr;
u32 u32temp;
u32 counter;
- dev_addr = 1;
port_addr = phy_addr;
/* set address */
@@ -492,14 +490,12 @@ void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg,
* This routine only issues the read, the data must be read
* separately.
*/
-void nes_read_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg, u8 phy_addr)
+void nes_read_10G_phy_reg(struct nes_device *nesdev, u8 phy_addr, u8 dev_addr, u16 phy_reg)
{
- u32 dev_addr;
u32 port_addr;
u32 u32temp;
u32 counter;
- dev_addr = 1;
port_addr = phy_addr;
/* set address */
@@ -660,7 +656,9 @@ int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 acti
/* DELETE or RESOLVE */
if (arp_index == nesadapter->arp_table_size) {
- nes_debug(NES_DBG_NETDEV, "mac address not in ARP table - cannot delete or resolve\n");
+ nes_debug(NES_DBG_NETDEV, "MAC for " NIPQUAD_FMT " not in ARP table - cannot %s\n",
+ HIPQUAD(ip_addr),
+ action == NES_ARP_RESOLVE ? "resolve" : "delete");
return -1;
}
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index f9a5d439089..99b3c4ae86e 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -1266,7 +1266,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
sq_size = init_attr->cap.max_send_wr;
rq_size = init_attr->cap.max_recv_wr;
- // check if the encoded sizes are OK or not...
+ /* check if the encoded sizes are OK or not... */
sq_encoded_size = nes_get_encoded_size(&sq_size);
rq_encoded_size = nes_get_encoded_size(&rq_size);
@@ -1976,7 +1976,7 @@ static int nes_destroy_cq(struct ib_cq *ib_cq)
if (nescq->cq_mem_size)
pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size,
- (void *)nescq->hw_cq.cq_vbase, nescq->hw_cq.cq_pbase);
+ nescq->hw_cq.cq_vbase, nescq->hw_cq.cq_pbase);
kfree(nescq);
return ret;
@@ -2377,7 +2377,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u8 single_page = 1;
u8 stag_key;
- region = ib_umem_get(pd->uobject->context, start, length, acc);
+ region = ib_umem_get(pd->uobject->context, start, length, acc, 0);
if (IS_ERR(region)) {
return (struct ib_mr *)region;
}
@@ -3610,6 +3610,12 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
while (cqe_count < num_entries) {
if (le32_to_cpu(nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) &
NES_CQE_VALID) {
+ /*
+ * Make sure we read CQ entry contents *after*
+ * we've checked the valid bit.
+ */
+ rmb();
+
cqe = nescq->hw_cq.cq_vbase[head];
nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0;
u32temp = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 73b2b176ad0..ca126fc2b85 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -56,11 +56,11 @@
/* constants */
enum {
- IPOIB_PACKET_SIZE = 2048,
- IPOIB_BUF_SIZE = IPOIB_PACKET_SIZE + IB_GRH_BYTES,
-
IPOIB_ENCAP_LEN = 4,
+ IPOIB_UD_HEAD_SIZE = IB_GRH_BYTES + IPOIB_ENCAP_LEN,
+ IPOIB_UD_RX_SG = 2, /* max buffer needed for 4K mtu */
+
IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */
IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN,
IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE,
@@ -95,6 +95,8 @@ enum {
IPOIB_MCAST_FLAG_SENDONLY = 1,
IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
IPOIB_MCAST_FLAG_ATTACHED = 3,
+
+ MAX_SEND_CQE = 16,
};
#define IPOIB_OP_RECV (1ul << 31)
@@ -139,7 +141,7 @@ struct ipoib_mcast {
struct ipoib_rx_buf {
struct sk_buff *skb;
- u64 mapping;
+ u64 mapping[IPOIB_UD_RX_SG];
};
struct ipoib_tx_buf {
@@ -285,7 +287,8 @@ struct ipoib_dev_priv {
u16 pkey_index;
struct ib_pd *pd;
struct ib_mr *mr;
- struct ib_cq *cq;
+ struct ib_cq *recv_cq;
+ struct ib_cq *send_cq;
struct ib_qp *qp;
u32 qkey;
@@ -294,6 +297,7 @@ struct ipoib_dev_priv {
unsigned int admin_mtu;
unsigned int mcast_mtu;
+ unsigned int max_ib_mtu;
struct ipoib_rx_buf *rx_ring;
@@ -304,6 +308,10 @@ struct ipoib_dev_priv {
struct ib_sge tx_sge[MAX_SKB_FRAGS + 1];
struct ib_send_wr tx_wr;
unsigned tx_outstanding;
+ struct ib_wc send_wc[MAX_SEND_CQE];
+
+ struct ib_recv_wr rx_wr;
+ struct ib_sge rx_sge[IPOIB_UD_RX_SG];
struct ib_wc ibwc[IPOIB_NUM_WC];
@@ -326,6 +334,7 @@ struct ipoib_dev_priv {
#endif
int hca_caps;
struct ipoib_ethtool_st ethtool;
+ struct timer_list poll_timer;
};
struct ipoib_ah {
@@ -366,6 +375,14 @@ struct ipoib_neigh {
struct list_head list;
};
+#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN)
+#define IPOIB_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES)
+
+static inline int ipoib_ud_need_sg(unsigned int ib_mtu)
+{
+ return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE;
+}
+
/*
* We stash a pointer to our private neighbour information after our
* hardware address in neigh->ha. The ALIGN() expression here makes
@@ -388,6 +405,7 @@ extern struct workqueue_struct *ipoib_workqueue;
int ipoib_poll(struct napi_struct *napi, int budget);
void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
+void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr);
struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
struct ib_pd *pd, struct ib_ah_attr *attr);
@@ -650,7 +668,6 @@ static inline int ipoib_register_debugfs(void) { return 0; }
static inline void ipoib_unregister_debugfs(void) { }
#endif
-
#define ipoib_printk(level, priv, format, arg...) \
printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg)
#define ipoib_warn(priv, format, arg...) \
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 9db7b0bd913..97e67d36378 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -249,8 +249,8 @@ static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_init_attr attr = {
.event_handler = ipoib_cm_rx_event_handler,
- .send_cq = priv->cq, /* For drain WR */
- .recv_cq = priv->cq,
+ .send_cq = priv->recv_cq, /* For drain WR */
+ .recv_cq = priv->recv_cq,
.srq = priv->cm.srq,
.cap.max_send_wr = 1, /* For drain WR */
.cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
@@ -951,8 +951,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_init_attr attr = {
- .send_cq = priv->cq,
- .recv_cq = priv->cq,
+ .send_cq = priv->recv_cq,
+ .recv_cq = priv->recv_cq,
.srq = priv->cm.srq,
.cap.max_send_wr = ipoib_sendq_size,
.cap.max_send_sge = 1,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 9a47428366c..10279b79c44 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -71,7 +71,7 @@ static int ipoib_set_coalesce(struct net_device *dev,
coal->rx_max_coalesced_frames > 0xffff)
return -EINVAL;
- ret = ib_modify_cq(priv->cq, coal->rx_max_coalesced_frames,
+ ret = ib_modify_cq(priv->recv_cq, coal->rx_max_coalesced_frames,
coal->rx_coalesce_usecs);
if (ret && ret != -ENOSYS) {
ipoib_warn(priv, "failed modifying CQ (%d)\n", ret);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 0205eb7c1bd..f429bce24c2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -89,28 +89,59 @@ void ipoib_free_ah(struct kref *kref)
spin_unlock_irqrestore(&priv->lock, flags);
}
+static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
+ u64 mapping[IPOIB_UD_RX_SG])
+{
+ if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
+ ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_UD_HEAD_SIZE,
+ DMA_FROM_DEVICE);
+ ib_dma_unmap_page(priv->ca, mapping[1], PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ } else
+ ib_dma_unmap_single(priv->ca, mapping[0],
+ IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
+ DMA_FROM_DEVICE);
+}
+
+static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv,
+ struct sk_buff *skb,
+ unsigned int length)
+{
+ if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
+ unsigned int size;
+ /*
+ * There is only two buffers needed for max_payload = 4K,
+ * first buf size is IPOIB_UD_HEAD_SIZE
+ */
+ skb->tail += IPOIB_UD_HEAD_SIZE;
+ skb->len += length;
+
+ size = length - IPOIB_UD_HEAD_SIZE;
+
+ frag->size = size;
+ skb->data_len += size;
+ skb->truesize += size;
+ } else
+ skb_put(skb, length);
+
+}
+
static int ipoib_ib_post_receive(struct net_device *dev, int id)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ib_sge list;
- struct ib_recv_wr param;
struct ib_recv_wr *bad_wr;
int ret;
- list.addr = priv->rx_ring[id].mapping;
- list.length = IPOIB_BUF_SIZE;
- list.lkey = priv->mr->lkey;
+ priv->rx_wr.wr_id = id | IPOIB_OP_RECV;
+ priv->rx_sge[0].addr = priv->rx_ring[id].mapping[0];
+ priv->rx_sge[1].addr = priv->rx_ring[id].mapping[1];
- param.next = NULL;
- param.wr_id = id | IPOIB_OP_RECV;
- param.sg_list = &list;
- param.num_sge = 1;
- ret = ib_post_recv(priv->qp, &param, &bad_wr);
+ ret = ib_post_recv(priv->qp, &priv->rx_wr, &bad_wr);
if (unlikely(ret)) {
ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
- ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping,
- IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+ ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[id].mapping);
dev_kfree_skb_any(priv->rx_ring[id].skb);
priv->rx_ring[id].skb = NULL;
}
@@ -118,15 +149,21 @@ static int ipoib_ib_post_receive(struct net_device *dev, int id)
return ret;
}
-static int ipoib_alloc_rx_skb(struct net_device *dev, int id)
+static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct sk_buff *skb;
- u64 addr;
+ int buf_size;
+ u64 *mapping;
- skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4);
- if (!skb)
- return -ENOMEM;
+ if (ipoib_ud_need_sg(priv->max_ib_mtu))
+ buf_size = IPOIB_UD_HEAD_SIZE;
+ else
+ buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
+
+ skb = dev_alloc_skb(buf_size + 4);
+ if (unlikely(!skb))
+ return NULL;
/*
* IB will leave a 40 byte gap for a GRH and IPoIB adds a 4 byte
@@ -135,17 +172,32 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id)
*/
skb_reserve(skb, 4);
- addr = ib_dma_map_single(priv->ca, skb->data, IPOIB_BUF_SIZE,
- DMA_FROM_DEVICE);
- if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
- dev_kfree_skb_any(skb);
- return -EIO;
+ mapping = priv->rx_ring[id].mapping;
+ mapping[0] = ib_dma_map_single(priv->ca, skb->data, buf_size,
+ DMA_FROM_DEVICE);
+ if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0])))
+ goto error;
+
+ if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
+ struct page *page = alloc_page(GFP_ATOMIC);
+ if (!page)
+ goto partial_error;
+ skb_fill_page_desc(skb, 0, page, 0, PAGE_SIZE);
+ mapping[1] =
+ ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[0].page,
+ 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ if (unlikely(ib_dma_mapping_error(priv->ca, mapping[1])))
+ goto partial_error;
}
- priv->rx_ring[id].skb = skb;
- priv->rx_ring[id].mapping = addr;
+ priv->rx_ring[id].skb = skb;
+ return skb;
- return 0;
+partial_error:
+ ib_dma_unmap_single(priv->ca, mapping[0], buf_size, DMA_FROM_DEVICE);
+error:
+ dev_kfree_skb_any(skb);
+ return NULL;
}
static int ipoib_ib_post_receives(struct net_device *dev)
@@ -154,7 +206,7 @@ static int ipoib_ib_post_receives(struct net_device *dev)
int i;
for (i = 0; i < ipoib_recvq_size; ++i) {
- if (ipoib_alloc_rx_skb(dev, i)) {
+ if (!ipoib_alloc_rx_skb(dev, i)) {
ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
return -ENOMEM;
}
@@ -172,7 +224,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
struct ipoib_dev_priv *priv = netdev_priv(dev);
unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV;
struct sk_buff *skb;
- u64 addr;
+ u64 mapping[IPOIB_UD_RX_SG];
ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n",
wr_id, wc->status);
@@ -184,15 +236,13 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
}
skb = priv->rx_ring[wr_id].skb;
- addr = priv->rx_ring[wr_id].mapping;
if (unlikely(wc->status != IB_WC_SUCCESS)) {
if (wc->status != IB_WC_WR_FLUSH_ERR)
ipoib_warn(priv, "failed recv event "
"(status=%d, wrid=%d vend_err %x)\n",
wc->status, wr_id, wc->vendor_err);
- ib_dma_unmap_single(priv->ca, addr,
- IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+ ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping);
dev_kfree_skb_any(skb);
priv->rx_ring[wr_id].skb = NULL;
return;
@@ -205,11 +255,14 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num)
goto repost;
+ memcpy(mapping, priv->rx_ring[wr_id].mapping,
+ IPOIB_UD_RX_SG * sizeof *mapping);
+
/*
* If we can't allocate a new RX buffer, dump
* this packet and reuse the old buffer.
*/
- if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
+ if (unlikely(!ipoib_alloc_rx_skb(dev, wr_id))) {
++dev->stats.rx_dropped;
goto repost;
}
@@ -217,9 +270,9 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
wc->byte_len, wc->slid);
- ib_dma_unmap_single(priv->ca, addr, IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+ ipoib_ud_dma_unmap_rx(priv, mapping);
+ ipoib_ud_skb_put_frags(priv, skb, wc->byte_len);
- skb_put(skb, wc->byte_len);
skb_pull(skb, IB_GRH_BYTES);
skb->protocol = ((struct ipoib_header *) skb->data)->proto;
@@ -311,7 +364,6 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
struct ipoib_dev_priv *priv = netdev_priv(dev);
unsigned int wr_id = wc->wr_id;
struct ipoib_tx_buf *tx_req;
- unsigned long flags;
ipoib_dbg_data(priv, "send completion: id %d, status: %d\n",
wr_id, wc->status);
@@ -331,13 +383,11 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
dev_kfree_skb_any(tx_req->skb);
- spin_lock_irqsave(&priv->tx_lock, flags);
++priv->tx_tail;
if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
netif_queue_stopped(dev) &&
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
netif_wake_queue(dev);
- spin_unlock_irqrestore(&priv->tx_lock, flags);
if (wc->status != IB_WC_SUCCESS &&
wc->status != IB_WC_WR_FLUSH_ERR)
@@ -346,6 +396,17 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
wc->status, wr_id, wc->vendor_err);
}
+static int poll_tx(struct ipoib_dev_priv *priv)
+{
+ int n, i;
+
+ n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc);
+ for (i = 0; i < n; ++i)
+ ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i);
+
+ return n == MAX_SEND_CQE;
+}
+
int ipoib_poll(struct napi_struct *napi, int budget)
{
struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi);
@@ -361,7 +422,7 @@ poll_more:
int max = (budget - done);
t = min(IPOIB_NUM_WC, max);
- n = ib_poll_cq(priv->cq, t, priv->ibwc);
+ n = ib_poll_cq(priv->recv_cq, t, priv->ibwc);
for (i = 0; i < n; i++) {
struct ib_wc *wc = priv->ibwc + i;
@@ -372,12 +433,8 @@ poll_more:
ipoib_cm_handle_rx_wc(dev, wc);
else
ipoib_ib_handle_rx_wc(dev, wc);
- } else {
- if (wc->wr_id & IPOIB_OP_CM)
- ipoib_cm_handle_tx_wc(dev, wc);
- else
- ipoib_ib_handle_tx_wc(dev, wc);
- }
+ } else
+ ipoib_cm_handle_tx_wc(priv->dev, wc);
}
if (n != t)
@@ -386,7 +443,7 @@ poll_more:
if (done < budget) {
netif_rx_complete(dev, napi);
- if (unlikely(ib_req_notify_cq(priv->cq,
+ if (unlikely(ib_req_notify_cq(priv->recv_cq,
IB_CQ_NEXT_COMP |
IB_CQ_REPORT_MISSED_EVENTS)) &&
netif_rx_reschedule(dev, napi))
@@ -404,6 +461,26 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
netif_rx_schedule(dev, &priv->napi);
}
+static void drain_tx_cq(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->tx_lock, flags);
+ while (poll_tx(priv))
+ ; /* nothing */
+
+ if (netif_queue_stopped(dev))
+ mod_timer(&priv->poll_timer, jiffies + 1);
+
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
+}
+
+void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr)
+{
+ drain_tx_cq((struct net_device *)dev_ptr);
+}
+
static inline int post_send(struct ipoib_dev_priv *priv,
unsigned int wr_id,
struct ib_ah *address, u32 qpn,
@@ -498,23 +575,34 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
else
priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
+ if (++priv->tx_outstanding == ipoib_sendq_size) {
+ ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
+ if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP))
+ ipoib_warn(priv, "request notify on send CQ failed\n");
+ netif_stop_queue(dev);
+ }
+
if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
address->ah, qpn, tx_req, phead, hlen))) {
ipoib_warn(priv, "post_send failed\n");
++dev->stats.tx_errors;
+ --priv->tx_outstanding;
ipoib_dma_unmap_tx(priv->ca, tx_req);
dev_kfree_skb_any(skb);
+ if (netif_queue_stopped(dev))
+ netif_wake_queue(dev);
} else {
dev->trans_start = jiffies;
address->last_send = priv->tx_head;
++priv->tx_head;
+ skb_orphan(skb);
- if (++priv->tx_outstanding == ipoib_sendq_size) {
- ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
- netif_stop_queue(dev);
- }
}
+
+ if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
+ while (poll_tx(priv))
+ ; /* nothing */
}
static void __ipoib_reap_ah(struct net_device *dev)
@@ -548,6 +636,11 @@ void ipoib_reap_ah(struct work_struct *work)
round_jiffies_relative(HZ));
}
+static void ipoib_ib_tx_timer_func(unsigned long ctx)
+{
+ drain_tx_cq((struct net_device *)ctx);
+}
+
int ipoib_ib_dev_open(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -584,6 +677,10 @@ int ipoib_ib_dev_open(struct net_device *dev)
queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
round_jiffies_relative(HZ));
+ init_timer(&priv->poll_timer);
+ priv->poll_timer.function = ipoib_ib_tx_timer_func;
+ priv->poll_timer.data = (unsigned long)dev;
+
set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
return 0;
@@ -661,7 +758,7 @@ void ipoib_drain_cq(struct net_device *dev)
struct ipoib_dev_priv *priv = netdev_priv(dev);
int i, n;
do {
- n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc);
+ n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc);
for (i = 0; i < n; ++i) {
/*
* Convert any successful completions to flush
@@ -676,14 +773,13 @@ void ipoib_drain_cq(struct net_device *dev)
ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
else
ipoib_ib_handle_rx_wc(dev, priv->ibwc + i);
- } else {
- if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
- ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
- else
- ipoib_ib_handle_tx_wc(dev, priv->ibwc + i);
- }
+ } else
+ ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
}
} while (n == IPOIB_NUM_WC);
+
+ while (poll_tx(priv))
+ ; /* nothing */
}
int ipoib_ib_dev_stop(struct net_device *dev, int flush)
@@ -733,10 +829,8 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)
rx_req = &priv->rx_ring[i];
if (!rx_req->skb)
continue;
- ib_dma_unmap_single(priv->ca,
- rx_req->mapping,
- IPOIB_BUF_SIZE,
- DMA_FROM_DEVICE);
+ ipoib_ud_dma_unmap_rx(priv,
+ priv->rx_ring[i].mapping);
dev_kfree_skb_any(rx_req->skb);
rx_req->skb = NULL;
}
@@ -752,6 +846,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)
ipoib_dbg(priv, "All sends and receives done.\n");
timeout:
+ del_timer_sync(&priv->poll_timer);
qp_attr.qp_state = IB_QPS_RESET;
if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
ipoib_warn(priv, "Failed to modify QP to RESET state\n");
@@ -775,7 +870,7 @@ timeout:
msleep(1);
}
- ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP);
+ ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
return 0;
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index bd07f02cf02..2442090ac8d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -195,7 +195,7 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
return 0;
}
- if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN)
+ if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu))
return -EINVAL;
priv->admin_mtu = new_mtu;
@@ -971,10 +971,6 @@ static void ipoib_setup(struct net_device *dev)
NETIF_F_LLTX |
NETIF_F_HIGHDMA);
- /* MTU will be reset when mcast join happens */
- dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN;
- priv->mcast_mtu = priv->admin_mtu = dev->mtu;
-
memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
netif_carrier_off(dev);
@@ -1107,6 +1103,7 @@ static struct net_device *ipoib_add_port(const char *format,
{
struct ipoib_dev_priv *priv;
struct ib_device_attr *device_attr;
+ struct ib_port_attr attr;
int result = -ENOMEM;
priv = ipoib_intf_alloc(format);
@@ -1115,6 +1112,18 @@ static struct net_device *ipoib_add_port(const char *format,
SET_NETDEV_DEV(priv->dev, hca->dma_device);
+ if (!ib_query_port(hca, port, &attr))
+ priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
+ else {
+ printk(KERN_WARNING "%s: ib_query_port %d failed\n",
+ hca->name, port);
+ goto device_init_failed;
+ }
+
+ /* MTU will be reset when mcast join happens */
+ priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
+ priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu;
+
result = ib_query_pkey(hca, port, 0, &priv->pkey);
if (result) {
printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n",
@@ -1289,7 +1298,8 @@ static int __init ipoib_init_module(void)
ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size);
ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE);
- ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE);
+ ipoib_sendq_size = max(ipoib_sendq_size, max(2 * MAX_SEND_CQE,
+ IPOIB_MIN_QUEUE_SIZE));
#ifdef CONFIG_INFINIBAND_IPOIB_CM
ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
#endif
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 31a53c5bcb1..d00a2c174ae 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -567,8 +567,7 @@ void ipoib_mcast_join_task(struct work_struct *work)
return;
}
- priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) -
- IPOIB_ENCAP_LEN;
+ priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
if (!ipoib_cm_admin_enabled(dev))
dev->mtu = min(priv->mcast_mtu, priv->admin_mtu);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 8a20e3742c4..8766d29ce3b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -150,7 +150,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
.max_send_wr = ipoib_sendq_size,
.max_recv_wr = ipoib_recvq_size,
.max_send_sge = 1,
- .max_recv_sge = 1
+ .max_recv_sge = IPOIB_UD_RX_SG
},
.sq_sig_type = IB_SIGNAL_ALL_WR,
.qp_type = IB_QPT_UD
@@ -171,26 +171,34 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
goto out_free_pd;
}
- size = ipoib_sendq_size + ipoib_recvq_size + 1;
+ size = ipoib_recvq_size + 1;
ret = ipoib_cm_dev_init(dev);
if (!ret) {
+ size += ipoib_sendq_size;
if (ipoib_cm_has_srq(dev))
size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */
else
size += ipoib_recvq_size * ipoib_max_conn_qp;
}
- priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
- if (IS_ERR(priv->cq)) {
- printk(KERN_WARNING "%s: failed to create CQ\n", ca->name);
+ priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
+ if (IS_ERR(priv->recv_cq)) {
+ printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name);
goto out_free_mr;
}
- if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP))
- goto out_free_cq;
+ priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL,
+ dev, ipoib_sendq_size, 0);
+ if (IS_ERR(priv->send_cq)) {
+ printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name);
+ goto out_free_recv_cq;
+ }
+
+ if (ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP))
+ goto out_free_send_cq;
- init_attr.send_cq = priv->cq;
- init_attr.recv_cq = priv->cq;
+ init_attr.send_cq = priv->send_cq;
+ init_attr.recv_cq = priv->recv_cq;
if (priv->hca_caps & IB_DEVICE_UD_TSO)
init_attr.create_flags = IB_QP_CREATE_IPOIB_UD_LSO;
@@ -201,7 +209,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
priv->qp = ib_create_qp(priv->pd, &init_attr);
if (IS_ERR(priv->qp)) {
printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
- goto out_free_cq;
+ goto out_free_send_cq;
}
priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
@@ -215,10 +223,26 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
priv->tx_wr.sg_list = priv->tx_sge;
priv->tx_wr.send_flags = IB_SEND_SIGNALED;
+ priv->rx_sge[0].lkey = priv->mr->lkey;
+ if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
+ priv->rx_sge[0].length = IPOIB_UD_HEAD_SIZE;
+ priv->rx_sge[1].length = PAGE_SIZE;
+ priv->rx_sge[1].lkey = priv->mr->lkey;
+ priv->rx_wr.num_sge = IPOIB_UD_RX_SG;
+ } else {
+ priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
+ priv->rx_wr.num_sge = 1;
+ }
+ priv->rx_wr.next = NULL;
+ priv->rx_wr.sg_list = priv->rx_sge;
+
return 0;
-out_free_cq:
- ib_destroy_cq(priv->cq);
+out_free_send_cq:
+ ib_destroy_cq(priv->send_cq);
+
+out_free_recv_cq:
+ ib_destroy_cq(priv->recv_cq);
out_free_mr:
ib_dereg_mr(priv->mr);
@@ -241,8 +265,11 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
}
- if (ib_destroy_cq(priv->cq))
- ipoib_warn(priv, "ib_cq_destroy failed\n");
+ if (ib_destroy_cq(priv->send_cq))
+ ipoib_warn(priv, "ib_cq_destroy (send) failed\n");
+
+ if (ib_destroy_cq(priv->recv_cq))
+ ipoib_warn(priv, "ib_cq_destroy (recv) failed\n");
ipoib_cm_dev_cleanup(dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 293f5b892e3..1cdb5cfb0ff 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -89,6 +89,10 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
goto err;
}
+ priv->max_ib_mtu = ppriv->max_ib_mtu;
+ /* MTU will be reset when mcast join happens */
+ priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
+ priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu;
set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
priv->pkey = pkey;
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index be1b9fbd416..aeb58cae9a3 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -473,13 +473,15 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s
stats->r2t_pdus = conn->r2t_pdus_cnt; /* always 0 */
stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
- stats->custom_length = 3;
+ stats->custom_length = 4;
strcpy(stats->custom[0].desc, "qp_tx_queue_full");
stats->custom[0].value = 0; /* TB iser_conn->qp_tx_queue_full; */
strcpy(stats->custom[1].desc, "fmr_map_not_avail");
stats->custom[1].value = 0; /* TB iser_conn->fmr_map_not_avail */;
strcpy(stats->custom[2].desc, "eh_abort_cnt");
stats->custom[2].value = conn->eh_abort_cnt;
+ strcpy(stats->custom[3].desc, "fmr_unalign_cnt");
+ stats->custom[3].value = conn->fmr_unalign_cnt;
}
static int
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 1ee867b1b34..a8c1b300e34 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -71,6 +71,13 @@
#define iser_dbg(fmt, arg...) \
do { \
+ if (iser_debug_level > 1) \
+ printk(KERN_DEBUG PFX "%s:" fmt,\
+ __func__ , ## arg); \
+ } while (0)
+
+#define iser_warn(fmt, arg...) \
+ do { \
if (iser_debug_level > 0) \
printk(KERN_DEBUG PFX "%s:" fmt,\
__func__ , ## arg); \
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 4a17743a639..cac50c4dc15 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -334,8 +334,11 @@ static void iser_data_buf_dump(struct iser_data_buf *data,
struct scatterlist *sg;
int i;
+ if (iser_debug_level == 0)
+ return;
+
for_each_sg(sgl, sg, data->dma_nents, i)
- iser_err("sg[%d] dma_addr:0x%lX page:0x%p "
+ iser_warn("sg[%d] dma_addr:0x%lX page:0x%p "
"off:0x%x sz:0x%x dma_len:0x%x\n",
i, (unsigned long)ib_sg_dma_address(ibdev, sg),
sg_page(sg), sg->offset,
@@ -420,6 +423,7 @@ void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask)
int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask,
enum iser_data_dir cmd_dir)
{
+ struct iscsi_conn *iscsi_conn = iser_ctask->iser_conn->iscsi_conn;
struct iser_conn *ib_conn = iser_ctask->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
struct ib_device *ibdev = device->ib_device;
@@ -434,7 +438,8 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask,
aligned_len = iser_data_buf_aligned_len(mem, ibdev);
if (aligned_len != mem->dma_nents) {
- iser_err("rdma alignment violation %d/%d aligned\n",
+ iscsi_conn->fmr_unalign_cnt++;
+ iser_warn("rdma alignment violation %d/%d aligned\n",
aligned_len, mem->size);
iser_data_buf_dump(mem, ibdev);