From bf31a1a02eb28d9bda0bb74345df7889faeb7335 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 13 May 2009 16:52:40 -0700 Subject: IB/ehca: Replace vmalloc() with kmalloc() for queue allocation To improve performance of driver resource allocation, replace vmalloc() calls with kmalloc(). Signed-off-by: Stefan Roscher Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ipz_pt_fn.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c index c3a32846543..a2605593ae7 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -220,7 +220,7 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, queue->small_page = NULL; /* allocate queue page pointers */ - queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *)); + queue->queue_pages = kmalloc(nr_of_pages * sizeof(void *), GFP_KERNEL); if (!queue->queue_pages) { ehca_gen_err("Couldn't allocate queue page list"); return 0; @@ -240,7 +240,7 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, ipz_queue_ctor_exit0: ehca_gen_err("Couldn't alloc pages queue=%p " "nr_of_pages=%x", queue, nr_of_pages); - vfree(queue->queue_pages); + kfree(queue->queue_pages); return 0; } @@ -262,7 +262,7 @@ int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue) free_page((unsigned long)queue->queue_pages[i]); } - vfree(queue->queue_pages); + kfree(queue->queue_pages); return 1; } -- cgit v1.2.3-70-g09d2 From c94f156f63c835ffc02b686f9d4238b106f31a5d Mon Sep 17 00:00:00 2001 From: Stefan Roscher Date: Wed, 13 May 2009 16:52:42 -0700 Subject: IB/ehca: Fall back to vmalloc() for big allocations In case of large queue pairs there is the possibillity of allocation failures due to memory fragmentation when using kmalloc(). To ensure the memory is allocated even if kmalloc() can not find chunks which are big enough, we fall back to allocating the memory with vmalloc(). Signed-off-by: Stefan Roscher Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ipz_pt_fn.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c index a2605593ae7..1227c593627 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -222,8 +222,11 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, /* allocate queue page pointers */ queue->queue_pages = kmalloc(nr_of_pages * sizeof(void *), GFP_KERNEL); if (!queue->queue_pages) { - ehca_gen_err("Couldn't allocate queue page list"); - return 0; + queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *)); + if (!queue->queue_pages) { + ehca_gen_err("Couldn't allocate queue page list"); + return 0; + } } memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *)); @@ -240,7 +243,10 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, ipz_queue_ctor_exit0: ehca_gen_err("Couldn't alloc pages queue=%p " "nr_of_pages=%x", queue, nr_of_pages); - kfree(queue->queue_pages); + if (is_vmalloc_addr(queue->queue_pages)) + vfree(queue->queue_pages); + else + kfree(queue->queue_pages); return 0; } @@ -262,7 +268,10 @@ int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue) free_page((unsigned long)queue->queue_pages[i]); } - kfree(queue->queue_pages); + if (is_vmalloc_addr(queue->queue_pages)) + vfree(queue->queue_pages); + else + kfree(queue->queue_pages); return 1; } -- cgit v1.2.3-70-g09d2 From 1988d1fa1a9d642c5714a6afc9775fba0627f3ed Mon Sep 17 00:00:00 2001 From: Stefan Roscher Date: Wed, 13 May 2009 16:52:43 -0700 Subject: IB/ehca: Remove unnecessary memory operations for userspace queue pairs The queue map for flush completion circumvention is only used for kernel space queue pairs. This patch skips the allocation of the queue maps in case the QP is created for userspace. In addition, this patch does not iomap the galpas for kernel usage if the queue pair is only used in userspace. These changes will improve the performance of creation of userspace queue pairs. Signed-off-by: Stefan Roscher Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_qp.c | 94 ++++++++++++++++++++--------------- drivers/infiniband/hw/ehca/hcp_if.c | 6 +-- drivers/infiniband/hw/ehca/hcp_if.h | 2 +- drivers/infiniband/hw/ehca/hcp_phyp.c | 11 ++-- drivers/infiniband/hw/ehca/hcp_phyp.h | 2 +- 5 files changed, 65 insertions(+), 50 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 00c10815971..ead4e718c08 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -461,7 +461,7 @@ static struct ehca_qp *internal_create_qp( ib_device); struct ib_ucontext *context = NULL; u64 h_ret; - int is_llqp = 0, has_srq = 0; + int is_llqp = 0, has_srq = 0, is_user = 0; int qp_type, max_send_sge, max_recv_sge, ret; /* h_call's out parameters */ @@ -609,9 +609,6 @@ static struct ehca_qp *internal_create_qp( } } - if (pd->uobject && udata) - context = pd->uobject->context; - my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL); if (!my_qp) { ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); @@ -619,6 +616,11 @@ static struct ehca_qp *internal_create_qp( return ERR_PTR(-ENOMEM); } + if (pd->uobject && udata) { + is_user = 1; + context = pd->uobject->context; + } + atomic_set(&my_qp->nr_events, 0); init_waitqueue_head(&my_qp->wait_completion); spin_lock_init(&my_qp->spinlock_s); @@ -707,7 +709,7 @@ static struct ehca_qp *internal_create_qp( (parms.squeue.is_small || parms.rqueue.is_small); } - h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms); + h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user); if (h_ret != H_SUCCESS) { ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lli", h_ret); @@ -769,18 +771,20 @@ static struct ehca_qp *internal_create_qp( goto create_qp_exit2; } - my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / - my_qp->ipz_squeue.qe_size; - my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * - sizeof(struct ehca_qmap_entry)); - if (!my_qp->sq_map.map) { - ehca_err(pd->device, "Couldn't allocate squeue " - "map ret=%i", ret); - goto create_qp_exit3; + if (!is_user) { + my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / + my_qp->ipz_squeue.qe_size; + my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->sq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit3; + } + INIT_LIST_HEAD(&my_qp->sq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->sq_map); } - INIT_LIST_HEAD(&my_qp->sq_err_node); - /* to avoid the generation of bogus flush CQEs */ - reset_queue_map(&my_qp->sq_map); } if (HAS_RQ(my_qp)) { @@ -792,20 +796,21 @@ static struct ehca_qp *internal_create_qp( "and pages ret=%i", ret); goto create_qp_exit4; } - - my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / - my_qp->ipz_rqueue.qe_size; - my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * - sizeof(struct ehca_qmap_entry)); - if (!my_qp->rq_map.map) { - ehca_err(pd->device, "Couldn't allocate squeue " - "map ret=%i", ret); - goto create_qp_exit5; + if (!is_user) { + my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / + my_qp->ipz_rqueue.qe_size; + my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->rq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit5; + } + INIT_LIST_HEAD(&my_qp->rq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->rq_map); } - INIT_LIST_HEAD(&my_qp->rq_err_node); - /* to avoid the generation of bogus flush CQEs */ - reset_queue_map(&my_qp->rq_map); - } else if (init_attr->srq) { + } else if (init_attr->srq && !is_user) { /* this is a base QP, use the queue map of the SRQ */ my_qp->rq_map = my_srq->rq_map; INIT_LIST_HEAD(&my_qp->rq_err_node); @@ -918,7 +923,7 @@ create_qp_exit7: kfree(my_qp->mod_qp_parm); create_qp_exit6: - if (HAS_RQ(my_qp)) + if (HAS_RQ(my_qp) && !is_user) vfree(my_qp->rq_map.map); create_qp_exit5: @@ -926,7 +931,7 @@ create_qp_exit5: ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); create_qp_exit4: - if (HAS_SQ(my_qp)) + if (HAS_SQ(my_qp) && !is_user) vfree(my_qp->sq_map.map); create_qp_exit3: @@ -1244,6 +1249,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, u64 update_mask; u64 h_ret; int bad_wqe_cnt = 0; + int is_user = 0; int squeue_locked = 0; unsigned long flags = 0; @@ -1266,6 +1272,8 @@ static int internal_modify_qp(struct ib_qp *ibqp, ret = ehca2ib_return_code(h_ret); goto modify_qp_exit1; } + if (ibqp->uobject) + is_user = 1; qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state); @@ -1728,7 +1736,8 @@ static int internal_modify_qp(struct ib_qp *ibqp, goto modify_qp_exit2; } } - if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)) { + if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR) + && !is_user) { ret = check_for_left_cqes(my_qp, shca); if (ret) goto modify_qp_exit2; @@ -1738,16 +1747,17 @@ static int internal_modify_qp(struct ib_qp *ibqp, ipz_qeit_reset(&my_qp->ipz_rqueue); ipz_qeit_reset(&my_qp->ipz_squeue); - if (qp_cur_state == IB_QPS_ERR) { + if (qp_cur_state == IB_QPS_ERR && !is_user) { del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); if (HAS_RQ(my_qp)) del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); } - reset_queue_map(&my_qp->sq_map); + if (!is_user) + reset_queue_map(&my_qp->sq_map); - if (HAS_RQ(my_qp)) + if (HAS_RQ(my_qp) && !is_user) reset_queue_map(&my_qp->rq_map); } @@ -2138,10 +2148,12 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, int ret; u64 h_ret; u8 port_num; + int is_user = 0; enum ib_qp_type qp_type; unsigned long flags; if (uobject) { + is_user = 1; if (my_qp->mm_count_galpa || my_qp->mm_count_rqueue || my_qp->mm_count_squeue) { ehca_err(dev, "Resources still referenced in " @@ -2168,10 +2180,10 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, * SRQs will never get into an error list and do not have a recv_cq, * so we need to skip them here. */ - if (HAS_RQ(my_qp) && !IS_SRQ(my_qp)) + if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user) del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); - if (HAS_SQ(my_qp)) + if (HAS_SQ(my_qp) && !is_user) del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); /* now wait until all pending events have completed */ @@ -2209,13 +2221,13 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, if (HAS_RQ(my_qp)) { ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); - - vfree(my_qp->rq_map.map); + if (!is_user) + vfree(my_qp->rq_map.map); } if (HAS_SQ(my_qp)) { ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); - - vfree(my_qp->sq_map.map); + if (!is_user) + vfree(my_qp->sq_map.map); } kmem_cache_free(qp_cache, my_qp); atomic_dec(&shca->num_qps); diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index d0ab0c0d5e9..4d5dc3304d4 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -284,7 +284,7 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, param->act_pages = (u32)outs[4]; if (ret == H_SUCCESS) - hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]); + hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]); if (ret == H_NOT_ENOUGH_RESOURCES) ehca_gen_err("Not enough resources. ret=%lli", ret); @@ -293,7 +293,7 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, } u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_alloc_qp_parms *parms) + struct ehca_alloc_qp_parms *parms, int is_user) { u64 ret; u64 allocate_controls, max_r10_reg, r11, r12; @@ -359,7 +359,7 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); if (ret == H_SUCCESS) - hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]); + hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]); if (ret == H_NOT_ENOUGH_RESOURCES) ehca_gen_err("Not enough resources. ret=%lli", ret); diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h index 2c3c6e0ea5c..39c1c3618ec 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.h +++ b/drivers/infiniband/hw/ehca/hcp_if.h @@ -78,7 +78,7 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, * initialize resources, create empty QPPTs (2 rings). */ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_alloc_qp_parms *parms); + struct ehca_alloc_qp_parms *parms, int is_user); u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, const u8 port_id, diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c index 214821095cb..b3e0e72e8a7 100644 --- a/drivers/infiniband/hw/ehca/hcp_phyp.c +++ b/drivers/infiniband/hw/ehca/hcp_phyp.c @@ -54,12 +54,15 @@ int hcall_unmap_page(u64 mapaddr) return 0; } -int hcp_galpas_ctor(struct h_galpas *galpas, +int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, u64 paddr_kernel, u64 paddr_user) { - int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle); - if (ret) - return ret; + if (!is_user) { + int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle); + if (ret) + return ret; + } else + galpas->kernel.fw_handle = 0; galpas->user.fw_handle = paddr_user; diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h index 5305c2a3ed9..204227d5303 100644 --- a/drivers/infiniband/hw/ehca/hcp_phyp.h +++ b/drivers/infiniband/hw/ehca/hcp_phyp.h @@ -78,7 +78,7 @@ static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value) *(volatile u64 __force *)addr = value; } -int hcp_galpas_ctor(struct h_galpas *galpas, +int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, u64 paddr_kernel, u64 paddr_user); int hcp_galpas_dtor(struct h_galpas *galpas); -- cgit v1.2.3-70-g09d2 From bde2cfaf8ff5511b4f434078554f89ff6cb677f2 Mon Sep 17 00:00:00 2001 From: Stefan Roscher Date: Wed, 13 May 2009 16:52:43 -0700 Subject: IB/ehca: Increment version number Signed-off-by: Stefan Roscher Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 368311ce332..85905ab9391 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -52,7 +52,7 @@ #include "ehca_tools.h" #include "hcp_if.h" -#define HCAD_VERSION "0026" +#define HCAD_VERSION "0027" MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Christoph Raisch "); -- cgit v1.2.3-70-g09d2 From 5b891a9332dc4212bf166a4506092fbcd60f2319 Mon Sep 17 00:00:00 2001 From: Jack Stone Date: Wed, 13 May 2009 16:53:39 -0700 Subject: infiniband: Remove void casts Remove uneeded casts of void *. Signed-off-by: Jack Stone Signed-off-by: Roland Dreier --- drivers/infiniband/hw/amso1100/c2_cq.c | 4 ++-- drivers/infiniband/hw/ehca/ehca_irq.c | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c index bb17cce3cb5..f5c45b194f5 100644 --- a/drivers/infiniband/hw/amso1100/c2_cq.c +++ b/drivers/infiniband/hw/amso1100/c2_cq.c @@ -133,7 +133,7 @@ static inline int c2_poll_one(struct c2_dev *c2dev, struct c2_qp *qp; int is_recv = 0; - ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq); + ce = c2_mq_consume(&cq->mq); if (!ce) { return -EAGAIN; } @@ -146,7 +146,7 @@ static inline int c2_poll_one(struct c2_dev *c2dev, while ((qp = (struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) { c2_mq_free(&cq->mq); - ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq); + ce = c2_mq_consume(&cq->mq); if (!ce) return -EAGAIN; } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 99bcbd7ffb0..4b89b791be6 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -479,13 +479,13 @@ void ehca_tasklet_neq(unsigned long data) struct ehca_eqe *eqe; u64 ret; - eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq); + eqe = ehca_poll_eq(shca, &shca->neq); while (eqe) { if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry)) parse_ec(shca, eqe->entry); - eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq); + eqe = ehca_poll_eq(shca, &shca->neq); } ret = hipz_h_reset_event(shca->ipz_hca_handle, @@ -572,8 +572,7 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) eqe_cnt = 0; do { u32 token; - eqe_cache[eqe_cnt].eqe = - (struct ehca_eqe *)ehca_poll_eq(shca, eq); + eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq); if (!eqe_cache[eqe_cnt].eqe) break; eqe_value = eqe_cache[eqe_cnt].eqe->entry; @@ -637,7 +636,7 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) goto unlock_irq_spinlock; do { struct ehca_eqe *eqe; - eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); + eqe = ehca_poll_eq(shca, &shca->eq); if (!eqe) break; process_eqe(shca, eqe); -- cgit v1.2.3-70-g09d2 From 28e43a519b9edb8277fc6b490ad17aa38c45a02b Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Fri, 15 May 2009 10:16:45 -0700 Subject: RDMA/nes: Fix off-by-one bugs in reset_adapter_ne020() and init_serdes() With a postfix increment, i is incremented one past 10K/5K before the loop ends, so the error messages will be displayed too soon if the test succeeds on the last iteration. Fix the comparisons to be > instead of >=. Signed-off-by: Roel Kluin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_hw.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index b832a7b814a..4a84d02ece0 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -667,7 +667,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_ i = 0; while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000) mdelay(1); - if (i >= 10000) { + if (i > 10000) { nes_debug(NES_DBG_INIT, "Did not see full soft reset done.\n"); return 0; } @@ -675,7 +675,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_ i = 0; while ((nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS) != 0x80) && i++ < 10000) mdelay(1); - if (i >= 10000) { + if (i > 10000) { printk(KERN_ERR PFX "Internal CPU not ready, status = %02X\n", nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS)); return 0; @@ -701,7 +701,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_ i = 0; while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000) mdelay(1); - if (i >= 10000) { + if (i > 10000) { nes_debug(NES_DBG_INIT, "Did not see port soft reset done.\n"); return 0; } @@ -711,7 +711,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_ while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0) & 0x0000000f)) != 0x0000000f) && i++ < 5000) mdelay(1); - if (i >= 5000) { + if (i > 5000) { nes_debug(NES_DBG_INIT, "Serdes 0 not ready, status=%x\n", u32temp); return 0; } @@ -722,7 +722,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_ while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1) & 0x0000000f)) != 0x0000000f) && i++ < 5000) mdelay(1); - if (i >= 5000) { + if (i > 5000) { nes_debug(NES_DBG_INIT, "Serdes 1 not ready, status=%x\n", u32temp); return 0; } @@ -792,7 +792,7 @@ static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0) & 0x0000000f)) != 0x0000000f) && i++ < 5000) mdelay(1); - if (i >= 5000) { + if (i > 5000) { nes_debug(NES_DBG_PHY, "Init: serdes 0 not ready, status=%x\n", u32temp); return 1; } @@ -815,7 +815,7 @@ static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1) & 0x0000000f)) != 0x0000000f) && (i++ < 5000)) mdelay(1); - if (i >= 5000) { + if (i > 5000) { printk("%s: Init: serdes 1 not ready, status=%x\n", __func__, u32temp); /* return 1; */ } -- cgit v1.2.3-70-g09d2 From 10eb0f013c63c71c82ede77945a5f390c10cfda6 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 13 May 2009 17:57:38 -0500 Subject: [SCSI] iscsi: pass ep connect shost When we create the tcp/ip connection by calling ep_connect, we currently just go by the routing table info. I think there are two problems with this. 1. Some drivers do not have access to a routing table. Some drivers like qla4xxx do not even know about other ports. 2. If you have two initiator ports on the same subnet, the user may have set things up so that session1 was supposed to be run through port1. and session2 was supposed to be run through port2. It looks like we could end with both sessions going through one of the ports. Fixes for cxgb3i from Karen Xie. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/infiniband/ulp/iser/iscsi_iser.c | 3 +- drivers/scsi/cxgb3i/cxgb3i.h | 1 - drivers/scsi/cxgb3i/cxgb3i_iscsi.c | 25 +++++++++++++--- drivers/scsi/cxgb3i/cxgb3i_offload.c | 23 ++++++++------ drivers/scsi/cxgb3i/cxgb3i_offload.h | 3 +- drivers/scsi/scsi_transport_iscsi.c | 51 +++++++++++++++++++++++++------- include/scsi/iscsi_if.h | 7 ++++- include/scsi/scsi_transport_iscsi.h | 3 +- 8 files changed, 87 insertions(+), 29 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 75223f50de5..ffbe0c76bc1 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -517,7 +517,8 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s } static struct iscsi_endpoint * -iscsi_iser_ep_connect(struct sockaddr *dst_addr, int non_blocking) +iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, + int non_blocking) { int err; struct iser_conn *ib_conn; diff --git a/drivers/scsi/cxgb3i/cxgb3i.h b/drivers/scsi/cxgb3i/cxgb3i.h index 59b0958d2d1..e3133b58e59 100644 --- a/drivers/scsi/cxgb3i/cxgb3i.h +++ b/drivers/scsi/cxgb3i/cxgb3i.h @@ -144,7 +144,6 @@ struct cxgb3i_adapter *cxgb3i_adapter_find_by_tdev(struct t3cdev *); void cxgb3i_adapter_open(struct t3cdev *); void cxgb3i_adapter_close(struct t3cdev *); -struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *); struct cxgb3i_hba *cxgb3i_hba_host_add(struct cxgb3i_adapter *, struct net_device *); void cxgb3i_hba_host_remove(struct cxgb3i_hba *); diff --git a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c index 9212400b9b1..04a43744aed 100644 --- a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c +++ b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c @@ -178,7 +178,7 @@ void cxgb3i_adapter_close(struct t3cdev *t3dev) * cxgb3i_hba_find_by_netdev - find the cxgb3i_hba structure via net_device * @t3dev: t3cdev adapter */ -struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *ndev) +static struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *ndev) { struct cxgb3i_adapter *snic; int i; @@ -261,20 +261,27 @@ void cxgb3i_hba_host_remove(struct cxgb3i_hba *hba) /** * cxgb3i_ep_connect - establish TCP connection to target portal + * @shost: scsi host to use * @dst_addr: target IP address * @non_blocking: blocking or non-blocking call * * Initiates a TCP/IP connection to the dst_addr */ -static struct iscsi_endpoint *cxgb3i_ep_connect(struct sockaddr *dst_addr, +static struct iscsi_endpoint *cxgb3i_ep_connect(struct Scsi_Host *shost, + struct sockaddr *dst_addr, int non_blocking) { struct iscsi_endpoint *ep; struct cxgb3i_endpoint *cep; - struct cxgb3i_hba *hba; + struct cxgb3i_hba *hba = NULL; struct s3_conn *c3cn = NULL; int err = 0; + if (shost) + hba = iscsi_host_priv(shost); + + cxgb3i_api_debug("shost 0x%p, hba 0x%p.\n", shost, hba); + c3cn = cxgb3i_c3cn_create(); if (!c3cn) { cxgb3i_log_info("ep connect OOM.\n"); @@ -282,17 +289,27 @@ static struct iscsi_endpoint *cxgb3i_ep_connect(struct sockaddr *dst_addr, goto release_conn; } - err = cxgb3i_c3cn_connect(c3cn, (struct sockaddr_in *)dst_addr); + err = cxgb3i_c3cn_connect(hba ? hba->ndev : NULL, c3cn, + (struct sockaddr_in *)dst_addr); if (err < 0) { cxgb3i_log_info("ep connect failed.\n"); goto release_conn; } + hba = cxgb3i_hba_find_by_netdev(c3cn->dst_cache->dev); if (!hba) { err = -ENOSPC; cxgb3i_log_info("NOT going through cxgbi device.\n"); goto release_conn; } + + if (shost && hba != iscsi_host_priv(shost)) { + err = -ENOSPC; + cxgb3i_log_info("Could not connect through request host%u\n", + shost->host_no); + goto release_conn; + } + if (c3cn_is_closing(c3cn)) { err = -ENOSPC; cxgb3i_log_info("ep connect unable to connect.\n"); diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.c b/drivers/scsi/cxgb3i/cxgb3i_offload.c index e11c9c180f3..c1d5be4adf9 100644 --- a/drivers/scsi/cxgb3i/cxgb3i_offload.c +++ b/drivers/scsi/cxgb3i/cxgb3i_offload.c @@ -1479,12 +1479,13 @@ static struct net_device *cxgb3_egress_dev(struct net_device *root_dev, return NULL; } -static struct rtable *find_route(__be32 saddr, __be32 daddr, +static struct rtable *find_route(struct net_device *dev, + __be32 saddr, __be32 daddr, __be16 sport, __be16 dport) { struct rtable *rt; struct flowi fl = { - .oif = 0, + .oif = dev ? dev->ifindex : 0, .nl_u = { .ip4_u = { .daddr = daddr, @@ -1573,36 +1574,40 @@ out_err: * * return 0 if active open request is sent, < 0 otherwise. */ -int cxgb3i_c3cn_connect(struct s3_conn *c3cn, struct sockaddr_in *usin) +int cxgb3i_c3cn_connect(struct net_device *dev, struct s3_conn *c3cn, + struct sockaddr_in *usin) { struct rtable *rt; - struct net_device *dev; struct cxgb3i_sdev_data *cdata; struct t3cdev *cdev; __be32 sipv4; int err; + c3cn_conn_debug("c3cn 0x%p, dev 0x%p.\n", c3cn, dev); + if (usin->sin_family != AF_INET) return -EAFNOSUPPORT; c3cn->daddr.sin_port = usin->sin_port; c3cn->daddr.sin_addr.s_addr = usin->sin_addr.s_addr; - rt = find_route(c3cn->saddr.sin_addr.s_addr, + rt = find_route(dev, c3cn->saddr.sin_addr.s_addr, c3cn->daddr.sin_addr.s_addr, c3cn->saddr.sin_port, c3cn->daddr.sin_port); if (rt == NULL) { - c3cn_conn_debug("NO route to 0x%x, port %u.\n", + c3cn_conn_debug("NO route to 0x%x, port %u, dev %s.\n", c3cn->daddr.sin_addr.s_addr, - ntohs(c3cn->daddr.sin_port)); + ntohs(c3cn->daddr.sin_port), + dev ? dev->name : "any"); return -ENETUNREACH; } if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { - c3cn_conn_debug("multi-cast route to 0x%x, port %u.\n", + c3cn_conn_debug("multi-cast route to 0x%x, port %u, dev %s.\n", c3cn->daddr.sin_addr.s_addr, - ntohs(c3cn->daddr.sin_port)); + ntohs(c3cn->daddr.sin_port), + dev ? dev->name : "any"); ip_rt_put(rt); return -ENETUNREACH; } diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.h b/drivers/scsi/cxgb3i/cxgb3i_offload.h index ebfca960c0a..6a1d86b1faf 100644 --- a/drivers/scsi/cxgb3i/cxgb3i_offload.h +++ b/drivers/scsi/cxgb3i/cxgb3i_offload.h @@ -169,7 +169,8 @@ void cxgb3i_sdev_add(struct t3cdev *, struct cxgb3_client *); void cxgb3i_sdev_remove(struct t3cdev *); struct s3_conn *cxgb3i_c3cn_create(void); -int cxgb3i_c3cn_connect(struct s3_conn *, struct sockaddr_in *); +int cxgb3i_c3cn_connect(struct net_device *, struct s3_conn *, + struct sockaddr_in *); void cxgb3i_c3cn_rx_credits(struct s3_conn *, int); int cxgb3i_c3cn_send_pdus(struct s3_conn *, struct sk_buff *); void cxgb3i_c3cn_release(struct s3_conn *); diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 0a2ce7b6325..d69a53aa406 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1268,26 +1268,54 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) return err; } +static int iscsi_if_ep_connect(struct iscsi_transport *transport, + struct iscsi_uevent *ev, int msg_type) +{ + struct iscsi_endpoint *ep; + struct sockaddr *dst_addr; + struct Scsi_Host *shost = NULL; + int non_blocking, err = 0; + + if (!transport->ep_connect) + return -EINVAL; + + if (msg_type == ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST) { + shost = scsi_host_lookup(ev->u.ep_connect_through_host.host_no); + if (!shost) { + printk(KERN_ERR "ep connect failed. Could not find " + "host no %u\n", + ev->u.ep_connect_through_host.host_no); + return -ENODEV; + } + non_blocking = ev->u.ep_connect_through_host.non_blocking; + } else + non_blocking = ev->u.ep_connect.non_blocking; + + dst_addr = (struct sockaddr *)((char*)ev + sizeof(*ev)); + ep = transport->ep_connect(shost, dst_addr, non_blocking); + if (IS_ERR(ep)) { + err = PTR_ERR(ep); + goto release_host; + } + + ev->r.ep_connect_ret.handle = ep->id; +release_host: + if (shost) + scsi_host_put(shost); + return err; +} + static int iscsi_if_transport_ep(struct iscsi_transport *transport, struct iscsi_uevent *ev, int msg_type) { struct iscsi_endpoint *ep; - struct sockaddr *dst_addr; int rc = 0; switch (msg_type) { + case ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST: case ISCSI_UEVENT_TRANSPORT_EP_CONNECT: - if (!transport->ep_connect) - return -EINVAL; - - dst_addr = (struct sockaddr *)((char*)ev + sizeof(*ev)); - ep = transport->ep_connect(dst_addr, - ev->u.ep_connect.non_blocking); - if (IS_ERR(ep)) - return PTR_ERR(ep); - - ev->r.ep_connect_ret.handle = ep->id; + rc = iscsi_if_ep_connect(transport, ev, msg_type); break; case ISCSI_UEVENT_TRANSPORT_EP_POLL: if (!transport->ep_poll) @@ -1469,6 +1497,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) case ISCSI_UEVENT_TRANSPORT_EP_CONNECT: case ISCSI_UEVENT_TRANSPORT_EP_POLL: case ISCSI_UEVENT_TRANSPORT_EP_DISCONNECT: + case ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST: err = iscsi_if_transport_ep(transport, ev, nlh->nlmsg_type); break; case ISCSI_UEVENT_TGT_DSCVR: diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h index d0ed5226f8c..2c1a4af9eaf 100644 --- a/include/scsi/iscsi_if.h +++ b/include/scsi/iscsi_if.h @@ -50,7 +50,8 @@ enum iscsi_uevent_e { ISCSI_UEVENT_TGT_DSCVR = UEVENT_BASE + 15, ISCSI_UEVENT_SET_HOST_PARAM = UEVENT_BASE + 16, ISCSI_UEVENT_UNBIND_SESSION = UEVENT_BASE + 17, - ISCSI_UEVENT_CREATE_BOUND_SESSION = UEVENT_BASE + 18, + ISCSI_UEVENT_CREATE_BOUND_SESSION = UEVENT_BASE + 18, + ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST = UEVENT_BASE + 19, /* up events */ ISCSI_KEVENT_RECV_PDU = KEVENT_BASE + 1, @@ -131,6 +132,10 @@ struct iscsi_uevent { struct msg_transport_connect { uint32_t non_blocking; } ep_connect; + struct msg_transport_connect_through_host { + uint32_t host_no; + uint32_t non_blocking; + } ep_connect_through_host; struct msg_transport_poll { uint64_t ep_handle; uint32_t timeout_ms; diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 457588e1119..8cb7a31d996 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -126,7 +126,8 @@ struct iscsi_transport { int *index, int *age); void (*session_recovery_timedout) (struct iscsi_cls_session *session); - struct iscsi_endpoint *(*ep_connect) (struct sockaddr *dst_addr, + struct iscsi_endpoint *(*ep_connect) (struct Scsi_Host *shost, + struct sockaddr *dst_addr, int non_blocking); int (*ep_poll) (struct iscsi_endpoint *ep, int timeout_ms); void (*ep_disconnect) (struct iscsi_endpoint *ep); -- cgit v1.2.3-70-g09d2 From b3cd5050bf8eb32ceecee129cac7c59e6f1668c4 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 13 May 2009 17:57:49 -0500 Subject: [SCSI] libiscsi: add task aborted state If a task did not complete normally due to a TMF, libiscsi will now complete the task with the state ISCSI_TASK_ABRT_TMF. Drivers like bnx2i that need to free resources if a command did not complete normally can then check the task state. If a driver does not need to send a special command if we have dropped the session then they can check for ISCSI_TASK_ABRT_SESS_RECOV. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/infiniband/ulp/iser/iscsi_iser.c | 7 ++-- drivers/scsi/libiscsi.c | 60 +++++++++++++++++++------------- drivers/scsi/libiscsi_tcp.c | 4 +-- include/scsi/libiscsi.h | 2 ++ 4 files changed, 41 insertions(+), 32 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index ffbe0c76bc1..0ba6ec87629 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -257,11 +257,8 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; - /* - * mgmt tasks do not need special cleanup and we do not - * allocate anything in the init task callout - */ - if (!task->sc || task->state == ISCSI_TASK_PENDING) + /* mgmt tasks do not need special cleanup */ + if (!task->sc) return; if (iser_task->status == ISER_TASK_STATUS_STARTED) { diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index dafa054537f..b00be6c3efc 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -443,18 +443,20 @@ EXPORT_SYMBOL_GPL(iscsi_put_task); /** * iscsi_complete_task - finish a task * @task: iscsi cmd task + * @state: state to complete task with * * Must be called with session lock. */ -static void iscsi_complete_task(struct iscsi_task *task) +static void iscsi_complete_task(struct iscsi_task *task, int state) { struct iscsi_conn *conn = task->conn; - if (task->state == ISCSI_TASK_COMPLETED) + if (task->state == ISCSI_TASK_COMPLETED || + task->state == ISCSI_TASK_ABRT_TMF || + task->state == ISCSI_TASK_ABRT_SESS_RECOV) return; WARN_ON_ONCE(task->state == ISCSI_TASK_FREE); - - task->state = ISCSI_TASK_COMPLETED; + task->state = state; if (!list_empty(&task->running)) list_del_init(&task->running); @@ -478,6 +480,7 @@ static void fail_scsi_task(struct iscsi_task *task, int err) { struct iscsi_conn *conn = task->conn; struct scsi_cmnd *sc; + int state; /* * if a command completes and we get a successful tmf response @@ -488,14 +491,20 @@ static void fail_scsi_task(struct iscsi_task *task, int err) if (!sc) return; - if (task->state == ISCSI_TASK_PENDING) + if (task->state == ISCSI_TASK_PENDING) { /* * cmd never made it to the xmit thread, so we should not count * the cmd in the sequencing */ conn->session->queued_cmdsn--; + /* it was never sent so just complete like normal */ + state = ISCSI_TASK_COMPLETED; + } else if (err == DID_TRANSPORT_DISRUPTED) + state = ISCSI_TASK_ABRT_SESS_RECOV; + else + state = ISCSI_TASK_ABRT_TMF; - sc->result = err; + sc->result = err << 16; if (!scsi_bidi_cmnd(sc)) scsi_set_resid(sc, scsi_bufflen(sc)); else { @@ -503,7 +512,7 @@ static void fail_scsi_task(struct iscsi_task *task, int err) scsi_in(sc)->resid = scsi_in(sc)->length; } - iscsi_complete_task(task); + iscsi_complete_task(task, state); } static int iscsi_prep_mgmt_task(struct iscsi_conn *conn, @@ -731,7 +740,7 @@ out: ISCSI_DBG_SESSION(session, "cmd rsp done [sc %p res %d itt 0x%x]\n", sc, sc->result, task->itt); conn->scsirsp_pdus_cnt++; - iscsi_complete_task(task); + iscsi_complete_task(task, ISCSI_TASK_COMPLETED); } /** @@ -769,7 +778,7 @@ iscsi_data_in_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr, "[sc %p res %d itt 0x%x]\n", sc, sc->result, task->itt); conn->scsirsp_pdus_cnt++; - iscsi_complete_task(task); + iscsi_complete_task(task, ISCSI_TASK_COMPLETED); } static void iscsi_tmf_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr) @@ -990,7 +999,7 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, } iscsi_tmf_rsp(conn, hdr); - iscsi_complete_task(task); + iscsi_complete_task(task, ISCSI_TASK_COMPLETED); break; case ISCSI_OP_NOOP_IN: iscsi_update_cmdsn(session, (struct iscsi_nopin*)hdr); @@ -1008,7 +1017,7 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, goto recv_pdu; mod_timer(&conn->transport_timer, jiffies + conn->recv_timeout); - iscsi_complete_task(task); + iscsi_complete_task(task, ISCSI_TASK_COMPLETED); break; default: rc = ISCSI_ERR_BAD_OPCODE; @@ -1020,7 +1029,7 @@ out: recv_pdu: if (iscsi_recv_pdu(conn->cls_conn, hdr, data, datalen)) rc = ISCSI_ERR_CONN_FAILED; - iscsi_complete_task(task); + iscsi_complete_task(task, ISCSI_TASK_COMPLETED); return rc; } EXPORT_SYMBOL_GPL(__iscsi_complete_pdu); @@ -1262,7 +1271,7 @@ check_mgmt: struct iscsi_task, running); list_del_init(&conn->task->running); if (conn->session->state == ISCSI_STATE_LOGGING_OUT) { - fail_scsi_task(conn->task, DID_IMM_RETRY << 16); + fail_scsi_task(conn->task, DID_IMM_RETRY); continue; } rc = iscsi_prep_scsi_cmd_pdu(conn->task); @@ -1273,7 +1282,7 @@ check_mgmt: conn->task = NULL; goto again; } else - fail_scsi_task(conn->task, DID_ABORT << 16); + fail_scsi_task(conn->task, DID_ABORT); continue; } rc = iscsi_xmit_task(conn); @@ -1469,7 +1478,7 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) prepd_reject: sc->scsi_done = NULL; - iscsi_complete_task(task); + iscsi_complete_task(task, ISCSI_TASK_COMPLETED); reject: spin_unlock(&session->lock); ISCSI_DBG_SESSION(session, "cmd 0x%x rejected (%d)\n", @@ -1479,7 +1488,7 @@ reject: prepd_fault: sc->scsi_done = NULL; - iscsi_complete_task(task); + iscsi_complete_task(task, ISCSI_TASK_COMPLETED); fault: spin_unlock(&session->lock); ISCSI_DBG_SESSION(session, "iscsi: cmd 0x%x is not queued (%d)\n", @@ -1665,7 +1674,7 @@ static void fail_scsi_tasks(struct iscsi_conn *conn, unsigned lun, ISCSI_DBG_SESSION(conn->session, "failing sc %p itt 0x%x state %d\n", task->sc, task->itt, task->state); - fail_scsi_task(task, error << 16); + fail_scsi_task(task, error); } } @@ -1868,7 +1877,7 @@ int iscsi_eh_abort(struct scsi_cmnd *sc) } if (task->state == ISCSI_TASK_PENDING) { - fail_scsi_task(task, DID_ABORT << 16); + fail_scsi_task(task, DID_ABORT); goto success; } @@ -1899,7 +1908,7 @@ int iscsi_eh_abort(struct scsi_cmnd *sc) * then sent more data for the cmd. */ spin_lock(&session->lock); - fail_scsi_task(task, DID_ABORT << 16); + fail_scsi_task(task, DID_ABORT); conn->tmf_state = TMF_INITIAL; spin_unlock(&session->lock); iscsi_start_tx(conn); @@ -2572,7 +2581,7 @@ static void fail_mgmt_tasks(struct iscsi_session *session, struct iscsi_conn *conn) { struct iscsi_task *task; - int i; + int i, state; for (i = 0; i < conn->session->cmds_max; i++) { task = conn->session->cmds[i]; @@ -2585,7 +2594,11 @@ fail_mgmt_tasks(struct iscsi_session *session, struct iscsi_conn *conn) ISCSI_DBG_SESSION(conn->session, "failing mgmt itt 0x%x state %d\n", task->itt, task->state); - iscsi_complete_task(task); + state = ISCSI_TASK_ABRT_SESS_RECOV; + if (task->state == ISCSI_TASK_PENDING) + state = ISCSI_TASK_COMPLETED; + iscsi_complete_task(task, state); + } } @@ -2642,10 +2655,7 @@ static void iscsi_start_session_recovery(struct iscsi_session *session, * flush queues. */ spin_lock_bh(&session->lock); - if (flag == STOP_CONN_RECOVER) - fail_scsi_tasks(conn, -1, DID_TRANSPORT_DISRUPTED); - else - fail_scsi_tasks(conn, -1, DID_ERROR); + fail_scsi_tasks(conn, -1, DID_TRANSPORT_DISRUPTED); fail_mgmt_tasks(session, conn); spin_unlock_bh(&session->lock); mutex_unlock(&session->eh_mutex); diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c index b84a1d853f2..2bc07090321 100644 --- a/drivers/scsi/libiscsi_tcp.c +++ b/drivers/scsi/libiscsi_tcp.c @@ -440,8 +440,8 @@ void iscsi_tcp_cleanup_task(struct iscsi_task *task) struct iscsi_tcp_task *tcp_task = task->dd_data; struct iscsi_r2t_info *r2t; - /* nothing to do for mgmt or pending tasks */ - if (!task->sc || task->state == ISCSI_TASK_PENDING) + /* nothing to do for mgmt */ + if (!task->sc) return; /* flush task's r2t queues */ diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index facae71183a..196525cd402 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -86,6 +86,8 @@ enum { ISCSI_TASK_COMPLETED, ISCSI_TASK_PENDING, ISCSI_TASK_RUNNING, + ISCSI_TASK_ABRT_TMF, /* aborted due to TMF */ + ISCSI_TASK_ABRT_SESS_RECOV, /* aborted due to session recovery */ }; struct iscsi_r2t_info { -- cgit v1.2.3-70-g09d2 From c1f67a88bf62fac0f4151c007b361199c2cd1988 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 27 May 2009 14:36:16 -0700 Subject: IB/mthca: Add module parameter for number of MTTs per segment The current MTT allocator uses kmalloc() to allocate a buffer for its buddy allocator, and thus is limited in the amount of MTT segments that it can control. As a result, the size of memory that can be registered is limited too. This patch uses a module parameter to control the number of MTT entries that each segment represents, allowing more memory to be registered with the same number of segments. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_cmd.c | 2 +- drivers/infiniband/hw/mthca/mthca_dev.h | 1 + drivers/infiniband/hw/mthca/mthca_main.c | 17 ++++++++++++++--- drivers/infiniband/hw/mthca/mthca_mr.c | 16 ++++++++-------- drivers/infiniband/hw/mthca/mthca_profile.c | 4 ++-- 5 files changed, 26 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 6d55f9d748f..8c2ed994d54 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1059,7 +1059,7 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET); if (mthca_is_memfree(dev)) dev_lim->reserved_mtts = ALIGN((1 << (field >> 4)) * sizeof(u64), - MTHCA_MTT_SEG_SIZE) / MTHCA_MTT_SEG_SIZE; + dev->limits.mtt_seg_size) / dev->limits.mtt_seg_size; else dev_lim->reserved_mtts = 1 << (field >> 4); MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 252590116df..9ef611f6dd3 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -159,6 +159,7 @@ struct mthca_limits { int reserved_eqs; int num_mpts; int num_mtt_segs; + int mtt_seg_size; int fmr_reserved_mtts; int reserved_mtts; int reserved_mrws; diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 1d83cf7caf3..13da9f1d24c 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -125,6 +125,10 @@ module_param_named(fmr_reserved_mtts, hca_profile.fmr_reserved_mtts, int, 0444); MODULE_PARM_DESC(fmr_reserved_mtts, "number of memory translation table segments reserved for FMR"); +static int log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8); +module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); +MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-5)"); + static char mthca_version[] __devinitdata = DRV_NAME ": Mellanox InfiniBand HCA driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; @@ -162,6 +166,7 @@ static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim) int err; u8 status; + mdev->limits.mtt_seg_size = (1 << log_mtts_per_seg) * 8; err = mthca_QUERY_DEV_LIM(mdev, dev_lim, &status); if (err) { mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n"); @@ -460,11 +465,11 @@ static int mthca_init_icm(struct mthca_dev *mdev, } /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */ - mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * MTHCA_MTT_SEG_SIZE, - dma_get_cache_alignment()) / MTHCA_MTT_SEG_SIZE; + mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * mdev->limits.mtt_seg_size, + dma_get_cache_alignment()) / mdev->limits.mtt_seg_size; mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base, - MTHCA_MTT_SEG_SIZE, + mdev->limits.mtt_seg_size, mdev->limits.num_mtt_segs, mdev->limits.reserved_mtts, 1, 0); @@ -1315,6 +1320,12 @@ static void __init mthca_validate_profile(void) printk(KERN_WARNING PFX "Corrected fmr_reserved_mtts to %d.\n", hca_profile.fmr_reserved_mtts); } + + if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 5)) { + printk(KERN_WARNING PFX "bad log_mtts_per_seg (%d). Using default - %d\n", + log_mtts_per_seg, ilog2(MTHCA_MTT_SEG_SIZE / 8)); + log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8); + } } static int __init mthca_init(void) diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 882e6b73591..d606edf1085 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -220,7 +220,7 @@ static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size, mtt->buddy = buddy; mtt->order = 0; - for (i = MTHCA_MTT_SEG_SIZE / 8; i < size; i <<= 1) + for (i = dev->limits.mtt_seg_size / 8; i < size; i <<= 1) ++mtt->order; mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy); @@ -267,7 +267,7 @@ static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, while (list_len > 0) { mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base + - mtt->first_seg * MTHCA_MTT_SEG_SIZE + + mtt->first_seg * dev->limits.mtt_seg_size + start_index * 8); mtt_entry[1] = 0; for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i) @@ -326,7 +326,7 @@ static void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, u64 __iomem *mtts; int i; - mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * MTHCA_MTT_SEG_SIZE + + mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * dev->limits.mtt_seg_size + start_index * sizeof (u64); for (i = 0; i < list_len; ++i) mthca_write64_raw(cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT), @@ -345,10 +345,10 @@ static void mthca_arbel_write_mtt_seg(struct mthca_dev *dev, /* For Arbel, all MTTs must fit in the same page. */ BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE); /* Require full segments */ - BUG_ON(s % MTHCA_MTT_SEG_SIZE); + BUG_ON(s % dev->limits.mtt_seg_size); mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg + - s / MTHCA_MTT_SEG_SIZE, &dma_handle); + s / dev->limits.mtt_seg_size, &dma_handle); BUG_ON(!mtts); @@ -479,7 +479,7 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, if (mr->mtt) mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base + - mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE); + mr->mtt->first_seg * dev->limits.mtt_seg_size); if (0) { mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey); @@ -626,7 +626,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, goto err_out_table; } - mtt_seg = mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE; + mtt_seg = mr->mtt->first_seg * dev->limits.mtt_seg_size; if (mthca_is_memfree(dev)) { mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table, @@ -908,7 +908,7 @@ int mthca_init_mr_table(struct mthca_dev *dev) dev->mr_table.mtt_base); dev->mr_table.tavor_fmr.mtt_base = - ioremap(addr, mtts * MTHCA_MTT_SEG_SIZE); + ioremap(addr, mtts * dev->limits.mtt_seg_size); if (!dev->mr_table.tavor_fmr.mtt_base) { mthca_warn(dev, "MTT ioremap for FMR failed.\n"); err = -ENOMEM; diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c index d168c254061..8edb28a9a0e 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.c +++ b/drivers/infiniband/hw/mthca/mthca_profile.c @@ -94,7 +94,7 @@ s64 mthca_make_profile(struct mthca_dev *dev, profile[MTHCA_RES_RDB].size = MTHCA_RDB_ENTRY_SIZE; profile[MTHCA_RES_MCG].size = MTHCA_MGM_ENTRY_SIZE; profile[MTHCA_RES_MPT].size = dev_lim->mpt_entry_sz; - profile[MTHCA_RES_MTT].size = MTHCA_MTT_SEG_SIZE; + profile[MTHCA_RES_MTT].size = dev->limits.mtt_seg_size; profile[MTHCA_RES_UAR].size = dev_lim->uar_scratch_entry_sz; profile[MTHCA_RES_UDAV].size = MTHCA_AV_SIZE; profile[MTHCA_RES_UARC].size = request->uarc_size; @@ -232,7 +232,7 @@ s64 mthca_make_profile(struct mthca_dev *dev, dev->limits.num_mtt_segs = profile[i].num; dev->mr_table.mtt_base = profile[i].start; init_hca->mtt_base = profile[i].start; - init_hca->mtt_seg_sz = ffs(MTHCA_MTT_SEG_SIZE) - 7; + init_hca->mtt_seg_sz = ffs(dev->limits.mtt_seg_size) - 7; break; case MTHCA_RES_UAR: dev->limits.num_uars = profile[i].num; -- cgit v1.2.3-70-g09d2 From 7ab1a2b31d4a8b4f519ccff5a84c53a5b87fd1be Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 27 May 2009 14:42:36 -0700 Subject: RDMA/cxgb3: Report correct port state and MTU Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_provider.c | 32 +++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 160ef482712..e2a63214008 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -1152,12 +1153,39 @@ static int iwch_query_device(struct ib_device *ibdev, static int iwch_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { + struct iwch_dev *dev; + struct net_device *netdev; + struct in_device *inetdev; + PDBG("%s ibdev %p\n", __func__, ibdev); + dev = to_iwch_dev(ibdev); + netdev = dev->rdev.port_info.lldevs[port-1]; + memset(props, 0, sizeof(struct ib_port_attr)); props->max_mtu = IB_MTU_4096; - props->active_mtu = IB_MTU_2048; - props->state = IB_PORT_ACTIVE; + if (netdev->mtu >= 4096) + props->active_mtu = IB_MTU_4096; + else if (netdev->mtu >= 2048) + props->active_mtu = IB_MTU_2048; + else if (netdev->mtu >= 1024) + props->active_mtu = IB_MTU_1024; + else if (netdev->mtu >= 512) + props->active_mtu = IB_MTU_512; + else + props->active_mtu = IB_MTU_256; + + if (!netif_carrier_ok(netdev)) + props->state = IB_PORT_DOWN; + else { + inetdev = in_dev_get(netdev); + if (inetdev->ifa_list) + props->state = IB_PORT_ACTIVE; + else + props->state = IB_PORT_INIT; + in_dev_put(inetdev); + } + props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_SNMP_TUNNEL_SUP | -- cgit v1.2.3-70-g09d2 From 3026c19a14ba71ccd4dc4925abab9395ea12839c Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 27 May 2009 14:43:39 -0700 Subject: RDMA/cxgb3: Limit fast register size based on T3 limitations T3 firmware only supports one WRs worth of page list for fast register work requests. The driver currently allows 2 WRs worth, which doesn't work for T3, so reduce the limit in the driver. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/cxio_wr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index ff9be1a1310..32e3b1461d8 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -176,7 +176,7 @@ struct t3_send_wr { struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */ }; -#define T3_MAX_FASTREG_DEPTH 24 +#define T3_MAX_FASTREG_DEPTH 10 #define T3_MAX_FASTREG_FRAG 10 struct t3_fastreg_wr { -- cgit v1.2.3-70-g09d2 From 25a52393270ca48c7d0848672ad4423313033c3d Mon Sep 17 00:00:00 2001 From: Joachim Fenkes Date: Wed, 3 Jun 2009 13:25:42 -0700 Subject: IB/ehca: Remove superfluous bitmasks from QP control block All the fields in the control block are nicely right-aligned, so no masking is necessary. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_classes_pSeries.h | 28 ----------------------- drivers/infiniband/hw/ehca/ehca_qp.c | 18 ++++----------- 2 files changed, 5 insertions(+), 41 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h index 1798e6466bd..689c35786dd 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h +++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h @@ -165,7 +165,6 @@ struct hcp_modify_qp_control_block { #define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM( 7, 7) #define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM( 8, 8) #define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM( 9, 9) -#define MQPCB_QP_STATE EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11, 11) #define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12, 12) #define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13, 13) @@ -176,60 +175,33 @@ struct hcp_modify_qp_control_block { #define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18, 18) #define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19, 19) #define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20, 20) -#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21, 21) -#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_DLID EHCA_BMASK_IBM(22, 22) -#define MQPCB_DLID EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23, 23) -#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29, 31) #define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24, 24) -#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25, 31) #define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25, 25) -#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26, 26) -#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27, 27) -#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28, 28) -#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12, 31) #define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30, 30) #define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31, 31) -#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28, 31) #define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32, 32) -#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31, 31) #define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33, 33) -#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31) #define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34, 34) -#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27, 31) #define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35, 35) -#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36, 36) -#define MQPCB_DLID_AL EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37, 37) -#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31) #define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38, 38) -#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25, 31) #define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39, 39) -#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40, 40) -#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41, 41) -#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42, 42) -#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12, 31) #define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44, 44) #define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45, 45) -#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46, 46) -#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47, 47) -#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31, 31) -#define MQPCB_QP_NUMBER EHCA_BMASK_IBM( 8, 31) #define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48, 48) -#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31, 31) #define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49, 49) -#define MQPCB_CURR_SRQ_LIMIT EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50, 50) #define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51, 51) diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index ead4e718c08..0338f1fabe8 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -1962,19 +1962,13 @@ int ehca_query_qp(struct ib_qp *qp, qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size; qp_attr->dest_qp_num = qpcb->dest_qp_nr; - qp_attr->pkey_index = - EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->prim_p_key_idx); - - qp_attr->port_num = - EHCA_BMASK_GET(MQPCB_PRIM_PHYS_PORT, qpcb->prim_phys_port); - + qp_attr->pkey_index = qpcb->prim_p_key_idx; + qp_attr->port_num = qpcb->prim_phys_port; qp_attr->timeout = qpcb->timeout; qp_attr->retry_cnt = qpcb->retry_count; qp_attr->rnr_retry = qpcb->rnr_retry_count; - qp_attr->alt_pkey_index = - EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->alt_p_key_idx); - + qp_attr->alt_pkey_index = qpcb->alt_p_key_idx; qp_attr->alt_port_num = qpcb->alt_phys_port; qp_attr->alt_timeout = qpcb->timeout_al; @@ -2061,8 +2055,7 @@ int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, update_mask |= EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1) | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1); - mqpcb->curr_srq_limit = - EHCA_BMASK_SET(MQPCB_CURR_SRQ_LIMIT, attr->srq_limit); + mqpcb->curr_srq_limit = attr->srq_limit; mqpcb->qp_aff_asyn_ev_log_reg = EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1); } @@ -2125,8 +2118,7 @@ int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1; srq_attr->max_sge = 3; - srq_attr->srq_limit = EHCA_BMASK_GET( - MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit); + srq_attr->srq_limit = qpcb->curr_srq_limit; if (ehca_debug_level >= 2) ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); -- cgit v1.2.3-70-g09d2 From 2ac6bf4ddc87c3b6b609f8fa82f6ebbffeac12f4 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 5 Jun 2009 10:36:24 -0700 Subject: IB/mlx4: Add strong ordering to local inval and fast reg work requests The ConnectX Programmer's Reference Manual states that the "SO" bit must be set when posting Fast Register and Local Invalidate send work requests. When this bit is set, the work request will be executed only after all previous work requests on the send queue have been executed. (If the bit is not set, Fast Register and Local Invalidate WQEs may begin execution too early, which violates the defined semantics for these operations) This fixes the issue with NFS/RDMA reported in Signed-off-by: Jack Morgenstein Cc: Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 4 ++++ include/linux/mlx4/qp.h | 1 + 2 files changed, 5 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 20724aee76f..c4a02648c8a 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1585,12 +1585,16 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_WR_LOCAL_INV: + ctrl->srcrb_flags |= + cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); set_local_inv_seg(wqe, wr->ex.invalidate_rkey); wqe += sizeof (struct mlx4_wqe_local_inval_seg); size += sizeof (struct mlx4_wqe_local_inval_seg) / 16; break; case IB_WR_FAST_REG_MR: + ctrl->srcrb_flags |= + cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); set_fmr_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_fmr_seg); size += sizeof (struct mlx4_wqe_fmr_seg) / 16; diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index bf8f11982da..9f29d86e5dc 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -165,6 +165,7 @@ enum { MLX4_WQE_CTRL_IP_CSUM = 1 << 4, MLX4_WQE_CTRL_TCP_UDP_CSUM = 1 << 5, MLX4_WQE_CTRL_INS_VLAN = 1 << 6, + MLX4_WQE_CTRL_STRONG_ORDER = 1 << 7, }; struct mlx4_wqe_ctrl_seg { -- cgit v1.2.3-70-g09d2 From 9aa0a489d909af0cc36c41d3061ef956c7442ce2 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Sat, 13 Jun 2009 15:14:09 -0700 Subject: IB/mthca: Don't double-free IRQs when falling back from MSI-X to INTx When both MSI-X and legacy INTx fail to generate an interrupt, the driver frees the MSI-X interrupts twice. Fix this by clearing the have_irq flag for the MSI-X interrupts when they are freed the first time. Reported-by: Yinghai Lu Tested-by: Yinghai Lu Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_eq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 28f0e0c40d7..90e4e450a12 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -641,9 +641,11 @@ static void mthca_free_irqs(struct mthca_dev *dev) if (dev->eq_table.have_irq) free_irq(dev->pdev->irq, dev); for (i = 0; i < MTHCA_NUM_EQ; ++i) - if (dev->eq_table.eq[i].have_irq) + if (dev->eq_table.eq[i].have_irq) { free_irq(dev->eq_table.eq[i].msi_x_vector, dev->eq_table.eq + i); + dev->eq_table.eq[i].have_irq = 0; + } } static int mthca_map_reg(struct mthca_dev *dev, -- cgit v1.2.3-70-g09d2