diff options
Diffstat (limited to 'drivers/infiniband/hw/ehca')
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_classes.h | 16 | ||||
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_cq.c | 7 | ||||
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_irq.c | 44 | ||||
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_iverbs.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_main.c | 83 | ||||
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_qp.c | 240 | ||||
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_reqs.c | 211 |
7 files changed, 525 insertions, 78 deletions
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 1ab919f836a..4df887af66a 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -128,6 +128,8 @@ struct ehca_shca { /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */ u32 hca_cap_mr_pgsize; int max_mtu; + int max_num_qps; + int max_num_cqs; atomic_t num_cqs; atomic_t num_qps; }; @@ -164,6 +166,13 @@ struct ehca_qmap_entry { u16 reported; }; +struct ehca_queue_map { + struct ehca_qmap_entry *map; + unsigned int entries; + unsigned int tail; + unsigned int left_to_poll; +}; + struct ehca_qp { union { struct ib_qp ib_qp; @@ -173,8 +182,9 @@ struct ehca_qp { enum ehca_ext_qp_type ext_type; enum ib_qp_state state; struct ipz_queue ipz_squeue; - struct ehca_qmap_entry *sq_map; + struct ehca_queue_map sq_map; struct ipz_queue ipz_rqueue; + struct ehca_queue_map rq_map; struct h_galpas galpas; u32 qkey; u32 real_qp_num; @@ -204,6 +214,8 @@ struct ehca_qp { atomic_t nr_events; /* events seen */ wait_queue_head_t wait_completion; int mig_armed; + struct list_head sq_err_node; + struct list_head rq_err_node; }; #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) @@ -233,6 +245,8 @@ struct ehca_cq { /* mmap counter for resources mapped into user space */ u32 mm_count_queue; u32 mm_count_galpa; + struct list_head sqp_err_list; + struct list_head rqp_err_list; }; enum ehca_mr_flag { diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 5540b276a33..2f4c28a3027 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -132,9 +132,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) return ERR_PTR(-EINVAL); - if (!atomic_add_unless(&shca->num_cqs, 1, ehca_max_cq)) { + if (!atomic_add_unless(&shca->num_cqs, 1, shca->max_num_cqs)) { ehca_err(device, "Unable to create CQ, max number of %i " - "CQs reached.", ehca_max_cq); + "CQs reached.", shca->max_num_cqs); ehca_err(device, "To increase the maximum number of CQs " "use the number_of_cqs module parameter.\n"); return ERR_PTR(-ENOSPC); @@ -276,6 +276,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, for (i = 0; i < QP_HASHTAB_LEN; i++) INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]); + INIT_LIST_HEAD(&my_cq->sqp_err_list); + INIT_LIST_HEAD(&my_cq->rqp_err_list); + if (context) { struct ipz_queue *ipz_queue = &my_cq->ipz_queue; struct ehca_create_cq_resp resp; diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index cb55be04442..757035ea246 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -359,36 +359,48 @@ static void notify_port_conf_change(struct ehca_shca *shca, int port_num) *old_attr = new_attr; } +/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */ +static int replay_modify_qp(struct ehca_sport *sport) +{ + int aqp1_destroyed; + unsigned long flags; + + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + + aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI]; + + if (sport->ibqp_sqp[IB_QPT_SMI]) + ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]); + if (!aqp1_destroyed) + ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]); + + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + + return aqp1_destroyed; +} + static void parse_ec(struct ehca_shca *shca, u64 eqe) { u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); u8 spec_event; struct ehca_sport *sport = &shca->sport[port - 1]; - unsigned long flags; switch (ec) { case 0x30: /* port availability change */ if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { - int suppress_event; - /* replay modify_qp for sqps */ - spin_lock_irqsave(&sport->mod_sqp_lock, flags); - suppress_event = !sport->ibqp_sqp[IB_QPT_GSI]; - if (sport->ibqp_sqp[IB_QPT_SMI]) - ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]); - if (!suppress_event) - ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]); - spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); - - /* AQP1 was destroyed, ignore this event */ - if (suppress_event) - break; + /* only replay modify_qp calls in autodetect mode; + * if AQP1 was destroyed, the port is already down + * again and we can drop the event. + */ + if (ehca_nr_ports < 0) + if (replay_modify_qp(sport)) + break; sport->port_state = IB_PORT_ACTIVE; dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, "is active"); - ehca_query_sma_attr(shca, port, - &sport->saved_attr); + ehca_query_sma_attr(shca, port, &sport->saved_attr); } else { sport->port_state = IB_PORT_DOWN; dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index a8a2ea585d2..8f7f282ead6 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -197,6 +197,8 @@ void ehca_poll_eqs(unsigned long data); int ehca_calc_ipd(struct ehca_shca *shca, int port, enum ib_rate path_rate, u32 *ipd); +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq); + #ifdef CONFIG_PPC_64K_PAGES void *ehca_alloc_fw_ctrlblock(gfp_t flags); void ehca_free_fw_ctrlblock(void *ptr); diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 598844d2edc..bb02a86aa52 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -44,6 +44,8 @@ #include <linux/slab.h> #endif +#include <linux/notifier.h> +#include <linux/memory.h> #include "ehca_classes.h" #include "ehca_iverbs.h" #include "ehca_mrmw.h" @@ -366,22 +368,23 @@ static int ehca_sense_attributes(struct ehca_shca *shca) shca->hca_cap_mr_pgsize |= pgsize_map[i + 1]; /* Set maximum number of CQs and QPs to calculate EQ size */ - if (ehca_max_qp == -1) - ehca_max_qp = min_t(int, rblock->max_qp, EHCA_MAX_NUM_QUEUES); - else if (ehca_max_qp < 1 || ehca_max_qp > rblock->max_qp) { - ehca_gen_err("Requested number of QPs is out of range (1 - %i) " - "specified by HW", rblock->max_qp); - ret = -EINVAL; - goto sense_attributes1; + if (shca->max_num_qps == -1) + shca->max_num_qps = min_t(int, rblock->max_qp, + EHCA_MAX_NUM_QUEUES); + else if (shca->max_num_qps < 1 || shca->max_num_qps > rblock->max_qp) { + ehca_gen_warn("The requested number of QPs is out of range " + "(1 - %i) specified by HW. Value is set to %i", + rblock->max_qp, rblock->max_qp); + shca->max_num_qps = rblock->max_qp; } - if (ehca_max_cq == -1) - ehca_max_cq = min_t(int, rblock->max_cq, EHCA_MAX_NUM_QUEUES); - else if (ehca_max_cq < 1 || ehca_max_cq > rblock->max_cq) { - ehca_gen_err("Requested number of CQs is out of range (1 - %i) " - "specified by HW", rblock->max_cq); - ret = -EINVAL; - goto sense_attributes1; + if (shca->max_num_cqs == -1) + shca->max_num_cqs = min_t(int, rblock->max_cq, + EHCA_MAX_NUM_QUEUES); + else if (shca->max_num_cqs < 1 || shca->max_num_cqs > rblock->max_cq) { + ehca_gen_warn("The requested number of CQs is out of range " + "(1 - %i) specified by HW. Value is set to %i", + rblock->max_cq, rblock->max_cq); } /* query max MTU from first port -- it's the same for all ports */ @@ -733,9 +736,13 @@ static int __devinit ehca_probe(struct of_device *dev, ehca_gen_err("Cannot allocate shca memory."); return -ENOMEM; } + mutex_init(&shca->modify_mutex); atomic_set(&shca->num_cqs, 0); atomic_set(&shca->num_qps, 0); + shca->max_num_qps = ehca_max_qp; + shca->max_num_cqs = ehca_max_cq; + for (i = 0; i < ARRAY_SIZE(shca->sport); i++) spin_lock_init(&shca->sport[i].mod_sqp_lock); @@ -755,7 +762,7 @@ static int __devinit ehca_probe(struct of_device *dev, goto probe1; } - eq_size = 2 * ehca_max_cq + 4 * ehca_max_qp; + eq_size = 2 * shca->max_num_cqs + 4 * shca->max_num_qps; /* create event queues */ ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size); if (ret) { @@ -964,6 +971,41 @@ void ehca_poll_eqs(unsigned long data) spin_unlock(&shca_list_lock); } +static int ehca_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + static unsigned long ehca_dmem_warn_time; + + switch (action) { + case MEM_CANCEL_OFFLINE: + case MEM_CANCEL_ONLINE: + case MEM_ONLINE: + case MEM_OFFLINE: + return NOTIFY_OK; + case MEM_GOING_ONLINE: + case MEM_GOING_OFFLINE: + /* only ok if no hca is attached to the lpar */ + spin_lock(&shca_list_lock); + if (list_empty(&shca_list)) { + spin_unlock(&shca_list_lock); + return NOTIFY_OK; + } else { + spin_unlock(&shca_list_lock); + if (printk_timed_ratelimit(&ehca_dmem_warn_time, + 30 * 1000)) + ehca_gen_err("DMEM operations are not allowed" + "as long as an ehca adapter is" + "attached to the LPAR"); + return NOTIFY_BAD; + } + } + return NOTIFY_OK; +} + +static struct notifier_block ehca_mem_nb = { + .notifier_call = ehca_mem_notifier, +}; + static int __init ehca_module_init(void) { int ret; @@ -991,6 +1033,12 @@ static int __init ehca_module_init(void) goto module_init2; } + ret = register_memory_notifier(&ehca_mem_nb); + if (ret) { + ehca_gen_err("Failed registering memory add/remove notifier"); + goto module_init3; + } + if (ehca_poll_all_eqs != 1) { ehca_gen_err("WARNING!!!"); ehca_gen_err("It is possible to lose interrupts."); @@ -1003,6 +1051,9 @@ static int __init ehca_module_init(void) return 0; +module_init3: + ibmebus_unregister_driver(&ehca_driver); + module_init2: ehca_destroy_slab_caches(); @@ -1018,6 +1069,8 @@ static void __exit ehca_module_exit(void) ibmebus_unregister_driver(&ehca_driver); + unregister_memory_notifier(&ehca_mem_nb); + ehca_destroy_slab_caches(); ehca_destroy_comp_pool(); diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index b6bcee03673..9e05ee2db39 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -396,6 +396,50 @@ static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue, queue->is_small = (queue->page_size != 0); } +/* needs to be called with cq->spinlock held */ +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq) +{ + struct list_head *list, *node; + + /* TODO: support low latency QPs */ + if (qp->ext_type == EQPT_LLQP) + return; + + if (on_sq) { + list = &qp->send_cq->sqp_err_list; + node = &qp->sq_err_node; + } else { + list = &qp->recv_cq->rqp_err_list; + node = &qp->rq_err_node; + } + + if (list_empty(node)) + list_add_tail(node, list); + + return; +} + +static void del_from_err_list(struct ehca_cq *cq, struct list_head *node) +{ + unsigned long flags; + + spin_lock_irqsave(&cq->spinlock, flags); + + if (!list_empty(node)) + list_del_init(node); + + spin_unlock_irqrestore(&cq->spinlock, flags); +} + +static void reset_queue_map(struct ehca_queue_map *qmap) +{ + int i; + + qmap->tail = 0; + for (i = 0; i < qmap->entries; i++) + qmap->map[i].reported = 1; +} + /* * Create an ib_qp struct that is either a QP or an SRQ, depending on * the value of the is_srq parameter. If init_attr and srq_init_attr share @@ -407,12 +451,11 @@ static struct ehca_qp *internal_create_qp( struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata, int is_srq) { - struct ehca_qp *my_qp; + struct ehca_qp *my_qp, *my_srq = NULL; struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); struct ib_ucontext *context = NULL; - u32 nr_qes; u64 h_ret; int is_llqp = 0, has_srq = 0; int qp_type, max_send_sge, max_recv_sge, ret; @@ -422,9 +465,9 @@ static struct ehca_qp *internal_create_qp( u32 swqe_size = 0, rwqe_size = 0, ib_qp_num; unsigned long flags; - if (!atomic_add_unless(&shca->num_qps, 1, ehca_max_qp)) { + if (!atomic_add_unless(&shca->num_qps, 1, shca->max_num_qps)) { ehca_err(pd->device, "Unable to create QP, max number of %i " - "QPs reached.", ehca_max_qp); + "QPs reached.", shca->max_num_qps); ehca_err(pd->device, "To increase the maximum number of QPs " "use the number_of_qps module parameter.\n"); return ERR_PTR(-ENOSPC); @@ -457,8 +500,13 @@ static struct ehca_qp *internal_create_qp( /* handle SRQ base QPs */ if (init_attr->srq) { - struct ehca_qp *my_srq = - container_of(init_attr->srq, struct ehca_qp, ib_srq); + my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq); + + if (qp_type == IB_QPT_UC) { + ehca_err(pd->device, "UC with SRQ not supported"); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } has_srq = 1; parms.ext_type = EQPT_SRQBASE; @@ -716,15 +764,19 @@ static struct ehca_qp *internal_create_qp( "and pages ret=%i", ret); goto create_qp_exit2; } - nr_qes = my_qp->ipz_squeue.queue_length / + + my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / my_qp->ipz_squeue.qe_size; - my_qp->sq_map = vmalloc(nr_qes * + my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * sizeof(struct ehca_qmap_entry)); - if (!my_qp->sq_map) { + if (!my_qp->sq_map.map) { ehca_err(pd->device, "Couldn't allocate squeue " "map ret=%i", ret); goto create_qp_exit3; } + INIT_LIST_HEAD(&my_qp->sq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->sq_map); } if (HAS_RQ(my_qp)) { @@ -736,6 +788,25 @@ static struct ehca_qp *internal_create_qp( "and pages ret=%i", ret); goto create_qp_exit4; } + + my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / + my_qp->ipz_rqueue.qe_size; + my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->rq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit5; + } + INIT_LIST_HEAD(&my_qp->rq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->rq_map); + } else if (init_attr->srq) { + /* this is a base QP, use the queue map of the SRQ */ + my_qp->rq_map = my_srq->rq_map; + INIT_LIST_HEAD(&my_qp->rq_err_node); + + my_qp->ipz_rqueue = my_srq->ipz_rqueue; } if (is_srq) { @@ -789,6 +860,11 @@ static struct ehca_qp *internal_create_qp( if (qp_type == IB_QPT_GSI) { h_ret = ehca_define_sqp(shca, my_qp, init_attr); if (h_ret != H_SUCCESS) { + kfree(my_qp->mod_qp_parm); + my_qp->mod_qp_parm = NULL; + /* the QP pointer is no longer valid */ + shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] = + NULL; ret = ehca2ib_return_code(h_ret); goto create_qp_exit6; } @@ -799,7 +875,7 @@ static struct ehca_qp *internal_create_qp( if (ret) { ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%i", ret); - goto create_qp_exit6; + goto create_qp_exit7; } } @@ -825,25 +901,29 @@ static struct ehca_qp *internal_create_qp( if (ib_copy_to_udata(udata, &resp, sizeof resp)) { ehca_err(pd->device, "Copy to udata failed"); ret = -EINVAL; - goto create_qp_exit7; + goto create_qp_exit8; } } return my_qp; -create_qp_exit7: +create_qp_exit8: ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num); -create_qp_exit6: +create_qp_exit7: kfree(my_qp->mod_qp_parm); +create_qp_exit6: + if (HAS_RQ(my_qp)) + vfree(my_qp->rq_map.map); + create_qp_exit5: if (HAS_RQ(my_qp)) ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); create_qp_exit4: if (HAS_SQ(my_qp)) - vfree(my_qp->sq_map); + vfree(my_qp->sq_map.map); create_qp_exit3: if (HAS_SQ(my_qp)) @@ -1035,6 +1115,101 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, return 0; } +static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue, + struct ehca_queue_map *qmap) +{ + void *wqe_v; + u64 q_ofs; + u32 wqe_idx; + + /* convert real to abs address */ + wqe_p = wqe_p & (~(1UL << 63)); + + wqe_v = abs_to_virt(wqe_p); + + if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) { + ehca_gen_err("Invalid offset for calculating left cqes " + "wqe_p=%#lx wqe_v=%p\n", wqe_p, wqe_v); + return -EFAULT; + } + + wqe_idx = q_ofs / ipz_queue->qe_size; + if (wqe_idx < qmap->tail) + qmap->left_to_poll = (qmap->entries - qmap->tail) + wqe_idx; + else + qmap->left_to_poll = wqe_idx - qmap->tail; + + return 0; +} + +static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca) +{ + u64 h_ret; + void *send_wqe_p, *recv_wqe_p; + int ret; + unsigned long flags; + int qp_num = my_qp->ib_qp.qp_num; + + /* this hcall is not supported on base QPs */ + if (my_qp->ext_type != EQPT_SRQBASE) { + /* get send and receive wqe pointer */ + h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, &my_qp->pf, + &send_wqe_p, &recv_wqe_p, 4); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "disable_and_get_wqe() " + "failed ehca_qp=%p qp_num=%x h_ret=%li", + my_qp, qp_num, h_ret); + return ehca2ib_return_code(h_ret); + } + + /* + * acquire lock to ensure that nobody is polling the cq which + * could mean that the qmap->tail pointer is in an + * inconsistent state. + */ + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue, + &my_qp->sq_map); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + if (ret) + return ret; + + + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue, + &my_qp->rq_map); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); + if (ret) + return ret; + } else { + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + my_qp->sq_map.left_to_poll = 0; + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + my_qp->rq_map.left_to_poll = 0; + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); + } + + /* this assures flush cqes being generated only for pending wqes */ + if ((my_qp->sq_map.left_to_poll == 0) && + (my_qp->rq_map.left_to_poll == 0)) { + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + ehca_add_to_err_list(my_qp, 1); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + + if (HAS_RQ(my_qp)) { + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + ehca_add_to_err_list(my_qp, 0); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, + flags); + } + } + + return 0; +} + /* * internal_modify_qp with circumvention to handle aqp0 properly * smi_reset2init indicates if this is an internal reset-to-init-call for @@ -1539,10 +1714,27 @@ static int internal_modify_qp(struct ib_qp *ibqp, goto modify_qp_exit2; } } + if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)) { + ret = check_for_left_cqes(my_qp, shca); + if (ret) + goto modify_qp_exit2; + } if (statetrans == IB_QPST_ANY2RESET) { ipz_qeit_reset(&my_qp->ipz_rqueue); ipz_qeit_reset(&my_qp->ipz_squeue); + + if (qp_cur_state == IB_QPS_ERR) { + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); + + if (HAS_RQ(my_qp)) + del_from_err_list(my_qp->recv_cq, + &my_qp->rq_err_node); + } + reset_queue_map(&my_qp->sq_map); + + if (HAS_RQ(my_qp)) + reset_queue_map(&my_qp->rq_map); } if (attr_mask & IB_QP_QKEY) @@ -1958,6 +2150,16 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, idr_remove(&ehca_qp_idr, my_qp->token); write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + /* + * SRQs will never get into an error list and do not have a recv_cq, + * so we need to skip them here. + */ + if (HAS_RQ(my_qp) && !IS_SRQ(my_qp)) + del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); + + if (HAS_SQ(my_qp)) + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); + /* now wait until all pending events have completed */ wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events)); @@ -1983,7 +2185,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, if (qp_type == IB_QPT_GSI) { struct ib_event event; ehca_info(dev, "device %s: port %x is inactive.", - shca->ib_device.name, port_num); + shca->ib_device.name, port_num); event.device = &shca->ib_device; event.event = IB_EVENT_PORT_ERR; event.element.port_num = port_num; @@ -1991,11 +2193,15 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, ib_dispatch_event(&event); } - if (HAS_RQ(my_qp)) + if (HAS_RQ(my_qp)) { ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); + + vfree(my_qp->rq_map.map); + } if (HAS_SQ(my_qp)) { ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); - vfree(my_qp->sq_map); + + vfree(my_qp->sq_map.map); } kmem_cache_free(qp_cache, my_qp); atomic_dec(&shca->num_qps); diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index 4426d82fe79..64928079eaf 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -53,9 +53,25 @@ /* in RC traffic, insert an empty RDMA READ every this many packets */ #define ACK_CIRC_THRESHOLD 2000000 +static u64 replace_wr_id(u64 wr_id, u16 idx) +{ + u64 ret; + + ret = wr_id & ~QMAP_IDX_MASK; + ret |= idx & QMAP_IDX_MASK; + + return ret; +} + +static u16 get_app_wr_id(u64 wr_id) +{ + return wr_id & QMAP_IDX_MASK; +} + static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, struct ehca_wqe *wqe_p, - struct ib_recv_wr *recv_wr) + struct ib_recv_wr *recv_wr, + u32 rq_map_idx) { u8 cnt_ds; if (unlikely((recv_wr->num_sge < 0) || @@ -69,7 +85,7 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, /* clear wqe header until sglist */ memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); - wqe_p->work_request_id = recv_wr->wr_id; + wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx); wqe_p->nr_of_data_seg = recv_wr->num_sge; for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { @@ -146,6 +162,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, u64 dma_length; struct ehca_av *my_av; u32 remote_qkey = send_wr->wr.ud.remote_qkey; + struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx]; if (unlikely((send_wr->num_sge < 0) || (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { @@ -158,11 +175,10 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, /* clear wqe header until sglist */ memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); - wqe_p->work_request_id = send_wr->wr_id & ~QMAP_IDX_MASK; - wqe_p->work_request_id |= sq_map_idx & QMAP_IDX_MASK; + wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx); - qp->sq_map[sq_map_idx].app_wr_id = send_wr->wr_id & QMAP_IDX_MASK; - qp->sq_map[sq_map_idx].reported = 0; + qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); + qmap_entry->reported = 0; switch (send_wr->opcode) { case IB_WR_SEND: @@ -496,7 +512,9 @@ static int internal_post_recv(struct ehca_qp *my_qp, struct ehca_wqe *wqe_p; int wqe_cnt = 0; int ret = 0; + u32 rq_map_idx; unsigned long flags; + struct ehca_qmap_entry *qmap_entry; if (unlikely(!HAS_RQ(my_qp))) { ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", @@ -524,8 +542,15 @@ static int internal_post_recv(struct ehca_qp *my_qp, } goto post_recv_exit0; } + /* + * Get the index of the WQE in the recv queue. The same index + * is used for writing into the rq_map. + */ + rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size; + /* write a RECV WQE into the QUEUE */ - ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr); + ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr, + rq_map_idx); /* * if something failed, * reset the free entry pointer to the start value @@ -540,6 +565,11 @@ static int internal_post_recv(struct ehca_qp *my_qp, } goto post_recv_exit0; } + + qmap_entry = &my_qp->rq_map.map[rq_map_idx]; + qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id); + qmap_entry->reported = 0; + wqe_cnt++; } /* eof for cur_recv_wr */ @@ -596,10 +626,12 @@ static const u8 ib_wc_opcode[255] = { /* internal function to poll one entry of cq */ static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) { - int ret = 0; + int ret = 0, qmap_tail_idx; struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); struct ehca_cqe *cqe; struct ehca_qp *my_qp; + struct ehca_qmap_entry *qmap_entry; + struct ehca_queue_map *qmap; int cqe_count = 0, is_error; repoll: @@ -674,27 +706,52 @@ repoll: goto repoll; wc->qp = &my_qp->ib_qp; - if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) { - struct ehca_qmap_entry *qmap_entry; + if (is_error) { /* - * We got a send completion and need to restore the original - * wr_id. + * set left_to_poll to 0 because in error state, we will not + * get any additional CQEs */ - qmap_entry = &my_qp->sq_map[cqe->work_request_id & - QMAP_IDX_MASK]; + ehca_add_to_err_list(my_qp, 1); + my_qp->sq_map.left_to_poll = 0; - if (qmap_entry->reported) { - ehca_warn(cq->device, "Double cqe on qp_num=%#x", - my_qp->real_qp_num); - /* found a double cqe, discard it and read next one */ - goto repoll; - } - wc->wr_id = cqe->work_request_id & ~QMAP_IDX_MASK; - wc->wr_id |= qmap_entry->app_wr_id; - qmap_entry->reported = 1; - } else + if (HAS_RQ(my_qp)) + ehca_add_to_err_list(my_qp, 0); + my_qp->rq_map.left_to_poll = 0; + } + + qmap_tail_idx = get_app_wr_id(cqe->work_request_id); + if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) + /* We got a send completion. */ + qmap = &my_qp->sq_map; + else /* We got a receive completion. */ - wc->wr_id = cqe->work_request_id; + qmap = &my_qp->rq_map; + + qmap_entry = &qmap->map[qmap_tail_idx]; + if (qmap_entry->reported) { + ehca_warn(cq->device, "Double cqe on qp_num=%#x", + my_qp->real_qp_num); + /* found a double cqe, discard it and read next one */ + goto repoll; + } + + wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); + qmap_entry->reported = 1; + + /* this is a proper completion, we need to advance the tail pointer */ + if (++qmap->tail == qmap->entries) + qmap->tail = 0; + + /* if left_to_poll is decremented to 0, add the QP to the error list */ + if (qmap->left_to_poll > 0) { + qmap->left_to_poll--; + if ((my_qp->sq_map.left_to_poll == 0) && + (my_qp->rq_map.left_to_poll == 0)) { + ehca_add_to_err_list(my_qp, 1); + if (HAS_RQ(my_qp)) + ehca_add_to_err_list(my_qp, 0); + } + } /* eval ib_wc_opcode */ wc->opcode = ib_wc_opcode[cqe->optype]-1; @@ -733,13 +790,88 @@ poll_cq_one_exit0: return ret; } +static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq, + struct ib_wc *wc, int num_entries, + struct ipz_queue *ipz_queue, int on_sq) +{ + int nr = 0; + struct ehca_wqe *wqe; + u64 offset; + struct ehca_queue_map *qmap; + struct ehca_qmap_entry *qmap_entry; + + if (on_sq) + qmap = &my_qp->sq_map; + else + qmap = &my_qp->rq_map; + + qmap_entry = &qmap->map[qmap->tail]; + + while ((nr < num_entries) && (qmap_entry->reported == 0)) { + /* generate flush CQE */ + memset(wc, 0, sizeof(*wc)); + + offset = qmap->tail * ipz_queue->qe_size; + wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); + if (!wqe) { + ehca_err(cq->device, "Invalid wqe offset=%#lx on " + "qp_num=%#x", offset, my_qp->real_qp_num); + return nr; + } + + wc->wr_id = replace_wr_id(wqe->work_request_id, + qmap_entry->app_wr_id); + + if (on_sq) { + switch (wqe->optype) { + case WQE_OPTYPE_SEND: + wc->opcode = IB_WC_SEND; + break; + case WQE_OPTYPE_RDMAWRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case WQE_OPTYPE_RDMAREAD: + wc->opcode = IB_WC_RDMA_READ; + break; + default: + ehca_err(cq->device, "Invalid optype=%x", + wqe->optype); + return nr; + } + } else + wc->opcode = IB_WC_RECV; + + if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) { + wc->ex.imm_data = wqe->immediate_data; + wc->wc_flags |= IB_WC_WITH_IMM; + } + + wc->status = IB_WC_WR_FLUSH_ERR; + + wc->qp = &my_qp->ib_qp; + + /* mark as reported and advance tail pointer */ + qmap_entry->reported = 1; + if (++qmap->tail == qmap->entries) + qmap->tail = 0; + qmap_entry = &qmap->map[qmap->tail]; + + wc++; nr++; + } + + return nr; + +} + int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) { struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); int nr; + struct ehca_qp *err_qp; struct ib_wc *current_wc = wc; int ret = 0; unsigned long flags; + int entries_left = num_entries; if (num_entries < 1) { ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " @@ -749,15 +881,40 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) } spin_lock_irqsave(&my_cq->spinlock, flags); - for (nr = 0; nr < num_entries; nr++) { + + /* generate flush cqes for send queues */ + list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) { + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, + &err_qp->ipz_squeue, 1); + entries_left -= nr; + current_wc += nr; + + if (entries_left == 0) + break; + } + + /* generate flush cqes for receive queues */ + list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) { + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, + &err_qp->ipz_rqueue, 0); + entries_left -= nr; + current_wc += nr; + + if (entries_left == 0) + break; + } + + for (nr = 0; nr < entries_left; nr++) { ret = ehca_poll_cq_one(cq, current_wc); if (ret) break; current_wc++; } /* eof for nr */ + entries_left -= nr; + spin_unlock_irqrestore(&my_cq->spinlock, flags); if (ret == -EAGAIN || !ret) - ret = nr; + ret = num_entries - entries_left; poll_cq_exit0: return ret; |