summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/ehca
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/ehca')
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h16
-rw-r--r--drivers/infiniband/hw/ehca/ehca_cq.c7
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c44
-rw-r--r--drivers/infiniband/hw/ehca/ehca_iverbs.h2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c83
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c240
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c211
7 files changed, 525 insertions, 78 deletions
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 1ab919f836a..4df887af66a 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -128,6 +128,8 @@ struct ehca_shca {
/* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */
u32 hca_cap_mr_pgsize;
int max_mtu;
+ int max_num_qps;
+ int max_num_cqs;
atomic_t num_cqs;
atomic_t num_qps;
};
@@ -164,6 +166,13 @@ struct ehca_qmap_entry {
u16 reported;
};
+struct ehca_queue_map {
+ struct ehca_qmap_entry *map;
+ unsigned int entries;
+ unsigned int tail;
+ unsigned int left_to_poll;
+};
+
struct ehca_qp {
union {
struct ib_qp ib_qp;
@@ -173,8 +182,9 @@ struct ehca_qp {
enum ehca_ext_qp_type ext_type;
enum ib_qp_state state;
struct ipz_queue ipz_squeue;
- struct ehca_qmap_entry *sq_map;
+ struct ehca_queue_map sq_map;
struct ipz_queue ipz_rqueue;
+ struct ehca_queue_map rq_map;
struct h_galpas galpas;
u32 qkey;
u32 real_qp_num;
@@ -204,6 +214,8 @@ struct ehca_qp {
atomic_t nr_events; /* events seen */
wait_queue_head_t wait_completion;
int mig_armed;
+ struct list_head sq_err_node;
+ struct list_head rq_err_node;
};
#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
@@ -233,6 +245,8 @@ struct ehca_cq {
/* mmap counter for resources mapped into user space */
u32 mm_count_queue;
u32 mm_count_galpa;
+ struct list_head sqp_err_list;
+ struct list_head rqp_err_list;
};
enum ehca_mr_flag {
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 5540b276a33..2f4c28a3027 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -132,9 +132,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
return ERR_PTR(-EINVAL);
- if (!atomic_add_unless(&shca->num_cqs, 1, ehca_max_cq)) {
+ if (!atomic_add_unless(&shca->num_cqs, 1, shca->max_num_cqs)) {
ehca_err(device, "Unable to create CQ, max number of %i "
- "CQs reached.", ehca_max_cq);
+ "CQs reached.", shca->max_num_cqs);
ehca_err(device, "To increase the maximum number of CQs "
"use the number_of_cqs module parameter.\n");
return ERR_PTR(-ENOSPC);
@@ -276,6 +276,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
for (i = 0; i < QP_HASHTAB_LEN; i++)
INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]);
+ INIT_LIST_HEAD(&my_cq->sqp_err_list);
+ INIT_LIST_HEAD(&my_cq->rqp_err_list);
+
if (context) {
struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
struct ehca_create_cq_resp resp;
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index cb55be04442..757035ea246 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -359,36 +359,48 @@ static void notify_port_conf_change(struct ehca_shca *shca, int port_num)
*old_attr = new_attr;
}
+/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */
+static int replay_modify_qp(struct ehca_sport *sport)
+{
+ int aqp1_destroyed;
+ unsigned long flags;
+
+ spin_lock_irqsave(&sport->mod_sqp_lock, flags);
+
+ aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI];
+
+ if (sport->ibqp_sqp[IB_QPT_SMI])
+ ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]);
+ if (!aqp1_destroyed)
+ ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]);
+
+ spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
+
+ return aqp1_destroyed;
+}
+
static void parse_ec(struct ehca_shca *shca, u64 eqe)
{
u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
u8 spec_event;
struct ehca_sport *sport = &shca->sport[port - 1];
- unsigned long flags;
switch (ec) {
case 0x30: /* port availability change */
if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
- int suppress_event;
- /* replay modify_qp for sqps */
- spin_lock_irqsave(&sport->mod_sqp_lock, flags);
- suppress_event = !sport->ibqp_sqp[IB_QPT_GSI];
- if (sport->ibqp_sqp[IB_QPT_SMI])
- ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]);
- if (!suppress_event)
- ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]);
- spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
-
- /* AQP1 was destroyed, ignore this event */
- if (suppress_event)
- break;
+ /* only replay modify_qp calls in autodetect mode;
+ * if AQP1 was destroyed, the port is already down
+ * again and we can drop the event.
+ */
+ if (ehca_nr_ports < 0)
+ if (replay_modify_qp(sport))
+ break;
sport->port_state = IB_PORT_ACTIVE;
dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
"is active");
- ehca_query_sma_attr(shca, port,
- &sport->saved_attr);
+ ehca_query_sma_attr(shca, port, &sport->saved_attr);
} else {
sport->port_state = IB_PORT_DOWN;
dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
index a8a2ea585d2..8f7f282ead6 100644
--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -197,6 +197,8 @@ void ehca_poll_eqs(unsigned long data);
int ehca_calc_ipd(struct ehca_shca *shca, int port,
enum ib_rate path_rate, u32 *ipd);
+void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq);
+
#ifdef CONFIG_PPC_64K_PAGES
void *ehca_alloc_fw_ctrlblock(gfp_t flags);
void ehca_free_fw_ctrlblock(void *ptr);
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 598844d2edc..bb02a86aa52 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -44,6 +44,8 @@
#include <linux/slab.h>
#endif
+#include <linux/notifier.h>
+#include <linux/memory.h>
#include "ehca_classes.h"
#include "ehca_iverbs.h"
#include "ehca_mrmw.h"
@@ -366,22 +368,23 @@ static int ehca_sense_attributes(struct ehca_shca *shca)
shca->hca_cap_mr_pgsize |= pgsize_map[i + 1];
/* Set maximum number of CQs and QPs to calculate EQ size */
- if (ehca_max_qp == -1)
- ehca_max_qp = min_t(int, rblock->max_qp, EHCA_MAX_NUM_QUEUES);
- else if (ehca_max_qp < 1 || ehca_max_qp > rblock->max_qp) {
- ehca_gen_err("Requested number of QPs is out of range (1 - %i) "
- "specified by HW", rblock->max_qp);
- ret = -EINVAL;
- goto sense_attributes1;
+ if (shca->max_num_qps == -1)
+ shca->max_num_qps = min_t(int, rblock->max_qp,
+ EHCA_MAX_NUM_QUEUES);
+ else if (shca->max_num_qps < 1 || shca->max_num_qps > rblock->max_qp) {
+ ehca_gen_warn("The requested number of QPs is out of range "
+ "(1 - %i) specified by HW. Value is set to %i",
+ rblock->max_qp, rblock->max_qp);
+ shca->max_num_qps = rblock->max_qp;
}
- if (ehca_max_cq == -1)
- ehca_max_cq = min_t(int, rblock->max_cq, EHCA_MAX_NUM_QUEUES);
- else if (ehca_max_cq < 1 || ehca_max_cq > rblock->max_cq) {
- ehca_gen_err("Requested number of CQs is out of range (1 - %i) "
- "specified by HW", rblock->max_cq);
- ret = -EINVAL;
- goto sense_attributes1;
+ if (shca->max_num_cqs == -1)
+ shca->max_num_cqs = min_t(int, rblock->max_cq,
+ EHCA_MAX_NUM_QUEUES);
+ else if (shca->max_num_cqs < 1 || shca->max_num_cqs > rblock->max_cq) {
+ ehca_gen_warn("The requested number of CQs is out of range "
+ "(1 - %i) specified by HW. Value is set to %i",
+ rblock->max_cq, rblock->max_cq);
}
/* query max MTU from first port -- it's the same for all ports */
@@ -733,9 +736,13 @@ static int __devinit ehca_probe(struct of_device *dev,
ehca_gen_err("Cannot allocate shca memory.");
return -ENOMEM;
}
+
mutex_init(&shca->modify_mutex);
atomic_set(&shca->num_cqs, 0);
atomic_set(&shca->num_qps, 0);
+ shca->max_num_qps = ehca_max_qp;
+ shca->max_num_cqs = ehca_max_cq;
+
for (i = 0; i < ARRAY_SIZE(shca->sport); i++)
spin_lock_init(&shca->sport[i].mod_sqp_lock);
@@ -755,7 +762,7 @@ static int __devinit ehca_probe(struct of_device *dev,
goto probe1;
}
- eq_size = 2 * ehca_max_cq + 4 * ehca_max_qp;
+ eq_size = 2 * shca->max_num_cqs + 4 * shca->max_num_qps;
/* create event queues */
ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size);
if (ret) {
@@ -964,6 +971,41 @@ void ehca_poll_eqs(unsigned long data)
spin_unlock(&shca_list_lock);
}
+static int ehca_mem_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ static unsigned long ehca_dmem_warn_time;
+
+ switch (action) {
+ case MEM_CANCEL_OFFLINE:
+ case MEM_CANCEL_ONLINE:
+ case MEM_ONLINE:
+ case MEM_OFFLINE:
+ return NOTIFY_OK;
+ case MEM_GOING_ONLINE:
+ case MEM_GOING_OFFLINE:
+ /* only ok if no hca is attached to the lpar */
+ spin_lock(&shca_list_lock);
+ if (list_empty(&shca_list)) {
+ spin_unlock(&shca_list_lock);
+ return NOTIFY_OK;
+ } else {
+ spin_unlock(&shca_list_lock);
+ if (printk_timed_ratelimit(&ehca_dmem_warn_time,
+ 30 * 1000))
+ ehca_gen_err("DMEM operations are not allowed"
+ "as long as an ehca adapter is"
+ "attached to the LPAR");
+ return NOTIFY_BAD;
+ }
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block ehca_mem_nb = {
+ .notifier_call = ehca_mem_notifier,
+};
+
static int __init ehca_module_init(void)
{
int ret;
@@ -991,6 +1033,12 @@ static int __init ehca_module_init(void)
goto module_init2;
}
+ ret = register_memory_notifier(&ehca_mem_nb);
+ if (ret) {
+ ehca_gen_err("Failed registering memory add/remove notifier");
+ goto module_init3;
+ }
+
if (ehca_poll_all_eqs != 1) {
ehca_gen_err("WARNING!!!");
ehca_gen_err("It is possible to lose interrupts.");
@@ -1003,6 +1051,9 @@ static int __init ehca_module_init(void)
return 0;
+module_init3:
+ ibmebus_unregister_driver(&ehca_driver);
+
module_init2:
ehca_destroy_slab_caches();
@@ -1018,6 +1069,8 @@ static void __exit ehca_module_exit(void)
ibmebus_unregister_driver(&ehca_driver);
+ unregister_memory_notifier(&ehca_mem_nb);
+
ehca_destroy_slab_caches();
ehca_destroy_comp_pool();
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index b6bcee03673..9e05ee2db39 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -396,6 +396,50 @@ static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue,
queue->is_small = (queue->page_size != 0);
}
+/* needs to be called with cq->spinlock held */
+void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq)
+{
+ struct list_head *list, *node;
+
+ /* TODO: support low latency QPs */
+ if (qp->ext_type == EQPT_LLQP)
+ return;
+
+ if (on_sq) {
+ list = &qp->send_cq->sqp_err_list;
+ node = &qp->sq_err_node;
+ } else {
+ list = &qp->recv_cq->rqp_err_list;
+ node = &qp->rq_err_node;
+ }
+
+ if (list_empty(node))
+ list_add_tail(node, list);
+
+ return;
+}
+
+static void del_from_err_list(struct ehca_cq *cq, struct list_head *node)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cq->spinlock, flags);
+
+ if (!list_empty(node))
+ list_del_init(node);
+
+ spin_unlock_irqrestore(&cq->spinlock, flags);
+}
+
+static void reset_queue_map(struct ehca_queue_map *qmap)
+{
+ int i;
+
+ qmap->tail = 0;
+ for (i = 0; i < qmap->entries; i++)
+ qmap->map[i].reported = 1;
+}
+
/*
* Create an ib_qp struct that is either a QP or an SRQ, depending on
* the value of the is_srq parameter. If init_attr and srq_init_attr share
@@ -407,12 +451,11 @@ static struct ehca_qp *internal_create_qp(
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata, int is_srq)
{
- struct ehca_qp *my_qp;
+ struct ehca_qp *my_qp, *my_srq = NULL;
struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
ib_device);
struct ib_ucontext *context = NULL;
- u32 nr_qes;
u64 h_ret;
int is_llqp = 0, has_srq = 0;
int qp_type, max_send_sge, max_recv_sge, ret;
@@ -422,9 +465,9 @@ static struct ehca_qp *internal_create_qp(
u32 swqe_size = 0, rwqe_size = 0, ib_qp_num;
unsigned long flags;
- if (!atomic_add_unless(&shca->num_qps, 1, ehca_max_qp)) {
+ if (!atomic_add_unless(&shca->num_qps, 1, shca->max_num_qps)) {
ehca_err(pd->device, "Unable to create QP, max number of %i "
- "QPs reached.", ehca_max_qp);
+ "QPs reached.", shca->max_num_qps);
ehca_err(pd->device, "To increase the maximum number of QPs "
"use the number_of_qps module parameter.\n");
return ERR_PTR(-ENOSPC);
@@ -457,8 +500,13 @@ static struct ehca_qp *internal_create_qp(
/* handle SRQ base QPs */
if (init_attr->srq) {
- struct ehca_qp *my_srq =
- container_of(init_attr->srq, struct ehca_qp, ib_srq);
+ my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq);
+
+ if (qp_type == IB_QPT_UC) {
+ ehca_err(pd->device, "UC with SRQ not supported");
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-EINVAL);
+ }
has_srq = 1;
parms.ext_type = EQPT_SRQBASE;
@@ -716,15 +764,19 @@ static struct ehca_qp *internal_create_qp(
"and pages ret=%i", ret);
goto create_qp_exit2;
}
- nr_qes = my_qp->ipz_squeue.queue_length /
+
+ my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
my_qp->ipz_squeue.qe_size;
- my_qp->sq_map = vmalloc(nr_qes *
+ my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
sizeof(struct ehca_qmap_entry));
- if (!my_qp->sq_map) {
+ if (!my_qp->sq_map.map) {
ehca_err(pd->device, "Couldn't allocate squeue "
"map ret=%i", ret);
goto create_qp_exit3;
}
+ INIT_LIST_HEAD(&my_qp->sq_err_node);
+ /* to avoid the generation of bogus flush CQEs */
+ reset_queue_map(&my_qp->sq_map);
}
if (HAS_RQ(my_qp)) {
@@ -736,6 +788,25 @@ static struct ehca_qp *internal_create_qp(
"and pages ret=%i", ret);
goto create_qp_exit4;
}
+
+ my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
+ my_qp->ipz_rqueue.qe_size;
+ my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
+ sizeof(struct ehca_qmap_entry));
+ if (!my_qp->rq_map.map) {
+ ehca_err(pd->device, "Couldn't allocate squeue "
+ "map ret=%i", ret);
+ goto create_qp_exit5;
+ }
+ INIT_LIST_HEAD(&my_qp->rq_err_node);
+ /* to avoid the generation of bogus flush CQEs */
+ reset_queue_map(&my_qp->rq_map);
+ } else if (init_attr->srq) {
+ /* this is a base QP, use the queue map of the SRQ */
+ my_qp->rq_map = my_srq->rq_map;
+ INIT_LIST_HEAD(&my_qp->rq_err_node);
+
+ my_qp->ipz_rqueue = my_srq->ipz_rqueue;
}
if (is_srq) {
@@ -789,6 +860,11 @@ static struct ehca_qp *internal_create_qp(
if (qp_type == IB_QPT_GSI) {
h_ret = ehca_define_sqp(shca, my_qp, init_attr);
if (h_ret != H_SUCCESS) {
+ kfree(my_qp->mod_qp_parm);
+ my_qp->mod_qp_parm = NULL;
+ /* the QP pointer is no longer valid */
+ shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] =
+ NULL;
ret = ehca2ib_return_code(h_ret);
goto create_qp_exit6;
}
@@ -799,7 +875,7 @@ static struct ehca_qp *internal_create_qp(
if (ret) {
ehca_err(pd->device,
"Couldn't assign qp to send_cq ret=%i", ret);
- goto create_qp_exit6;
+ goto create_qp_exit7;
}
}
@@ -825,25 +901,29 @@ static struct ehca_qp *internal_create_qp(
if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
ehca_err(pd->device, "Copy to udata failed");
ret = -EINVAL;
- goto create_qp_exit7;
+ goto create_qp_exit8;
}
}
return my_qp;
-create_qp_exit7:
+create_qp_exit8:
ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num);
-create_qp_exit6:
+create_qp_exit7:
kfree(my_qp->mod_qp_parm);
+create_qp_exit6:
+ if (HAS_RQ(my_qp))
+ vfree(my_qp->rq_map.map);
+
create_qp_exit5:
if (HAS_RQ(my_qp))
ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
create_qp_exit4:
if (HAS_SQ(my_qp))
- vfree(my_qp->sq_map);
+ vfree(my_qp->sq_map.map);
create_qp_exit3:
if (HAS_SQ(my_qp))
@@ -1035,6 +1115,101 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
return 0;
}
+static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue,
+ struct ehca_queue_map *qmap)
+{
+ void *wqe_v;
+ u64 q_ofs;
+ u32 wqe_idx;
+
+ /* convert real to abs address */
+ wqe_p = wqe_p & (~(1UL << 63));
+
+ wqe_v = abs_to_virt(wqe_p);
+
+ if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) {
+ ehca_gen_err("Invalid offset for calculating left cqes "
+ "wqe_p=%#lx wqe_v=%p\n", wqe_p, wqe_v);
+ return -EFAULT;
+ }
+
+ wqe_idx = q_ofs / ipz_queue->qe_size;
+ if (wqe_idx < qmap->tail)
+ qmap->left_to_poll = (qmap->entries - qmap->tail) + wqe_idx;
+ else
+ qmap->left_to_poll = wqe_idx - qmap->tail;
+
+ return 0;
+}
+
+static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca)
+{
+ u64 h_ret;
+ void *send_wqe_p, *recv_wqe_p;
+ int ret;
+ unsigned long flags;
+ int qp_num = my_qp->ib_qp.qp_num;
+
+ /* this hcall is not supported on base QPs */
+ if (my_qp->ext_type != EQPT_SRQBASE) {
+ /* get send and receive wqe pointer */
+ h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
+ my_qp->ipz_qp_handle, &my_qp->pf,
+ &send_wqe_p, &recv_wqe_p, 4);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "disable_and_get_wqe() "
+ "failed ehca_qp=%p qp_num=%x h_ret=%li",
+ my_qp, qp_num, h_ret);
+ return ehca2ib_return_code(h_ret);
+ }
+
+ /*
+ * acquire lock to ensure that nobody is polling the cq which
+ * could mean that the qmap->tail pointer is in an
+ * inconsistent state.
+ */
+ spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
+ ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue,
+ &my_qp->sq_map);
+ spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
+ if (ret)
+ return ret;
+
+
+ spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
+ ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue,
+ &my_qp->rq_map);
+ spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
+ if (ret)
+ return ret;
+ } else {
+ spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
+ my_qp->sq_map.left_to_poll = 0;
+ spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
+
+ spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
+ my_qp->rq_map.left_to_poll = 0;
+ spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
+ }
+
+ /* this assures flush cqes being generated only for pending wqes */
+ if ((my_qp->sq_map.left_to_poll == 0) &&
+ (my_qp->rq_map.left_to_poll == 0)) {
+ spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
+ ehca_add_to_err_list(my_qp, 1);
+ spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
+
+ if (HAS_RQ(my_qp)) {
+ spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
+ ehca_add_to_err_list(my_qp, 0);
+ spin_unlock_irqrestore(&my_qp->recv_cq->spinlock,
+ flags);
+ }
+ }
+
+ return 0;
+}
+
/*
* internal_modify_qp with circumvention to handle aqp0 properly
* smi_reset2init indicates if this is an internal reset-to-init-call for
@@ -1539,10 +1714,27 @@ static int internal_modify_qp(struct ib_qp *ibqp,
goto modify_qp_exit2;
}
}
+ if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)) {
+ ret = check_for_left_cqes(my_qp, shca);
+ if (ret)
+ goto modify_qp_exit2;
+ }
if (statetrans == IB_QPST_ANY2RESET) {
ipz_qeit_reset(&my_qp->ipz_rqueue);
ipz_qeit_reset(&my_qp->ipz_squeue);
+
+ if (qp_cur_state == IB_QPS_ERR) {
+ del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
+
+ if (HAS_RQ(my_qp))
+ del_from_err_list(my_qp->recv_cq,
+ &my_qp->rq_err_node);
+ }
+ reset_queue_map(&my_qp->sq_map);
+
+ if (HAS_RQ(my_qp))
+ reset_queue_map(&my_qp->rq_map);
}
if (attr_mask & IB_QP_QKEY)
@@ -1958,6 +2150,16 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
idr_remove(&ehca_qp_idr, my_qp->token);
write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+ /*
+ * SRQs will never get into an error list and do not have a recv_cq,
+ * so we need to skip them here.
+ */
+ if (HAS_RQ(my_qp) && !IS_SRQ(my_qp))
+ del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node);
+
+ if (HAS_SQ(my_qp))
+ del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
+
/* now wait until all pending events have completed */
wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events));
@@ -1983,7 +2185,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
if (qp_type == IB_QPT_GSI) {
struct ib_event event;
ehca_info(dev, "device %s: port %x is inactive.",
- shca->ib_device.name, port_num);
+ shca->ib_device.name, port_num);
event.device = &shca->ib_device;
event.event = IB_EVENT_PORT_ERR;
event.element.port_num = port_num;
@@ -1991,11 +2193,15 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
ib_dispatch_event(&event);
}
- if (HAS_RQ(my_qp))
+ if (HAS_RQ(my_qp)) {
ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
+
+ vfree(my_qp->rq_map.map);
+ }
if (HAS_SQ(my_qp)) {
ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
- vfree(my_qp->sq_map);
+
+ vfree(my_qp->sq_map.map);
}
kmem_cache_free(qp_cache, my_qp);
atomic_dec(&shca->num_qps);
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index 4426d82fe79..64928079eaf 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -53,9 +53,25 @@
/* in RC traffic, insert an empty RDMA READ every this many packets */
#define ACK_CIRC_THRESHOLD 2000000
+static u64 replace_wr_id(u64 wr_id, u16 idx)
+{
+ u64 ret;
+
+ ret = wr_id & ~QMAP_IDX_MASK;
+ ret |= idx & QMAP_IDX_MASK;
+
+ return ret;
+}
+
+static u16 get_app_wr_id(u64 wr_id)
+{
+ return wr_id & QMAP_IDX_MASK;
+}
+
static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
struct ehca_wqe *wqe_p,
- struct ib_recv_wr *recv_wr)
+ struct ib_recv_wr *recv_wr,
+ u32 rq_map_idx)
{
u8 cnt_ds;
if (unlikely((recv_wr->num_sge < 0) ||
@@ -69,7 +85,7 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
/* clear wqe header until sglist */
memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
- wqe_p->work_request_id = recv_wr->wr_id;
+ wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx);
wqe_p->nr_of_data_seg = recv_wr->num_sge;
for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {
@@ -146,6 +162,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
u64 dma_length;
struct ehca_av *my_av;
u32 remote_qkey = send_wr->wr.ud.remote_qkey;
+ struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx];
if (unlikely((send_wr->num_sge < 0) ||
(send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {
@@ -158,11 +175,10 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
/* clear wqe header until sglist */
memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
- wqe_p->work_request_id = send_wr->wr_id & ~QMAP_IDX_MASK;
- wqe_p->work_request_id |= sq_map_idx & QMAP_IDX_MASK;
+ wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx);
- qp->sq_map[sq_map_idx].app_wr_id = send_wr->wr_id & QMAP_IDX_MASK;
- qp->sq_map[sq_map_idx].reported = 0;
+ qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id);
+ qmap_entry->reported = 0;
switch (send_wr->opcode) {
case IB_WR_SEND:
@@ -496,7 +512,9 @@ static int internal_post_recv(struct ehca_qp *my_qp,
struct ehca_wqe *wqe_p;
int wqe_cnt = 0;
int ret = 0;
+ u32 rq_map_idx;
unsigned long flags;
+ struct ehca_qmap_entry *qmap_entry;
if (unlikely(!HAS_RQ(my_qp))) {
ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d",
@@ -524,8 +542,15 @@ static int internal_post_recv(struct ehca_qp *my_qp,
}
goto post_recv_exit0;
}
+ /*
+ * Get the index of the WQE in the recv queue. The same index
+ * is used for writing into the rq_map.
+ */
+ rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
+
/* write a RECV WQE into the QUEUE */
- ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr);
+ ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr,
+ rq_map_idx);
/*
* if something failed,
* reset the free entry pointer to the start value
@@ -540,6 +565,11 @@ static int internal_post_recv(struct ehca_qp *my_qp,
}
goto post_recv_exit0;
}
+
+ qmap_entry = &my_qp->rq_map.map[rq_map_idx];
+ qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id);
+ qmap_entry->reported = 0;
+
wqe_cnt++;
} /* eof for cur_recv_wr */
@@ -596,10 +626,12 @@ static const u8 ib_wc_opcode[255] = {
/* internal function to poll one entry of cq */
static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
{
- int ret = 0;
+ int ret = 0, qmap_tail_idx;
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
struct ehca_cqe *cqe;
struct ehca_qp *my_qp;
+ struct ehca_qmap_entry *qmap_entry;
+ struct ehca_queue_map *qmap;
int cqe_count = 0, is_error;
repoll:
@@ -674,27 +706,52 @@ repoll:
goto repoll;
wc->qp = &my_qp->ib_qp;
- if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) {
- struct ehca_qmap_entry *qmap_entry;
+ if (is_error) {
/*
- * We got a send completion and need to restore the original
- * wr_id.
+ * set left_to_poll to 0 because in error state, we will not
+ * get any additional CQEs
*/
- qmap_entry = &my_qp->sq_map[cqe->work_request_id &
- QMAP_IDX_MASK];
+ ehca_add_to_err_list(my_qp, 1);
+ my_qp->sq_map.left_to_poll = 0;
- if (qmap_entry->reported) {
- ehca_warn(cq->device, "Double cqe on qp_num=%#x",
- my_qp->real_qp_num);
- /* found a double cqe, discard it and read next one */
- goto repoll;
- }
- wc->wr_id = cqe->work_request_id & ~QMAP_IDX_MASK;
- wc->wr_id |= qmap_entry->app_wr_id;
- qmap_entry->reported = 1;
- } else
+ if (HAS_RQ(my_qp))
+ ehca_add_to_err_list(my_qp, 0);
+ my_qp->rq_map.left_to_poll = 0;
+ }
+
+ qmap_tail_idx = get_app_wr_id(cqe->work_request_id);
+ if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT))
+ /* We got a send completion. */
+ qmap = &my_qp->sq_map;
+ else
/* We got a receive completion. */
- wc->wr_id = cqe->work_request_id;
+ qmap = &my_qp->rq_map;
+
+ qmap_entry = &qmap->map[qmap_tail_idx];
+ if (qmap_entry->reported) {
+ ehca_warn(cq->device, "Double cqe on qp_num=%#x",
+ my_qp->real_qp_num);
+ /* found a double cqe, discard it and read next one */
+ goto repoll;
+ }
+
+ wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id);
+ qmap_entry->reported = 1;
+
+ /* this is a proper completion, we need to advance the tail pointer */
+ if (++qmap->tail == qmap->entries)
+ qmap->tail = 0;
+
+ /* if left_to_poll is decremented to 0, add the QP to the error list */
+ if (qmap->left_to_poll > 0) {
+ qmap->left_to_poll--;
+ if ((my_qp->sq_map.left_to_poll == 0) &&
+ (my_qp->rq_map.left_to_poll == 0)) {
+ ehca_add_to_err_list(my_qp, 1);
+ if (HAS_RQ(my_qp))
+ ehca_add_to_err_list(my_qp, 0);
+ }
+ }
/* eval ib_wc_opcode */
wc->opcode = ib_wc_opcode[cqe->optype]-1;
@@ -733,13 +790,88 @@ poll_cq_one_exit0:
return ret;
}
+static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq,
+ struct ib_wc *wc, int num_entries,
+ struct ipz_queue *ipz_queue, int on_sq)
+{
+ int nr = 0;
+ struct ehca_wqe *wqe;
+ u64 offset;
+ struct ehca_queue_map *qmap;
+ struct ehca_qmap_entry *qmap_entry;
+
+ if (on_sq)
+ qmap = &my_qp->sq_map;
+ else
+ qmap = &my_qp->rq_map;
+
+ qmap_entry = &qmap->map[qmap->tail];
+
+ while ((nr < num_entries) && (qmap_entry->reported == 0)) {
+ /* generate flush CQE */
+ memset(wc, 0, sizeof(*wc));
+
+ offset = qmap->tail * ipz_queue->qe_size;
+ wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset);
+ if (!wqe) {
+ ehca_err(cq->device, "Invalid wqe offset=%#lx on "
+ "qp_num=%#x", offset, my_qp->real_qp_num);
+ return nr;
+ }
+
+ wc->wr_id = replace_wr_id(wqe->work_request_id,
+ qmap_entry->app_wr_id);
+
+ if (on_sq) {
+ switch (wqe->optype) {
+ case WQE_OPTYPE_SEND:
+ wc->opcode = IB_WC_SEND;
+ break;
+ case WQE_OPTYPE_RDMAWRITE:
+ wc->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case WQE_OPTYPE_RDMAREAD:
+ wc->opcode = IB_WC_RDMA_READ;
+ break;
+ default:
+ ehca_err(cq->device, "Invalid optype=%x",
+ wqe->optype);
+ return nr;
+ }
+ } else
+ wc->opcode = IB_WC_RECV;
+
+ if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) {
+ wc->ex.imm_data = wqe->immediate_data;
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ }
+
+ wc->status = IB_WC_WR_FLUSH_ERR;
+
+ wc->qp = &my_qp->ib_qp;
+
+ /* mark as reported and advance tail pointer */
+ qmap_entry->reported = 1;
+ if (++qmap->tail == qmap->entries)
+ qmap->tail = 0;
+ qmap_entry = &qmap->map[qmap->tail];
+
+ wc++; nr++;
+ }
+
+ return nr;
+
+}
+
int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
{
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
int nr;
+ struct ehca_qp *err_qp;
struct ib_wc *current_wc = wc;
int ret = 0;
unsigned long flags;
+ int entries_left = num_entries;
if (num_entries < 1) {
ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
@@ -749,15 +881,40 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
}
spin_lock_irqsave(&my_cq->spinlock, flags);
- for (nr = 0; nr < num_entries; nr++) {
+
+ /* generate flush cqes for send queues */
+ list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) {
+ nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
+ &err_qp->ipz_squeue, 1);
+ entries_left -= nr;
+ current_wc += nr;
+
+ if (entries_left == 0)
+ break;
+ }
+
+ /* generate flush cqes for receive queues */
+ list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) {
+ nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
+ &err_qp->ipz_rqueue, 0);
+ entries_left -= nr;
+ current_wc += nr;
+
+ if (entries_left == 0)
+ break;
+ }
+
+ for (nr = 0; nr < entries_left; nr++) {
ret = ehca_poll_cq_one(cq, current_wc);
if (ret)
break;
current_wc++;
} /* eof for nr */
+ entries_left -= nr;
+
spin_unlock_irqrestore(&my_cq->spinlock, flags);
if (ret == -EAGAIN || !ret)
- ret = nr;
+ ret = num_entries - entries_left;
poll_cq_exit0:
return ret;