summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorDmitry Torokhov <dtor@insightbb.com>2007-05-01 00:24:54 -0400
committerDmitry Torokhov <dtor@insightbb.com>2007-05-01 00:24:54 -0400
commitbc95f3669f5e6f63cf0b84fe4922c3c6dd4aa775 (patch)
tree427fcf2a7287c16d4b5aa6cbf494d59579a6a8b1 /drivers/infiniband
parent3d29cdff999c37b3876082278a8134a0642a02cd (diff)
parentdc87c3985e9b442c60994308a96f887579addc39 (diff)
Merge master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts: drivers/usb/input/Makefile drivers/usb/input/gtco.c
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/Makefile2
-rw-r--r--drivers/infiniband/core/cm.c10
-rw-r--r--drivers/infiniband/core/cma.c363
-rw-r--r--drivers/infiniband/core/fmr_pool.c4
-rw-r--r--drivers/infiniband/core/iwcm.c47
-rw-r--r--drivers/infiniband/core/mad.c34
-rw-r--r--drivers/infiniband/core/multicast.c837
-rw-r--r--drivers/infiniband/core/sa.h66
-rw-r--r--drivers/infiniband/core/sa_query.c54
-rw-r--r--drivers/infiniband/core/smi.c86
-rw-r--r--drivers/infiniband/core/smi.h34
-rw-r--r--drivers/infiniband/core/sysfs.c3
-rw-r--r--drivers/infiniband/core/ucm.c23
-rw-r--r--drivers/infiniband/core/ucma.c228
-rw-r--r--drivers/infiniband/core/user_mad.c20
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c4
-rw-r--r--drivers/infiniband/core/verbs.c2
-rw-r--r--drivers/infiniband/hw/amso1100/c2.c6
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/Makefile1
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_dbg.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c45
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h6
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.c15
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c55
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_ev.c13
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c49
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h35
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c34
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_user.h1
-rw-r--r--drivers/infiniband/hw/ehca/Kconfig8
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h26
-rw-r--r--drivers/infiniband/hw/ehca/ehca_cq.c16
-rw-r--r--drivers/infiniband/hw/ehca/ehca_eq.c1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c58
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c336
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.h1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c37
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.c24
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.h4
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.h11
-rw-r--r--drivers/infiniband/hw/ipath/ipath_common.h23
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c38
-rw-r--r--drivers/infiniband/hw/ipath/ipath_debug.h1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c11
-rw-r--r--drivers/infiniband/hw/ipath/ipath_dma.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c123
-rw-r--r--drivers/infiniband/hw/ipath/ipath_eeprom.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c287
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c16
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c154
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c75
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c86
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c100
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h10
-rw-r--r--drivers/infiniband/hw/ipath/ipath_keys.c14
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c12
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c133
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c920
-rw-r--r--drivers/infiniband/hw/ipath/ipath_registers.h22
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c63
-rw-r--r--drivers/infiniband/hw/ipath/ipath_stats.c16
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c15
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h57
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c10
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c21
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c22
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c122
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c14
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c24
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c206
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c13
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h1
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c17
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c40
87 files changed, 3673 insertions, 1631 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 50fb1cd447b..189e5d4b9b1 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -12,7 +12,7 @@ ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
-ib_sa-y := sa_query.o
+ib_sa-y := sa_query.o multicast.o
ib_cm-y := cm.o
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index d446998b12a..842cd0b53e9 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -88,7 +88,6 @@ struct cm_port {
struct cm_device {
struct list_head list;
struct ib_device *device;
- __be64 ca_guid;
struct cm_port port[0];
};
@@ -739,8 +738,8 @@ retest:
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
- &cm_id_priv->av.port->cm_dev->ca_guid,
- sizeof cm_id_priv->av.port->cm_dev->ca_guid,
+ &cm_id_priv->id.device->node_guid,
+ sizeof cm_id_priv->id.device->node_guid,
NULL, 0);
break;
case IB_CM_REQ_RCVD:
@@ -883,7 +882,7 @@ static void cm_format_req(struct cm_req_msg *req_msg,
req_msg->local_comm_id = cm_id_priv->id.local_id;
req_msg->service_id = param->service_id;
- req_msg->local_ca_guid = cm_id_priv->av.port->cm_dev->ca_guid;
+ req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
cm_req_set_resp_res(req_msg, param->responder_resources);
cm_req_set_init_depth(req_msg, param->initiator_depth);
@@ -1442,7 +1441,7 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
cm_rep_set_srq(rep_msg, param->srq);
- rep_msg->local_ca_guid = cm_id_priv->av.port->cm_dev->ca_guid;
+ rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
if (param->private_data && param->private_data_len)
memcpy(rep_msg->private_data, param->private_data,
@@ -3385,7 +3384,6 @@ static void cm_add_one(struct ib_device *device)
return;
cm_dev->device = device;
- cm_dev->ca_guid = device->node_guid;
set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
for (i = 1; i <= device->phys_port_cnt; i++) {
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index db88e609bf4..fde92ce4515 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -71,12 +71,12 @@ static struct workqueue_struct *cma_wq;
static DEFINE_IDR(sdp_ps);
static DEFINE_IDR(tcp_ps);
static DEFINE_IDR(udp_ps);
+static DEFINE_IDR(ipoib_ps);
static int next_port;
struct cma_device {
struct list_head list;
struct ib_device *device;
- __be64 node_guid;
struct completion comp;
atomic_t refcount;
struct list_head id_list;
@@ -116,6 +116,7 @@ struct rdma_id_private {
struct list_head list;
struct list_head listen_list;
struct cma_device *cma_dev;
+ struct list_head mc_list;
enum cma_state state;
spinlock_t lock;
@@ -134,10 +135,23 @@ struct rdma_id_private {
} cm_id;
u32 seq_num;
+ u32 qkey;
u32 qp_num;
u8 srq;
};
+struct cma_multicast {
+ struct rdma_id_private *id_priv;
+ union {
+ struct ib_sa_multicast *ib;
+ } multicast;
+ struct list_head list;
+ void *context;
+ struct sockaddr addr;
+ u8 pad[sizeof(struct sockaddr_in6) -
+ sizeof(struct sockaddr)];
+};
+
struct cma_work {
struct work_struct work;
struct rdma_id_private *id;
@@ -243,6 +257,11 @@ static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
}
+static inline int cma_is_ud_ps(enum rdma_port_space ps)
+{
+ return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
+}
+
static void cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
{
@@ -265,19 +284,41 @@ static void cma_detach_from_dev(struct rdma_id_private *id_priv)
id_priv->cma_dev = NULL;
}
+static int cma_set_qkey(struct ib_device *device, u8 port_num,
+ enum rdma_port_space ps,
+ struct rdma_dev_addr *dev_addr, u32 *qkey)
+{
+ struct ib_sa_mcmember_rec rec;
+ int ret = 0;
+
+ switch (ps) {
+ case RDMA_PS_UDP:
+ *qkey = RDMA_UDP_QKEY;
+ break;
+ case RDMA_PS_IPOIB:
+ ib_addr_get_mgid(dev_addr, &rec.mgid);
+ ret = ib_sa_get_mcmember_rec(device, port_num, &rec.mgid, &rec);
+ *qkey = be32_to_cpu(rec.qkey);
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
static int cma_acquire_dev(struct rdma_id_private *id_priv)
{
- enum rdma_node_type dev_type = id_priv->id.route.addr.dev_addr.dev_type;
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
struct cma_device *cma_dev;
union ib_gid gid;
int ret = -ENODEV;
- switch (rdma_node_get_transport(dev_type)) {
+ switch (rdma_node_get_transport(dev_addr->dev_type)) {
case RDMA_TRANSPORT_IB:
- ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+ ib_addr_get_sgid(dev_addr, &gid);
break;
case RDMA_TRANSPORT_IWARP:
- iw_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+ iw_addr_get_sgid(dev_addr, &gid);
break;
default:
return -ENODEV;
@@ -287,7 +328,12 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
ret = ib_find_cached_gid(cma_dev->device, &gid,
&id_priv->id.port_num, NULL);
if (!ret) {
- cma_attach_to_dev(id_priv, cma_dev);
+ ret = cma_set_qkey(cma_dev->device,
+ id_priv->id.port_num,
+ id_priv->id.ps, dev_addr,
+ &id_priv->qkey);
+ if (!ret)
+ cma_attach_to_dev(id_priv, cma_dev);
break;
}
}
@@ -325,40 +371,50 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
init_waitqueue_head(&id_priv->wait_remove);
atomic_set(&id_priv->dev_remove, 0);
INIT_LIST_HEAD(&id_priv->listen_list);
+ INIT_LIST_HEAD(&id_priv->mc_list);
get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
return &id_priv->id;
}
EXPORT_SYMBOL(rdma_create_id);
-static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
+static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
{
struct ib_qp_attr qp_attr;
- struct rdma_dev_addr *dev_addr;
- int ret;
+ int qp_attr_mask, ret;
- dev_addr = &id_priv->id.route.addr.dev_addr;
- ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
- ib_addr_get_pkey(dev_addr),
- &qp_attr.pkey_index);
+ qp_attr.qp_state = IB_QPS_INIT;
+ ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
if (ret)
return ret;
- qp_attr.qp_state = IB_QPS_INIT;
- qp_attr.qp_access_flags = 0;
- qp_attr.port_num = id_priv->id.port_num;
- return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS |
- IB_QP_PKEY_INDEX | IB_QP_PORT);
+ ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+ if (ret)
+ return ret;
+
+ qp_attr.qp_state = IB_QPS_RTR;
+ ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+ if (ret)
+ return ret;
+
+ qp_attr.qp_state = IB_QPS_RTS;
+ qp_attr.sq_psn = 0;
+ ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
+
+ return ret;
}
-static int cma_init_iw_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
+static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
{
struct ib_qp_attr qp_attr;
+ int qp_attr_mask, ret;
qp_attr.qp_state = IB_QPS_INIT;
- qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+ ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
+ if (ret)
+ return ret;
- return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS);
+ return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
}
int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
@@ -376,18 +432,10 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
if (IS_ERR(qp))
return PTR_ERR(qp);
- switch (rdma_node_get_transport(id->device->node_type)) {
- case RDMA_TRANSPORT_IB:
- ret = cma_init_ib_qp(id_priv, qp);
- break;
- case RDMA_TRANSPORT_IWARP:
- ret = cma_init_iw_qp(id_priv, qp);
- break;
- default:
- ret = -ENOSYS;
- break;
- }
-
+ if (cma_is_ud_ps(id_priv->id.ps))
+ ret = cma_init_ud_qp(id_priv, qp);
+ else
+ ret = cma_init_conn_qp(id_priv, qp);
if (ret)
goto err;
@@ -460,23 +508,55 @@ static int cma_modify_qp_err(struct rdma_cm_id *id)
return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE);
}
+static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
+ struct ib_qp_attr *qp_attr, int *qp_attr_mask)
+{
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ int ret;
+
+ ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
+ ib_addr_get_pkey(dev_addr),
+ &qp_attr->pkey_index);
+ if (ret)
+ return ret;
+
+ qp_attr->port_num = id_priv->id.port_num;
+ *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
+
+ if (cma_is_ud_ps(id_priv->id.ps)) {
+ qp_attr->qkey = id_priv->qkey;
+ *qp_attr_mask |= IB_QP_QKEY;
+ } else {
+ qp_attr->qp_access_flags = 0;
+ *qp_attr_mask |= IB_QP_ACCESS_FLAGS;
+ }
+ return 0;
+}
+
int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
int *qp_attr_mask)
{
struct rdma_id_private *id_priv;
- int ret;
+ int ret = 0;
id_priv = container_of(id, struct rdma_id_private, id);
switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
case RDMA_TRANSPORT_IB:
- ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
- qp_attr_mask);
+ if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps))
+ ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
+ else
+ ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
+ qp_attr_mask);
if (qp_attr->qp_state == IB_QPS_RTR)
qp_attr->rq_psn = id_priv->seq_num;
break;
case RDMA_TRANSPORT_IWARP:
- ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
- qp_attr_mask);
+ if (!id_priv->cm_id.iw) {
+ qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+ *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
+ } else
+ ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
+ qp_attr_mask);
break;
default:
ret = -ENOSYS;
@@ -698,6 +778,19 @@ static void cma_release_port(struct rdma_id_private *id_priv)
mutex_unlock(&lock);
}
+static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
+{
+ struct cma_multicast *mc;
+
+ while (!list_empty(&id_priv->mc_list)) {
+ mc = container_of(id_priv->mc_list.next,
+ struct cma_multicast, list);
+ list_del(&mc->list);
+ ib_sa_free_multicast(mc->multicast.ib);
+ kfree(mc);
+ }
+}
+
void rdma_destroy_id(struct rdma_cm_id *id)
{
struct rdma_id_private *id_priv;
@@ -722,6 +815,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
default:
break;
}
+ cma_leave_mc_groups(id_priv);
mutex_lock(&lock);
cma_detach_from_dev(id_priv);
}
@@ -972,7 +1066,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
memset(&event, 0, sizeof event);
offset = cma_user_data_offset(listen_id->id.ps);
event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
- if (listen_id->id.ps == RDMA_PS_UDP) {
+ if (cma_is_ud_ps(listen_id->id.ps)) {
conn_id = cma_new_udp_id(&listen_id->id, ib_event);
event.param.ud.private_data = ib_event->private_data + offset;
event.param.ud.private_data_len =
@@ -1397,11 +1491,13 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
ib_addr_get_dgid(addr, &path_rec.dgid);
path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr));
path_rec.numb_path = 1;
+ path_rec.reversible = 1;
id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
id_priv->id.port_num, &path_rec,
IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
- IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH,
+ IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
+ IB_SA_PATH_REC_REVERSIBLE,
timeout_ms, GFP_KERNEL,
cma_query_handler, work, &id_priv->query);
@@ -1847,6 +1943,9 @@ static int cma_get_port(struct rdma_id_private *id_priv)
case RDMA_PS_UDP:
ps = &udp_ps;
break;
+ case RDMA_PS_IPOIB:
+ ps = &ipoib_ps;
+ break;
default:
return -EPROTONOSUPPORT;
}
@@ -1961,7 +2060,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
event.status = ib_event->param.sidr_rep_rcvd.status;
break;
}
- if (rep->qkey != RDMA_UD_QKEY) {
+ if (id_priv->qkey != rep->qkey) {
event.event = RDMA_CM_EVENT_UNREACHABLE;
event.status = -EINVAL;
break;
@@ -2160,7 +2259,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
switch (rdma_node_get_transport(id->device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (id->ps == RDMA_PS_UDP)
+ if (cma_is_ud_ps(id->ps))
ret = cma_resolve_ib_udp(id_priv, conn_param);
else
ret = cma_connect_ib(id_priv, conn_param);
@@ -2256,7 +2355,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
rep.status = status;
if (status == IB_SIDR_SUCCESS) {
rep.qp_num = id_priv->qp_num;
- rep.qkey = RDMA_UD_QKEY;
+ rep.qkey = id_priv->qkey;
}
rep.private_data = private_data;
rep.private_data_len = private_data_len;
@@ -2280,7 +2379,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
switch (rdma_node_get_transport(id->device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (id->ps == RDMA_PS_UDP)
+ if (cma_is_ud_ps(id->ps))
ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
conn_param->private_data,
conn_param->private_data_len);
@@ -2341,7 +2440,7 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
switch (rdma_node_get_transport(id->device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (id->ps == RDMA_PS_UDP)
+ if (cma_is_ud_ps(id->ps))
ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
private_data, private_data_len);
else
@@ -2392,6 +2491,178 @@ out:
}
EXPORT_SYMBOL(rdma_disconnect);
+static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
+{
+ struct rdma_id_private *id_priv;
+ struct cma_multicast *mc = multicast->context;
+ struct rdma_cm_event event;
+ int ret;
+
+ id_priv = mc->id_priv;
+ atomic_inc(&id_priv->dev_remove);
+ if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
+ !cma_comp(id_priv, CMA_ADDR_RESOLVED))
+ goto out;
+
+ if (!status && id_priv->id.qp)
+ status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
+ multicast->rec.mlid);
+
+ memset(&event, 0, sizeof event);
+ event.status = status;
+ event.param.ud.private_data = mc->context;
+ if (!status) {
+ event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
+ ib_init_ah_from_mcmember(id_priv->id.device,
+ id_priv->id.port_num, &multicast->rec,
+ &event.param.ud.ah_attr);
+ event.param.ud.qp_num = 0xFFFFFF;
+ event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
+ } else
+ event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+
+ ret = id_priv->id.event_handler(&id_priv->id, &event);
+ if (ret) {
+ cma_exch(id_priv, CMA_DESTROYING);
+ cma_release_remove(id_priv);
+ rdma_destroy_id(&id_priv->id);
+ return 0;
+ }
+out:
+ cma_release_remove(id_priv);
+ return 0;
+}
+
+static void cma_set_mgid(struct rdma_id_private *id_priv,
+ struct sockaddr *addr, union ib_gid *mgid)
+{
+ unsigned char mc_map[MAX_ADDR_LEN];
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ struct sockaddr_in *sin = (struct sockaddr_in *) addr;
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
+
+ if (cma_any_addr(addr)) {
+ memset(mgid, 0, sizeof *mgid);
+ } else if ((addr->sa_family == AF_INET6) &&
+ ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) ==
+ 0xFF10A01B)) {
+ /* IPv6 address is an SA assigned MGID. */
+ memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
+ } else {
+ ip_ib_mc_map(sin->sin_addr.s_addr, mc_map);
+ if (id_priv->id.ps == RDMA_PS_UDP)
+ mc_map[7] = 0x01; /* Use RDMA CM signature */
+ mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8;
+ mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr);
+ *mgid = *(union ib_gid *) (mc_map + 4);
+ }
+}
+
+static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
+ struct cma_multicast *mc)
+{
+ struct ib_sa_mcmember_rec rec;
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ ib_sa_comp_mask comp_mask;
+ int ret;
+
+ ib_addr_get_mgid(dev_addr, &rec.mgid);
+ ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
+ &rec.mgid, &rec);
+ if (ret)
+ return ret;
+
+ cma_set_mgid(id_priv, &mc->addr, &rec.mgid);
+ if (id_priv->id.ps == RDMA_PS_UDP)
+ rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
+ ib_addr_get_sgid(dev_addr, &rec.port_gid);
+ rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
+ rec.join_state = 1;
+
+ comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
+ IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
+ IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
+ IB_SA_MCMEMBER_REC_FLOW_LABEL |
+ IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
+
+ mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
+ id_priv->id.port_num, &rec,
+ comp_mask, GFP_KERNEL,
+ cma_ib_mc_handler, mc);
+ if (IS_ERR(mc->multicast.ib))
+ return PTR_ERR(mc->multicast.ib);
+
+ return 0;
+}
+
+int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
+ void *context)
+{
+ struct rdma_id_private *id_priv;
+ struct cma_multicast *mc;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
+ !cma_comp(id_priv, CMA_ADDR_RESOLVED))
+ return -EINVAL;
+
+ mc = kmalloc(sizeof *mc, GFP_KERNEL);
+ if (!mc)
+ return -ENOMEM;
+
+ memcpy(&mc->addr, addr, ip_addr_size(addr));
+ mc->context = context;
+ mc->id_priv = id_priv;
+
+ spin_lock(&id_priv->lock);
+ list_add(&mc->list, &id_priv->mc_list);
+ spin_unlock(&id_priv->lock);
+
+ switch (rdma_node_get_transport(id->device->node_type)) {
+ case RDMA_TRANSPORT_IB:
+ ret = cma_join_ib_multicast(id_priv, mc);
+ break;
+ default:
+ ret = -ENOSYS;
+ break;
+ }
+
+ if (ret) {
+ spin_lock_irq(&id_priv->lock);
+ list_del(&mc->list);
+ spin_unlock_irq(&id_priv->lock);
+ kfree(mc);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(rdma_join_multicast);
+
+void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
+{
+ struct rdma_id_private *id_priv;
+ struct cma_multicast *mc;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ spin_lock_irq(&id_priv->lock);
+ list_for_each_entry(mc, &id_priv->mc_list, list) {
+ if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
+ list_del(&mc->list);
+ spin_unlock_irq(&id_priv->lock);
+
+ if (id->qp)
+ ib_detach_mcast(id->qp,
+ &mc->multicast.ib->rec.mgid,
+ mc->multicast.ib->rec.mlid);
+ ib_sa_free_multicast(mc->multicast.ib);
+ kfree(mc);
+ return;
+ }
+ }
+ spin_unlock_irq(&id_priv->lock);
+}
+EXPORT_SYMBOL(rdma_leave_multicast);
+
static void cma_add_one(struct ib_device *device)
{
struct cma_device *cma_dev;
@@ -2402,7 +2673,6 @@ static void cma_add_one(struct ib_device *device)
return;
cma_dev->device = device;
- cma_dev->node_guid = device->node_guid;
init_completion(&cma_dev->comp);
atomic_set(&cma_dev->refcount, 1);
@@ -2522,6 +2792,7 @@ static void cma_cleanup(void)
idr_destroy(&sdp_ps);
idr_destroy(&tcp_ps);
idr_destroy(&udp_ps);
+ idr_destroy(&ipoib_ps);
}
module_init(cma_init);
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 8926a2bd4a8..1d796e7c819 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -301,7 +301,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
{
struct ib_pool_fmr *fmr;
- struct ib_fmr_attr attr = {
+ struct ib_fmr_attr fmr_attr = {
.max_pages = params->max_pages_per_fmr,
.max_maps = pool->max_remaps,
.page_shift = params->page_shift
@@ -321,7 +321,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
fmr->ref_count = 0;
INIT_HLIST_NODE(&fmr->cache_node);
- fmr->fmr = ib_alloc_fmr(pd, params->access, &attr);
+ fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr);
if (IS_ERR(fmr->fmr)) {
printk(KERN_WARNING "fmr_create failed for FMR %d", i);
kfree(fmr);
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 1039ad57d53..891d1fa7b2e 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -146,6 +146,12 @@ static int copy_private_data(struct iw_cm_event *event)
return 0;
}
+static void free_cm_id(struct iwcm_id_private *cm_id_priv)
+{
+ dealloc_work_entries(cm_id_priv);
+ kfree(cm_id_priv);
+}
+
/*
* Release a reference on cm_id. If the last reference is being
* released, enable the waiting thread (in iw_destroy_cm_id) to
@@ -153,21 +159,14 @@ static int copy_private_data(struct iw_cm_event *event)
*/
static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
{
- int ret = 0;
-
BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
if (atomic_dec_and_test(&cm_id_priv->refcount)) {
BUG_ON(!list_empty(&cm_id_priv->work_list));
- if (waitqueue_active(&cm_id_priv->destroy_comp.wait)) {
- BUG_ON(cm_id_priv->state != IW_CM_STATE_DESTROYING);
- BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY,
- &cm_id_priv->flags));
- ret = 1;
- }
complete(&cm_id_priv->destroy_comp);
+ return 1;
}
- return ret;
+ return 0;
}
static void add_ref(struct iw_cm_id *cm_id)
@@ -181,7 +180,11 @@ static void rem_ref(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
- iwcm_deref_id(cm_id_priv);
+ if (iwcm_deref_id(cm_id_priv) &&
+ test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) {
+ BUG_ON(!list_empty(&cm_id_priv->work_list));
+ free_cm_id(cm_id_priv);
+ }
}
static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
@@ -355,7 +358,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
case IW_CM_STATE_CONN_RECV:
/*
* App called destroy before/without calling accept after
- * receiving connection request event notification.
+ * receiving connection request event notification or
+ * returned non zero from the event callback function.
+ * In either case, must tell the provider to reject.
*/
cm_id_priv->state = IW_CM_STATE_DESTROYING;
break;
@@ -391,9 +396,7 @@ void iw_destroy_cm_id(struct iw_cm_id *cm_id)
wait_for_completion(&cm_id_priv->destroy_comp);
- dealloc_work_entries(cm_id_priv);
-
- kfree(cm_id_priv);
+ free_cm_id(cm_id_priv);
}
EXPORT_SYMBOL(iw_destroy_cm_id);
@@ -647,10 +650,11 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
/* Call the client CM handler */
ret = cm_id->cm_handler(cm_id, iw_event);
if (ret) {
+ iw_cm_reject(cm_id, NULL, 0);
set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
destroy_cm_id(cm_id);
if (atomic_read(&cm_id_priv->refcount)==0)
- kfree(cm_id);
+ free_cm_id(cm_id_priv);
}
out:
@@ -854,13 +858,12 @@ static void cm_work_handler(struct work_struct *_work)
destroy_cm_id(&cm_id_priv->id);
}
BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
- if (iwcm_deref_id(cm_id_priv))
- return;
-
- if (atomic_read(&cm_id_priv->refcount)==0 &&
- test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) {
- dealloc_work_entries(cm_id_priv);
- kfree(cm_id_priv);
+ if (iwcm_deref_id(cm_id_priv)) {
+ if (test_bit(IWCM_F_CALLBACK_DESTROY,
+ &cm_id_priv->flags)) {
+ BUG_ON(!list_empty(&cm_id_priv->work_list));
+ free_cm_id(cm_id_priv);
+ }
return;
}
spin_lock_irqsave(&cm_id_priv->lock, flags);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 13efd417034..6edfecf1be7 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
* Copyright (c) 2005 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
*
@@ -31,7 +31,6 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
- * $Id: mad.c 5596 2006-03-03 01:00:07Z sean.hefty $
*/
#include <linux/dma-mapping.h>
#include <rdma/ib_cache.h>
@@ -668,7 +667,7 @@ static void build_smp_wc(struct ib_qp *qp,
static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
struct ib_mad_send_wr_private *mad_send_wr)
{
- int ret;
+ int ret = 0;
struct ib_smp *smp = mad_send_wr->send_buf.mad;
unsigned long flags;
struct ib_mad_local_private *local;
@@ -688,14 +687,15 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
*/
if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) ==
IB_LID_PERMISSIVE &&
- !smi_handle_dr_smp_send(smp, device->node_type, port_num)) {
+ smi_handle_dr_smp_send(smp, device->node_type, port_num) ==
+ IB_SMI_DISCARD) {
ret = -EINVAL;
printk(KERN_ERR PFX "Invalid directed route\n");
goto out;
}
+
/* Check to post send on QP or process locally */
- ret = smi_check_local_smp(smp, device);
- if (!ret)
+ if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD)
goto out;
local = kmalloc(sizeof *local, GFP_ATOMIC);
@@ -1874,18 +1874,22 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
if (recv->mad.mad.mad_hdr.mgmt_class ==
IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
- if (!smi_handle_dr_smp_recv(&recv->mad.smp,
- port_priv->device->node_type,
- port_priv->port_num,
- port_priv->device->phys_port_cnt))
+ if (smi_handle_dr_smp_recv(&recv->mad.smp,
+ port_priv->device->node_type,
+ port_priv->port_num,
+ port_priv->device->phys_port_cnt) ==
+ IB_SMI_DISCARD)
goto out;
- if (!smi_check_forward_dr_smp(&recv->mad.smp))
+
+ if (smi_check_forward_dr_smp(&recv->mad.smp) == IB_SMI_LOCAL)
goto local;
- if (!smi_handle_dr_smp_send(&recv->mad.smp,
- port_priv->device->node_type,
- port_priv->port_num))
+
+ if (smi_handle_dr_smp_send(&recv->mad.smp,
+ port_priv->device->node_type,
+ port_priv->port_num) == IB_SMI_DISCARD)
goto out;
- if (!smi_check_local_smp(&recv->mad.smp, port_priv->device))
+
+ if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD)
goto out;
}
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
new file mode 100644
index 00000000000..4a579b3a1c9
--- /dev/null
+++ b/drivers/infiniband/core/multicast.c
@@ -0,0 +1,837 @@
+/*
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/completion.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/bitops.h>
+#include <linux/random.h>
+
+#include <rdma/ib_cache.h>
+#include "sa.h"
+
+static void mcast_add_one(struct ib_device *device);
+static void mcast_remove_one(struct ib_device *device);
+
+static struct ib_client mcast_client = {
+ .name = "ib_multicast",
+ .add = mcast_add_one,
+ .remove = mcast_remove_one
+};
+
+static struct ib_sa_client sa_client;
+static struct workqueue_struct *mcast_wq;
+static union ib_gid mgid0;
+
+struct mcast_device;
+
+struct mcast_port {
+ struct mcast_device *dev;
+ spinlock_t lock;
+ struct rb_root table;
+ atomic_t refcount;
+ struct completion comp;
+ u8 port_num;
+};
+
+struct mcast_device {
+ struct ib_device *device;
+ struct ib_event_handler event_handler;
+ int start_port;
+ int end_port;
+ struct mcast_port port[0];
+};
+
+enum mcast_state {
+ MCAST_IDLE,
+ MCAST_JOINING,
+ MCAST_MEMBER,
+ MCAST_BUSY,
+ MCAST_ERROR
+};
+
+struct mcast_member;
+
+struct mcast_group {
+ struct ib_sa_mcmember_rec rec;
+ struct rb_node node;
+ struct mcast_port *port;
+ spinlock_t lock;
+ struct work_struct work;
+ struct list_head pending_list;
+ struct list_head active_list;
+ struct mcast_member *last_join;
+ int members[3];
+ atomic_t refcount;
+ enum mcast_state state;
+ struct ib_sa_query *query;
+ int query_id;
+};
+
+struct mcast_member {
+ struct ib_sa_multicast multicast;
+ struct ib_sa_client *client;
+ struct mcast_group *group;
+ struct list_head list;
+ enum mcast_state state;
+ atomic_t refcount;
+ struct completion comp;
+};
+
+static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
+ void *context);
+static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
+ void *context);
+
+static struct mcast_group *mcast_find(struct mcast_port *port,
+ union ib_gid *mgid)
+{
+ struct rb_node *node = port->table.rb_node;
+ struct mcast_group *group;
+ int ret;
+
+ while (node) {
+ group = rb_entry(node, struct mcast_group, node);
+ ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid);
+ if (!ret)
+ return group;
+
+ if (ret < 0)
+ node = node->rb_left;
+ else
+ node = node->rb_right;
+ }
+ return NULL;
+}
+
+static struct mcast_group *mcast_insert(struct mcast_port *port,
+ struct mcast_group *group,
+ int allow_duplicates)
+{
+ struct rb_node **link = &port->table.rb_node;
+ struct rb_node *parent = NULL;
+ struct mcast_group *cur_group;
+ int ret;
+
+ while (*link) {
+ parent = *link;
+ cur_group = rb_entry(parent, struct mcast_group, node);
+
+ ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw,
+ sizeof group->rec.mgid);
+ if (ret < 0)
+ link = &(*link)->rb_left;
+ else if (ret > 0)
+ link = &(*link)->rb_right;
+ else if (allow_duplicates)
+ link = &(*link)->rb_left;
+ else
+ return cur_group;
+ }
+ rb_link_node(&group->node, parent, link);
+ rb_insert_color(&group->node, &port->table);
+ return NULL;
+}
+
+static void deref_port(struct mcast_port *port)
+{
+ if (atomic_dec_and_test(&port->refcount))
+ complete(&port->comp);
+}
+
+static void release_group(struct mcast_group *group)
+{
+ struct mcast_port *port = group->port;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ if (atomic_dec_and_test(&group->refcount)) {
+ rb_erase(&group->node, &port->table);
+ spin_unlock_irqrestore(&port->lock, flags);
+ kfree(group);
+ deref_port(port);
+ } else
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void deref_member(struct mcast_member *member)
+{
+ if (atomic_dec_and_test(&member->refcount))
+ complete(&member->comp);
+}
+
+static void queue_join(struct mcast_member *member)
+{
+ struct mcast_group *group = member->group;
+ unsigned long flags;
+
+ spin_lock_irqsave(&group->lock, flags);
+ list_add(&member->list, &group->pending_list);
+ if (group->state == MCAST_IDLE) {
+ group->state = MCAST_BUSY;
+ atomic_inc(&group->refcount);
+ queue_work(mcast_wq, &group->work);
+ }
+ spin_unlock_irqrestore(&group->lock, flags);
+}
+
+/*
+ * A multicast group has three types of members: full member, non member, and
+ * send only member. We need to keep track of the number of members of each
+ * type based on their join state. Adjust the number of members the belong to
+ * the specified join states.
+ */
+static void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
+{
+ int i;
+
+ for (i = 0; i < 3; i++, join_state >>= 1)
+ if (join_state & 0x1)
+ group->members[i] += inc;
+}
+
+/*
+ * If a multicast group has zero members left for a particular join state, but
+ * the group is still a member with the SA, we need to leave that join state.
+ * Determine which join states we still belong to, but that do not have any
+ * active members.
+ */
+static u8 get_leave_state(struct mcast_group *group)
+{
+ u8 leave_state = 0;
+ int i;
+
+ for (i = 0; i < 3; i++)
+ if (!group->members[i])
+ leave_state |= (0x1 << i);
+
+ return leave_state & group->rec.join_state;
+}
+
+static int check_selector(ib_sa_comp_mask comp_mask,
+ ib_sa_comp_mask selector_mask,
+ ib_sa_comp_mask value_mask,
+ u8 selector, u8 src_value, u8 dst_value)
+{
+ int err;
+
+ if (!(comp_mask & selector_mask) || !(comp_mask & value_mask))
+ return 0;
+
+ switch (selector) {
+ case IB_SA_GT:
+ err = (src_value <= dst_value);
+ break;
+ case IB_SA_LT:
+ err = (src_value >= dst_value);
+ break;
+ case IB_SA_EQ:
+ err = (src_value != dst_value);
+ break;
+ default:
+ err = 0;
+ break;
+ }
+
+ return err;
+}
+
+static int cmp_rec(struct ib_sa_mcmember_rec *src,
+ struct ib_sa_mcmember_rec *dst, ib_sa_comp_mask comp_mask)
+{
+ /* MGID must already match */
+
+ if (comp_mask & IB_SA_MCMEMBER_REC_PORT_GID &&
+ memcmp(&src->port_gid, &dst->port_gid, sizeof src->port_gid))
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey)
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid)
+ return -EINVAL;
+ if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
+ IB_SA_MCMEMBER_REC_MTU, dst->mtu_selector,
+ src->mtu, dst->mtu))
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS &&
+ src->traffic_class != dst->traffic_class)
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey)
+ return -EINVAL;
+ if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
+ IB_SA_MCMEMBER_REC_RATE, dst->rate_selector,
+ src->rate, dst->rate))
+ return -EINVAL;
+ if (check_selector(comp_mask,
+ IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR,
+ IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME,
+ dst->packet_life_time_selector,
+ src->packet_life_time, dst->packet_life_time))
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_SL && src->sl != dst->sl)
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL &&
+ src->flow_label != dst->flow_label)
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT &&
+ src->hop_limit != dst->hop_limit)
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE && src->scope != dst->scope)
+ return -EINVAL;
+
+ /* join_state checked separately, proxy_join ignored */
+
+ return 0;
+}
+
+static int send_join(struct mcast_group *group, struct mcast_member *member)
+{
+ struct mcast_port *port = group->port;
+ int ret;
+
+ group->last_join = member;
+ ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
+ port->port_num, IB_MGMT_METHOD_SET,
+ &member->multicast.rec,
+ member->multicast.comp_mask,
+ 3000, GFP_KERNEL, join_handler, group,
+ &group->query);
+ if (ret >= 0) {
+ group->query_id = ret;
+ ret = 0;
+ }
+ return ret;
+}
+
+static int send_leave(struct mcast_group *group, u8 leave_state)
+{
+ struct mcast_port *port = group->port;
+ struct ib_sa_mcmember_rec rec;
+ int ret;
+
+ rec = group->rec;
+ rec.join_state = leave_state;
+
+ ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
+ port->port_num, IB_SA_METHOD_DELETE, &rec,
+ IB_SA_MCMEMBER_REC_MGID |
+ IB_SA_MCMEMBER_REC_PORT_GID |
+ IB_SA_MCMEMBER_REC_JOIN_STATE,
+ 3000, GFP_KERNEL, leave_handler,
+ group, &group->query);
+ if (ret >= 0) {
+ group->query_id = ret;
+ ret = 0;
+ }
+ return ret;
+}
+
+static void join_group(struct mcast_group *group, struct mcast_member *member,
+ u8 join_state)
+{
+ member->state = MCAST_MEMBER;
+ adjust_membership(group, join_state, 1);
+ group->rec.join_state |= join_state;
+ member->multicast.rec = group->rec;
+ member->multicast.rec.join_state = join_state;
+ list_move(&member->list, &group->active_list);
+}
+
+static int fail_join(struct mcast_group *group, struct mcast_member *member,
+ int status)
+{
+ spin_lock_irq(&group->lock);
+ list_del_init(&member->list);
+ spin_unlock_irq(&group->lock);
+ return member->multicast.callback(status, &member->multicast);
+}
+
+static void process_group_error(struct mcast_group *group)
+{
+ struct mcast_member *member;
+ int ret;
+
+ spin_lock_irq(&group->lock);
+ while (!list_empty(&group->active_list)) {
+ member = list_entry(group->active_list.next,
+ struct mcast_member, list);
+ atomic_inc(&member->refcount);
+ list_del_init(&member->list);
+ adjust_membership(group, member->multicast.rec.join_state, -1);
+ member->state = MCAST_ERROR;
+ spin_unlock_irq(&group->lock);
+
+ ret = member->multicast.callback(-ENETRESET,
+ &member->multicast);
+ deref_member(member);
+ if (ret)
+ ib_sa_free_multicast(&member->multicast);
+ spin_lock_irq(&group->lock);
+ }
+
+ group->rec.join_state = 0;
+ group->state = MCAST_BUSY;
+ spin_unlock_irq(&group->lock);
+}
+
+static void mcast_work_handler(struct work_struct *work)
+{
+ struct mcast_group *group;
+ struct mcast_member *member;
+ struct ib_sa_multicast *multicast;
+ int status, ret;
+ u8 join_state;
+
+ group = container_of(work, typeof(*group), work);
+retest:
+ spin_lock_irq(&group->lock);
+ while (!list_empty(&group->pending_list) ||
+ (group->state == MCAST_ERROR)) {
+
+ if (group->state == MCAST_ERROR) {
+ spin_unlock_irq(&group->lock);
+ process_group_error(group);
+ goto retest;
+ }
+
+ member = list_entry(group->pending_list.next,
+ struct mcast_member, list);
+ multicast = &member->multicast;
+ join_state = multicast->rec.join_state;
+ atomic_inc(&member->refcount);
+
+ if (join_state == (group->rec.join_state & join_state)) {
+ status = cmp_rec(&group->rec, &multicast->rec,
+ multicast->comp_mask);
+ if (!status)
+ join_group(group, member, join_state);
+ else
+ list_del_init(&member->list);
+ spin_unlock_irq(&group->lock);
+ ret = multicast->callback(status, multicast);
+ } else {
+ spin_unlock_irq(&group->lock);
+ status = send_join(group, member);
+ if (!status) {
+ deref_member(member);
+ return;
+ }
+ ret = fail_join(group, member, status);
+ }
+
+ deref_member(member);
+ if (ret)
+ ib_sa_free_multicast(&member->multicast);
+ spin_lock_irq(&group->lock);
+ }
+
+ join_state = get_leave_state(group);
+ if (join_state) {
+ group->rec.join_state &= ~join_state;
+ spin_unlock_irq(&group->lock);
+ if (send_leave(group, join_state))
+ goto retest;
+ } else {
+ group->state = MCAST_IDLE;
+ spin_unlock_irq(&group->lock);
+ release_group(group);
+ }
+}
+
+/*
+ * Fail a join request if it is still active - at the head of the pending queue.
+ */
+static void process_join_error(struct mcast_group *group, int status)
+{
+ struct mcast_member *member;
+ int ret;
+
+ spin_lock_irq(&group->lock);
+ member = list_entry(group->pending_list.next,
+ struct mcast_member, list);
+ if (group->last_join == member) {
+ atomic_inc(&member->refcount);
+ list_del_init(&member->list);
+ spin_unlock_irq(&group->lock);
+ ret = member->multicast.callback(status, &member->multicast);
+ deref_member(member);
+ if (ret)
+ ib_sa_free_multicast(&member->multicast);
+ } else
+ spin_unlock_irq(&group->lock);
+}
+
+static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
+ void *context)
+{
+ struct mcast_group *group = context;
+
+ if (status)
+ process_join_error(group, status);
+ else {
+ spin_lock_irq(&group->port->lock);
+ group->rec = *rec;
+ if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) {
+ rb_erase(&group->node, &group->port->table);
+ mcast_insert(group->port, group, 1);
+ }
+ spin_unlock_irq(&group->port->lock);
+ }
+ mcast_work_handler(&group->work);
+}
+
+static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
+ void *context)
+{
+ struct mcast_group *group = context;
+
+ mcast_work_handler(&group->work);
+}
+
+static struct mcast_group *acquire_group(struct mcast_port *port,
+ union ib_gid *mgid, gfp_t gfp_mask)
+{
+ struct mcast_group *group, *cur_group;
+ unsigned long flags;
+ int is_mgid0;
+
+ is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0);
+ if (!is_mgid0) {
+ spin_lock_irqsave(&port->lock, flags);
+ group = mcast_find(port, mgid);
+ if (group)
+ goto found;
+ spin_unlock_irqrestore(&port->lock, flags);
+ }
+
+ group = kzalloc(sizeof *group, gfp_mask);
+ if (!group)
+ return NULL;
+
+ group->port = port;
+ group->rec.mgid = *mgid;
+ INIT_LIST_HEAD(&group->pending_list);
+ INIT_LIST_HEAD(&group->active_list);
+ INIT_WORK(&group->work, mcast_work_handler);
+ spin_lock_init(&group->lock);
+
+ spin_lock_irqsave(&port->lock, flags);
+ cur_group = mcast_insert(port, group, is_mgid0);
+ if (cur_group) {
+ kfree(group);
+ group = cur_group;
+ } else
+ atomic_inc(&port->refcount);
+found:
+ atomic_inc(&group->refcount);
+ spin_unlock_irqrestore(&port->lock, flags);
+ return group;
+}
+
+/*
+ * We serialize all join requests to a single group to make our lives much
+ * easier. Otherwise, two users could try to join the same group
+ * simultaneously, with different configurations, one could leave while the
+ * join is in progress, etc., which makes locking around error recovery
+ * difficult.
+ */
+struct ib_sa_multicast *
+ib_sa_join_multicast(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_mcmember_rec *rec,
+ ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
+ int (*callback)(int status,
+ struct ib_sa_multicast *multicast),
+ void *context)
+{
+ struct mcast_device *dev;
+ struct mcast_member *member;
+ struct ib_sa_multicast *multicast;
+ int ret;
+
+ dev = ib_get_client_data(device, &mcast_client);
+ if (!dev)
+ return ERR_PTR(-ENODEV);
+
+ member = kmalloc(sizeof *member, gfp_mask);
+ if (!member)
+ return ERR_PTR(-ENOMEM);
+
+ ib_sa_client_get(client);
+ member->client = client;
+ member->multicast.rec = *rec;
+ member->multicast.comp_mask = comp_mask;
+ member->multicast.callback = callback;
+ member->multicast.context = context;
+ init_completion(&member->comp);
+ atomic_set(&member->refcount, 1);
+ member->state = MCAST_JOINING;
+
+ member->group = acquire_group(&dev->port[port_num - dev->start_port],
+ &rec->mgid, gfp_mask);
+ if (!member->group) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ /*
+ * The user will get the multicast structure in their callback. They
+ * could then free the multicast structure before we can return from
+ * this routine. So we save the pointer to return before queuing
+ * any callback.
+ */
+ multicast = &member->multicast;
+ queue_join(member);
+ return multicast;
+
+err:
+ ib_sa_client_put(client);
+ kfree(member);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_sa_join_multicast);
+
+void ib_sa_free_multicast(struct ib_sa_multicast *multicast)
+{
+ struct mcast_member *member;
+ struct mcast_group *group;
+
+ member = container_of(multicast, struct mcast_member, multicast);
+ group = member->group;
+
+ spin_lock_irq(&group->lock);
+ if (member->state == MCAST_MEMBER)
+ adjust_membership(group, multicast->rec.join_state, -1);
+
+ list_del_init(&member->list);
+
+ if (group->state == MCAST_IDLE) {
+ group->state = MCAST_BUSY;
+ spin_unlock_irq(&group->lock);
+ /* Continue to hold reference on group until callback */
+ queue_work(mcast_wq, &group->work);
+ } else {
+ spin_unlock_irq(&group->lock);
+ release_group(group);
+ }
+
+ deref_member(member);
+ wait_for_completion(&member->comp);
+ ib_sa_client_put(member->client);
+ kfree(member);
+}
+EXPORT_SYMBOL(ib_sa_free_multicast);
+
+int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
+ union ib_gid *mgid, struct ib_sa_mcmember_rec *rec)
+{
+ struct mcast_device *dev;
+ struct mcast_port *port;
+ struct mcast_group *group;
+ unsigned long flags;
+ int ret = 0;
+
+ dev = ib_get_client_data(device, &mcast_client);
+ if (!dev)
+ return -ENODEV;
+
+ port = &dev->port[port_num - dev->start_port];
+ spin_lock_irqsave(&port->lock, flags);
+ group = mcast_find(port, mgid);
+ if (group)
+ *rec = group->rec;
+ else
+ ret = -EADDRNOTAVAIL;
+ spin_unlock_irqrestore(&port->lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
+
+int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
+ struct ib_sa_mcmember_rec *rec,
+ struct ib_ah_attr *ah_attr)
+{
+ int ret;
+ u16 gid_index;
+ u8 p;
+
+ ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index);
+ if (ret)
+ return ret;
+
+ memset(ah_attr, 0, sizeof *ah_attr);
+ ah_attr->dlid = be16_to_cpu(rec->mlid);
+ ah_attr->sl = rec->sl;
+ ah_attr->port_num = port_num;
+ ah_attr->static_rate = rec->rate;
+
+ ah_attr->ah_flags = IB_AH_GRH;
+ ah_attr->grh.dgid = rec->mgid;
+
+ ah_attr->grh.sgid_index = (u8) gid_index;
+ ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
+ ah_attr->grh.hop_limit = rec->hop_limit;
+ ah_attr->grh.traffic_class = rec->traffic_class;
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_init_ah_from_mcmember);
+
+static void mcast_groups_lost(struct mcast_port *port)
+{
+ struct mcast_group *group;
+ struct rb_node *node;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ for (node = rb_first(&port->table); node; node = rb_next(node)) {
+ group = rb_entry(node, struct mcast_group, node);
+ spin_lock(&group->lock);
+ if (group->state == MCAST_IDLE) {
+ atomic_inc(&group->refcount);
+ queue_work(mcast_wq, &group->work);
+ }
+ group->state = MCAST_ERROR;
+ spin_unlock(&group->lock);
+ }
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void mcast_event_handler(struct ib_event_handler *handler,
+ struct ib_event *event)
+{
+ struct mcast_device *dev;
+
+ dev = container_of(handler, struct mcast_device, event_handler);
+
+ switch (event->event) {
+ case IB_EVENT_PORT_ERR:
+ case IB_EVENT_LID_CHANGE:
+ case IB_EVENT_SM_CHANGE:
+ case IB_EVENT_CLIENT_REREGISTER:
+ mcast_groups_lost(&dev->port[event->element.port_num -
+ dev->start_port]);
+ break;
+ default:
+ break;
+ }
+}
+
+static void mcast_add_one(struct ib_device *device)
+{
+ struct mcast_device *dev;
+ struct mcast_port *port;
+ int i;
+
+ if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+ return;
+
+ dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port,
+ GFP_KERNEL);
+ if (!dev)
+ return;
+
+ if (device->node_type == RDMA_NODE_IB_SWITCH)
+ dev->start_port = dev->end_port = 0;
+ else {
+ dev->start_port = 1;
+ dev->end_port = device->phys_port_cnt;
+ }
+
+ for (i = 0; i <= dev->end_port - dev->start_port; i++) {
+ port = &dev->port[i];
+ port->dev = dev;
+ port->port_num = dev->start_port + i;
+ spin_lock_init(&port->lock);
+ port->table = RB_ROOT;
+ init_completion(&port->comp);
+ atomic_set(&port->refcount, 1);
+ }
+
+ dev->device = device;
+ ib_set_client_data(device, &mcast_client, dev);
+
+ INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler);
+ ib_register_event_handler(&dev->event_handler);
+}
+
+static void mcast_remove_one(struct ib_device *device)
+{
+ struct mcast_device *dev;
+ struct mcast_port *port;
+ int i;
+
+ dev = ib_get_client_data(device, &mcast_client);
+ if (!dev)
+ return;
+
+ ib_unregister_event_handler(&dev->event_handler);
+ flush_workqueue(mcast_wq);
+
+ for (i = 0; i <= dev->end_port - dev->start_port; i++) {
+ port = &dev->port[i];
+ deref_port(port);
+ wait_for_completion(&port->comp);
+ }
+
+ kfree(dev);
+}
+
+int mcast_init(void)
+{
+ int ret;
+
+ mcast_wq = create_singlethread_workqueue("ib_mcast");
+ if (!mcast_wq)
+ return -ENOMEM;
+
+ ib_sa_register_client(&sa_client);
+
+ ret = ib_register_client(&mcast_client);
+ if (ret)
+ goto err;
+ return 0;
+
+err:
+ ib_sa_unregister_client(&sa_client);
+ destroy_workqueue(mcast_wq);
+ return ret;
+}
+
+void mcast_cleanup(void)
+{
+ ib_unregister_client(&mcast_client);
+ ib_sa_unregister_client(&sa_client);
+ destroy_workqueue(mcast_wq);
+}
diff --git a/drivers/infiniband/core/sa.h b/drivers/infiniband/core/sa.h
new file mode 100644
index 00000000000..24c93fd320f
--- /dev/null
+++ b/drivers/infiniband/core/sa.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ * Copyright (c) 2006 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef SA_H
+#define SA_H
+
+#include <rdma/ib_sa.h>
+
+static inline void ib_sa_client_get(struct ib_sa_client *client)
+{
+ atomic_inc(&client->users);
+}
+
+static inline void ib_sa_client_put(struct ib_sa_client *client)
+{
+ if (atomic_dec_and_test(&client->users))
+ complete(&client->comp);
+}
+
+int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ u8 method,
+ struct ib_sa_mcmember_rec *rec,
+ ib_sa_comp_mask comp_mask,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_mcmember_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query);
+
+int mcast_init(void);
+void mcast_cleanup(void);
+
+#endif /* SA_H */
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index e45afba7534..9a7eaadb168 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -47,8 +47,8 @@
#include <linux/workqueue.h>
#include <rdma/ib_pack.h>
-#include <rdma/ib_sa.h>
#include <rdma/ib_cache.h>
+#include "sa.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand subnet administration query support");
@@ -57,6 +57,7 @@ MODULE_LICENSE("Dual BSD/GPL");
struct ib_sa_sm_ah {
struct ib_ah *ah;
struct kref ref;
+ u8 src_path_mask;
};
struct ib_sa_port {
@@ -380,6 +381,7 @@ static void update_sm_ah(struct work_struct *work)
}
kref_init(&new_ah->ref);
+ new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
memset(&ah_attr, 0, sizeof ah_attr);
ah_attr.dlid = port_attr.sm_lid;
@@ -425,17 +427,6 @@ void ib_sa_register_client(struct ib_sa_client *client)
}
EXPORT_SYMBOL(ib_sa_register_client);
-static inline void ib_sa_client_get(struct ib_sa_client *client)
-{
- atomic_inc(&client->users);
-}
-
-static inline void ib_sa_client_put(struct ib_sa_client *client)
-{
- if (atomic_dec_and_test(&client->users))
- complete(&client->comp);
-}
-
void ib_sa_unregister_client(struct ib_sa_client *client)
{
ib_sa_client_put(client);
@@ -471,6 +462,25 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
}
EXPORT_SYMBOL(ib_sa_cancel_query);
+static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
+{
+ struct ib_sa_device *sa_dev;
+ struct ib_sa_port *port;
+ unsigned long flags;
+ u8 src_path_mask;
+
+ sa_dev = ib_get_client_data(device, &sa_client);
+ if (!sa_dev)
+ return 0x7f;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ spin_lock_irqsave(&port->ah_lock, flags);
+ src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f;
+ spin_unlock_irqrestore(&port->ah_lock, flags);
+
+ return src_path_mask;
+}
+
int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr)
{
@@ -480,8 +490,10 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
memset(ah_attr, 0, sizeof *ah_attr);
ah_attr->dlid = be16_to_cpu(rec->dlid);
ah_attr->sl = rec->sl;
- ah_attr->src_path_bits = be16_to_cpu(rec->slid) & 0x7f;
+ ah_attr->src_path_bits = be16_to_cpu(rec->slid) &
+ get_src_path_mask(device, port_num);
ah_attr->port_num = port_num;
+ ah_attr->static_rate = rec->rate;
if (rec->hop_limit > 1) {
ah_attr->ah_flags = IB_AH_GRH;
@@ -901,7 +913,6 @@ err1:
kfree(query);
return ret;
}
-EXPORT_SYMBOL(ib_sa_mcmember_rec_query);
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
@@ -1053,14 +1064,27 @@ static int __init ib_sa_init(void)
get_random_bytes(&tid, sizeof tid);
ret = ib_register_client(&sa_client);
- if (ret)
+ if (ret) {
printk(KERN_ERR "Couldn't register ib_sa client\n");
+ goto err1;
+ }
+
+ ret = mcast_init();
+ if (ret) {
+ printk(KERN_ERR "Couldn't initialize multicast handling\n");
+ goto err2;
+ }
+ return 0;
+err2:
+ ib_unregister_client(&sa_client);
+err1:
return ret;
}
static void __exit ib_sa_cleanup(void)
{
+ mcast_cleanup();
ib_unregister_client(&sa_client);
idr_destroy(&query_idr);
}
diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c
index 54b81e17ad5..2bca753eb62 100644
--- a/drivers/infiniband/core/smi.c
+++ b/drivers/infiniband/core/smi.c
@@ -3,7 +3,7 @@
* Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved.
* Copyright (c) 2004, 2005 Intel Corporation. All rights reserved.
* Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved.
- * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved.
+ * Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -34,7 +34,6 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
- * $Id: smi.c 1389 2004-12-27 22:56:47Z roland $
*/
#include <rdma/ib_smi.h>
@@ -44,9 +43,8 @@
* Fixup a directed route SMP for sending
* Return 0 if the SMP should be discarded
*/
-int smi_handle_dr_smp_send(struct ib_smp *smp,
- u8 node_type,
- int port_num)
+enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
+ u8 node_type, int port_num)
{
u8 hop_ptr, hop_cnt;
@@ -59,18 +57,18 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
if (hop_cnt && hop_ptr == 0) {
smp->hop_ptr++;
return (smp->initial_path[smp->hop_ptr] ==
- port_num);
+ port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-9:2 */
if (hop_ptr && hop_ptr < hop_cnt) {
if (node_type != RDMA_NODE_IB_SWITCH)
- return 0;
+ return IB_SMI_DISCARD;
/* smp->return_path set when received */
smp->hop_ptr++;
return (smp->initial_path[smp->hop_ptr] ==
- port_num);
+ port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-9:3 -- We're at the end of the DR segment of path */
@@ -78,29 +76,30 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
/* smp->return_path set when received */
smp->hop_ptr++;
return (node_type == RDMA_NODE_IB_SWITCH ||
- smp->dr_dlid == IB_LID_PERMISSIVE);
+ smp->dr_dlid == IB_LID_PERMISSIVE ?
+ IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
/* C14-9:5 -- Fail unreasonable hop pointer */
- return (hop_ptr == hop_cnt + 1);
+ return (hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD);
} else {
/* C14-13:1 */
if (hop_cnt && hop_ptr == hop_cnt + 1) {
smp->hop_ptr--;
return (smp->return_path[smp->hop_ptr] ==
- port_num);
+ port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-13:2 */
if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
if (node_type != RDMA_NODE_IB_SWITCH)
- return 0;
+ return IB_SMI_DISCARD;
smp->hop_ptr--;
return (smp->return_path[smp->hop_ptr] ==
- port_num);
+ port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-13:3 -- at the end of the DR segment of path */
@@ -108,15 +107,16 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
smp->hop_ptr--;
/* C14-13:3 -- SMPs destined for SM shouldn't be here */
return (node_type == RDMA_NODE_IB_SWITCH ||
- smp->dr_slid == IB_LID_PERMISSIVE);
+ smp->dr_slid == IB_LID_PERMISSIVE ?
+ IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-13:4 -- hop_ptr = 0 -> should have gone to SM */
if (hop_ptr == 0)
- return 1;
+ return IB_SMI_HANDLE;
/* C14-13:5 -- Check for unreasonable hop pointer */
- return 0;
+ return IB_SMI_DISCARD;
}
}
@@ -124,10 +124,8 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
* Adjust information for a received SMP
* Return 0 if the SMP should be dropped
*/
-int smi_handle_dr_smp_recv(struct ib_smp *smp,
- u8 node_type,
- int port_num,
- int phys_port_cnt)
+enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
+ int port_num, int phys_port_cnt)
{
u8 hop_ptr, hop_cnt;
@@ -138,16 +136,17 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
if (!ib_get_smp_direction(smp)) {
/* C14-9:1 -- sender should have incremented hop_ptr */
if (hop_cnt && hop_ptr == 0)
- return 0;
+ return IB_SMI_DISCARD;
/* C14-9:2 -- intermediate hop */
if (hop_ptr && hop_ptr < hop_cnt) {
if (node_type != RDMA_NODE_IB_SWITCH)
- return 0;
+ return IB_SMI_DISCARD;
smp->return_path[hop_ptr] = port_num;
/* smp->hop_ptr updated when sending */
- return (smp->initial_path[hop_ptr+1] <= phys_port_cnt);
+ return (smp->initial_path[hop_ptr+1] <= phys_port_cnt ?
+ IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-9:3 -- We're at the end of the DR segment of path */
@@ -157,12 +156,13 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
/* smp->hop_ptr updated when sending */
return (node_type == RDMA_NODE_IB_SWITCH ||
- smp->dr_dlid == IB_LID_PERMISSIVE);
+ smp->dr_dlid == IB_LID_PERMISSIVE ?
+ IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
/* C14-9:5 -- fail unreasonable hop pointer */
- return (hop_ptr == hop_cnt + 1);
+ return (hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD);
} else {
@@ -170,16 +170,17 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
if (hop_cnt && hop_ptr == hop_cnt + 1) {
smp->hop_ptr--;
return (smp->return_path[smp->hop_ptr] ==
- port_num);
+ port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-13:2 */
if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
if (node_type != RDMA_NODE_IB_SWITCH)
- return 0;
+ return IB_SMI_DISCARD;
/* smp->hop_ptr updated when sending */
- return (smp->return_path[hop_ptr-1] <= phys_port_cnt);
+ return (smp->return_path[hop_ptr-1] <= phys_port_cnt ?
+ IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-13:3 -- We're at the end of the DR segment of path */
@@ -187,23 +188,20 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
if (smp->dr_slid == IB_LID_PERMISSIVE) {
/* giving SMP to SM - update hop_ptr */
smp->hop_ptr--;
- return 1;
+ return IB_SMI_HANDLE;
}
/* smp->hop_ptr updated when sending */
- return (node_type == RDMA_NODE_IB_SWITCH);
+ return (node_type == RDMA_NODE_IB_SWITCH ?
+ IB_SMI_HANDLE: IB_SMI_DISCARD);
}
/* C14-13:4 -- hop_ptr = 0 -> give to SM */
/* C14-13:5 -- Check for unreasonable hop pointer */
- return (hop_ptr == 0);
+ return (hop_ptr == 0 ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
}
-/*
- * Return 1 if the received DR SMP should be forwarded to the send queue
- * Return 0 if the SMP should be completed up the stack
- */
-int smi_check_forward_dr_smp(struct ib_smp *smp)
+enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp)
{
u8 hop_ptr, hop_cnt;
@@ -213,23 +211,25 @@ int smi_check_forward_dr_smp(struct ib_smp *smp)
if (!ib_get_smp_direction(smp)) {
/* C14-9:2 -- intermediate hop */
if (hop_ptr && hop_ptr < hop_cnt)
- return 1;
+ return IB_SMI_SEND;
/* C14-9:3 -- at the end of the DR segment of path */
if (hop_ptr == hop_cnt)
- return (smp->dr_dlid == IB_LID_PERMISSIVE);
+ return (smp->dr_dlid == IB_LID_PERMISSIVE ?
+ IB_SMI_SEND : IB_SMI_LOCAL);
/* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
if (hop_ptr == hop_cnt + 1)
- return 1;
+ return IB_SMI_SEND;
} else {
- /* C14-13:2 */
+ /* C14-13:2 -- intermediate hop */
if (2 <= hop_ptr && hop_ptr <= hop_cnt)
- return 1;
+ return IB_SMI_SEND;
/* C14-13:3 -- at the end of the DR segment of path */
if (hop_ptr == 1)
- return (smp->dr_slid != IB_LID_PERMISSIVE);
+ return (smp->dr_slid != IB_LID_PERMISSIVE ?
+ IB_SMI_SEND : IB_SMI_LOCAL);
}
- return 0;
+ return IB_SMI_LOCAL;
}
diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h
index 3011bfd86dc..9a4b349efc3 100644
--- a/drivers/infiniband/core/smi.h
+++ b/drivers/infiniband/core/smi.h
@@ -3,7 +3,7 @@
* Copyright (c) 2004 Infinicon Corporation. All rights reserved.
* Copyright (c) 2004 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
- * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
+ * Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -33,7 +33,6 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
- * $Id: smi.h 1389 2004-12-27 22:56:47Z roland $
*/
#ifndef __SMI_H_
@@ -41,26 +40,33 @@
#include <rdma/ib_smi.h>
-int smi_handle_dr_smp_recv(struct ib_smp *smp,
- u8 node_type,
- int port_num,
- int phys_port_cnt);
-extern int smi_check_forward_dr_smp(struct ib_smp *smp);
-extern int smi_handle_dr_smp_send(struct ib_smp *smp,
- u8 node_type,
- int port_num);
+enum smi_action {
+ IB_SMI_DISCARD,
+ IB_SMI_HANDLE
+};
+
+enum smi_forward_action {
+ IB_SMI_LOCAL, /* SMP should be completed up the stack */
+ IB_SMI_SEND, /* received DR SMP should be forwarded to the send queue */
+};
+
+enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
+ int port_num, int phys_port_cnt);
+extern enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp);
+extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
+ u8 node_type, int port_num);
/*
* Return 1 if the SMP should be handled by the local SMA/SM via process_mad
*/
-static inline int smi_check_local_smp(struct ib_smp *smp,
- struct ib_device *device)
+static inline enum smi_action smi_check_local_smp(struct ib_smp *smp,
+ struct ib_device *device)
{
/* C14-9:3 -- We're at the end of the DR segment of path */
/* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */
return ((device->process_mad &&
!ib_get_smp_direction(smp) &&
- (smp->hop_ptr == smp->hop_cnt + 1)));
+ (smp->hop_ptr == smp->hop_cnt + 1)) ?
+ IB_SMI_HANDLE : IB_SMI_DISCARD);
}
-
#endif /* __SMI_H_ */
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 709323c14c5..08c299ebf4a 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -683,6 +683,7 @@ int ib_device_register_sysfs(struct ib_device *device)
class_dev->class = &ib_class;
class_dev->class_data = device;
+ class_dev->dev = device->dma_device;
strlcpy(class_dev->class_id, device->name, BUS_ID_SIZE);
INIT_LIST_HEAD(&device->port_list);
@@ -714,8 +715,6 @@ int ib_device_register_sysfs(struct ib_device *device)
if (ret)
goto err_put;
} else {
- int i;
-
for (i = 1; i <= device->phys_port_cnt; ++i) {
ret = add_port(device, i);
if (ret)
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index ee51d79a7ad..2586a3ee8eb 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -407,29 +407,18 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
mutex_lock(&file->file_mutex);
while (list_empty(&file->events)) {
+ mutex_unlock(&file->file_mutex);
- if (file->filp->f_flags & O_NONBLOCK) {
- result = -EAGAIN;
- break;
- }
+ if (file->filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
- if (signal_pending(current)) {
- result = -ERESTARTSYS;
- break;
- }
+ if (wait_event_interruptible(file->poll_wait,
+ !list_empty(&file->events)))
+ return -ERESTARTSYS;
- prepare_to_wait(&file->poll_wait, &wait, TASK_INTERRUPTIBLE);
-
- mutex_unlock(&file->file_mutex);
- schedule();
mutex_lock(&file->file_mutex);
-
- finish_wait(&file->poll_wait, &wait);
}
- if (result)
- goto done;
-
uevent = list_entry(file->events.next, struct ib_ucm_event, file_list);
if (ib_ucm_new_cm_id(uevent->resp.event)) {
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 6b81b98961c..53b4c94a7eb 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -70,10 +70,24 @@ struct ucma_context {
u64 uid;
struct list_head list;
+ struct list_head mc_list;
+};
+
+struct ucma_multicast {
+ struct ucma_context *ctx;
+ int id;
+ int events_reported;
+
+ u64 uid;
+ struct list_head list;
+ struct sockaddr addr;
+ u8 pad[sizeof(struct sockaddr_in6) -
+ sizeof(struct sockaddr)];
};
struct ucma_event {
struct ucma_context *ctx;
+ struct ucma_multicast *mc;
struct list_head list;
struct rdma_cm_id *cm_id;
struct rdma_ucm_event_resp resp;
@@ -81,6 +95,7 @@ struct ucma_event {
static DEFINE_MUTEX(mut);
static DEFINE_IDR(ctx_idr);
+static DEFINE_IDR(multicast_idr);
static inline struct ucma_context *_ucma_find_context(int id,
struct ucma_file *file)
@@ -124,6 +139,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
atomic_set(&ctx->ref, 1);
init_completion(&ctx->comp);
+ INIT_LIST_HEAD(&ctx->mc_list);
ctx->file = file;
do {
@@ -147,6 +163,37 @@ error:
return NULL;
}
+static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
+{
+ struct ucma_multicast *mc;
+ int ret;
+
+ mc = kzalloc(sizeof(*mc), GFP_KERNEL);
+ if (!mc)
+ return NULL;
+
+ do {
+ ret = idr_pre_get(&multicast_idr, GFP_KERNEL);
+ if (!ret)
+ goto error;
+
+ mutex_lock(&mut);
+ ret = idr_get_new(&multicast_idr, mc, &mc->id);
+ mutex_unlock(&mut);
+ } while (ret == -EAGAIN);
+
+ if (ret)
+ goto error;
+
+ mc->ctx = ctx;
+ list_add_tail(&mc->list, &ctx->mc_list);
+ return mc;
+
+error:
+ kfree(mc);
+ return NULL;
+}
+
static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
struct rdma_conn_param *src)
{
@@ -180,8 +227,19 @@ static void ucma_set_event_context(struct ucma_context *ctx,
struct ucma_event *uevent)
{
uevent->ctx = ctx;
- uevent->resp.uid = ctx->uid;
- uevent->resp.id = ctx->id;
+ switch (event->event) {
+ case RDMA_CM_EVENT_MULTICAST_JOIN:
+ case RDMA_CM_EVENT_MULTICAST_ERROR:
+ uevent->mc = (struct ucma_multicast *)
+ event->param.ud.private_data;
+ uevent->resp.uid = uevent->mc->uid;
+ uevent->resp.id = uevent->mc->id;
+ break;
+ default:
+ uevent->resp.uid = ctx->uid;
+ uevent->resp.id = ctx->id;
+ break;
+ }
}
static int ucma_event_handler(struct rdma_cm_id *cm_id,
@@ -199,7 +257,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
ucma_set_event_context(ctx, event, uevent);
uevent->resp.event = event->event;
uevent->resp.status = event->status;
- if (cm_id->ps == RDMA_PS_UDP)
+ if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB)
ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud);
else
ucma_copy_conn_event(&uevent->resp.param.conn,
@@ -208,7 +266,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
mutex_lock(&ctx->file->mut);
if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
if (!ctx->backlog) {
- ret = -EDQUOT;
+ ret = -ENOMEM;
kfree(uevent);
goto out;
}
@@ -248,26 +306,18 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
mutex_lock(&file->mut);
while (list_empty(&file->event_list)) {
- if (file->filp->f_flags & O_NONBLOCK) {
- ret = -EAGAIN;
- break;
- }
+ mutex_unlock(&file->mut);
- if (signal_pending(current)) {
- ret = -ERESTARTSYS;
- break;
- }
+ if (file->filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ if (wait_event_interruptible(file->poll_wait,
+ !list_empty(&file->event_list)))
+ return -ERESTARTSYS;
- prepare_to_wait(&file->poll_wait, &wait, TASK_INTERRUPTIBLE);
- mutex_unlock(&file->mut);
- schedule();
mutex_lock(&file->mut);
- finish_wait(&file->poll_wait, &wait);
}
- if (ret)
- goto done;
-
uevent = list_entry(file->event_list.next, struct ucma_event, list);
if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
@@ -290,6 +340,8 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
list_del(&uevent->list);
uevent->ctx->events_reported++;
+ if (uevent->mc)
+ uevent->mc->events_reported++;
kfree(uevent);
done:
mutex_unlock(&file->mut);
@@ -342,6 +394,19 @@ err1:
return ret;
}
+static void ucma_cleanup_multicast(struct ucma_context *ctx)
+{
+ struct ucma_multicast *mc, *tmp;
+
+ mutex_lock(&mut);
+ list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
+ list_del(&mc->list);
+ idr_remove(&multicast_idr, mc->id);
+ kfree(mc);
+ }
+ mutex_unlock(&mut);
+}
+
static void ucma_cleanup_events(struct ucma_context *ctx)
{
struct ucma_event *uevent, *tmp;
@@ -360,6 +425,19 @@ static void ucma_cleanup_events(struct ucma_context *ctx)
}
}
+static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
+{
+ struct ucma_event *uevent, *tmp;
+
+ list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) {
+ if (uevent->mc != mc)
+ continue;
+
+ list_del(&uevent->list);
+ kfree(uevent);
+ }
+}
+
static int ucma_free_ctx(struct ucma_context *ctx)
{
int events_reported;
@@ -367,6 +445,8 @@ static int ucma_free_ctx(struct ucma_context *ctx)
/* No new events will be generated after destroying the id. */
rdma_destroy_id(ctx->cm_id);
+ ucma_cleanup_multicast(ctx);
+
/* Cleanup events not yet reported to the user. */
mutex_lock(&ctx->file->mut);
ucma_cleanup_events(ctx);
@@ -731,6 +811,114 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
return ret;
}
+static ssize_t ucma_join_multicast(struct ucma_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_join_mcast cmd;
+ struct rdma_ucm_create_id_resp resp;
+ struct ucma_context *ctx;
+ struct ucma_multicast *mc;
+ int ret;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ mutex_lock(&file->mut);
+ mc = ucma_alloc_multicast(ctx);
+ if (IS_ERR(mc)) {
+ ret = PTR_ERR(mc);
+ goto err1;
+ }
+
+ mc->uid = cmd.uid;
+ memcpy(&mc->addr, &cmd.addr, sizeof cmd.addr);
+ ret = rdma_join_multicast(ctx->cm_id, &mc->addr, mc);
+ if (ret)
+ goto err2;
+
+ resp.id = mc->id;
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp))) {
+ ret = -EFAULT;
+ goto err3;
+ }
+
+ mutex_unlock(&file->mut);
+ ucma_put_ctx(ctx);
+ return 0;
+
+err3:
+ rdma_leave_multicast(ctx->cm_id, &mc->addr);
+ ucma_cleanup_mc_events(mc);
+err2:
+ mutex_lock(&mut);
+ idr_remove(&multicast_idr, mc->id);
+ mutex_unlock(&mut);
+ list_del(&mc->list);
+ kfree(mc);
+err1:
+ mutex_unlock(&file->mut);
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
+static ssize_t ucma_leave_multicast(struct ucma_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_destroy_id cmd;
+ struct rdma_ucm_destroy_id_resp resp;
+ struct ucma_multicast *mc;
+ int ret = 0;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ mutex_lock(&mut);
+ mc = idr_find(&multicast_idr, cmd.id);
+ if (!mc)
+ mc = ERR_PTR(-ENOENT);
+ else if (mc->ctx->file != file)
+ mc = ERR_PTR(-EINVAL);
+ else {
+ idr_remove(&multicast_idr, mc->id);
+ atomic_inc(&mc->ctx->ref);
+ }
+ mutex_unlock(&mut);
+
+ if (IS_ERR(mc)) {
+ ret = PTR_ERR(mc);
+ goto out;
+ }
+
+ rdma_leave_multicast(mc->ctx->cm_id, &mc->addr);
+ mutex_lock(&mc->ctx->file->mut);
+ ucma_cleanup_mc_events(mc);
+ list_del(&mc->list);
+ mutex_unlock(&mc->ctx->file->mut);
+
+ ucma_put_ctx(mc->ctx);
+ resp.events_reported = mc->events_reported;
+ kfree(mc);
+
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp)))
+ ret = -EFAULT;
+out:
+ return ret;
+}
+
static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
const char __user *inbuf,
int in_len, int out_len) = {
@@ -750,6 +938,8 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
[RDMA_USER_CM_CMD_GET_OPTION] = NULL,
[RDMA_USER_CM_CMD_SET_OPTION] = NULL,
[RDMA_USER_CM_CMD_NOTIFY] = ucma_notify,
+ [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast,
+ [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast,
};
static ssize_t ucma_write(struct file *filp, const char __user *buf,
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index c069ebeba8e..8199b83052a 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -135,7 +135,7 @@ static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
static DEFINE_SPINLOCK(port_lock);
static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS];
-static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS * 2);
+static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
static void ib_umad_add_one(struct ib_device *device);
static void ib_umad_remove_one(struct ib_device *device);
@@ -231,12 +231,17 @@ static void recv_handler(struct ib_mad_agent *agent,
packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits;
packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
if (packet->mad.hdr.grh_present) {
- /* XXX parse GRH */
- packet->mad.hdr.gid_index = 0;
- packet->mad.hdr.hop_limit = 0;
- packet->mad.hdr.traffic_class = 0;
- memset(packet->mad.hdr.gid, 0, 16);
- packet->mad.hdr.flow_label = 0;
+ struct ib_ah_attr ah_attr;
+
+ ib_init_ah_from_wc(agent->device, agent->port_num,
+ mad_recv_wc->wc, mad_recv_wc->recv_buf.grh,
+ &ah_attr);
+
+ packet->mad.hdr.gid_index = ah_attr.grh.sgid_index;
+ packet->mad.hdr.hop_limit = ah_attr.grh.hop_limit;
+ packet->mad.hdr.traffic_class = ah_attr.grh.traffic_class;
+ memcpy(packet->mad.hdr.gid, &ah_attr.grh.dgid, 16);
+ packet->mad.hdr.flow_label = cpu_to_be32(ah_attr.grh.flow_label);
}
if (queue_packet(file, agent, packet))
@@ -473,6 +478,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
if (packet->mad.hdr.grh_present) {
ah_attr.ah_flags = IB_AH_GRH;
memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16);
+ ah_attr.grh.sgid_index = packet->mad.hdr.gid_index;
ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label);
ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit;
ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index df1efbc1088..4fd75afa6a3 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -622,8 +622,10 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
obj->umem.virt_base = cmd.hca_va;
pd = idr_read_pd(cmd.pd_handle, file->ucontext);
- if (!pd)
+ if (!pd) {
+ ret = -EINVAL;
goto err_release;
+ }
mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata);
if (IS_ERR(mr)) {
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 8b5dd3649bb..ccdf93d30b0 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -167,7 +167,7 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
ah_attr->grh.sgid_index = (u8) gid_index;
flow_class = be32_to_cpu(grh->version_tclass_flow);
ah_attr->grh.flow_label = flow_class & 0xFFFFF;
- ah_attr->grh.hop_limit = grh->hop_limit;
+ ah_attr->grh.hop_limit = 0xFF;
ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
}
return 0;
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
index 59243d9aedd..58bc272bd40 100644
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -439,7 +439,8 @@ static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem)
}
/* Setup the skb for reuse since we're dropping this pkt */
- elem->skb->tail = elem->skb->data = elem->skb->head;
+ elem->skb->data = elem->skb->head;
+ skb_reset_tail_pointer(elem->skb);
/* Zero out the rxp hdr in the sk_buff */
memset(elem->skb->data, 0, sizeof(*rxp_hdr));
@@ -521,9 +522,8 @@ static void c2_rx_interrupt(struct net_device *netdev)
* "sizeof(struct c2_rxp_hdr)".
*/
skb->data += sizeof(*rxp_hdr);
- skb->tail = skb->data + buflen;
+ skb_set_tail_pointer(skb, buflen);
skb->len = buflen;
- skb->dev = netdev;
skb->protocol = eth_type_trans(skb, netdev);
netif_rx(skb);
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index fef97275291..607c09bf764 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -796,7 +796,6 @@ int c2_register_device(struct c2_dev *dev)
memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6);
dev->ibdev.phys_port_cnt = 1;
dev->ibdev.dma_device = &dev->pcidev->dev;
- dev->ibdev.class_dev.dev = &dev->pcidev->dev;
dev->ibdev.query_device = c2_query_device;
dev->ibdev.query_port = c2_query_port;
dev->ibdev.modify_port = c2_modify_port;
diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile
index 0e110f32f12..36b98989b15 100644
--- a/drivers/infiniband/hw/cxgb3/Makefile
+++ b/drivers/infiniband/hw/cxgb3/Makefile
@@ -8,5 +8,4 @@ iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \
ifdef CONFIG_INFINIBAND_CXGB3_DEBUG
EXTRA_CFLAGS += -DDEBUG
-iw_cxgb3-y += cxio_dbg.o
endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
index 5a7306f5efa..75f7b16a271 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_dbg.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index 82fa7204198..f5e9aeec6f6 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -37,6 +36,7 @@
#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/pci.h>
+#include <linux/dma-mapping.h>
#include "cxio_resource.h"
#include "cxio_hal.h"
@@ -46,7 +46,7 @@
static LIST_HEAD(rdev_list);
static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL;
-static inline struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name)
+static struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name)
{
struct cxio_rdev *rdev;
@@ -56,8 +56,7 @@ static inline struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name)
return NULL;
}
-static inline struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev
- *tdev)
+static struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev)
{
struct cxio_rdev *rdev;
@@ -119,7 +118,7 @@ int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
return 0;
}
-static inline int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid)
+static int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid)
{
struct rdma_cq_setup setup;
setup.id = cqid;
@@ -131,7 +130,7 @@ static inline int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid)
return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
}
-int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
+static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
{
u64 sge_cmd;
struct t3_modify_qp_wr *wqe;
@@ -426,7 +425,7 @@ void cxio_flush_hw_cq(struct t3_cq *cq)
}
}
-static inline int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq)
+static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq)
{
if (CQE_OPCODE(*cqe) == T3_TERMINATE)
return 0;
@@ -499,9 +498,9 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
u64 sge_cmd, ctx0, ctx1;
u64 base_addr;
struct t3_modify_qp_wr *wqe;
- struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
-
+ struct sk_buff *skb;
+ skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
if (!skb) {
PDBG("%s alloc_skb failed\n", __FUNCTION__);
return -ENOMEM;
@@ -509,7 +508,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
err = cxio_hal_init_ctrl_cq(rdev_p);
if (err) {
PDBG("%s err %d initializing ctrl_cq\n", __FUNCTION__, err);
- return err;
+ goto err;
}
rdev_p->ctrl_qp.workq = dma_alloc_coherent(
&(rdev_p->rnic_info.pdev->dev),
@@ -519,7 +518,8 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
GFP_KERNEL);
if (!rdev_p->ctrl_qp.workq) {
PDBG("%s dma_alloc_coherent failed\n", __FUNCTION__);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto err;
}
pci_unmap_addr_set(&rdev_p->ctrl_qp, mapping,
rdev_p->ctrl_qp.dma_addr);
@@ -557,6 +557,9 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2);
skb->priority = CPL_PRIORITY_CONTROL;
return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
+err:
+ kfree_skb(skb);
+ return err;
}
static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
@@ -761,17 +764,6 @@ ret:
return err;
}
-/* IN : stag key, pdid, pbl_size
- * Out: stag index, actaul pbl_size, and pbl_addr allocated.
- */
-int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid,
- enum tpt_mem_perm perm, u32 * pbl_size, u32 * pbl_addr)
-{
- *stag = T3_STAG_UNSET;
- return (__cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR,
- perm, 0, 0ULL, 0, 0, NULL, pbl_size, pbl_addr));
-}
-
int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
u8 page_size, __be64 *pbl, u32 *pbl_size,
@@ -1030,7 +1022,7 @@ void __exit cxio_hal_exit(void)
cxio_hal_destroy_rhdl_resource();
}
-static inline void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq)
+static void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq)
{
struct t3_swsq *sqp;
__u32 ptr = wq->sq_rptr;
@@ -1059,9 +1051,8 @@ static inline void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq)
break;
}
-static inline void create_read_req_cqe(struct t3_wq *wq,
- struct t3_cqe *hw_cqe,
- struct t3_cqe *read_cqe)
+static void create_read_req_cqe(struct t3_wq *wq, struct t3_cqe *hw_cqe,
+ struct t3_cqe *read_cqe)
{
read_cqe->u.scqe.wrid_hi = wq->oldest_read->sq_wptr;
read_cqe->len = wq->oldest_read->read_len;
@@ -1074,7 +1065,7 @@ static inline void create_read_req_cqe(struct t3_wq *wq,
/*
* Return a ptr to the next read wr in the SWSQ or NULL.
*/
-static inline void advance_oldest_read(struct t3_wq *wq)
+static void advance_oldest_read(struct t3_wq *wq)
{
u32 rptr = wq->oldest_read - wq->sq + 1;
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index 1b97e80b878..99543d63470 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -144,7 +143,6 @@ int cxio_rdev_open(struct cxio_rdev *rdev);
void cxio_rdev_close(struct cxio_rdev *rdev);
int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq,
enum t3_cq_opcode op, u32 credit);
-int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev, u32 qpid);
int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
@@ -155,8 +153,6 @@ int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq,
struct cxio_ucontext *uctx);
int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode);
-int cxio_allocate_stag(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
- enum tpt_mem_perm perm, u32 * pbl_size, u32 * pbl_addr);
int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
u8 page_size, __be64 *pbl, u32 *pbl_size,
@@ -172,8 +168,6 @@ int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag);
int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr);
void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
-u32 cxio_hal_get_rhdl(void);
-void cxio_hal_put_rhdl(u32 rhdl);
u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp);
void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid);
int __init cxio_hal_init(void);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index 997aa32cbf0..d3095ae5bc2 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -180,7 +179,7 @@ tpt_err:
/*
* returns 0 if no resource available
*/
-static inline u32 cxio_hal_get_resource(struct kfifo *fifo)
+static u32 cxio_hal_get_resource(struct kfifo *fifo)
{
u32 entry;
if (kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32)))
@@ -189,21 +188,11 @@ static inline u32 cxio_hal_get_resource(struct kfifo *fifo)
return 0; /* fifo emptry */
}
-static inline void cxio_hal_put_resource(struct kfifo *fifo, u32 entry)
+static void cxio_hal_put_resource(struct kfifo *fifo, u32 entry)
{
BUG_ON(kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32)) == 0);
}
-u32 cxio_hal_get_rhdl(void)
-{
- return cxio_hal_get_resource(rhdl_fifo);
-}
-
-void cxio_hal_put_rhdl(u32 rhdl)
-{
- cxio_hal_put_resource(rhdl_fifo, rhdl);
-}
-
u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp)
{
return cxio_hal_get_resource(rscp->tpt_fifo);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.h b/drivers/infiniband/hw/cxgb3/cxio_resource.h
index a6bbe8370d8..a2703a3d882 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
index 103fc42d697..90d7b8972cb 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index 4611afa5222..0315c9d9fce 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
index 6517ef85026..caf4e6007a4 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.h
+++ b/drivers/infiniband/hw/cxgb3/iwch.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index a522b1baa3b..3b4b0acd707 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -210,8 +209,7 @@ static enum iwch_ep_state state_read(struct iwch_ep_common *epc)
return state;
}
-static inline void __state_set(struct iwch_ep_common *epc,
- enum iwch_ep_state new)
+static void __state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
{
epc->state = new;
}
@@ -307,8 +305,7 @@ static int status2errno(int status)
*/
static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
{
- if (skb) {
- BUG_ON(skb_cloned(skb));
+ if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
skb_trim(skb, 0);
skb_get(skb);
} else {
@@ -480,7 +477,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
BUG_ON(skb_cloned(skb));
mpalen = sizeof(*mpa) + ep->plen;
- if (skb->data + mpalen + sizeof(*req) > skb->end) {
+ if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) {
kfree_skb(skb);
skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL);
if (!skb) {
@@ -510,7 +507,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
*/
skb_get(skb);
set_arp_failure_handler(skb, arp_failure_discard);
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
len = skb->len;
req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
@@ -562,7 +559,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
skb_get(skb);
skb->priority = CPL_PRIORITY_DATA;
set_arp_failure_handler(skb, arp_failure_discard);
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
req->wr_lo = htonl(V_WR_TID(ep->hwtid));
@@ -613,7 +610,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
*/
skb_get(skb);
set_arp_failure_handler(skb, arp_failure_discard);
- skb->h.raw = skb->data;
+ skb_reset_transport_header(skb);
len = skb->len;
req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
@@ -824,7 +821,8 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
/*
* copy the new data into our accumulation buffer.
*/
- memcpy(&(ep->mpa_pkt[ep->mpa_pkt_len]), skb->data, skb->len);
+ skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
+ skb->len);
ep->mpa_pkt_len += skb->len;
/*
@@ -943,7 +941,8 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
/*
* Copy the new data into our accumulation buffer.
*/
- memcpy(&(ep->mpa_pkt[ep->mpa_pkt_len]), skb->data, skb->len);
+ skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
+ skb->len);
ep->mpa_pkt_len += skb->len;
/*
@@ -1417,6 +1416,7 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
wake_up(&ep->com.waitq);
break;
case FPDU_MODE:
+ start_ep_timer(ep);
__state_set(&ep->com, CLOSING);
attrs.next_state = IWCH_QP_STATE_CLOSING;
iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
@@ -1427,7 +1427,6 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
disconnect = 0;
break;
case CLOSING:
- start_ep_timer(ep);
__state_set(&ep->com, MORIBUND);
disconnect = 0;
break;
@@ -1460,7 +1459,7 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
/*
* Returns whether an ABORT_REQ_RSS message is a negative advice.
*/
-static inline int is_neg_adv_abort(unsigned int status)
+static int is_neg_adv_abort(unsigned int status)
{
return status == CPL_ERR_RTX_NEG_ADVICE ||
status == CPL_ERR_PERSIST_NEG_ADVICE;
@@ -1489,8 +1488,10 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
case CONNECTING:
break;
case MPA_REQ_WAIT:
+ stop_ep_timer(ep);
break;
case MPA_REQ_SENT:
+ stop_ep_timer(ep);
connect_reply_upcall(ep, -ECONNRESET);
break;
case MPA_REP_SENT:
@@ -1509,9 +1510,10 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
get_ep(&ep->com);
break;
case MORIBUND:
+ case CLOSING:
stop_ep_timer(ep);
+ /*FALLTHROUGH*/
case FPDU_MODE:
- case CLOSING:
if (ep->com.cm_id && ep->com.qp) {
attrs.next_state = IWCH_QP_STATE_ERROR;
ret = iwch_modify_qp(ep->com.qp->rhp,
@@ -1572,7 +1574,6 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
spin_lock_irqsave(&ep->com.lock, flags);
switch (ep->com.state) {
case CLOSING:
- start_ep_timer(ep);
__state_set(&ep->com, MORIBUND);
break;
case MORIBUND:
@@ -1588,6 +1589,8 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
__state_set(&ep->com, DEAD);
release = 1;
break;
+ case ABORTING:
+ break;
case DEAD:
default:
BUG_ON(1);
@@ -1618,7 +1621,8 @@ static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
PDBG("%s ep %p\n", __FUNCTION__, ep);
skb_pull(skb, sizeof(struct cpl_rdma_terminate));
PDBG("%s saving %d bytes of term msg\n", __FUNCTION__, skb->len);
- memcpy(ep->com.qp->attr.terminate_buffer, skb->data, skb->len);
+ skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer,
+ skb->len);
ep->com.qp->attr.terminate_msg_len = skb->len;
ep->com.qp->attr.is_terminate_local = 0;
return CPL_RET_BUF_DONE;
@@ -1636,6 +1640,7 @@ static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
printk(KERN_ERR MOD "%s BAD CLOSE - Aborting tid %u\n",
__FUNCTION__, ep->hwtid);
+ stop_ep_timer(ep);
attrs.next_state = IWCH_QP_STATE_ERROR;
iwch_modify_qp(ep->com.qp->rhp,
ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
@@ -1660,6 +1665,7 @@ static void ep_timeout(unsigned long arg)
break;
case MPA_REQ_WAIT:
break;
+ case CLOSING:
case MORIBUND:
if (ep->com.cm_id && ep->com.qp) {
attrs.next_state = IWCH_QP_STATE_ERROR;
@@ -1688,12 +1694,11 @@ int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
return -ECONNRESET;
}
BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
- state_set(&ep->com, CLOSING);
if (mpa_rev == 0)
abort_connection(ep, NULL, GFP_KERNEL);
else {
err = send_mpa_reject(ep, pdata, pdata_len);
- err = send_halfclose(ep, GFP_KERNEL);
+ err = iwch_ep_disconnect(ep, 0, GFP_KERNEL);
}
return 0;
}
@@ -1958,11 +1963,11 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
case MPA_REQ_RCVD:
case MPA_REP_SENT:
case FPDU_MODE:
+ start_ep_timer(ep);
ep->com.state = CLOSING;
close = 1;
break;
case CLOSING:
- start_ep_timer(ep);
ep->com.state = MORIBUND;
close = 1;
break;
@@ -2024,6 +2029,17 @@ static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
return 0;
}
+static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
+
+ if (rpl->status != CPL_ERR_NONE) {
+ printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u "
+ "for tid %u\n", rpl->status, GET_TID(rpl));
+ }
+ return CPL_RET_BUF_DONE;
+}
+
int __init iwch_cm_init(void)
{
skb_queue_head_init(&rxq);
@@ -2051,6 +2067,7 @@ int __init iwch_cm_init(void)
t3c_handlers[CPL_ABORT_REQ_RSS] = sched;
t3c_handlers[CPL_RDMA_TERMINATE] = sched;
t3c_handlers[CPL_RDMA_EC_STATUS] = sched;
+ t3c_handlers[CPL_SET_TCB_RPL] = set_tcb_rpl;
/*
* These are the real handlers that are called from a
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
index 7c810d90427..0c6f281bd4a 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
index 98b3bdb5de9..d7624c170ee 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c
index a6efa8fe15d..b40676662a8 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_ev.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -48,12 +47,6 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
struct iwch_qp_attributes attrs;
struct iwch_qp *qhp;
- printk(KERN_ERR "%s - AE qpid 0x%x opcode %d status 0x%x "
- "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__,
- CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
- CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
- CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
-
spin_lock(&rnicp->lock);
qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
@@ -74,6 +67,12 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
return;
}
+ printk(KERN_ERR "%s - AE qpid 0x%x opcode %d status 0x%x "
+ "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__,
+ CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
+ CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
+ CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
+
atomic_inc(&qhp->refcnt);
spin_unlock(&rnicp->lock);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
index 2b6cd53bb3f..a6c2c4ba29e 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 6861087d776..af28a317016 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -332,6 +331,7 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
int ret = 0;
struct iwch_mm_entry *mm;
struct iwch_ucontext *ucontext;
+ u64 addr;
PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __FUNCTION__, vma->vm_pgoff,
key, len);
@@ -346,10 +346,11 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
mm = remove_mmap(ucontext, key, len);
if (!mm)
return -EINVAL;
+ addr = mm->addr;
kfree(mm);
- if ((mm->addr >= rdev_p->rnic_info.udbell_physbase) &&
- (mm->addr < (rdev_p->rnic_info.udbell_physbase +
+ if ((addr >= rdev_p->rnic_info.udbell_physbase) &&
+ (addr < (rdev_p->rnic_info.udbell_physbase +
rdev_p->rnic_info.udbell_len))) {
/*
@@ -363,7 +364,7 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
vma->vm_flags &= ~VM_MAYREAD;
ret = io_remap_pfn_range(vma, vma->vm_start,
- mm->addr >> PAGE_SHIFT,
+ addr >> PAGE_SHIFT,
len, vma->vm_page_prot);
} else {
@@ -371,7 +372,7 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
* Map WQ or CQ contig dma memory...
*/
ret = remap_pfn_range(vma, vma->vm_start,
- mm->addr >> PAGE_SHIFT,
+ addr >> PAGE_SHIFT,
len, vma->vm_page_prot);
}
@@ -464,9 +465,6 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
php = to_iwch_pd(pd);
rhp = php->rhp;
- acc = iwch_convert_access(acc);
-
-
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
if (!mhp)
return ERR_PTR(-ENOMEM);
@@ -492,12 +490,7 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
mhp->attr.pdid = php->pdid;
mhp->attr.zbva = 0;
- /* NOTE: TPT perms are backwards from BIND WR perms! */
- mhp->attr.perms = (acc & 0x1) << 3;
- mhp->attr.perms |= (acc & 0x2) << 1;
- mhp->attr.perms |= (acc & 0x4) >> 1;
- mhp->attr.perms |= (acc & 0x8) >> 3;
-
+ mhp->attr.perms = iwch_ib_to_tpt_access(acc);
mhp->attr.va_fbo = *iova_start;
mhp->attr.page_size = shift - 12;
@@ -526,7 +519,6 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
struct iwch_mr mh, *mhp;
struct iwch_pd *php;
struct iwch_dev *rhp;
- int new_acc;
__be64 *page_list = NULL;
int shift = 0;
u64 total_size;
@@ -547,19 +539,20 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
if (rhp != php->rhp)
return -EINVAL;
- new_acc = mhp->attr.perms;
-
memcpy(&mh, mhp, sizeof *mhp);
if (mr_rereg_mask & IB_MR_REREG_PD)
php = to_iwch_pd(pd);
if (mr_rereg_mask & IB_MR_REREG_ACCESS)
- mh.attr.perms = iwch_convert_access(acc);
- if (mr_rereg_mask & IB_MR_REREG_TRANS)
+ mh.attr.perms = iwch_ib_to_tpt_access(acc);
+ if (mr_rereg_mask & IB_MR_REREG_TRANS) {
ret = build_phys_page_list(buffer_list, num_phys_buf,
iova_start,
&total_size, &npages,
&shift, &page_list);
+ if (ret)
+ return ret;
+ }
ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages);
kfree(page_list);
@@ -569,7 +562,7 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
if (mr_rereg_mask & IB_MR_REREG_PD)
mhp->attr.pdid = php->pdid;
if (mr_rereg_mask & IB_MR_REREG_ACCESS)
- mhp->attr.perms = acc;
+ mhp->attr.perms = iwch_ib_to_tpt_access(acc);
if (mr_rereg_mask & IB_MR_REREG_TRANS) {
mhp->attr.zbva = 0;
mhp->attr.va_fbo = *iova_start;
@@ -614,8 +607,6 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
goto err;
}
- acc = iwch_convert_access(acc);
-
i = n = 0;
list_for_each_entry(chunk, &region->chunk_list, list)
@@ -631,10 +622,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
mhp->rhp = rhp;
mhp->attr.pdid = php->pdid;
mhp->attr.zbva = 0;
- mhp->attr.perms = (acc & 0x1) << 3;
- mhp->attr.perms |= (acc & 0x2) << 1;
- mhp->attr.perms |= (acc & 0x4) >> 1;
- mhp->attr.perms |= (acc & 0x8) >> 3;
+ mhp->attr.perms = iwch_ib_to_tpt_access(acc);
mhp->attr.va_fbo = region->virt_base;
mhp->attr.page_size = shift - 12;
mhp->attr.len = (u32) region->length;
@@ -737,10 +725,8 @@ static int iwch_destroy_qp(struct ib_qp *ib_qp)
qhp = to_iwch_qp(ib_qp);
rhp = qhp->rhp;
- if (qhp->attr.state == IWCH_QP_STATE_RTS) {
- attrs.next_state = IWCH_QP_STATE_ERROR;
- iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0);
- }
+ attrs.next_state = IWCH_QP_STATE_ERROR;
+ iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0);
wait_event(qhp->wait, !qhp->ep);
remove_handle(rhp, &rhp->qpidr, qhp->wq.qpid);
@@ -949,7 +935,7 @@ void iwch_qp_rem_ref(struct ib_qp *qp)
wake_up(&(to_iwch_qp(qp)->wait));
}
-struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn)
+static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn)
{
PDBG("%s ib_dev %p qpn 0x%x\n", __FUNCTION__, dev, qpn);
return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn);
@@ -1122,7 +1108,6 @@ int iwch_register_device(struct iwch_dev *dev)
memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev);
- dev->ibdev.class_dev.dev = &(dev->rdev.rnic_info.pdev->dev);
dev->ibdev.query_device = iwch_query_device;
dev->ibdev.query_port = iwch_query_port;
dev->ibdev.modify_port = iwch_modify_port;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 61e3278fd7a..93bcc56756b 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -179,7 +178,6 @@ static inline struct iwch_qp *to_iwch_qp(struct ib_qp *ibqp)
void iwch_qp_add_ref(struct ib_qp *qp);
void iwch_qp_rem_ref(struct ib_qp *qp);
-struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn);
struct iwch_ucontext {
struct ib_ucontext ibucontext;
@@ -288,27 +286,20 @@ static inline int iwch_convert_state(enum ib_qp_state ib_state)
}
}
-enum iwch_mem_perms {
- IWCH_MEM_ACCESS_LOCAL_READ = 1 << 0,
- IWCH_MEM_ACCESS_LOCAL_WRITE = 1 << 1,
- IWCH_MEM_ACCESS_REMOTE_READ = 1 << 2,
- IWCH_MEM_ACCESS_REMOTE_WRITE = 1 << 3,
- IWCH_MEM_ACCESS_ATOMICS = 1 << 4,
- IWCH_MEM_ACCESS_BINDING = 1 << 5,
- IWCH_MEM_ACCESS_LOCAL =
- (IWCH_MEM_ACCESS_LOCAL_READ | IWCH_MEM_ACCESS_LOCAL_WRITE),
- IWCH_MEM_ACCESS_REMOTE =
- (IWCH_MEM_ACCESS_REMOTE_WRITE | IWCH_MEM_ACCESS_REMOTE_READ)
- /* cannot go beyond 1 << 31 */
-} __attribute__ ((packed));
-
-static inline u32 iwch_convert_access(int acc)
+static inline u32 iwch_ib_to_tpt_access(int acc)
{
- return (acc & IB_ACCESS_REMOTE_WRITE ? IWCH_MEM_ACCESS_REMOTE_WRITE : 0)
- | (acc & IB_ACCESS_REMOTE_READ ? IWCH_MEM_ACCESS_REMOTE_READ : 0) |
- (acc & IB_ACCESS_LOCAL_WRITE ? IWCH_MEM_ACCESS_LOCAL_WRITE : 0) |
- (acc & IB_ACCESS_MW_BIND ? IWCH_MEM_ACCESS_BINDING : 0) |
- IWCH_MEM_ACCESS_LOCAL_READ;
+ return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0) |
+ (acc & IB_ACCESS_LOCAL_WRITE ? TPT_LOCAL_WRITE : 0) |
+ TPT_LOCAL_READ;
+}
+
+static inline u32 iwch_ib_to_mwbind_access(int acc)
+{
+ return (acc & IB_ACCESS_REMOTE_WRITE ? T3_MEM_ACCESS_REM_WRITE : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ? T3_MEM_ACCESS_REM_READ : 0) |
+ (acc & IB_ACCESS_LOCAL_WRITE ? T3_MEM_ACCESS_LOCAL_WRITE : 0) |
+ T3_MEM_ACCESS_LOCAL_READ;
}
enum iwch_mmid_state {
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index e066727504b..0a472c9b44d 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -37,8 +36,8 @@
#define NO_SUPPORT -1
-static inline int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
- u8 * flit_cnt)
+static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
+ u8 * flit_cnt)
{
int i;
u32 plen;
@@ -97,8 +96,8 @@ static inline int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
return 0;
}
-static inline int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
- u8 *flit_cnt)
+static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
+ u8 *flit_cnt)
{
int i;
u32 plen;
@@ -138,8 +137,8 @@ static inline int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
return 0;
}
-static inline int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
- u8 *flit_cnt)
+static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
+ u8 *flit_cnt)
{
if (wr->num_sge > 1)
return -EINVAL;
@@ -159,9 +158,8 @@ static inline int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
/*
* TBD: this is going to be moved to firmware. Missing pdid/qpid check for now.
*/
-static inline int iwch_sgl2pbl_map(struct iwch_dev *rhp,
- struct ib_sge *sg_list, u32 num_sgle,
- u32 * pbl_addr, u8 * page_size)
+static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
+ u32 num_sgle, u32 * pbl_addr, u8 * page_size)
{
int i;
struct iwch_mr *mhp;
@@ -207,9 +205,8 @@ static inline int iwch_sgl2pbl_map(struct iwch_dev *rhp,
return 0;
}
-static inline int iwch_build_rdma_recv(struct iwch_dev *rhp,
- union t3_wr *wqe,
- struct ib_recv_wr *wr)
+static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe,
+ struct ib_recv_wr *wr)
{
int i, err = 0;
u32 pbl_addr[4];
@@ -442,7 +439,7 @@ int iwch_bind_mw(struct ib_qp *qp,
wqe->bind.type = T3_VA_BASED_TO;
/* TBD: check perms */
- wqe->bind.perms = iwch_convert_access(mw_bind->mw_access_flags);
+ wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->mw_access_flags);
wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey);
wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
wqe->bind.mw_len = cpu_to_be32(mw_bind->length);
@@ -474,8 +471,7 @@ int iwch_bind_mw(struct ib_qp *qp,
return err;
}
-static inline void build_term_codes(int t3err, u8 *layer_type, u8 *ecode,
- int tagged)
+static void build_term_codes(int t3err, u8 *layer_type, u8 *ecode, int tagged)
{
switch (t3err) {
case TPT_ERR_STAG:
@@ -673,7 +669,7 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag)
spin_lock_irqsave(&qhp->lock, *flag);
}
-static inline void flush_qp(struct iwch_qp *qhp, unsigned long *flag)
+static void flush_qp(struct iwch_qp *qhp, unsigned long *flag)
{
if (t3b_device(qhp->rhp))
cxio_set_wq_in_error(&qhp->wq);
@@ -685,7 +681,7 @@ static inline void flush_qp(struct iwch_qp *qhp, unsigned long *flag)
/*
* Return non zero if at least one RECV was pre-posted.
*/
-static inline int rqes_posted(struct iwch_qp *qhp)
+static int rqes_posted(struct iwch_qp *qhp)
{
return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV;
}
@@ -846,6 +842,8 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
break;
case IWCH_QP_STATE_TERMINATE:
qhp->attr.state = IWCH_QP_STATE_TERMINATE;
+ if (t3b_device(qhp->rhp))
+ cxio_set_wq_in_error(&qhp->wq);
if (!internal)
terminate = 1;
break;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_user.h b/drivers/infiniband/hw/cxgb3/iwch_user.h
index c4e7fbea8bb..cb7086f558c 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_user.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_user.h
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig
index 727b10d8968..1a854598e0e 100644
--- a/drivers/infiniband/hw/ehca/Kconfig
+++ b/drivers/infiniband/hw/ehca/Kconfig
@@ -7,11 +7,3 @@ config INFINIBAND_EHCA
To compile the driver as a module, choose M here. The module
will be called ib_ehca.
-config INFINIBAND_EHCA_SCALING
- bool "Scaling support (EXPERIMENTAL)"
- depends on IBMEBUS && INFINIBAND_EHCA && HOTPLUG_CPU && EXPERIMENTAL
- default y
- ---help---
- eHCA scaling support schedules the CQ callbacks to different CPUs.
-
- To enable this feature choose Y here.
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index cf95ee474b0..10fb8fbafa0 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -42,8 +42,6 @@
#ifndef __EHCA_CLASSES_H__
#define __EHCA_CLASSES_H__
-#include "ehca_classes.h"
-#include "ipz_pt_fn.h"
struct ehca_module;
struct ehca_qp;
@@ -54,15 +52,25 @@ struct ehca_mw;
struct ehca_pd;
struct ehca_av;
-#ifdef CONFIG_PPC64
-#include "ehca_classes_pSeries.h"
-#endif
+#include <linux/wait.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_user_verbs.h>
+#ifdef CONFIG_PPC64
+#include "ehca_classes_pSeries.h"
+#endif
+#include "ipz_pt_fn.h"
+#include "ehca_qes.h"
#include "ehca_irq.h"
+#define EHCA_EQE_CACHE_SIZE 20
+
+struct ehca_eqe_cache_entry {
+ struct ehca_eqe *eqe;
+ struct ehca_cq *cq;
+};
+
struct ehca_eq {
u32 length;
struct ipz_queue ipz_queue;
@@ -74,6 +82,8 @@ struct ehca_eq {
spinlock_t spinlock;
struct tasklet_struct interrupt_task;
u32 ist;
+ spinlock_t irq_spinlock;
+ struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE];
};
struct ehca_sport {
@@ -96,6 +106,7 @@ struct ehca_shca {
struct ehca_mr *maxmr;
struct ehca_pd *pd;
struct h_galpas galpas;
+ struct mutex modify_mutex;
};
struct ehca_pd {
@@ -145,7 +156,9 @@ struct ehca_cq {
spinlock_t cb_lock;
struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
struct list_head entry;
- u32 nr_callbacks;
+ u32 nr_callbacks; /* #events assigned to cpu by scaling code */
+ u32 nr_events; /* #events seen */
+ wait_queue_head_t wait_completion;
spinlock_t task_lock;
u32 ownpid;
/* mmap counter for resources mapped into user space */
@@ -269,6 +282,7 @@ extern struct idr ehca_cq_idr;
extern int ehca_static_rate;
extern int ehca_port_act_time;
extern int ehca_use_hp_mr;
+extern int ehca_scaling_code;
struct ipzu_queue_resp {
u32 qe_size; /* queue entry size */
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 6ebfa27e4e1..e2cdc1a16fe 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -146,6 +146,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
spin_lock_init(&my_cq->spinlock);
spin_lock_init(&my_cq->cb_lock);
spin_lock_init(&my_cq->task_lock);
+ init_waitqueue_head(&my_cq->wait_completion);
my_cq->ownpid = current->tgid;
cq = &my_cq->ib_cq;
@@ -302,6 +303,16 @@ create_cq_exit1:
return cq;
}
+static int get_cq_nr_events(struct ehca_cq *my_cq)
+{
+ int ret;
+ unsigned long flags;
+ spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+ ret = my_cq->nr_events;
+ spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+ return ret;
+}
+
int ehca_destroy_cq(struct ib_cq *cq)
{
u64 h_ret;
@@ -329,10 +340,11 @@ int ehca_destroy_cq(struct ib_cq *cq)
}
spin_lock_irqsave(&ehca_cq_idr_lock, flags);
- while (my_cq->nr_callbacks) {
+ while (my_cq->nr_events) {
spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
- yield();
+ wait_event(my_cq->wait_completion, !get_cq_nr_events(my_cq));
spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+ /* recheck nr_events to assure no cqe has just arrived */
}
idr_remove(&ehca_cq_idr, my_cq->token);
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
index 24ceab0bae4..4961eb88827 100644
--- a/drivers/infiniband/hw/ehca/ehca_eq.c
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -61,6 +61,7 @@ int ehca_create_eq(struct ehca_shca *shca,
struct ib_device *ib_dev = &shca->ib_device;
spin_lock_init(&eq->spinlock);
+ spin_lock_init(&eq->irq_spinlock);
eq->is_initialized = 0;
if (type != EHCA_EQ && type != EHCA_NEQ) {
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index b7be950ab47..32b55a4f0e5 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -147,6 +147,7 @@ int ehca_query_port(struct ib_device *ibdev,
break;
}
+ props->port_cap_flags = rblock->capability_mask;
props->gid_tbl_len = rblock->gid_tbl_len;
props->max_msg_sz = rblock->max_msg_sz;
props->bad_pkey_cntr = rblock->bad_pkey_cntr;
@@ -162,6 +163,9 @@ int ehca_query_port(struct ib_device *ibdev,
props->active_width = IB_WIDTH_12X;
props->active_speed = 0x1;
+ /* at the moment (logical) link state is always LINK_UP */
+ props->phys_state = 0x5;
+
query_port1:
ehca_free_fw_ctrlblock(rblock);
@@ -233,10 +237,60 @@ query_gid1:
return ret;
}
+const u32 allowed_port_caps = (
+ IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP |
+ IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP |
+ IB_PORT_VENDOR_CLASS_SUP);
+
int ehca_modify_port(struct ib_device *ibdev,
u8 port, int port_modify_mask,
struct ib_port_modify *props)
{
- /* Not implemented yet */
- return -EFAULT;
+ int ret = 0;
+ struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device);
+ struct hipz_query_port *rblock;
+ u32 cap;
+ u64 hret;
+
+ if ((props->set_port_cap_mask | props->clr_port_cap_mask)
+ & ~allowed_port_caps) {
+ ehca_err(&shca->ib_device, "Non-changeable bits set in masks "
+ "set=%x clr=%x allowed=%x", props->set_port_cap_mask,
+ props->clr_port_cap_mask, allowed_port_caps);
+ return -EINVAL;
+ }
+
+ if (mutex_lock_interruptible(&shca->modify_mutex))
+ return -ERESTARTSYS;
+
+ rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!rblock) {
+ ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+ ret = -ENOMEM;
+ goto modify_port1;
+ }
+
+ if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "Can't query port properties");
+ ret = -EINVAL;
+ goto modify_port2;
+ }
+
+ cap = (rblock->capability_mask | props->set_port_cap_mask)
+ & ~props->clr_port_cap_mask;
+
+ hret = hipz_h_modify_port(shca->ipz_hca_handle, port,
+ cap, props->init_type, port_modify_mask);
+ if (hret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "Modify port failed hret=%lx", hret);
+ ret = -EINVAL;
+ }
+
+modify_port2:
+ ehca_free_fw_ctrlblock(rblock);
+
+modify_port1:
+ mutex_unlock(&shca->modify_mutex);
+
+ return ret;
}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 6c4f9f91b15..f284be1c916 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -63,13 +63,11 @@
#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63)
#define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7)
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
-
static void queue_comp_task(struct ehca_cq *__cq);
static struct ehca_comp_pool* pool;
+#ifdef CONFIG_HOTPLUG_CPU
static struct notifier_block comp_pool_callback_nb;
-
#endif
static inline void comp_event_callback(struct ehca_cq *cq)
@@ -206,7 +204,7 @@ static void qp_event_callback(struct ehca_shca *shca,
}
static void cq_event_callback(struct ehca_shca *shca,
- u64 eqe)
+ u64 eqe)
{
struct ehca_cq *cq;
unsigned long flags;
@@ -318,7 +316,7 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
"disruptive port %x configuration change", port);
ehca_info(&shca->ib_device,
- "port %x is inactive.", port);
+ "port %x is inactive.", port);
event.device = &shca->ib_device;
event.event = IB_EVENT_PORT_ERR;
event.element.port_num = port;
@@ -326,7 +324,7 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
ib_dispatch_event(&event);
ehca_info(&shca->ib_device,
- "port %x is active.", port);
+ "port %x is active.", port);
event.device = &shca->ib_device;
event.event = IB_EVENT_PORT_ACTIVE;
event.element.port_num = port;
@@ -401,200 +399,274 @@ irqreturn_t ehca_interrupt_eq(int irq, void *dev_id)
return IRQ_HANDLED;
}
-void ehca_tasklet_eq(unsigned long data)
+
+static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
{
- struct ehca_shca *shca = (struct ehca_shca*)data;
- struct ehca_eqe *eqe;
- int int_state;
- int query_cnt = 0;
+ u64 eqe_value;
+ u32 token;
+ unsigned long flags;
+ struct ehca_cq *cq;
- do {
- eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+ eqe_value = eqe->entry;
+ ehca_dbg(&shca->ib_device, "eqe_value=%lx", eqe_value);
+ if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
+ ehca_dbg(&shca->ib_device, "Got completion event");
+ token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
+ spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+ cq = idr_find(&ehca_cq_idr, token);
+ if (cq == NULL) {
+ spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+ ehca_err(&shca->ib_device,
+ "Invalid eqe for non-existing cq token=%x",
+ token);
+ return;
+ }
+ reset_eq_pending(cq);
+ cq->nr_events++;
+ spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+ if (ehca_scaling_code)
+ queue_comp_task(cq);
+ else {
+ comp_event_callback(cq);
+ spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+ cq->nr_events--;
+ if (!cq->nr_events)
+ wake_up(&cq->wait_completion);
+ spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+ }
+ } else {
+ ehca_dbg(&shca->ib_device, "Got non completion event");
+ parse_identifier(shca, eqe_value);
+ }
+}
- if ((shca->hw_level >= 2) && eqe)
- int_state = 1;
- else
- int_state = 0;
-
- while ((int_state == 1) || eqe) {
- while (eqe) {
- u64 eqe_value = eqe->entry;
-
- ehca_dbg(&shca->ib_device,
- "eqe_value=%lx", eqe_value);
-
- /* TODO: better structure */
- if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT,
- eqe_value)) {
- unsigned long flags;
- u32 token;
- struct ehca_cq *cq;
-
- ehca_dbg(&shca->ib_device,
- "... completion event");
- token =
- EHCA_BMASK_GET(EQE_CQ_TOKEN,
- eqe_value);
- spin_lock_irqsave(&ehca_cq_idr_lock,
- flags);
- cq = idr_find(&ehca_cq_idr, token);
-
- if (cq == NULL) {
- spin_unlock_irqrestore(&ehca_cq_idr_lock,
- flags);
- break;
- }
-
- reset_eq_pending(cq);
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
- queue_comp_task(cq);
- spin_unlock_irqrestore(&ehca_cq_idr_lock,
- flags);
-#else
- spin_unlock_irqrestore(&ehca_cq_idr_lock,
- flags);
- comp_event_callback(cq);
-#endif
- } else {
- ehca_dbg(&shca->ib_device,
- "... non completion event");
- parse_identifier(shca, eqe_value);
- }
- eqe =
- (struct ehca_eqe *)ehca_poll_eq(shca,
- &shca->eq);
- }
+void ehca_process_eq(struct ehca_shca *shca, int is_irq)
+{
+ struct ehca_eq *eq = &shca->eq;
+ struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
+ u64 eqe_value;
+ unsigned long flags;
+ int eqe_cnt, i;
+ int eq_empty = 0;
+
+ spin_lock_irqsave(&eq->irq_spinlock, flags);
+ if (is_irq) {
+ const int max_query_cnt = 100;
+ int query_cnt = 0;
+ int int_state = 1;
+ do {
+ int_state = hipz_h_query_int_state(
+ shca->ipz_hca_handle, eq->ist);
+ query_cnt++;
+ iosync();
+ } while (int_state && query_cnt < max_query_cnt);
+ if (unlikely((query_cnt == max_query_cnt)))
+ ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x",
+ int_state, query_cnt);
+ }
- if (shca->hw_level >= 2) {
- int_state =
- hipz_h_query_int_state(shca->ipz_hca_handle,
- shca->eq.ist);
- query_cnt++;
- iosync();
- if (query_cnt >= 100) {
- query_cnt = 0;
- int_state = 0;
- }
+ /* read out all eqes */
+ eqe_cnt = 0;
+ do {
+ u32 token;
+ eqe_cache[eqe_cnt].eqe =
+ (struct ehca_eqe *)ehca_poll_eq(shca, eq);
+ if (!eqe_cache[eqe_cnt].eqe)
+ break;
+ eqe_value = eqe_cache[eqe_cnt].eqe->entry;
+ if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
+ token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
+ spin_lock(&ehca_cq_idr_lock);
+ eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
+ if (!eqe_cache[eqe_cnt].cq) {
+ spin_unlock(&ehca_cq_idr_lock);
+ ehca_err(&shca->ib_device,
+ "Invalid eqe for non-existing cq "
+ "token=%x", token);
+ continue;
}
- eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
-
+ eqe_cache[eqe_cnt].cq->nr_events++;
+ spin_unlock(&ehca_cq_idr_lock);
+ } else
+ eqe_cache[eqe_cnt].cq = NULL;
+ eqe_cnt++;
+ } while (eqe_cnt < EHCA_EQE_CACHE_SIZE);
+ if (!eqe_cnt) {
+ if (is_irq)
+ ehca_dbg(&shca->ib_device,
+ "No eqe found for irq event");
+ goto unlock_irq_spinlock;
+ } else if (!is_irq)
+ ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
+ if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))
+ ehca_dbg(&shca->ib_device, "too many eqes for one irq event");
+ /* enable irq for new packets */
+ for (i = 0; i < eqe_cnt; i++) {
+ if (eq->eqe_cache[i].cq)
+ reset_eq_pending(eq->eqe_cache[i].cq);
+ }
+ /* check eq */
+ spin_lock(&eq->spinlock);
+ eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue));
+ spin_unlock(&eq->spinlock);
+ /* call completion handler for cached eqes */
+ for (i = 0; i < eqe_cnt; i++)
+ if (eq->eqe_cache[i].cq) {
+ if (ehca_scaling_code)
+ queue_comp_task(eq->eqe_cache[i].cq);
+ else {
+ struct ehca_cq *cq = eq->eqe_cache[i].cq;
+ comp_event_callback(cq);
+ spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+ cq->nr_events--;
+ if (!cq->nr_events)
+ wake_up(&cq->wait_completion);
+ spin_unlock_irqrestore(&ehca_cq_idr_lock,
+ flags);
+ }
+ } else {
+ ehca_dbg(&shca->ib_device, "Got non completion event");
+ parse_identifier(shca, eq->eqe_cache[i].eqe->entry);
}
- } while (int_state != 0);
+ /* poll eq if not empty */
+ if (eq_empty)
+ goto unlock_irq_spinlock;
+ do {
+ struct ehca_eqe *eqe;
+ eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+ if (!eqe)
+ break;
+ process_eqe(shca, eqe);
+ } while (1);
- return;
+unlock_irq_spinlock:
+ spin_unlock_irqrestore(&eq->irq_spinlock, flags);
}
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+void ehca_tasklet_eq(unsigned long data)
+{
+ ehca_process_eq((struct ehca_shca*)data, 1);
+}
static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
{
- unsigned long flags_last_cpu;
+ int cpu;
+ unsigned long flags;
+ WARN_ON_ONCE(!in_interrupt());
if (ehca_debug_level)
ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
- spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu);
- pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map);
- if (pool->last_cpu == NR_CPUS)
- pool->last_cpu = first_cpu(cpu_online_map);
- spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu);
+ spin_lock_irqsave(&pool->last_cpu_lock, flags);
+ cpu = next_cpu(pool->last_cpu, cpu_online_map);
+ if (cpu == NR_CPUS)
+ cpu = first_cpu(cpu_online_map);
+ pool->last_cpu = cpu;
+ spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
- return pool->last_cpu;
+ return cpu;
}
static void __queue_comp_task(struct ehca_cq *__cq,
struct ehca_cpu_comp_task *cct)
{
- unsigned long flags_cct;
- unsigned long flags_cq;
+ unsigned long flags;
- spin_lock_irqsave(&cct->task_lock, flags_cct);
- spin_lock_irqsave(&__cq->task_lock, flags_cq);
+ spin_lock_irqsave(&cct->task_lock, flags);
+ spin_lock(&__cq->task_lock);
if (__cq->nr_callbacks == 0) {
__cq->nr_callbacks++;
list_add_tail(&__cq->entry, &cct->cq_list);
cct->cq_jobs++;
wake_up(&cct->wait_queue);
- }
- else
+ } else
__cq->nr_callbacks++;
- spin_unlock_irqrestore(&__cq->task_lock, flags_cq);
- spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+ spin_unlock(&__cq->task_lock);
+ spin_unlock_irqrestore(&cct->task_lock, flags);
}
static void queue_comp_task(struct ehca_cq *__cq)
{
- int cpu;
int cpu_id;
struct ehca_cpu_comp_task *cct;
+ int cq_jobs;
+ unsigned long flags;
- cpu = get_cpu();
cpu_id = find_next_online_cpu(pool);
-
BUG_ON(!cpu_online(cpu_id));
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+ BUG_ON(!cct);
- if (cct->cq_jobs > 0) {
+ spin_lock_irqsave(&cct->task_lock, flags);
+ cq_jobs = cct->cq_jobs;
+ spin_unlock_irqrestore(&cct->task_lock, flags);
+ if (cq_jobs > 0) {
cpu_id = find_next_online_cpu(pool);
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+ BUG_ON(!cct);
}
__queue_comp_task(__cq, cct);
-
- put_cpu();
-
- return;
}
static void run_comp_task(struct ehca_cpu_comp_task* cct)
{
struct ehca_cq *cq;
- unsigned long flags_cct;
- unsigned long flags_cq;
+ unsigned long flags;
- spin_lock_irqsave(&cct->task_lock, flags_cct);
+ spin_lock_irqsave(&cct->task_lock, flags);
while (!list_empty(&cct->cq_list)) {
cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
- spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+ spin_unlock_irqrestore(&cct->task_lock, flags);
comp_event_callback(cq);
- spin_lock_irqsave(&cct->task_lock, flags_cct);
- spin_lock_irqsave(&cq->task_lock, flags_cq);
+ spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+ cq->nr_events--;
+ if (!cq->nr_events)
+ wake_up(&cq->wait_completion);
+ spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+ spin_lock_irqsave(&cct->task_lock, flags);
+ spin_lock(&cq->task_lock);
cq->nr_callbacks--;
- if (cq->nr_callbacks == 0) {
+ if (!cq->nr_callbacks) {
list_del_init(cct->cq_list.next);
cct->cq_jobs--;
}
- spin_unlock_irqrestore(&cq->task_lock, flags_cq);
-
+ spin_unlock(&cq->task_lock);
}
- spin_unlock_irqrestore(&cct->task_lock, flags_cct);
-
- return;
+ spin_unlock_irqrestore(&cct->task_lock, flags);
}
static int comp_task(void *__cct)
{
struct ehca_cpu_comp_task* cct = __cct;
+ int cql_empty;
DECLARE_WAITQUEUE(wait, current);
set_current_state(TASK_INTERRUPTIBLE);
while(!kthread_should_stop()) {
add_wait_queue(&cct->wait_queue, &wait);
- if (list_empty(&cct->cq_list))
+ spin_lock_irq(&cct->task_lock);
+ cql_empty = list_empty(&cct->cq_list);
+ spin_unlock_irq(&cct->task_lock);
+ if (cql_empty)
schedule();
else
__set_current_state(TASK_RUNNING);
remove_wait_queue(&cct->wait_queue, &wait);
- if (!list_empty(&cct->cq_list))
+ spin_lock_irq(&cct->task_lock);
+ cql_empty = list_empty(&cct->cq_list);
+ spin_unlock_irq(&cct->task_lock);
+ if (!cql_empty)
run_comp_task(__cct);
set_current_state(TASK_INTERRUPTIBLE);
@@ -637,8 +709,6 @@ static void destroy_comp_task(struct ehca_comp_pool *pool,
if (task)
kthread_stop(task);
-
- return;
}
static void take_over_work(struct ehca_comp_pool *pool,
@@ -654,17 +724,18 @@ static void take_over_work(struct ehca_comp_pool *pool,
list_splice_init(&cct->cq_list, &list);
while(!list_empty(&list)) {
- cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
+ cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
- list_del(&cq->entry);
- __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks,
- smp_processor_id()));
+ list_del(&cq->entry);
+ __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks,
+ smp_processor_id()));
}
spin_unlock_irqrestore(&cct->task_lock, flags_cct);
}
+#ifdef CONFIG_HOTPLUG_CPU
static int comp_pool_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
@@ -707,15 +778,16 @@ static int comp_pool_callback(struct notifier_block *nfb,
return NOTIFY_OK;
}
-
#endif
int ehca_create_comp_pool(void)
{
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
int cpu;
struct task_struct *task;
+ if (!ehca_scaling_code)
+ return 0;
+
pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
if (pool == NULL)
return -ENOMEM;
@@ -737,20 +809,27 @@ int ehca_create_comp_pool(void)
}
}
+#ifdef CONFIG_HOTPLUG_CPU
comp_pool_callback_nb.notifier_call = comp_pool_callback;
comp_pool_callback_nb.priority =0;
register_cpu_notifier(&comp_pool_callback_nb);
#endif
+ printk(KERN_INFO "eHCA scaling code enabled\n");
+
return 0;
}
void ehca_destroy_comp_pool(void)
{
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
int i;
+ if (!ehca_scaling_code)
+ return;
+
+#ifdef CONFIG_HOTPLUG_CPU
unregister_cpu_notifier(&comp_pool_callback_nb);
+#endif
for (i = 0; i < NR_CPUS; i++) {
if (cpu_online(i))
@@ -758,7 +837,4 @@ void ehca_destroy_comp_pool(void)
}
free_percpu(pool->cpu_comp_tasks);
kfree(pool);
-#endif
-
- return;
}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h
index be579cc0adf..6ed06ee033e 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.h
+++ b/drivers/infiniband/hw/ehca/ehca_irq.h
@@ -56,6 +56,7 @@ void ehca_tasklet_neq(unsigned long data);
irqreturn_t ehca_interrupt_eq(int irq, void *dev_id);
void ehca_tasklet_eq(unsigned long data);
+void ehca_process_eq(struct ehca_shca *shca, int is_irq);
struct ehca_cpu_comp_task {
wait_queue_head_t wait_queue;
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 1155bcf4821..4700085ba83 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -52,7 +52,7 @@
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
-MODULE_VERSION("SVNEHCA_0020");
+MODULE_VERSION("SVNEHCA_0022");
int ehca_open_aqp1 = 0;
int ehca_debug_level = 0;
@@ -62,6 +62,7 @@ int ehca_use_hp_mr = 0;
int ehca_port_act_time = 30;
int ehca_poll_all_eqs = 1;
int ehca_static_rate = -1;
+int ehca_scaling_code = 1;
module_param_named(open_aqp1, ehca_open_aqp1, int, 0);
module_param_named(debug_level, ehca_debug_level, int, 0);
@@ -71,6 +72,7 @@ module_param_named(use_hp_mr, ehca_use_hp_mr, int, 0);
module_param_named(port_act_time, ehca_port_act_time, int, 0);
module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, 0);
module_param_named(static_rate, ehca_static_rate, int, 0);
+module_param_named(scaling_code, ehca_scaling_code, int, 0);
MODULE_PARM_DESC(open_aqp1,
"AQP1 on startup (0: no (default), 1: yes)");
@@ -91,6 +93,8 @@ MODULE_PARM_DESC(poll_all_eqs,
" (0: no, 1: yes (default))");
MODULE_PARM_DESC(static_rate,
"set permanent static rate (default: disabled)");
+MODULE_PARM_DESC(scaling_code,
+ "set scaling code (0: disabled, 1: enabled/default)");
spinlock_t ehca_qp_idr_lock;
spinlock_t ehca_cq_idr_lock;
@@ -432,8 +436,8 @@ static int ehca_destroy_aqp1(struct ehca_sport *sport)
static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%d\n",
- ehca_debug_level);
+ return snprintf(buf, PAGE_SIZE, "%d\n",
+ ehca_debug_level);
}
static ssize_t ehca_store_debug_level(struct device_driver *ddp,
@@ -561,11 +565,11 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
const struct of_device_id *id)
{
struct ehca_shca *shca;
- u64 *handle;
+ const u64 *handle;
struct ib_pd *ibpd;
int ret;
- handle = (u64 *)get_property(dev->ofdev.node, "ibm,hca-handle", NULL);
+ handle = get_property(dev->ofdev.node, "ibm,hca-handle", NULL);
if (!handle) {
ehca_gen_err("Cannot get eHCA handle for adapter: %s.",
dev->ofdev.node->full_name);
@@ -583,6 +587,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
ehca_gen_err("Cannot allocate shca memory.");
return -ENOMEM;
}
+ mutex_init(&shca->modify_mutex);
shca->ibmebus_dev = dev;
shca->ipz_hca_handle.handle = *handle;
@@ -778,8 +783,24 @@ void ehca_poll_eqs(unsigned long data)
spin_lock(&shca_list_lock);
list_for_each_entry(shca, &shca_list, shca_list) {
- if (shca->eq.is_initialized)
- ehca_tasklet_eq((unsigned long)(void*)shca);
+ if (shca->eq.is_initialized) {
+ /* call deadman proc only if eq ptr does not change */
+ struct ehca_eq *eq = &shca->eq;
+ int max = 3;
+ volatile u64 q_ofs, q_ofs2;
+ u64 flags;
+ spin_lock_irqsave(&eq->spinlock, flags);
+ q_ofs = eq->ipz_queue.current_q_offset;
+ spin_unlock_irqrestore(&eq->spinlock, flags);
+ do {
+ spin_lock_irqsave(&eq->spinlock, flags);
+ q_ofs2 = eq->ipz_queue.current_q_offset;
+ spin_unlock_irqrestore(&eq->spinlock, flags);
+ max--;
+ } while (q_ofs == q_ofs2 && max > 0);
+ if (q_ofs == q_ofs2)
+ ehca_process_eq(shca, 0);
+ }
}
mod_timer(&poll_eqs_timer, jiffies + HZ);
spin_unlock(&shca_list_lock);
@@ -790,7 +811,7 @@ int __init ehca_module_init(void)
int ret;
printk(KERN_INFO "eHCA Infiniband Device Driver "
- "(Rel.: SVNEHCA_0020)\n");
+ "(Rel.: SVNEHCA_0022)\n");
idr_init(&ehca_qp_idr);
idr_init(&ehca_cq_idr);
spin_lock_init(&ehca_qp_idr_lock);
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index 3fb46e67df8..b564fcd3b28 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -70,6 +70,10 @@
#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0, 31)
#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32, 63)
+#define H_MP_INIT_TYPE EHCA_BMASK_IBM(44, 47)
+#define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48)
+#define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49)
+
/* direct access qp controls */
#define DAQP_CTRL_ENABLE 0x01
#define DAQP_CTRL_SEND_COMP 0x20
@@ -364,6 +368,26 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
return ret;
}
+u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
+ const u8 port_id, const u32 port_cap,
+ const u8 init_type, const int modify_mask)
+{
+ u64 port_attributes = port_cap;
+
+ if (modify_mask & IB_PORT_SHUTDOWN)
+ port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1);
+ if (modify_mask & IB_PORT_INIT_TYPE)
+ port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type);
+ if (modify_mask & IB_PORT_RESET_QKEY_CNTR)
+ port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1);
+
+ return ehca_plpar_hcall_norets(H_MODIFY_PORT,
+ adapter_handle.handle, /* r4 */
+ port_id, /* r5 */
+ port_attributes, /* r6 */
+ 0, 0, 0, 0);
+}
+
u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
struct hipz_query_hca *query_hca_rblock)
{
diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h
index 587ebd47095..2869f7dd619 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.h
+++ b/drivers/infiniband/hw/ehca/hcp_if.h
@@ -85,6 +85,10 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
const u8 port_id,
struct hipz_query_port *query_port_response_block);
+u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
+ const u8 port_id, const u32 port_cap,
+ const u8 init_type, const int modify_mask);
+
u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
struct hipz_query_hca *query_hca_rblock);
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
index dc3bda2634b..8199c45768a 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
@@ -79,7 +79,7 @@ static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset)
if (q_offset >= queue->queue_length)
return NULL;
current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT];
- return &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)];
+ return &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)];
}
/*
@@ -247,6 +247,15 @@ static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue)
return ret;
}
+static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue)
+{
+ void *ret = ipz_qeit_get(queue);
+ u32 qe = *(u8 *) ret;
+ if ((qe >> 7) != (queue->toggle_state & 1))
+ return NULL;
+ return ret;
+}
+
/* returns address (GX) of first queue entry */
static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt)
{
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index 54139d39818..10c008f22ba 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -78,6 +78,8 @@
#define IPATH_IB_LINKINIT 3
#define IPATH_IB_LINKDOWN_SLEEP 4
#define IPATH_IB_LINKDOWN_DISABLE 5
+#define IPATH_IB_LINK_LOOPBACK 6 /* enable local loopback */
+#define IPATH_IB_LINK_EXTERNAL 7 /* normal, disable local loopback */
/*
* stats maintained by the driver. For now, at least, this is global
@@ -316,11 +318,17 @@ struct ipath_base_info {
/* address of readonly memory copy of the rcvhdrq tail register. */
__u64 spi_rcvhdr_tailaddr;
- /* shared memory pages for subports if IPATH_RUNTIME_MASTER is set */
+ /* shared memory pages for subports if port is shared */
__u64 spi_subport_uregbase;
__u64 spi_subport_rcvegrbuf;
__u64 spi_subport_rcvhdr_base;
+ /* shared memory page for hardware port if it is shared */
+ __u64 spi_port_uregbase;
+ __u64 spi_port_rcvegrbuf;
+ __u64 spi_port_rcvhdr_base;
+ __u64 spi_port_rcvhdr_tailaddr;
+
} __attribute__ ((aligned(8)));
@@ -344,7 +352,7 @@ struct ipath_base_info {
* may not be implemented; the user code must deal with this if it
* cares, or it must abort after initialization reports the difference.
*/
-#define IPATH_USER_SWMINOR 3
+#define IPATH_USER_SWMINOR 5
#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
@@ -418,11 +426,14 @@ struct ipath_user_info {
#define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */
#define IPATH_CMD_TID_FREE 20 /* free expected TID entries */
#define IPATH_CMD_SET_PART_KEY 21 /* add partition key */
-#define IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes */
+#define __IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes (for old user code) */
#define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */
#define IPATH_CMD_USER_INIT 24 /* set up userspace */
+#define IPATH_CMD_UNUSED_1 25
+#define IPATH_CMD_UNUSED_2 26
+#define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */
-#define IPATH_CMD_MAX 24
+#define IPATH_CMD_MAX 27
struct ipath_port_info {
__u32 num_active; /* number of active units */
@@ -430,7 +441,7 @@ struct ipath_port_info {
__u16 port; /* port on unit assigned to caller */
__u16 subport; /* subport on unit assigned to caller */
__u16 num_ports; /* number of ports available on unit */
- __u16 num_subports; /* number of subport slaves opened on port */
+ __u16 num_subports; /* number of subports opened on port */
};
struct ipath_tid_info {
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index 87462e0cb4d..ea78e6dddc9 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -76,7 +76,20 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
}
return;
}
- wc->queue[head] = *entry;
+ wc->queue[head].wr_id = entry->wr_id;
+ wc->queue[head].status = entry->status;
+ wc->queue[head].opcode = entry->opcode;
+ wc->queue[head].vendor_err = entry->vendor_err;
+ wc->queue[head].byte_len = entry->byte_len;
+ wc->queue[head].imm_data = (__u32 __force)entry->imm_data;
+ wc->queue[head].qp_num = entry->qp->qp_num;
+ wc->queue[head].src_qp = entry->src_qp;
+ wc->queue[head].wc_flags = entry->wc_flags;
+ wc->queue[head].pkey_index = entry->pkey_index;
+ wc->queue[head].slid = entry->slid;
+ wc->queue[head].sl = entry->sl;
+ wc->queue[head].dlid_path_bits = entry->dlid_path_bits;
+ wc->queue[head].port_num = entry->port_num;
wc->head = next;
if (cq->notify == IB_CQ_NEXT_COMP ||
@@ -122,9 +135,30 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
if (tail > (u32) cq->ibcq.cqe)
tail = (u32) cq->ibcq.cqe;
for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
+ struct ipath_qp *qp;
+
if (tail == wc->head)
break;
- *entry = wc->queue[tail];
+
+ qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table,
+ wc->queue[tail].qp_num);
+ entry->qp = &qp->ibqp;
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+
+ entry->wr_id = wc->queue[tail].wr_id;
+ entry->status = wc->queue[tail].status;
+ entry->opcode = wc->queue[tail].opcode;
+ entry->vendor_err = wc->queue[tail].vendor_err;
+ entry->byte_len = wc->queue[tail].byte_len;
+ entry->imm_data = wc->queue[tail].imm_data;
+ entry->src_qp = wc->queue[tail].src_qp;
+ entry->wc_flags = wc->queue[tail].wc_flags;
+ entry->pkey_index = wc->queue[tail].pkey_index;
+ entry->slid = wc->queue[tail].slid;
+ entry->sl = wc->queue[tail].sl;
+ entry->dlid_path_bits = wc->queue[tail].dlid_path_bits;
+ entry->port_num = wc->queue[tail].port_num;
if (tail >= cq->ibcq.cqe)
tail = 0;
else
diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
index df69f0d80b8..42bfbdb0d3e 100644
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -57,6 +57,7 @@
#define __IPATH_PROCDBG 0x100
/* print mmap/nopage stuff, not using VDBG any more */
#define __IPATH_MMDBG 0x200
+#define __IPATH_ERRPKTDBG 0x400
#define __IPATH_USER_SEND 0x1000 /* use user mode send */
#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */
#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 0f13a2182cc..63e8368b0e9 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -296,7 +296,7 @@ static int ipath_diag_open(struct inode *in, struct file *fp)
}
fp->private_data = dd;
- ipath_diag_inuse = 1;
+ ipath_diag_inuse = -2;
diag_set_link = 0;
ret = 0;
@@ -461,6 +461,8 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data,
else if ((count % 4) || (*off % 4))
/* address or length is not 32-bit aligned, hence invalid */
ret = -EINVAL;
+ else if (ipath_diag_inuse < 1 && (*off || count != 8))
+ ret = -EINVAL; /* prevent cat /dev/ipath_diag* */
else if ((count % 8) || (*off % 8))
/* address or length not 64-bit aligned; do 32-bit reads */
ret = ipath_read_umem32(dd, data, kreg_base + *off, count);
@@ -470,6 +472,8 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data,
if (ret >= 0) {
*off += count;
ret = count;
+ if (ipath_diag_inuse == -2)
+ ipath_diag_inuse++;
}
return ret;
@@ -489,6 +493,9 @@ static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
else if ((count % 4) || (*off % 4))
/* address or length is not 32-bit aligned, hence invalid */
ret = -EINVAL;
+ else if ((ipath_diag_inuse == -1 && (*off || count != 8)) ||
+ ipath_diag_inuse == -2) /* read qw off 0, write qw off 0 */
+ ret = -EINVAL; /* before any other write allowed */
else if ((count % 8) || (*off % 8))
/* address or length not 64-bit aligned; do 32-bit writes */
ret = ipath_write_umem32(dd, kreg_base + *off, data, count);
@@ -498,6 +505,8 @@ static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
if (ret >= 0) {
*off += count;
ret = count;
+ if (ipath_diag_inuse == -1)
+ ipath_diag_inuse = 1; /* all read/write OK now */
}
return ret;
diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c
index 6e0f2b8918c..f87f003e3ef 100644
--- a/drivers/infiniband/hw/ipath/ipath_dma.c
+++ b/drivers/infiniband/hw/ipath/ipath_dma.c
@@ -96,8 +96,8 @@ static void ipath_dma_unmap_page(struct ib_device *dev,
BUG_ON(!valid_dma_direction(direction));
}
-int ipath_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
+static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
{
u64 addr;
int i;
@@ -167,7 +167,7 @@ static void *ipath_dma_alloc_coherent(struct ib_device *dev, size_t size,
}
static void ipath_dma_free_coherent(struct ib_device *dev, size_t size,
- void *cpu_addr, dma_addr_t dma_handle)
+ void *cpu_addr, u64 dma_handle)
{
free_pages((unsigned long) cpu_addr, get_order(size));
}
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index ae7f21a0cdc..e3a22320971 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -390,15 +390,23 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
/* setup the chip-specific functions, as early as possible. */
switch (ent->device) {
-#ifdef CONFIG_HT_IRQ
case PCI_DEVICE_ID_INFINIPATH_HT:
+#ifdef CONFIG_HT_IRQ
ipath_init_iba6110_funcs(dd);
break;
+#else
+ ipath_dev_err(dd, "QLogic HT device 0x%x cannot work if "
+ "CONFIG_HT_IRQ is not enabled\n", ent->device);
+ return -ENODEV;
#endif
-#ifdef CONFIG_PCI_MSI
case PCI_DEVICE_ID_INFINIPATH_PE800:
+#ifdef CONFIG_PCI_MSI
ipath_init_iba6120_funcs(dd);
break;
+#else
+ ipath_dev_err(dd, "QLogic PCIE device 0x%x cannot work if "
+ "CONFIG_PCI_MSI is not enabled\n", ent->device);
+ return -ENODEV;
#endif
default:
ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
@@ -486,7 +494,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
ret = ipath_init_chip(dd, 0); /* do the chip-specific init */
if (ret)
- goto bail_iounmap;
+ goto bail_irqsetup;
ret = ipath_enable_wc(dd);
@@ -505,6 +513,9 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
goto bail;
+bail_irqsetup:
+ if (pdev->irq) free_irq(pdev->irq, dd);
+
bail_iounmap:
iounmap((volatile void __iomem *) dd->ipath_kregbase);
@@ -525,8 +536,6 @@ static void __devexit cleanup_device(struct ipath_devdata *dd)
{
int port;
- ipath_shutdown_device(dd);
-
if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
/* can't do anything more with chip; needs re-init */
*dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
@@ -594,8 +603,9 @@ static void __devexit cleanup_device(struct ipath_devdata *dd)
ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
dd->ipath_pageshadow);
- vfree(dd->ipath_pageshadow);
+ tmpp = dd->ipath_pageshadow;
dd->ipath_pageshadow = NULL;
+ vfree(tmpp);
}
/*
@@ -622,6 +632,12 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev)
ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
+ /*
+ * disable the IB link early, to be sure no new packets arrive, which
+ * complicates the shutdown process
+ */
+ ipath_shutdown_device(dd);
+
if (dd->verbs_dev)
ipath_unregister_ib_device(dd->verbs_dev);
@@ -754,9 +770,42 @@ static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state,
return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
}
-void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
+/*
+ * Decode the error status into strings, deciding whether to always
+ * print * it or not depending on "normal packet errors" vs everything
+ * else. Return 1 if "real" errors, otherwise 0 if only packet
+ * errors, so caller can decide what to print with the string.
+ */
+int ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
{
+ int iserr = 1;
*buf = '\0';
+ if (err & INFINIPATH_E_PKTERRS) {
+ if (!(err & ~INFINIPATH_E_PKTERRS))
+ iserr = 0; // if only packet errors.
+ if (ipath_debug & __IPATH_ERRPKTDBG) {
+ if (err & INFINIPATH_E_REBP)
+ strlcat(buf, "EBP ", blen);
+ if (err & INFINIPATH_E_RVCRC)
+ strlcat(buf, "VCRC ", blen);
+ if (err & INFINIPATH_E_RICRC) {
+ strlcat(buf, "CRC ", blen);
+ // clear for check below, so only once
+ err &= INFINIPATH_E_RICRC;
+ }
+ if (err & INFINIPATH_E_RSHORTPKTLEN)
+ strlcat(buf, "rshortpktlen ", blen);
+ if (err & INFINIPATH_E_SDROPPEDDATAPKT)
+ strlcat(buf, "sdroppeddatapkt ", blen);
+ if (err & INFINIPATH_E_SPKTLEN)
+ strlcat(buf, "spktlen ", blen);
+ }
+ if ((err & INFINIPATH_E_RICRC) &&
+ !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
+ strlcat(buf, "CRC ", blen);
+ if (!iserr)
+ goto done;
+ }
if (err & INFINIPATH_E_RHDRLEN)
strlcat(buf, "rhdrlen ", blen);
if (err & INFINIPATH_E_RBADTID)
@@ -767,12 +816,12 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
strlcat(buf, "rhdr ", blen);
if (err & INFINIPATH_E_RLONGPKTLEN)
strlcat(buf, "rlongpktlen ", blen);
- if (err & INFINIPATH_E_RSHORTPKTLEN)
- strlcat(buf, "rshortpktlen ", blen);
if (err & INFINIPATH_E_RMAXPKTLEN)
strlcat(buf, "rmaxpktlen ", blen);
if (err & INFINIPATH_E_RMINPKTLEN)
strlcat(buf, "rminpktlen ", blen);
+ if (err & INFINIPATH_E_SMINPKTLEN)
+ strlcat(buf, "sminpktlen ", blen);
if (err & INFINIPATH_E_RFORMATERR)
strlcat(buf, "rformaterr ", blen);
if (err & INFINIPATH_E_RUNSUPVL)
@@ -781,32 +830,20 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
strlcat(buf, "runexpchar ", blen);
if (err & INFINIPATH_E_RIBFLOW)
strlcat(buf, "ribflow ", blen);
- if (err & INFINIPATH_E_REBP)
- strlcat(buf, "EBP ", blen);
if (err & INFINIPATH_E_SUNDERRUN)
strlcat(buf, "sunderrun ", blen);
if (err & INFINIPATH_E_SPIOARMLAUNCH)
strlcat(buf, "spioarmlaunch ", blen);
if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
strlcat(buf, "sunexperrpktnum ", blen);
- if (err & INFINIPATH_E_SDROPPEDDATAPKT)
- strlcat(buf, "sdroppeddatapkt ", blen);
if (err & INFINIPATH_E_SDROPPEDSMPPKT)
strlcat(buf, "sdroppedsmppkt ", blen);
if (err & INFINIPATH_E_SMAXPKTLEN)
strlcat(buf, "smaxpktlen ", blen);
- if (err & INFINIPATH_E_SMINPKTLEN)
- strlcat(buf, "sminpktlen ", blen);
if (err & INFINIPATH_E_SUNSUPVL)
strlcat(buf, "sunsupVL ", blen);
- if (err & INFINIPATH_E_SPKTLEN)
- strlcat(buf, "spktlen ", blen);
if (err & INFINIPATH_E_INVALIDADDR)
strlcat(buf, "invalidaddr ", blen);
- if (err & INFINIPATH_E_RICRC)
- strlcat(buf, "CRC ", blen);
- if (err & INFINIPATH_E_RVCRC)
- strlcat(buf, "VCRC ", blen);
if (err & INFINIPATH_E_RRCVEGRFULL)
strlcat(buf, "rcvegrfull ", blen);
if (err & INFINIPATH_E_RRCVHDRFULL)
@@ -819,6 +856,8 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
strlcat(buf, "hardware ", blen);
if (err & INFINIPATH_E_RESET)
strlcat(buf, "reset ", blen);
+done:
+ return iserr;
}
/**
@@ -1662,6 +1701,22 @@ int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
lstate = IPATH_LINKACTIVE;
break;
+ case IPATH_IB_LINK_LOOPBACK:
+ dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n");
+ dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+ dd->ipath_ibcctrl);
+ ret = 0;
+ goto bail; // no state change to wait for
+
+ case IPATH_IB_LINK_EXTERNAL:
+ dev_info(&dd->pcidev->dev, "Disabling IB local loopback (normal)\n");
+ dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+ dd->ipath_ibcctrl);
+ ret = 0;
+ goto bail; // no state change to wait for
+
default:
ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
ret = -EINVAL;
@@ -1765,29 +1820,6 @@ int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
return 0;
}
-/**
- * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register
- * @dd: the infinipath device
- * @regno: the register number to read
- * @port: the port containing the register
- *
- * Registers that vary with the chip implementation constants (port)
- * use this routine.
- */
-u64 ipath_read_kreg64_port(const struct ipath_devdata *dd, ipath_kreg regno,
- unsigned port)
-{
- u16 where;
-
- if (port < dd->ipath_portcnt &&
- (regno == dd->ipath_kregs->kr_rcvhdraddr ||
- regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
- where = regno + port;
- else
- where = -1;
-
- return ipath_read_kreg64(dd, where);
-}
/**
* ipath_write_kreg_port - write a device's per-port 64-bit kernel register
@@ -1973,7 +2005,8 @@ static int __init infinipath_init(void)
{
int ret;
- ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
+ if (ipath_debug & __IPATH_DBG)
+ printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
/*
* These must be called before the driver is registered with
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
index a4019a6b756..030185f90ee 100644
--- a/drivers/infiniband/hw/ipath/ipath_eeprom.c
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -626,6 +626,10 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
} else
memcpy(dd->ipath_serial, ifp->if_serial,
sizeof ifp->if_serial);
+ if (!strstr(ifp->if_comment, "Tested successfully"))
+ ipath_dev_err(dd, "Board SN %s did not pass functional "
+ "test: %s\n", dd->ipath_serial,
+ ifp->if_comment);
ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
(unsigned long long) be64_to_cpu(dd->ipath_guid));
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 5d64ff87529..1272aaf2a78 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -41,12 +41,6 @@
#include "ipath_kernel.h"
#include "ipath_common.h"
-/*
- * mmap64 doesn't allow all 64 bits for 32-bit applications
- * so only use the low 43 bits.
- */
-#define MMAP64_MASK 0x7FFFFFFFFFFUL
-
static int ipath_open(struct inode *, struct file *);
static int ipath_close(struct inode *, struct file *);
static ssize_t ipath_write(struct file *, const char __user *, size_t,
@@ -63,6 +57,24 @@ static const struct file_operations ipath_file_ops = {
.mmap = ipath_mmap
};
+/*
+ * Convert kernel virtual addresses to physical addresses so they don't
+ * potentially conflict with the chip addresses used as mmap offsets.
+ * It doesn't really matter what mmap offset we use as long as we can
+ * interpret it correctly.
+ */
+static u64 cvt_kvaddr(void *p)
+{
+ struct page *page;
+ u64 paddr = 0;
+
+ page = vmalloc_to_page(p);
+ if (page)
+ paddr = page_to_pfn(page) << PAGE_SHIFT;
+
+ return paddr;
+}
+
static int ipath_get_base_info(struct file *fp,
void __user *ubase, size_t ubase_size)
{
@@ -87,7 +99,7 @@ static int ipath_get_base_info(struct file *fp,
sz = sizeof(*kinfo);
/* If port sharing is not requested, allow the old size structure */
if (!shared)
- sz -= 3 * sizeof(u64);
+ sz -= 7 * sizeof(u64);
if (ubase_size < sz) {
ipath_cdbg(PROC,
"Base size %zu, need %zu (version mismatch?)\n",
@@ -165,24 +177,41 @@ static int ipath_get_base_info(struct file *fp,
kinfo->spi_piobufbase = (u64) pd->port_piobufs +
dd->ipath_palign *
(dd->ipath_pbufsport - kinfo->spi_piocnt);
- kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
- dd->ipath_palign * pd->port_port;
} else {
unsigned slave = subport_fp(fp) - 1;
kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt;
kinfo->spi_piobufbase = (u64) pd->port_piobufs +
dd->ipath_palign * kinfo->spi_piocnt * slave;
- kinfo->__spi_uregbase = ((u64) pd->subport_uregbase +
- PAGE_SIZE * slave) & MMAP64_MASK;
+ }
+ if (shared) {
+ kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
+ dd->ipath_palign * pd->port_port;
+ kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs;
+ kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base;
+ kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr;
- kinfo->spi_rcvhdr_base = ((u64) pd->subport_rcvhdr_base +
- pd->port_rcvhdrq_size * slave) & MMAP64_MASK;
- kinfo->spi_rcvhdr_tailaddr =
- (u64) pd->port_rcvhdrqtailaddr_phys & MMAP64_MASK;
- kinfo->spi_rcv_egrbufs = ((u64) pd->subport_rcvegrbuf +
- dd->ipath_rcvegrcnt * dd->ipath_rcvegrbufsize * slave) &
- MMAP64_MASK;
+ kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase +
+ PAGE_SIZE * subport_fp(fp));
+
+ kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base +
+ pd->port_rcvhdrq_size * subport_fp(fp));
+ kinfo->spi_rcvhdr_tailaddr = 0;
+ kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf +
+ pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size *
+ subport_fp(fp));
+
+ kinfo->spi_subport_uregbase =
+ cvt_kvaddr(pd->subport_uregbase);
+ kinfo->spi_subport_rcvegrbuf =
+ cvt_kvaddr(pd->subport_rcvegrbuf);
+ kinfo->spi_subport_rcvhdr_base =
+ cvt_kvaddr(pd->subport_rcvhdr_base);
+ ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
+ kinfo->spi_port, kinfo->spi_runtime_flags,
+ (unsigned long long) kinfo->spi_subport_uregbase,
+ (unsigned long long) kinfo->spi_subport_rcvegrbuf,
+ (unsigned long long) kinfo->spi_subport_rcvhdr_base);
}
kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) /
@@ -199,20 +228,10 @@ static int ipath_get_base_info(struct file *fp,
if (master) {
kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
- kinfo->spi_subport_uregbase =
- (u64) pd->subport_uregbase & MMAP64_MASK;
- kinfo->spi_subport_rcvegrbuf =
- (u64) pd->subport_rcvegrbuf & MMAP64_MASK;
- kinfo->spi_subport_rcvhdr_base =
- (u64) pd->subport_rcvhdr_base & MMAP64_MASK;
- ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
- kinfo->spi_port, kinfo->spi_runtime_flags,
- (unsigned long long) kinfo->spi_subport_uregbase,
- (unsigned long long) kinfo->spi_subport_rcvegrbuf,
- (unsigned long long) kinfo->spi_subport_rcvhdr_base);
}
- if (copy_to_user(ubase, kinfo, sizeof(*kinfo)))
+ sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo);
+ if (copy_to_user(ubase, kinfo, sz))
ret = -EFAULT;
bail:
@@ -1132,67 +1151,55 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
struct ipath_devdata *dd;
void *addr;
size_t size;
- int ret;
+ int ret = 0;
/* If the port is not shared, all addresses should be physical */
- if (!pd->port_subport_cnt) {
- ret = -EINVAL;
+ if (!pd->port_subport_cnt)
goto bail;
- }
dd = pd->port_dd;
size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
/*
- * Master has all the slave uregbase, rcvhdrq, and
- * rcvegrbufs mmapped.
+ * Each process has all the subport uregbase, rcvhdrq, and
+ * rcvegrbufs mmapped - as an array for all the processes,
+ * and also separately for this process.
*/
- if (subport == 0) {
- unsigned num_slaves = pd->port_subport_cnt - 1;
-
- if (pgaddr == ((u64) pd->subport_uregbase & MMAP64_MASK)) {
- addr = pd->subport_uregbase;
- size = PAGE_SIZE * num_slaves;
- } else if (pgaddr == ((u64) pd->subport_rcvhdr_base &
- MMAP64_MASK)) {
- addr = pd->subport_rcvhdr_base;
- size = pd->port_rcvhdrq_size * num_slaves;
- } else if (pgaddr == ((u64) pd->subport_rcvegrbuf &
- MMAP64_MASK)) {
- addr = pd->subport_rcvegrbuf;
- size *= num_slaves;
- } else {
- ret = -EINVAL;
- goto bail;
- }
- } else if (pgaddr == (((u64) pd->subport_uregbase +
- PAGE_SIZE * (subport - 1)) & MMAP64_MASK)) {
- addr = pd->subport_uregbase + PAGE_SIZE * (subport - 1);
- size = PAGE_SIZE;
- } else if (pgaddr == (((u64) pd->subport_rcvhdr_base +
- pd->port_rcvhdrq_size * (subport - 1)) &
- MMAP64_MASK)) {
- addr = pd->subport_rcvhdr_base +
- pd->port_rcvhdrq_size * (subport - 1);
- size = pd->port_rcvhdrq_size;
- } else if (pgaddr == (((u64) pd->subport_rcvegrbuf +
- size * (subport - 1)) & MMAP64_MASK)) {
- addr = pd->subport_rcvegrbuf + size * (subport - 1);
- /* rcvegrbufs are read-only on the slave */
- if (vma->vm_flags & VM_WRITE) {
- dev_info(&dd->pcidev->dev,
- "Can't map eager buffers as "
- "writable (flags=%lx)\n", vma->vm_flags);
- ret = -EPERM;
- goto bail;
- }
- /*
- * Don't allow permission to later change to writeable
- * with mprotect.
- */
- vma->vm_flags &= ~VM_MAYWRITE;
+ if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) {
+ addr = pd->subport_uregbase;
+ size = PAGE_SIZE * pd->port_subport_cnt;
+ } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) {
+ addr = pd->subport_rcvhdr_base;
+ size = pd->port_rcvhdrq_size * pd->port_subport_cnt;
+ } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) {
+ addr = pd->subport_rcvegrbuf;
+ size *= pd->port_subport_cnt;
+ } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase +
+ PAGE_SIZE * subport)) {
+ addr = pd->subport_uregbase + PAGE_SIZE * subport;
+ size = PAGE_SIZE;
+ } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base +
+ pd->port_rcvhdrq_size * subport)) {
+ addr = pd->subport_rcvhdr_base +
+ pd->port_rcvhdrq_size * subport;
+ size = pd->port_rcvhdrq_size;
+ } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf +
+ size * subport)) {
+ addr = pd->subport_rcvegrbuf + size * subport;
+ /* rcvegrbufs are read-only on the slave */
+ if (vma->vm_flags & VM_WRITE) {
+ dev_info(&dd->pcidev->dev,
+ "Can't map eager buffers as "
+ "writable (flags=%lx)\n", vma->vm_flags);
+ ret = -EPERM;
+ goto bail;
+ }
+ /*
+ * Don't allow permission to later change to writeable
+ * with mprotect.
+ */
+ vma->vm_flags &= ~VM_MAYWRITE;
} else {
- ret = -EINVAL;
goto bail;
}
len = vma->vm_end - vma->vm_start;
@@ -1205,7 +1212,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
vma->vm_ops = &ipath_file_vm_ops;
vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
- ret = 0;
+ ret = 1;
bail:
return ret;
@@ -1265,19 +1272,20 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
* Check for kernel virtual addresses first, anything else must
* match a HW or memory address.
*/
- if (pgaddr >= (1ULL<<40)) {
- ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
+ ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
+ if (ret) {
+ if (ret > 0)
+ ret = 0;
goto bail;
}
+ ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
if (!pd->port_subport_cnt) {
/* port is not shared */
- ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
piocnt = dd->ipath_pbufsport;
piobufs = pd->port_piobufs;
} else if (!subport_fp(fp)) {
/* caller is the master */
- ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) +
(dd->ipath_pbufsport % pd->port_subport_cnt);
piobufs = pd->port_piobufs +
@@ -1286,7 +1294,6 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
unsigned slave = subport_fp(fp) - 1;
/* caller is a slave */
- ureg = 0;
piocnt = dd->ipath_pbufsport / pd->port_subport_cnt;
piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
}
@@ -1300,9 +1307,6 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
(void *) dd->ipath_pioavailregs_dma,
"pioavail registers");
- else if (subport_fp(fp))
- /* Subports don't mmap the physical receive buffers */
- ret = -EINVAL;
else if (pgaddr == pd->port_rcvegr_phys)
ret = mmap_rcvegrbufs(vma, pd);
else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
@@ -1400,32 +1404,41 @@ static int init_subports(struct ipath_devdata *dd,
const struct ipath_user_info *uinfo)
{
int ret = 0;
- unsigned num_slaves;
+ unsigned num_subports;
size_t size;
- /* Old user binaries don't know about subports */
- if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR)
- goto bail;
/*
* If the user is requesting zero or one port,
* skip the subport allocation.
*/
if (uinfo->spu_subport_cnt <= 1)
goto bail;
- if (uinfo->spu_subport_cnt > 4) {
+
+ /* Old user binaries don't know about new subport implementation */
+ if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) {
+ dev_info(&dd->pcidev->dev,
+ "Mismatched user minor version (%d) and driver "
+ "minor version (%d) while port sharing. Ensure "
+ "that driver and library are from the same "
+ "release.\n",
+ (int) (uinfo->spu_userversion & 0xffff),
+ IPATH_USER_SWMINOR);
+ goto bail;
+ }
+ if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) {
ret = -EINVAL;
goto bail;
}
- num_slaves = uinfo->spu_subport_cnt - 1;
- pd->subport_uregbase = vmalloc(PAGE_SIZE * num_slaves);
+ num_subports = uinfo->spu_subport_cnt;
+ pd->subport_uregbase = vmalloc(PAGE_SIZE * num_subports);
if (!pd->subport_uregbase) {
ret = -ENOMEM;
goto bail;
}
/* Note: pd->port_rcvhdrq_size isn't initialized yet. */
size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
- sizeof(u32), PAGE_SIZE) * num_slaves;
+ sizeof(u32), PAGE_SIZE) * num_subports;
pd->subport_rcvhdr_base = vmalloc(size);
if (!pd->subport_rcvhdr_base) {
ret = -ENOMEM;
@@ -1434,7 +1447,7 @@ static int init_subports(struct ipath_devdata *dd,
pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks *
pd->port_rcvegrbuf_size *
- num_slaves);
+ num_subports);
if (!pd->subport_rcvegrbuf) {
ret = -ENOMEM;
goto bail_rhdr;
@@ -1443,6 +1456,12 @@ static int init_subports(struct ipath_devdata *dd,
pd->port_subport_cnt = uinfo->spu_subport_cnt;
pd->port_subport_id = uinfo->spu_subport_id;
pd->active_slaves = 1;
+ set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
+ memset(pd->subport_uregbase, 0, PAGE_SIZE * num_subports);
+ memset(pd->subport_rcvhdr_base, 0, size);
+ memset(pd->subport_rcvegrbuf, 0, pd->port_rcvegrbuf_chunks *
+ pd->port_rcvegrbuf_size *
+ num_subports);
goto bail;
bail_rhdr:
@@ -1573,18 +1592,19 @@ static int find_best_unit(struct file *fp,
*/
if (!cpus_empty(current->cpus_allowed) &&
!cpus_full(current->cpus_allowed)) {
- int ncpus = num_online_cpus(), curcpu = -1;
+ int ncpus = num_online_cpus(), curcpu = -1, nset = 0;
for (i = 0; i < ncpus; i++)
if (cpu_isset(i, current->cpus_allowed)) {
ipath_cdbg(PROC, "%s[%u] affinity set for "
- "cpu %d\n", current->comm,
- current->pid, i);
+ "cpu %d/%d\n", current->comm,
+ current->pid, i, ncpus);
curcpu = i;
+ nset++;
}
- if (curcpu != -1) {
+ if (curcpu != -1 && nset != ncpus) {
if (npresent) {
prefunit = curcpu / (ncpus / npresent);
- ipath_dbg("%s[%u] %d chips, %d cpus, "
+ ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, "
"%d cpus/chip, select unit %d\n",
current->comm, current->pid,
npresent, ncpus, ncpus / npresent,
@@ -1764,11 +1784,17 @@ static int ipath_do_user_init(struct file *fp,
const struct ipath_user_info *uinfo)
{
int ret;
- struct ipath_portdata *pd;
+ struct ipath_portdata *pd = port_fp(fp);
struct ipath_devdata *dd;
u32 head32;
- pd = port_fp(fp);
+ /* Subports don't need to initialize anything since master did it. */
+ if (subport_fp(fp)) {
+ ret = wait_event_interruptible(pd->port_wait,
+ !test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag));
+ goto done;
+ }
+
dd = pd->port_dd;
if (uinfo->spu_rcvhdrsize) {
@@ -1826,6 +1852,11 @@ static int ipath_do_user_init(struct file *fp,
dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl);
+ /* Notify any waiting slaves */
+ if (pd->port_subport_cnt) {
+ clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
+ wake_up(&pd->port_wait);
+ }
done:
return ret;
}
@@ -2017,6 +2048,17 @@ static int ipath_get_slave_info(struct ipath_portdata *pd,
return ret;
}
+static int ipath_force_pio_avail_update(struct ipath_devdata *dd)
+{
+ u64 reg = dd->ipath_sendctrl;
+
+ clear_bit(IPATH_S_PIOBUFAVAILUPD, &reg);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, reg);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
+
+ return 0;
+}
+
static ssize_t ipath_write(struct file *fp, const char __user *data,
size_t count, loff_t *off)
{
@@ -2071,27 +2113,35 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
dest = &cmd.cmd.part_key;
src = &ucmd->cmd.part_key;
break;
- case IPATH_CMD_SLAVE_INFO:
+ case __IPATH_CMD_SLAVE_INFO:
copy = sizeof(cmd.cmd.slave_mask_addr);
dest = &cmd.cmd.slave_mask_addr;
src = &ucmd->cmd.slave_mask_addr;
break;
+ case IPATH_CMD_PIOAVAILUPD: // force an update of PIOAvail reg
+ copy = 0;
+ src = NULL;
+ dest = NULL;
+ break;
default:
ret = -EINVAL;
goto bail;
}
- if ((count - consumed) < copy) {
- ret = -EINVAL;
- goto bail;
- }
+ if (copy) {
+ if ((count - consumed) < copy) {
+ ret = -EINVAL;
+ goto bail;
+ }
- if (copy_from_user(dest, src, copy)) {
- ret = -EFAULT;
- goto bail;
+ if (copy_from_user(dest, src, copy)) {
+ ret = -EFAULT;
+ goto bail;
+ }
+
+ consumed += copy;
}
- consumed += copy;
pd = port_fp(fp);
if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
cmd.type != IPATH_CMD_ASSIGN_PORT) {
@@ -2137,11 +2187,14 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
case IPATH_CMD_SET_PART_KEY:
ret = ipath_set_part_key(pd, cmd.cmd.part_key);
break;
- case IPATH_CMD_SLAVE_INFO:
+ case __IPATH_CMD_SLAVE_INFO:
ret = ipath_get_slave_info(pd,
(void __user *) (unsigned long)
cmd.cmd.slave_mask_addr);
break;
+ case IPATH_CMD_PIOAVAILUPD:
+ ret = ipath_force_pio_avail_update(pd->port_dd);
+ break;
}
if (ret >= 0)
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 5b40a846ff9..ed55979bfd3 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -451,12 +451,18 @@ bail:
return ret;
}
-static void remove_file(struct dentry *parent, char *name)
+static int remove_file(struct dentry *parent, char *name)
{
struct dentry *tmp;
+ int ret;
tmp = lookup_one_len(name, parent, strlen(name));
+ if (IS_ERR(tmp)) {
+ ret = PTR_ERR(tmp);
+ goto bail;
+ }
+
spin_lock(&dcache_lock);
spin_lock(&tmp->d_lock);
if (!(d_unhashed(tmp) && tmp->d_inode)) {
@@ -469,6 +475,14 @@ static void remove_file(struct dentry *parent, char *name)
spin_unlock(&tmp->d_lock);
spin_unlock(&dcache_lock);
}
+
+ ret = 0;
+bail:
+ /*
+ * We don't expect clients to care about the return value, but
+ * it's there if they need it.
+ */
+ return ret;
}
static int remove_device_files(struct super_block *sb,
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 7468477ba83..4171198fc20 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -43,6 +43,9 @@
#include "ipath_kernel.h"
#include "ipath_registers.h"
+static void ipath_setup_ht_setextled(struct ipath_devdata *, u64, u64);
+
+
/*
* This lists the InfiniPath registers, in the actual chip layout.
* This structure should never be directly accessed.
@@ -208,8 +211,8 @@ static const struct ipath_kregs ipath_ht_kregs = {
.kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
.kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
/*
- * These should not be used directly via ipath_read_kreg64(),
- * use them with ipath_read_kreg64_port(),
+ * These should not be used directly via ipath_write_kreg64(),
+ * use them with ipath_write_kreg64_port(),
*/
.kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
.kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0)
@@ -284,6 +287,14 @@ static const struct ipath_cregs ipath_ht_cregs = {
#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000
#define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000
+
+/* TID entries (memory), HT-only */
+#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
+#define INFINIPATH_RT_VALID 0x8000000000000000ULL
+#define INFINIPATH_RT_ADDR_SHIFT 0
+#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL
+#define INFINIPATH_RT_BUFSIZE_SHIFT 48
+
/*
* masks and bits that are different in different chips, or present only
* in one
@@ -402,6 +413,14 @@ static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
};
+#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
+#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \
+ << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)
+
+static int ipath_ht_txe_recover(struct ipath_devdata *);
+
/**
* ipath_ht_handle_hwerrors - display hardware errors.
* @dd: the infinipath device
@@ -450,13 +469,12 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
/*
* make sure we get this much out, unless told to be quiet,
+ * it's a parity error we may recover from,
* or it's occurred within the last 5 seconds
*/
- if ((hwerrs & ~(dd->ipath_lasthwerror |
- ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
- INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
- (ipath_debug & __IPATH_VERBDBG))
+ if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY |
+ RXE_EAGER_PARITY)) ||
+ (ipath_debug & __IPATH_VERBDBG))
dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
"(cleared)\n", (unsigned long long) hwerrs);
dd->ipath_lasthwerror |= hwerrs;
@@ -467,7 +485,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
(hwerrs & ~dd->ipath_hwe_bitsextant));
ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
- if (ctrl & INFINIPATH_C_FREEZEMODE) {
+ if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
/*
* parity errors in send memory are recoverable,
* just cancel the send (if indicated in * sendbuffererror),
@@ -476,50 +494,14 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
* occur if a processor speculative read is done to the PIO
* buffer while we are sending a packet, for example.
*/
- if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
- INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
- ipath_stats.sps_txeparity++;
- ipath_dbg("Recovering from TXE parity error (%llu), "
- "hwerrstatus=%llx\n",
- (unsigned long long) ipath_stats.sps_txeparity,
- (unsigned long long) hwerrs);
- ipath_disarm_senderrbufs(dd);
- hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
- INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
- if (!hwerrs) { /* else leave in freeze mode */
- ipath_write_kreg(dd,
- dd->ipath_kregs->kr_control,
- dd->ipath_control);
- return;
- }
- }
- if (hwerrs) {
- /*
- * if any set that we aren't ignoring; only
- * make the complaint once, in case it's stuck
- * or recurring, and we get here multiple
- * times.
- */
- if (dd->ipath_flags & IPATH_INITTED) {
- ipath_dev_err(dd, "Fatal Hardware Error (freeze "
- "mode), no longer usable, SN %.16s\n",
- dd->ipath_serial);
- isfatal = 1;
- }
- *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
- /* mark as having had error */
- *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
- /*
- * mark as not usable, at a minimum until driver
- * is reloaded, probably until reboot, since no
- * other reset is possible.
- */
- dd->ipath_flags &= ~IPATH_INITTED;
- } else {
- ipath_dbg("Clearing freezemode on ignored hardware "
- "error\n");
+ if ((hwerrs & TXE_PIO_PARITY) && ipath_ht_txe_recover(dd))
+ hwerrs &= ~TXE_PIO_PARITY;
+ if (hwerrs & RXE_EAGER_PARITY)
+ ipath_dev_err(dd, "RXE parity, Eager TID error is not "
+ "recoverable\n");
+ if (!hwerrs) {
+ ipath_dbg("Clearing freezemode on ignored or "
+ "recovered hardware error\n");
ctrl &= ~INFINIPATH_C_FREEZEMODE;
ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
ctrl);
@@ -587,7 +569,39 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
dd->ipath_hwerrmask);
}
- ipath_dev_err(dd, "%s hardware error\n", msg);
+ if (hwerrs) {
+ /*
+ * if any set that we aren't ignoring; only
+ * make the complaint once, in case it's stuck
+ * or recurring, and we get here multiple
+ * times.
+ * force link down, so switch knows, and
+ * LEDs are turned off
+ */
+ if (dd->ipath_flags & IPATH_INITTED) {
+ ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
+ ipath_setup_ht_setextled(dd,
+ INFINIPATH_IBCS_L_STATE_DOWN,
+ INFINIPATH_IBCS_LT_STATE_DISABLED);
+ ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+ "mode), no longer usable, SN %.16s\n",
+ dd->ipath_serial);
+ isfatal = 1;
+ }
+ *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
+ /* mark as having had error */
+ *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
+ /*
+ * mark as not usable, at a minimum until driver
+ * is reloaded, probably until reboot, since no
+ * other reset is possible.
+ */
+ dd->ipath_flags &= ~IPATH_INITTED;
+ }
+ else
+ *msg = 0; /* recovered from all of them */
+ if (*msg)
+ ipath_dev_err(dd, "%s hardware error\n", msg);
if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
/*
* for status file; if no trailing brace is copied,
@@ -658,7 +672,8 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
if (n)
snprintf(name, namelen, "%s", n);
- if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) {
+ if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
+ dd->ipath_minrev > 3)) {
/*
* This version of the driver only supports Rev 3.2 and 3.3
*/
@@ -1163,6 +1178,8 @@ static void ipath_ht_init_hwerrors(struct ipath_devdata *dd)
if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
ipath_dev_err(dd, "MemBIST did not complete!\n");
+ if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT)
+ ipath_dbg("MemBIST corrected\n");
ipath_check_htlink(dd);
@@ -1366,6 +1383,9 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
u64 __iomem *tidptr, u32 type,
unsigned long pa)
{
+ if (!dd->ipath_kregbase)
+ return;
+
if (pa != dd->ipath_tidinvalid) {
if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) {
dev_info(&dd->pcidev->dev,
@@ -1382,10 +1402,10 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
pa |= lenvalid | INFINIPATH_RT_VALID;
}
}
- if (dd->ipath_kregbase)
- writeq(pa, tidptr);
+ writeq(pa, tidptr);
}
+
/**
* ipath_ht_clear_tid - clear all TID entries for a port, expected and eager
* @dd: the infinipath device
@@ -1515,7 +1535,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
INFINIPATH_S_ABORT);
ipath_get_eeprom_info(dd);
- if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
+ if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') {
/*
* Later production QHT7040 has same changes as QHT7140, so
@@ -1528,13 +1548,31 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
return 0;
}
+
+static int ipath_ht_txe_recover(struct ipath_devdata *dd)
+{
+ int cnt = ++ipath_stats.sps_txeparity;
+ if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) {
+ if (cnt == IPATH_MAX_PARITY_ATTEMPTS)
+ ipath_dev_err(dd,
+ "Too many attempts to recover from "
+ "TXE parity, giving up\n");
+ return 0;
+ }
+ dev_info(&dd->pcidev->dev,
+ "Recovering from TXE PIO parity error\n");
+ ipath_disarm_senderrbufs(dd, 1);
+ return 1;
+}
+
+
/**
* ipath_init_ht_get_base_info - set chip-specific flags for user code
* @dd: the infinipath device
* @kbase: ipath_base_info pointer
*
* We set the PCIE flag because the lower bandwidth on PCIe vs
- * HyperTransport can affect some user packet algorithims.
+ * HyperTransport can affect some user packet algorithms.
*/
static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
{
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index ae8bf9950c6..1b9c3085775 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -43,6 +43,8 @@
#include "ipath_kernel.h"
#include "ipath_registers.h"
+static void ipath_setup_pe_setextled(struct ipath_devdata *, u64, u64);
+
/*
* This file contains all the chip-specific register information and
* access functions for the QLogic InfiniPath PCI-Express chip.
@@ -207,8 +209,8 @@ static const struct ipath_kregs ipath_pe_kregs = {
.kr_ibpllcfg = IPATH_KREG_OFFSET(IBPLLCfg),
/*
- * These should not be used directly via ipath_read_kreg64(),
- * use them with ipath_read_kreg64_port()
+ * These should not be used directly via ipath_write_kreg64(),
+ * use them with ipath_write_kreg64_port(),
*/
.kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
.kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
@@ -321,6 +323,12 @@ static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
};
+#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
+
+static int ipath_pe_txe_recover(struct ipath_devdata *);
+
/**
* ipath_pe_handle_hwerrors - display hardware errors.
* @dd: the infinipath device
@@ -394,32 +402,21 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
* occur if a processor speculative read is done to the PIO
* buffer while we are sending a packet, for example.
*/
- if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
- INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
- ipath_stats.sps_txeparity++;
- ipath_dbg("Recovering from TXE parity error (%llu), "
- "hwerrstatus=%llx\n",
- (unsigned long long) ipath_stats.sps_txeparity,
- (unsigned long long) hwerrs);
- ipath_disarm_senderrbufs(dd);
- hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
- INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
- if (!hwerrs) { /* else leave in freeze mode */
- ipath_write_kreg(dd,
- dd->ipath_kregs->kr_control,
- dd->ipath_control);
- return;
- }
- }
+ if ((hwerrs & TXE_PIO_PARITY) && ipath_pe_txe_recover(dd))
+ hwerrs &= ~TXE_PIO_PARITY;
if (hwerrs) {
/*
* if any set that we aren't ignoring only make the
* complaint once, in case it's stuck or recurring,
* and we get here multiple times
+ * Force link down, so switch knows, and
+ * LEDs are turned off
*/
if (dd->ipath_flags & IPATH_INITTED) {
+ ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
+ ipath_setup_pe_setextled(dd,
+ INFINIPATH_IBCS_L_STATE_DOWN,
+ INFINIPATH_IBCS_LT_STATE_DISABLED);
ipath_dev_err(dd, "Fatal Hardware Error (freeze "
"mode), no longer usable, SN %.16s\n",
dd->ipath_serial);
@@ -493,7 +490,8 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
dd->ipath_hwerrmask);
}
- ipath_dev_err(dd, "%s hardware error\n", msg);
+ if (*msg)
+ ipath_dev_err(dd, "%s hardware error\n", msg);
if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) {
/*
* for /sys status file ; if no trailing } is copied, we'll
@@ -581,6 +579,8 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd)
if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
ipath_dev_err(dd, "MemBIST did not complete!\n");
+ if (extsval & INFINIPATH_EXTS_MEMBIST_FOUND)
+ ipath_dbg("MemBIST corrected\n");
val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */
@@ -1293,7 +1293,7 @@ int __attribute__((weak)) ipath_unordered_wc(void)
* @kbase: ipath_base_info pointer
*
* We set the PCIE flag because the lower bandwidth on PCIe vs
- * HyperTransport can affect some user packet algorithims.
+ * HyperTransport can affect some user packet algorithms.
*/
static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
{
@@ -1330,6 +1330,35 @@ static void ipath_pe_free_irq(struct ipath_devdata *dd)
dd->ipath_irq = 0;
}
+/*
+ * On platforms using this chip, and not having ordered WC stores, we
+ * can get TXE parity errors due to speculative reads to the PIO buffers,
+ * and this, due to a chip bug can result in (many) false parity error
+ * reports. So it's a debug print on those, and an info print on systems
+ * where the speculative reads don't occur.
+ * Because we can get lots of false errors, we have no upper limit
+ * on recovery attempts on those platforms.
+ */
+static int ipath_pe_txe_recover(struct ipath_devdata *dd)
+{
+ if (ipath_unordered_wc())
+ ipath_dbg("Recovering from TXE PIO parity error\n");
+ else {
+ int cnt = ++ipath_stats.sps_txeparity;
+ if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) {
+ if (cnt == IPATH_MAX_PARITY_ATTEMPTS)
+ ipath_dev_err(dd,
+ "Too many attempts to recover from "
+ "TXE parity, giving up\n");
+ return 0;
+ }
+ dev_info(&dd->pcidev->dev,
+ "Recovering from TXE PIO parity error\n");
+ }
+ ipath_disarm_senderrbufs(dd, 1);
+ return 1;
+}
+
/**
* ipath_init_iba6120_funcs - set up the chip-specific function pointers
* @dd: the infinipath device
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index d4f6b5239ef..7045ba68949 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -216,6 +216,20 @@ static int bringup_link(struct ipath_devdata *dd)
return ret;
}
+static struct ipath_portdata *create_portdata0(struct ipath_devdata *dd)
+{
+ struct ipath_portdata *pd = NULL;
+
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ if (pd) {
+ pd->port_dd = dd;
+ pd->port_cnt = 1;
+ /* The port 0 pkey table is used by the layer interface. */
+ pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
+ }
+ return pd;
+}
+
static int init_chip_first(struct ipath_devdata *dd,
struct ipath_portdata **pdp)
{
@@ -271,20 +285,16 @@ static int init_chip_first(struct ipath_devdata *dd,
goto done;
}
- dd->ipath_pd[0] = kzalloc(sizeof(*pd), GFP_KERNEL);
+ pd = create_portdata0(dd);
- if (!dd->ipath_pd[0]) {
+ if (!pd) {
ipath_dev_err(dd, "Unable to allocate portdata for port "
"0, failing\n");
ret = -ENOMEM;
goto done;
}
- pd = dd->ipath_pd[0];
- pd->port_dd = dd;
- pd->port_port = 0;
- pd->port_cnt = 1;
- /* The port 0 pkey table is used by the layer interface. */
- pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
+ dd->ipath_pd[0] = pd;
+
dd->ipath_rcvtidcnt =
ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
dd->ipath_rcvtidbase =
@@ -590,6 +600,10 @@ static int init_housekeeping(struct ipath_devdata *dd,
goto done;
}
+
+ /* clear diagctrl register, in case diags were running and crashed */
+ ipath_write_kreg (dd, dd->ipath_kregs->kr_hwdiagctrl, 0);
+
/* clear the initial reset flag, in case first driver load */
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
INFINIPATH_E_RESET);
@@ -668,6 +682,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
{
int ret = 0, i;
u32 val32, kpiobufs;
+ u32 piobufs, uports;
u64 val;
struct ipath_portdata *pd = NULL; /* keep gcc4 happy */
gfp_t gfp_flags = GFP_USER | __GFP_COMP;
@@ -702,16 +717,17 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
* the in memory DMA'ed copies of the registers. This has to
* be done early, before we calculate lastport, etc.
*/
- val = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
+ piobufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
/*
* calc number of pioavail registers, and save it; we have 2
* bits per buffer.
*/
- dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2)
+ dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
/ (sizeof(u64) * BITS_PER_BYTE / 2);
+ uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
if (ipath_kpiobufs == 0) {
/* not set by user (this is default) */
- if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128)
+ if (piobufs >= (uports * IPATH_MIN_USER_PORT_BUFCNT) + 32)
kpiobufs = 32;
else
kpiobufs = 16;
@@ -719,31 +735,25 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
else
kpiobufs = ipath_kpiobufs;
- if (kpiobufs >
- (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
- (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT))) {
- i = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
- (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT);
+ if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) {
+ i = (int) piobufs -
+ (int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
if (i < 0)
i = 0;
- dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs for "
- "kernel leaves too few for %d user ports "
+ dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
+ "%d for kernel leaves too few for %d user ports "
"(%d each); using %u\n", kpiobufs,
- dd->ipath_cfgports - 1,
- IPATH_MIN_USER_PORT_BUFCNT, i);
+ piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
/*
* shouldn't change ipath_kpiobufs, because could be
* different for different devices...
*/
kpiobufs = i;
}
- dd->ipath_lastport_piobuf =
- dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - kpiobufs;
- dd->ipath_pbufsport = dd->ipath_cfgports > 1
- ? dd->ipath_lastport_piobuf / (dd->ipath_cfgports - 1)
- : 0;
- val32 = dd->ipath_lastport_piobuf -
- (dd->ipath_pbufsport * (dd->ipath_cfgports - 1));
+ dd->ipath_lastport_piobuf = piobufs - kpiobufs;
+ dd->ipath_pbufsport =
+ uports ? dd->ipath_lastport_piobuf / uports : 0;
+ val32 = dd->ipath_lastport_piobuf - (dd->ipath_pbufsport * uports);
if (val32 > 0) {
ipath_dbg("allocating %u pbufs/port leaves %u unused, "
"add to kernel\n", dd->ipath_pbufsport, val32);
@@ -754,8 +764,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
dd->ipath_lastpioindex = dd->ipath_lastport_piobuf;
ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
"each for %u user ports\n", kpiobufs,
- dd->ipath_piobcnt2k + dd->ipath_piobcnt4k,
- dd->ipath_pbufsport, dd->ipath_cfgports - 1);
+ piobufs, dd->ipath_pbufsport, uports);
dd->ipath_f_early_init(dd);
@@ -839,11 +848,24 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
* Set up the port 0 (kernel) rcvhdr q and egr TIDs. If doing
* re-init, the simplest way to handle this is to free
* existing, and re-allocate.
+ * Need to re-create rest of port 0 portdata as well.
*/
if (reinit) {
- struct ipath_portdata *pd = dd->ipath_pd[0];
- dd->ipath_pd[0] = NULL;
- ipath_free_pddata(dd, pd);
+ /* Alloc and init new ipath_portdata for port0,
+ * Then free old pd. Could lead to fragmentation, but also
+ * makes later support for hot-swap easier.
+ */
+ struct ipath_portdata *npd;
+ npd = create_portdata0(dd);
+ if (npd) {
+ ipath_free_pddata(dd, pd);
+ dd->ipath_pd[0] = pd = npd;
+ } else {
+ ipath_dev_err(dd, "Unable to allocate portdata for"
+ " port 0, failing\n");
+ ret = -ENOMEM;
+ goto done;
+ }
}
dd->ipath_f_tidtemplate(dd);
ret = ipath_create_rcvhdrq(dd, pd);
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 72b9e279d19..45d033169c6 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -38,10 +38,39 @@
#include "ipath_common.h"
/*
+ * clear (write) a pio buffer, to clear a parity error. This routine
+ * should only be called when in freeze mode, and the buffer should be
+ * canceled afterwards.
+ */
+static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum)
+{
+ u32 __iomem *pbuf;
+ u32 dwcnt; /* dword count to write */
+ if (pnum < dd->ipath_piobcnt2k) {
+ pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum *
+ dd->ipath_palign);
+ dwcnt = dd->ipath_piosize2k >> 2;
+ }
+ else {
+ pbuf = (u32 __iomem *) (dd->ipath_pio4kbase +
+ (pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
+ dwcnt = dd->ipath_piosize4k >> 2;
+ }
+ dev_info(&dd->pcidev->dev,
+ "Rewrite PIO buffer %u, to recover from parity error\n",
+ pnum);
+ *pbuf = dwcnt+1; /* no flush required, since already in freeze */
+ while(--dwcnt)
+ *pbuf++ = 0;
+}
+
+/*
* Called when we might have an error that is specific to a particular
* PIO buffer, and may need to cancel that buffer, so it can be re-used.
+ * If rewrite is true, and bits are set in the sendbufferror registers,
+ * we'll write to the buffer, for error recovery on parity errors.
*/
-void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
+void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
{
u32 piobcnt;
unsigned long sbuf[4];
@@ -74,8 +103,11 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
}
for (i = 0; i < piobcnt; i++)
- if (test_bit(i, sbuf))
+ if (test_bit(i, sbuf)) {
+ if (rewrite)
+ ipath_clrpiobuf(dd, i);
ipath_disarm_piobufs(dd, i, 1);
+ }
dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */
}
}
@@ -114,7 +146,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
{
u64 ignore_this_time = 0;
- ipath_disarm_senderrbufs(dd);
+ ipath_disarm_senderrbufs(dd, 0);
if ((errs & E_SUM_LINK_PKTERRS) &&
!(dd->ipath_flags & IPATH_LINKACTIVE)) {
/*
@@ -403,10 +435,13 @@ static void handle_supp_msgs(struct ipath_devdata *dd,
* happens so often we never want to count it.
*/
if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
- ipath_decode_err(msg, sizeof msg, dd->ipath_lasterror &
- ~INFINIPATH_E_IBSTATUSCHANGED);
+ int iserr;
+ iserr = ipath_decode_err(msg, sizeof msg,
+ dd->ipath_lasterror &
+ ~INFINIPATH_E_IBSTATUSCHANGED);
if (dd->ipath_lasterror &
- ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
+ ~(INFINIPATH_E_RRCVEGRFULL |
+ INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
ipath_dev_err(dd, "Suppressed %u messages for "
"fast-repeating errors (%s) (%llx)\n",
supp_msgs, msg,
@@ -420,8 +455,13 @@ static void handle_supp_msgs(struct ipath_devdata *dd,
* them. So only complain about these at debug
* level.
*/
- ipath_dbg("Suppressed %u messages for %s\n",
- supp_msgs, msg);
+ if (iserr)
+ ipath_dbg("Suppressed %u messages for %s\n",
+ supp_msgs, msg);
+ else
+ ipath_cdbg(ERRPKT,
+ "Suppressed %u messages for %s\n",
+ supp_msgs, msg);
}
}
}
@@ -462,7 +502,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
{
char msg[512];
u64 ignore_this_time = 0;
- int i;
+ int i, iserr = 0;
int chkerrpkts = 0, noprint = 0;
unsigned supp_msgs;
@@ -502,6 +542,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
}
if (supp_msgs == 250000) {
+ int s_iserr;
/*
* It's not entirely reasonable assuming that the errors set
* in the last clear period are all responsible for the
@@ -511,17 +552,17 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
~dd->ipath_maskederrs);
- ipath_decode_err(msg, sizeof msg,
+ s_iserr = ipath_decode_err(msg, sizeof msg,
(dd->ipath_maskederrs & ~dd->
ipath_ignorederrs));
if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
- ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
- ipath_dev_err(dd, "Disabling error(s) %llx because "
- "occurring too frequently (%s)\n",
- (unsigned long long)
- (dd->ipath_maskederrs &
- ~dd->ipath_ignorederrs), msg);
+ ~(INFINIPATH_E_RRCVEGRFULL |
+ INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
+ ipath_dev_err(dd, "Temporarily disabling "
+ "error(s) %llx reporting; too frequent (%s)\n",
+ (unsigned long long) (dd->ipath_maskederrs &
+ ~dd->ipath_ignorederrs), msg);
else {
/*
* rcvegrfull and rcvhdrqfull are "normal",
@@ -530,8 +571,15 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
* processing them. So only complain about
* these at debug level.
*/
- ipath_dbg("Disabling frequent queue full errors "
- "(%s)\n", msg);
+ if (s_iserr)
+ ipath_dbg("Temporarily disabling reporting "
+ "too frequent queue full errors (%s)\n",
+ msg);
+ else
+ ipath_cdbg(ERRPKT,
+ "Temporarily disabling reporting too"
+ " frequent packet errors (%s)\n",
+ msg);
}
/*
@@ -589,6 +637,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
ipath_stats.sps_crcerrs++;
chkerrpkts = 1;
}
+ iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS);
+
/*
* We don't want to print these two as they happen, or we can make
@@ -677,8 +727,13 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
*dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
}
- if (!noprint && *msg)
- ipath_dev_err(dd, "%s error\n", msg);
+ if (!noprint && *msg) {
+ if (iserr)
+ ipath_dev_err(dd, "%s error\n", msg);
+ else
+ dev_info(&dd->pcidev->dev, "%s packet problems\n",
+ msg);
+ }
if (dd->ipath_state_wanted & dd->ipath_flags) {
ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
"waking\n", dd->ipath_state_wanted,
@@ -819,11 +874,10 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
struct ipath_portdata *pd = dd->ipath_pd[i];
if (portr & (1 << i) && pd && pd->port_cnt &&
test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) {
- int rcbit;
clear_bit(IPATH_PORT_WAITING_RCV,
&pd->port_flag);
- rcbit = i + INFINIPATH_R_INTRAVAIL_SHIFT;
- clear_bit(1UL << rcbit, &dd->ipath_rcvctrl);
+ clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT,
+ &dd->ipath_rcvctrl);
wake_up_interruptible(&pd->port_wait);
rcvdint = 1;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 6d8d05fb599..e900c2593f4 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -590,7 +590,6 @@ int ipath_enable_wc(struct ipath_devdata *dd);
void ipath_disable_wc(struct ipath_devdata *dd);
int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
void ipath_shutdown_device(struct ipath_devdata *);
-void ipath_disarm_senderrbufs(struct ipath_devdata *);
struct file_operations;
int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
@@ -611,7 +610,7 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t);
extern int ipath_diag_inuse;
irqreturn_t ipath_intr(int irq, void *devid);
-void ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
+int ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
#if __IPATH_INFO || __IPATH_DBG
extern const char *ipath_ibcstatus_str[];
#endif
@@ -701,6 +700,8 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
#define IPATH_PORT_WAITING_RCV 2
/* waiting for a PIO buffer to be available */
#define IPATH_PORT_WAITING_PIO 3
+ /* master has not finished initializing */
+#define IPATH_PORT_MASTER_UNINIT 4
/* free up any allocated data at closes */
void ipath_free_data(struct ipath_portdata *dd);
@@ -711,6 +712,7 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *);
void ipath_init_iba6110_funcs(struct ipath_devdata *);
void ipath_get_eeprom_info(struct ipath_devdata *);
u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
+void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
/*
* number of words used for protocol header if not set by ipath_userinit();
@@ -754,8 +756,6 @@ int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int);
/* these are used for the registers that vary with port */
void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg,
unsigned, u64);
-u64 ipath_read_kreg64_port(const struct ipath_devdata *, ipath_kreg,
- unsigned);
/*
* We could have a single register get/put routine, that takes a group type,
@@ -897,6 +897,8 @@ dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
extern unsigned ipath_debug; /* debugging bit mask */
+#define IPATH_MAX_PARITY_ATTEMPTS 10000 /* max times to try recovery */
+
const char *ipath_get_unit_name(int unit);
extern struct mutex ipath_mutex;
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index 851763d7d2d..dd487c100f5 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -61,7 +61,7 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
r = (r + 1) & (rkt->max - 1);
if (r == n) {
spin_unlock_irqrestore(&rkt->lock, flags);
- ipath_dbg(KERN_INFO "LKEY table full\n");
+ ipath_dbg("LKEY table full\n");
ret = 0;
goto bail;
}
@@ -133,6 +133,12 @@ int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
* being reversible by calling bus_to_virt().
*/
if (sge->lkey == 0) {
+ struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
+
+ if (pd->user) {
+ ret = 0;
+ goto bail;
+ }
isge->mr = NULL;
isge->vaddr = (void *) sge->addr;
isge->length = sge->length;
@@ -206,6 +212,12 @@ int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
* (see ipath_get_dma_mr and ipath_dma.c).
*/
if (rkey == 0) {
+ struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
+
+ if (pd->user) {
+ ret = 0;
+ goto bail;
+ }
sge->mr = NULL;
sge->vaddr = (void *) vaddr;
sge->length = len;
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index 8cc8598d6c6..31e70732e36 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -210,9 +210,15 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
m = 0;
n = 0;
list_for_each_entry(chunk, &region->chunk_list, list) {
- for (i = 0; i < chunk->nmap; i++) {
- mr->mr.map[m]->segs[n].vaddr =
- page_address(chunk->page_list[i].page);
+ for (i = 0; i < chunk->nents; i++) {
+ void *vaddr;
+
+ vaddr = page_address(chunk->page_list[i].page);
+ if (!vaddr) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+ mr->mr.map[m]->segs[n].vaddr = vaddr;
mr->mr.map[m]->segs[n].length = region->page_size;
n++;
if (n == IPATH_SEGSZ) {
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 64f07b19349..16db9ac0b40 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -81,11 +81,51 @@ static u32 credit_table[31] = {
32768 /* 1E */
};
-static u32 alloc_qpn(struct ipath_qp_table *qpt)
+
+static void get_map_page(struct ipath_qp_table *qpt, struct qpn_map *map)
+{
+ unsigned long page = get_zeroed_page(GFP_KERNEL);
+ unsigned long flags;
+
+ /*
+ * Free the page if someone raced with us installing it.
+ */
+
+ spin_lock_irqsave(&qpt->lock, flags);
+ if (map->page)
+ free_page(page);
+ else
+ map->page = (void *)page;
+ spin_unlock_irqrestore(&qpt->lock, flags);
+}
+
+
+static int alloc_qpn(struct ipath_qp_table *qpt, enum ib_qp_type type)
{
u32 i, offset, max_scan, qpn;
struct qpn_map *map;
- u32 ret;
+ u32 ret = -1;
+
+ if (type == IB_QPT_SMI)
+ ret = 0;
+ else if (type == IB_QPT_GSI)
+ ret = 1;
+
+ if (ret != -1) {
+ map = &qpt->map[0];
+ if (unlikely(!map->page)) {
+ get_map_page(qpt, map);
+ if (unlikely(!map->page)) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+ }
+ if (!test_and_set_bit(ret, map->page))
+ atomic_dec(&map->n_free);
+ else
+ ret = -EBUSY;
+ goto bail;
+ }
qpn = qpt->last + 1;
if (qpn >= QPN_MAX)
@@ -95,19 +135,7 @@ static u32 alloc_qpn(struct ipath_qp_table *qpt)
max_scan = qpt->nmaps - !offset;
for (i = 0;;) {
if (unlikely(!map->page)) {
- unsigned long page = get_zeroed_page(GFP_KERNEL);
- unsigned long flags;
-
- /*
- * Free the page if someone raced with us
- * installing it:
- */
- spin_lock_irqsave(&qpt->lock, flags);
- if (map->page)
- free_page(page);
- else
- map->page = (void *)page;
- spin_unlock_irqrestore(&qpt->lock, flags);
+ get_map_page(qpt, map);
if (unlikely(!map->page))
break;
}
@@ -151,7 +179,7 @@ static u32 alloc_qpn(struct ipath_qp_table *qpt)
qpn = mk_qpn(qpt, map, offset);
}
- ret = 0;
+ ret = -ENOMEM;
bail:
return ret;
@@ -180,29 +208,19 @@ static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp,
enum ib_qp_type type)
{
unsigned long flags;
- u32 qpn;
int ret;
- if (type == IB_QPT_SMI)
- qpn = 0;
- else if (type == IB_QPT_GSI)
- qpn = 1;
- else {
- /* Allocate the next available QPN */
- qpn = alloc_qpn(qpt);
- if (qpn == 0) {
- ret = -ENOMEM;
- goto bail;
- }
- }
- qp->ibqp.qp_num = qpn;
+ ret = alloc_qpn(qpt, type);
+ if (ret < 0)
+ goto bail;
+ qp->ibqp.qp_num = ret;
/* Add the QP to the hash table. */
spin_lock_irqsave(&qpt->lock, flags);
- qpn %= qpt->max;
- qp->next = qpt->table[qpn];
- qpt->table[qpn] = qp;
+ ret %= qpt->max;
+ qp->next = qpt->table[ret];
+ qpt->table[ret] = qp;
atomic_inc(&qp->refcount);
spin_unlock_irqrestore(&qpt->lock, flags);
@@ -245,9 +263,7 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
if (!fnd)
return;
- /* If QPN is not reserved, mark QPN free in the bitmap. */
- if (qp->ibqp.qp_num > 1)
- free_qpn(qpt, qp->ibqp.qp_num);
+ free_qpn(qpt, qp->ibqp.qp_num);
wait_event(qp->wait, !atomic_read(&qp->refcount));
}
@@ -270,11 +286,10 @@ void ipath_free_all_qps(struct ipath_qp_table *qpt)
while (qp) {
nqp = qp->next;
- if (qp->ibqp.qp_num > 1)
- free_qpn(qpt, qp->ibqp.qp_num);
+ free_qpn(qpt, qp->ibqp.qp_num);
if (!atomic_dec_and_test(&qp->refcount) ||
!ipath_destroy_qp(&qp->ibqp))
- ipath_dbg(KERN_INFO "QP memory leak!\n");
+ ipath_dbg("QP memory leak!\n");
qp = nqp;
}
}
@@ -320,7 +335,8 @@ static void ipath_reset_qp(struct ipath_qp *qp)
qp->remote_qpn = 0;
qp->qkey = 0;
qp->qp_access_flags = 0;
- clear_bit(IPATH_S_BUSY, &qp->s_flags);
+ qp->s_busy = 0;
+ qp->s_flags &= ~IPATH_S_SIGNAL_REQ_WR;
qp->s_hdrwords = 0;
qp->s_psn = 0;
qp->r_psn = 0;
@@ -333,7 +349,6 @@ static void ipath_reset_qp(struct ipath_qp *qp)
qp->r_state = IB_OPCODE_UC_SEND_LAST;
}
qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
- qp->r_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
qp->r_nak_state = 0;
qp->r_wrid_valid = 0;
qp->s_rnr_timeout = 0;
@@ -344,6 +359,10 @@ static void ipath_reset_qp(struct ipath_qp *qp)
qp->s_ssn = 1;
qp->s_lsn = 0;
qp->s_wait_credit = 0;
+ memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
+ qp->r_head_ack_queue = 0;
+ qp->s_tail_ack_queue = 0;
+ qp->s_num_rd_atomic = 0;
if (qp->r_rq.wq) {
qp->r_rq.wq->head = 0;
qp->r_rq.wq->tail = 0;
@@ -357,7 +376,7 @@ static void ipath_reset_qp(struct ipath_qp *qp)
* @err: the receive completion error to signal if a RWQE is active
*
* Flushes both send and receive work queues.
- * QP s_lock should be held and interrupts disabled.
+ * The QP s_lock should be held and interrupts disabled.
*/
void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
@@ -365,7 +384,7 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ib_wc wc;
- ipath_dbg(KERN_INFO "QP%d/%d in error state\n",
+ ipath_dbg("QP%d/%d in error state\n",
qp->ibqp.qp_num, qp->remote_qpn);
spin_lock(&dev->pending_lock);
@@ -389,6 +408,8 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
wc.port_num = 0;
if (qp->r_wrid_valid) {
qp->r_wrid_valid = 0;
+ wc.wr_id = qp->r_wr_id;
+ wc.opcode = IB_WC_RECV;
wc.status = err;
ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
}
@@ -503,13 +524,17 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->path_mig_state != IB_MIG_REARM)
goto inval;
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC)
+ goto inval;
+
switch (new_state) {
case IB_QPS_RESET:
ipath_reset_qp(qp);
break;
case IB_QPS_ERR:
- ipath_error_qp(qp, IB_WC_GENERAL_ERR);
+ ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
break;
default:
@@ -559,6 +584,12 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr_mask & IB_QP_QKEY)
qp->qkey = attr->qkey;
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
+ qp->s_max_rd_atomic = attr->max_rd_atomic;
+
qp->state = new_state;
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -598,8 +629,8 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->alt_pkey_index = 0;
attr->en_sqd_async_notify = 0;
attr->sq_draining = 0;
- attr->max_rd_atomic = 1;
- attr->max_dest_rd_atomic = 1;
+ attr->max_rd_atomic = qp->s_max_rd_atomic;
+ attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
attr->min_rnr_timer = qp->r_min_rnr_timer;
attr->port_num = 1;
attr->timeout = qp->timeout;
@@ -614,7 +645,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
init_attr->recv_cq = qp->ibqp.recv_cq;
init_attr->srq = qp->ibqp.srq;
init_attr->cap = attr->cap;
- if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
+ if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR)
init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
else
init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
@@ -786,7 +817,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
qp->s_size = init_attr->cap.max_send_wr + 1;
qp->s_max_sge = init_attr->cap.max_send_sge;
if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
- qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR;
+ qp->s_flags = IPATH_S_SIGNAL_REQ_WR;
else
qp->s_flags = 0;
dev = to_idev(ibpd->device);
@@ -958,7 +989,7 @@ bail:
* @wc: the WC responsible for putting the QP in this state
*
* Flushes the send work queue.
- * The QP s_lock should be held.
+ * The QP s_lock should be held and interrupts disabled.
*/
void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
@@ -966,7 +997,7 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
- ipath_dbg(KERN_INFO "Send queue error on QP%d/%d: err: %d\n",
+ ipath_dbg("Send queue error on QP%d/%d: err: %d\n",
qp->ibqp.qp_num, qp->remote_qpn, wc->status);
spin_lock(&dev->pending_lock);
@@ -984,12 +1015,12 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
wc->status = IB_WC_WR_FLUSH_ERR;
while (qp->s_last != qp->s_head) {
+ wqe = get_swqe_ptr(qp, qp->s_last);
wc->wr_id = wqe->wr.wr_id;
wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
if (++qp->s_last >= qp->s_size)
qp->s_last = 0;
- wqe = get_swqe_ptr(qp, qp->s_last);
}
qp->s_cur = qp->s_tail = qp->s_head;
qp->state = IB_QPS_SQE;
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 5ff20cb0449..b4b88d0b53f 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -37,6 +37,19 @@
/* cut down ridiculously long IB macro names */
#define OP(x) IB_OPCODE_RC_##x
+static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe,
+ u32 psn, u32 pmtu)
+{
+ u32 len;
+
+ len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
+ ss->sge = wqe->sg_list[0];
+ ss->sg_list = wqe->sg_list + 1;
+ ss->num_sge = wqe->wr.num_sge;
+ ipath_skip_sge(ss, len);
+ return wqe->length - len;
+}
+
/**
* ipath_init_restart- initialize the qp->s_sge after a restart
* @qp: the QP who's SGE we're restarting
@@ -47,15 +60,9 @@
static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
{
struct ipath_ibdev *dev;
- u32 len;
- len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) *
- ib_mtu_enum_to_int(qp->path_mtu);
- qp->s_sge.sge = wqe->sg_list[0];
- qp->s_sge.sg_list = wqe->sg_list + 1;
- qp->s_sge.num_sge = wqe->wr.num_sge;
- ipath_skip_sge(&qp->s_sge, len);
- qp->s_len = wqe->length - len;
+ qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn,
+ ib_mtu_enum_to_int(qp->path_mtu));
dev = to_idev(qp->ibqp.device);
spin_lock(&dev->pending_lock);
if (list_empty(&qp->timerwait))
@@ -70,107 +77,123 @@ static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
* @ohdr: a pointer to the IB header being constructed
* @pmtu: the path MTU
*
- * Return bth0 if constructed; otherwise, return 0.
+ * Return 1 if constructed; otherwise, return 0.
+ * Note that we are in the responder's side of the QP context.
* Note the QP s_lock must be held.
*/
-u32 ipath_make_rc_ack(struct ipath_qp *qp,
- struct ipath_other_headers *ohdr,
- u32 pmtu)
+static int ipath_make_rc_ack(struct ipath_qp *qp,
+ struct ipath_other_headers *ohdr,
+ u32 pmtu, u32 *bth0p, u32 *bth2p)
{
+ struct ipath_ack_entry *e;
u32 hwords;
u32 len;
u32 bth0;
+ u32 bth2;
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5;
- /*
- * Send a response. Note that we are in the responder's
- * side of the QP context.
- */
switch (qp->s_ack_state) {
- case OP(RDMA_READ_REQUEST):
- qp->s_cur_sge = &qp->s_rdma_sge;
- len = qp->s_rdma_len;
- if (len > pmtu) {
- len = pmtu;
- qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
- } else
- qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
- qp->s_rdma_len -= len;
+ case OP(RDMA_READ_RESPONSE_LAST):
+ case OP(RDMA_READ_RESPONSE_ONLY):
+ case OP(ATOMIC_ACKNOWLEDGE):
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ /* FALLTHROUGH */
+ case OP(ACKNOWLEDGE):
+ /* Check for no next entry in the queue. */
+ if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
+ if (qp->s_flags & IPATH_S_ACK_PENDING)
+ goto normal;
+ goto bail;
+ }
+
+ e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+ if (e->opcode == OP(RDMA_READ_REQUEST)) {
+ /* Copy SGE state in case we need to resend */
+ qp->s_ack_rdma_sge = e->rdma_sge;
+ qp->s_cur_sge = &qp->s_ack_rdma_sge;
+ len = e->rdma_sge.sge.sge_length;
+ if (len > pmtu) {
+ len = pmtu;
+ qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
+ } else {
+ qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
+ if (++qp->s_tail_ack_queue >
+ IPATH_MAX_RDMA_ATOMIC)
+ qp->s_tail_ack_queue = 0;
+ }
+ ohdr->u.aeth = ipath_compute_aeth(qp);
+ hwords++;
+ qp->s_ack_rdma_psn = e->psn;
+ bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
+ } else {
+ /* COMPARE_SWAP or FETCH_ADD */
+ qp->s_cur_sge = NULL;
+ len = 0;
+ qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
+ ohdr->u.at.aeth = ipath_compute_aeth(qp);
+ ohdr->u.at.atomic_ack_eth[0] =
+ cpu_to_be32(e->atomic_data >> 32);
+ ohdr->u.at.atomic_ack_eth[1] =
+ cpu_to_be32(e->atomic_data);
+ hwords += sizeof(ohdr->u.at) / sizeof(u32);
+ bth2 = e->psn;
+ if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
+ qp->s_tail_ack_queue = 0;
+ }
bth0 = qp->s_ack_state << 24;
- ohdr->u.aeth = ipath_compute_aeth(qp);
- hwords++;
break;
case OP(RDMA_READ_RESPONSE_FIRST):
qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
/* FALLTHROUGH */
case OP(RDMA_READ_RESPONSE_MIDDLE):
- qp->s_cur_sge = &qp->s_rdma_sge;
- len = qp->s_rdma_len;
+ len = qp->s_ack_rdma_sge.sge.sge_length;
if (len > pmtu)
len = pmtu;
else {
ohdr->u.aeth = ipath_compute_aeth(qp);
hwords++;
qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
+ if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
+ qp->s_tail_ack_queue = 0;
}
- qp->s_rdma_len -= len;
bth0 = qp->s_ack_state << 24;
- break;
-
- case OP(RDMA_READ_RESPONSE_LAST):
- case OP(RDMA_READ_RESPONSE_ONLY):
- /*
- * We have to prevent new requests from changing
- * the r_sge state while a ipath_verbs_send()
- * is in progress.
- */
- qp->s_ack_state = OP(ACKNOWLEDGE);
- bth0 = 0;
- goto bail;
-
- case OP(COMPARE_SWAP):
- case OP(FETCH_ADD):
- qp->s_cur_sge = NULL;
- len = 0;
- /*
- * Set the s_ack_state so the receive interrupt handler
- * won't try to send an ACK (out of order) until this one
- * is actually sent.
- */
- qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
- bth0 = OP(ATOMIC_ACKNOWLEDGE) << 24;
- ohdr->u.at.aeth = ipath_compute_aeth(qp);
- ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
- hwords += sizeof(ohdr->u.at) / 4;
+ bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
break;
default:
- /* Send a regular ACK. */
- qp->s_cur_sge = NULL;
- len = 0;
+ normal:
/*
- * Set the s_ack_state so the receive interrupt handler
- * won't try to send an ACK (out of order) until this one
- * is actually sent.
+ * Send a regular ACK.
+ * Set the s_ack_state so we wait until after sending
+ * the ACK before setting s_ack_state to ACKNOWLEDGE
+ * (see above).
*/
- qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
- bth0 = OP(ACKNOWLEDGE) << 24;
+ qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
+ qp->s_flags &= ~IPATH_S_ACK_PENDING;
+ qp->s_cur_sge = NULL;
if (qp->s_nak_state)
- ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
- (qp->s_nak_state <<
- IPATH_AETH_CREDIT_SHIFT));
+ ohdr->u.aeth =
+ cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
+ (qp->s_nak_state <<
+ IPATH_AETH_CREDIT_SHIFT));
else
ohdr->u.aeth = ipath_compute_aeth(qp);
hwords++;
+ len = 0;
+ bth0 = OP(ACKNOWLEDGE) << 24;
+ bth2 = qp->s_ack_psn & IPATH_PSN_MASK;
}
qp->s_hdrwords = hwords;
qp->s_cur_size = len;
+ *bth0p = bth0;
+ *bth2p = bth2;
+ return 1;
bail:
- return bth0;
+ return 0;
}
/**
@@ -197,9 +220,16 @@ int ipath_make_rc_req(struct ipath_qp *qp,
u32 bth2;
char newreq;
+ /* Sending responses has higher priority over sending requests. */
+ if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
+ (qp->s_flags & IPATH_S_ACK_PENDING) ||
+ qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE) &&
+ ipath_make_rc_ack(qp, ohdr, pmtu, bth0p, bth2p))
+ goto done;
+
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) ||
qp->s_rnr_timeout)
- goto done;
+ goto bail;
/* Limit the number of packets sent without an ACK. */
if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) {
@@ -210,7 +240,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
list_add_tail(&qp->timerwait,
&dev->pending[dev->pending_index]);
spin_unlock(&dev->pending_lock);
- goto done;
+ goto bail;
}
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -232,7 +262,16 @@ int ipath_make_rc_req(struct ipath_qp *qp,
if (qp->s_cur == qp->s_tail) {
/* Check if send work queue is empty. */
if (qp->s_tail == qp->s_head)
- goto done;
+ goto bail;
+ /*
+ * If a fence is requested, wait for previous
+ * RDMA read and atomic operations to finish.
+ */
+ if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
+ qp->s_num_rd_atomic) {
+ qp->s_flags |= IPATH_S_FENCE_PENDING;
+ goto bail;
+ }
wqe->psn = qp->s_next_psn;
newreq = 1;
}
@@ -250,7 +289,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
/* If no credit, return. */
if (qp->s_lsn != (u32) -1 &&
ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
- goto done;
+ goto bail;
wqe->lpsn = wqe->psn;
if (len > pmtu) {
wqe->lpsn += (len - 1) / pmtu;
@@ -281,13 +320,13 @@ int ipath_make_rc_req(struct ipath_qp *qp,
/* If no credit, return. */
if (qp->s_lsn != (u32) -1 &&
ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
- goto done;
+ goto bail;
ohdr->u.rc.reth.vaddr =
cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
ohdr->u.rc.reth.rkey =
cpu_to_be32(wqe->wr.wr.rdma.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
- hwords += sizeof(struct ib_reth) / 4;
+ hwords += sizeof(struct ib_reth) / sizeof(u32);
wqe->lpsn = wqe->psn;
if (len > pmtu) {
wqe->lpsn += (len - 1) / pmtu;
@@ -312,14 +351,17 @@ int ipath_make_rc_req(struct ipath_qp *qp,
break;
case IB_WR_RDMA_READ:
- ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
- ohdr->u.rc.reth.rkey =
- cpu_to_be32(wqe->wr.wr.rdma.rkey);
- ohdr->u.rc.reth.length = cpu_to_be32(len);
- qp->s_state = OP(RDMA_READ_REQUEST);
- hwords += sizeof(ohdr->u.rc.reth) / 4;
+ /*
+ * Don't allow more operations to be started
+ * than the QP limits allow.
+ */
if (newreq) {
+ if (qp->s_num_rd_atomic >=
+ qp->s_max_rd_atomic) {
+ qp->s_flags |= IPATH_S_RDMAR_PENDING;
+ goto bail;
+ }
+ qp->s_num_rd_atomic++;
if (qp->s_lsn != (u32) -1)
qp->s_lsn++;
/*
@@ -330,6 +372,13 @@ int ipath_make_rc_req(struct ipath_qp *qp,
qp->s_next_psn += (len - 1) / pmtu;
wqe->lpsn = qp->s_next_psn++;
}
+ ohdr->u.rc.reth.vaddr =
+ cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ ohdr->u.rc.reth.rkey =
+ cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ ohdr->u.rc.reth.length = cpu_to_be32(len);
+ qp->s_state = OP(RDMA_READ_REQUEST);
+ hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
ss = NULL;
len = 0;
if (++qp->s_cur == qp->s_size)
@@ -338,32 +387,48 @@ int ipath_make_rc_req(struct ipath_qp *qp,
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
- if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP)
- qp->s_state = OP(COMPARE_SWAP);
- else
- qp->s_state = OP(FETCH_ADD);
- ohdr->u.atomic_eth.vaddr = cpu_to_be64(
- wqe->wr.wr.atomic.remote_addr);
- ohdr->u.atomic_eth.rkey = cpu_to_be32(
- wqe->wr.wr.atomic.rkey);
- ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->wr.wr.atomic.swap);
- ohdr->u.atomic_eth.compare_data = cpu_to_be64(
- wqe->wr.wr.atomic.compare_add);
- hwords += sizeof(struct ib_atomic_eth) / 4;
+ /*
+ * Don't allow more operations to be started
+ * than the QP limits allow.
+ */
if (newreq) {
+ if (qp->s_num_rd_atomic >=
+ qp->s_max_rd_atomic) {
+ qp->s_flags |= IPATH_S_RDMAR_PENDING;
+ goto bail;
+ }
+ qp->s_num_rd_atomic++;
if (qp->s_lsn != (u32) -1)
qp->s_lsn++;
wqe->lpsn = wqe->psn;
}
- if (++qp->s_cur == qp->s_size)
- qp->s_cur = 0;
+ if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ qp->s_state = OP(COMPARE_SWAP);
+ ohdr->u.atomic_eth.swap_data = cpu_to_be64(
+ wqe->wr.wr.atomic.swap);
+ ohdr->u.atomic_eth.compare_data = cpu_to_be64(
+ wqe->wr.wr.atomic.compare_add);
+ } else {
+ qp->s_state = OP(FETCH_ADD);
+ ohdr->u.atomic_eth.swap_data = cpu_to_be64(
+ wqe->wr.wr.atomic.compare_add);
+ ohdr->u.atomic_eth.compare_data = 0;
+ }
+ ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
+ wqe->wr.wr.atomic.remote_addr >> 32);
+ ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
+ wqe->wr.wr.atomic.remote_addr);
+ ohdr->u.atomic_eth.rkey = cpu_to_be32(
+ wqe->wr.wr.atomic.rkey);
+ hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
ss = NULL;
len = 0;
+ if (++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
break;
default:
- goto done;
+ goto bail;
}
qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1;
@@ -379,7 +444,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
qp->s_psn = wqe->lpsn + 1;
else {
qp->s_psn++;
- if ((int)(qp->s_psn - qp->s_next_psn) > 0)
+ if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
qp->s_next_psn = qp->s_psn;
}
/*
@@ -406,7 +471,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
/* FALLTHROUGH */
case OP(SEND_MIDDLE):
bth2 = qp->s_psn++ & IPATH_PSN_MASK;
- if ((int)(qp->s_psn - qp->s_next_psn) > 0)
+ if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
qp->s_next_psn = qp->s_psn;
ss = &qp->s_sge;
len = qp->s_len;
@@ -442,7 +507,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
/* FALLTHROUGH */
case OP(RDMA_WRITE_MIDDLE):
bth2 = qp->s_psn++ & IPATH_PSN_MASK;
- if ((int)(qp->s_psn - qp->s_next_psn) > 0)
+ if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
qp->s_next_psn = qp->s_psn;
ss = &qp->s_sge;
len = qp->s_len;
@@ -479,9 +544,9 @@ int ipath_make_rc_req(struct ipath_qp *qp,
cpu_to_be32(wqe->wr.wr.rdma.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
qp->s_state = OP(RDMA_READ_REQUEST);
- hwords += sizeof(ohdr->u.rc.reth) / 4;
+ hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
bth2 = qp->s_psn++ & IPATH_PSN_MASK;
- if ((int)(qp->s_psn - qp->s_next_psn) > 0)
+ if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
qp->s_next_psn = qp->s_psn;
ss = NULL;
len = 0;
@@ -489,20 +554,6 @@ int ipath_make_rc_req(struct ipath_qp *qp,
if (qp->s_cur == qp->s_size)
qp->s_cur = 0;
break;
-
- case OP(RDMA_READ_REQUEST):
- case OP(COMPARE_SWAP):
- case OP(FETCH_ADD):
- /*
- * We shouldn't start anything new until this request is
- * finished. The ACK will handle rescheduling us. XXX The
- * number of outstanding ones is negotiated at connection
- * setup time (see pg. 258,289)? XXX Also, if we support
- * multiple outstanding requests, we need to check the WQE
- * IB_SEND_FENCE flag and not send a new request if a RDMA
- * read or atomic is pending.
- */
- goto done;
}
if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
bth2 |= 1 << 31; /* Request ACK. */
@@ -512,9 +563,10 @@ int ipath_make_rc_req(struct ipath_qp *qp,
qp->s_cur_size = len;
*bth0p = bth0 | (qp->s_state << 24);
*bth2p = bth2;
+done:
return 1;
-done:
+bail:
return 0;
}
@@ -524,7 +576,8 @@ done:
*
* This is called from ipath_rc_rcv() and only uses the receive
* side QP state.
- * Note that RDMA reads are handled in the send side QP state and tasklet.
+ * Note that RDMA reads and atomics are handled in the
+ * send side QP state and tasklet.
*/
static void send_rc_ack(struct ipath_qp *qp)
{
@@ -535,6 +588,10 @@ static void send_rc_ack(struct ipath_qp *qp)
struct ipath_ib_header hdr;
struct ipath_other_headers *ohdr;
+ /* Don't send ACK or NAK if a RDMA read or atomic is pending. */
+ if (qp->r_head_ack_queue != qp->s_tail_ack_queue)
+ goto queue_ack;
+
/* Construct the header. */
ohdr = &hdr.u.oth;
lrh0 = IPATH_LRH_BTH;
@@ -548,19 +605,14 @@ static void send_rc_ack(struct ipath_qp *qp)
lrh0 = IPATH_LRH_GRH;
}
/* read pkey_index w/o lock (its atomic) */
- bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index);
+ bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) |
+ OP(ACKNOWLEDGE) << 24;
if (qp->r_nak_state)
ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
(qp->r_nak_state <<
IPATH_AETH_CREDIT_SHIFT));
else
ohdr->u.aeth = ipath_compute_aeth(qp);
- if (qp->r_ack_state >= OP(COMPARE_SWAP)) {
- bth0 |= OP(ATOMIC_ACKNOWLEDGE) << 24;
- ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
- hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4;
- } else
- bth0 |= OP(ACKNOWLEDGE) << 24;
lrh0 |= qp->remote_ah_attr.sl << 4;
hdr.lrh[0] = cpu_to_be16(lrh0);
hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
@@ -574,31 +626,31 @@ static void send_rc_ack(struct ipath_qp *qp)
* If we can send the ACK, clear the ACK state.
*/
if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) {
- qp->r_ack_state = OP(ACKNOWLEDGE);
dev->n_unicast_xmit++;
- } else {
- /*
- * We are out of PIO buffers at the moment.
- * Pass responsibility for sending the ACK to the
- * send tasklet so that when a PIO buffer becomes
- * available, the ACK is sent ahead of other outgoing
- * packets.
- */
- dev->n_rc_qacks++;
- spin_lock_irq(&qp->s_lock);
- /* Don't coalesce if a RDMA read or atomic is pending. */
- if (qp->s_ack_state == OP(ACKNOWLEDGE) ||
- qp->s_ack_state < OP(RDMA_READ_REQUEST)) {
- qp->s_ack_state = qp->r_ack_state;
- qp->s_nak_state = qp->r_nak_state;
- qp->s_ack_psn = qp->r_ack_psn;
- qp->r_ack_state = OP(ACKNOWLEDGE);
- }
- spin_unlock_irq(&qp->s_lock);
-
- /* Call ipath_do_rc_send() in another thread. */
- tasklet_hi_schedule(&qp->s_task);
+ goto done;
}
+
+ /*
+ * We are out of PIO buffers at the moment.
+ * Pass responsibility for sending the ACK to the
+ * send tasklet so that when a PIO buffer becomes
+ * available, the ACK is sent ahead of other outgoing
+ * packets.
+ */
+ dev->n_rc_qacks++;
+
+queue_ack:
+ spin_lock_irq(&qp->s_lock);
+ qp->s_flags |= IPATH_S_ACK_PENDING;
+ qp->s_nak_state = qp->r_nak_state;
+ qp->s_ack_psn = qp->r_ack_psn;
+ spin_unlock_irq(&qp->s_lock);
+
+ /* Call ipath_do_rc_send() in another thread. */
+ tasklet_hi_schedule(&qp->s_task);
+
+done:
+ return;
}
/**
@@ -727,7 +779,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
if (wqe->wr.opcode == IB_WR_RDMA_READ)
dev->n_rc_resends++;
else
- dev->n_rc_resends += (int)qp->s_psn - (int)psn;
+ dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
reset_psn(qp, psn);
tasklet_hi_schedule(&qp->s_task);
@@ -775,10 +827,6 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
list_del_init(&qp->timerwait);
spin_unlock(&dev->pending_lock);
- /* Nothing is pending to ACK/NAK. */
- if (unlikely(qp->s_last == qp->s_tail))
- goto bail;
-
/*
* Note that NAKs implicitly ACK outstanding SEND and RDMA write
* requests and implicitly NAK RDMA read and atomic requests issued
@@ -806,7 +854,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
*/
if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
(opcode != OP(RDMA_READ_RESPONSE_LAST) ||
- ipath_cmp24(ack_psn, wqe->lpsn) != 0)) ||
+ ipath_cmp24(ack_psn, wqe->lpsn) != 0)) ||
((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
(opcode != OP(ATOMIC_ACKNOWLEDGE) ||
@@ -824,20 +872,33 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
*/
goto bail;
}
- if (wqe->wr.opcode == IB_WR_RDMA_READ ||
- wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
- wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
- tasklet_hi_schedule(&qp->s_task);
+ if (qp->s_num_rd_atomic &&
+ (wqe->wr.opcode == IB_WR_RDMA_READ ||
+ wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
+ qp->s_num_rd_atomic--;
+ /* Restart sending task if fence is complete */
+ if ((qp->s_flags & IPATH_S_FENCE_PENDING) &&
+ !qp->s_num_rd_atomic) {
+ qp->s_flags &= ~IPATH_S_FENCE_PENDING;
+ tasklet_hi_schedule(&qp->s_task);
+ } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) {
+ qp->s_flags &= ~IPATH_S_RDMAR_PENDING;
+ tasklet_hi_schedule(&qp->s_task);
+ }
+ }
/* Post a send completion queue entry if requested. */
- if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
+ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
wc.wr_id = wqe->wr.wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
wc.vendor_err = 0;
wc.byte_len = wqe->length;
+ wc.imm_data = 0;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
+ wc.wc_flags = 0;
wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
@@ -854,15 +915,19 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
if (qp->s_last == qp->s_cur) {
if (++qp->s_cur >= qp->s_size)
qp->s_cur = 0;
+ qp->s_last = qp->s_cur;
+ if (qp->s_last == qp->s_tail)
+ break;
wqe = get_swqe_ptr(qp, qp->s_cur);
qp->s_state = OP(SEND_LAST);
qp->s_psn = wqe->psn;
+ } else {
+ if (++qp->s_last >= qp->s_size)
+ qp->s_last = 0;
+ if (qp->s_last == qp->s_tail)
+ break;
+ wqe = get_swqe_ptr(qp, qp->s_last);
}
- if (++qp->s_last >= qp->s_size)
- qp->s_last = 0;
- wqe = get_swqe_ptr(qp, qp->s_last);
- if (qp->s_last == qp->s_tail)
- break;
}
switch (aeth >> 29) {
@@ -874,6 +939,18 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
list_add_tail(&qp->timerwait,
&dev->pending[dev->pending_index]);
spin_unlock(&dev->pending_lock);
+ /*
+ * If we get a partial ACK for a resent operation,
+ * we can stop resending the earlier packets and
+ * continue with the next packet the receiver wants.
+ */
+ if (ipath_cmp24(qp->s_psn, psn) <= 0) {
+ reset_psn(qp, psn + 1);
+ tasklet_hi_schedule(&qp->s_task);
+ }
+ } else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
+ qp->s_state = OP(SEND_LAST);
+ qp->s_psn = psn + 1;
}
ipath_get_credit(qp, aeth);
qp->s_rnr_retry = qp->s_rnr_retry_cnt;
@@ -884,22 +961,23 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
case 1: /* RNR NAK */
dev->n_rnr_naks++;
+ if (qp->s_last == qp->s_tail)
+ goto bail;
if (qp->s_rnr_retry == 0) {
- if (qp->s_last == qp->s_tail)
- goto bail;
-
wc.status = IB_WC_RNR_RETRY_EXC_ERR;
goto class_b;
}
if (qp->s_rnr_retry_cnt < 7)
qp->s_rnr_retry--;
- if (qp->s_last == qp->s_tail)
- goto bail;
/* The last valid PSN is the previous PSN. */
update_last_psn(qp, psn - 1);
- dev->n_rc_resends += (int)qp->s_psn - (int)psn;
+ if (wqe->wr.opcode == IB_WR_RDMA_READ)
+ dev->n_rc_resends++;
+ else
+ dev->n_rc_resends +=
+ (qp->s_psn - psn) & IPATH_PSN_MASK;
reset_psn(qp, psn);
@@ -910,26 +988,20 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
goto bail;
case 3: /* NAK */
- /* The last valid PSN seen is the previous request's. */
- if (qp->s_last != qp->s_tail)
- update_last_psn(qp, wqe->psn - 1);
+ if (qp->s_last == qp->s_tail)
+ goto bail;
+ /* The last valid PSN is the previous PSN. */
+ update_last_psn(qp, psn - 1);
switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
IPATH_AETH_CREDIT_MASK) {
case 0: /* PSN sequence error */
dev->n_seq_naks++;
/*
- * Back up to the responder's expected PSN. XXX
+ * Back up to the responder's expected PSN.
* Note that we might get a NAK in the middle of an
* RDMA READ response which terminates the RDMA
* READ.
*/
- if (qp->s_last == qp->s_tail)
- break;
-
- if (ipath_cmp24(psn, wqe->psn) < 0)
- break;
-
- /* Retry the request. */
ipath_restart_rc(qp, psn, &wc);
break;
@@ -1003,6 +1075,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
u32 psn, u32 hdrsize, u32 pmtu,
int header_in_data)
{
+ struct ipath_swqe *wqe;
unsigned long flags;
struct ib_wc wc;
int diff;
@@ -1032,6 +1105,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
goto ack_done;
}
+ if (unlikely(qp->s_last == qp->s_tail))
+ goto ack_done;
+ wqe = get_swqe_ptr(qp, qp->s_last);
+
switch (opcode) {
case OP(ACKNOWLEDGE):
case OP(ATOMIC_ACKNOWLEDGE):
@@ -1042,38 +1119,49 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
aeth = be32_to_cpu(((__be32 *) data)[0]);
data += sizeof(__be32);
}
- if (opcode == OP(ATOMIC_ACKNOWLEDGE))
- *(u64 *) qp->s_sge.sge.vaddr = *(u64 *) data;
+ if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
+ u64 val;
+
+ if (!header_in_data) {
+ __be32 *p = ohdr->u.at.atomic_ack_eth;
+
+ val = ((u64) be32_to_cpu(p[0]) << 32) |
+ be32_to_cpu(p[1]);
+ } else
+ val = be64_to_cpu(((__be64 *) data)[0]);
+ *(u64 *) wqe->sg_list[0].vaddr = val;
+ }
if (!do_rc_ack(qp, aeth, psn, opcode) ||
opcode != OP(RDMA_READ_RESPONSE_FIRST))
goto ack_done;
hdrsize += 4;
+ if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+ goto ack_op_err;
/*
- * do_rc_ack() has already checked the PSN so skip
- * the sequence check.
+ * If this is a response to a resent RDMA read, we
+ * have to be careful to copy the data to the right
+ * location.
*/
- goto rdma_read;
+ qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
+ wqe, psn, pmtu);
+ goto read_middle;
case OP(RDMA_READ_RESPONSE_MIDDLE):
/* no AETH, no ACK */
if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
dev->n_rdma_seq++;
- if (qp->s_last != qp->s_tail)
- ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+ ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
goto ack_done;
}
- rdma_read:
- if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST)))
- goto ack_done;
+ if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+ goto ack_op_err;
+ read_middle:
if (unlikely(tlen != (hdrsize + pmtu + 4)))
- goto ack_done;
- if (unlikely(pmtu >= qp->s_len))
- goto ack_done;
+ goto ack_len_err;
+ if (unlikely(pmtu >= qp->s_rdma_read_len))
+ goto ack_len_err;
+
/* We got a response so update the timeout. */
- if (unlikely(qp->s_last == qp->s_tail ||
- get_swqe_ptr(qp, qp->s_last)->wr.opcode !=
- IB_WR_RDMA_READ))
- goto ack_done;
spin_lock(&dev->pending_lock);
if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait))
list_move_tail(&qp->timerwait,
@@ -1082,67 +1170,97 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
/*
* Update the RDMA receive state but do the copy w/o
* holding the locks and blocking interrupts.
- * XXX Yet another place that affects relaxed RDMA order
- * since we don't want s_sge modified.
*/
- qp->s_len -= pmtu;
+ qp->s_rdma_read_len -= pmtu;
update_last_psn(qp, psn);
spin_unlock_irqrestore(&qp->s_lock, flags);
- ipath_copy_sge(&qp->s_sge, data, pmtu);
+ ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu);
goto bail;
- case OP(RDMA_READ_RESPONSE_LAST):
- /* ACKs READ req. */
+ case OP(RDMA_READ_RESPONSE_ONLY):
if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
dev->n_rdma_seq++;
- if (qp->s_last != qp->s_tail)
- ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+ ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
goto ack_done;
}
- /* FALLTHROUGH */
- case OP(RDMA_READ_RESPONSE_ONLY):
- if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST)))
- goto ack_done;
+ if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+ goto ack_op_err;
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ /*
+ * Check that the data size is >= 0 && <= pmtu.
+ * Remember to account for the AETH header (4) and
+ * ICRC (4).
+ */
+ if (unlikely(tlen < (hdrsize + pad + 8)))
+ goto ack_len_err;
/*
- * Get the number of bytes the message was padded by.
+ * If this is a response to a resent RDMA read, we
+ * have to be careful to copy the data to the right
+ * location.
*/
+ qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
+ wqe, psn, pmtu);
+ goto read_last;
+
+ case OP(RDMA_READ_RESPONSE_LAST):
+ /* ACKs READ req. */
+ if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
+ dev->n_rdma_seq++;
+ ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+ goto ack_done;
+ }
+ if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+ goto ack_op_err;
+ /* Get the number of bytes the message was padded by. */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
/*
* Check that the data size is >= 1 && <= pmtu.
* Remember to account for the AETH header (4) and
* ICRC (4).
*/
- if (unlikely(tlen <= (hdrsize + pad + 8))) {
- /* XXX Need to generate an error CQ entry. */
- goto ack_done;
- }
+ if (unlikely(tlen <= (hdrsize + pad + 8)))
+ goto ack_len_err;
+ read_last:
tlen -= hdrsize + pad + 8;
- if (unlikely(tlen != qp->s_len)) {
- /* XXX Need to generate an error CQ entry. */
- goto ack_done;
- }
+ if (unlikely(tlen != qp->s_rdma_read_len))
+ goto ack_len_err;
if (!header_in_data)
aeth = be32_to_cpu(ohdr->u.aeth);
else {
aeth = be32_to_cpu(((__be32 *) data)[0]);
data += sizeof(__be32);
}
- ipath_copy_sge(&qp->s_sge, data, tlen);
- if (do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST))) {
- /*
- * Change the state so we contimue
- * processing new requests and wake up the
- * tasklet if there are posted sends.
- */
- qp->s_state = OP(SEND_LAST);
- if (qp->s_tail != qp->s_head)
- tasklet_hi_schedule(&qp->s_task);
- }
+ ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
+ (void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST));
goto ack_done;
}
ack_done:
spin_unlock_irqrestore(&qp->s_lock, flags);
+ goto bail;
+
+ack_op_err:
+ wc.status = IB_WC_LOC_QP_OP_ERR;
+ goto ack_err;
+
+ack_len_err:
+ wc.status = IB_WC_LOC_LEN_ERR;
+ack_err:
+ wc.wr_id = wqe->wr.wr_id;
+ wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
+ wc.vendor_err = 0;
+ wc.byte_len = 0;
+ wc.imm_data = 0;
+ wc.qp = &qp->ibqp;
+ wc.src_qp = qp->remote_qpn;
+ wc.wc_flags = 0;
+ wc.pkey_index = 0;
+ wc.slid = qp->remote_ah_attr.dlid;
+ wc.sl = qp->remote_ah_attr.sl;
+ wc.dlid_path_bits = 0;
+ wc.port_num = 0;
+ ipath_sqerror_qp(qp, &wc);
bail:
return;
}
@@ -1162,7 +1280,7 @@ bail:
* incoming RC packet for the given QP.
* Called at interrupt level.
* Return 1 if no more processing is needed; otherwise return 0 to
- * schedule a response to be sent and the s_lock unlocked.
+ * schedule a response to be sent.
*/
static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
struct ipath_other_headers *ohdr,
@@ -1173,25 +1291,23 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
int diff,
int header_in_data)
{
- struct ib_reth *reth;
+ struct ipath_ack_entry *e;
+ u8 i, prev;
+ int old_req;
if (diff > 0) {
/*
* Packet sequence error.
* A NAK will ACK earlier sends and RDMA writes.
- * Don't queue the NAK if a RDMA read, atomic, or
- * NAK is pending though.
+ * Don't queue the NAK if we already sent one.
*/
- if (qp->s_ack_state != OP(ACKNOWLEDGE) ||
- qp->r_nak_state != 0)
- goto done;
- if (qp->r_ack_state < OP(COMPARE_SWAP)) {
- qp->r_ack_state = OP(SEND_ONLY);
+ if (!qp->r_nak_state) {
qp->r_nak_state = IB_NAK_PSN_ERROR;
/* Use the expected PSN. */
qp->r_ack_psn = qp->r_psn;
+ goto send_ack;
}
- goto send_ack;
+ goto done;
}
/*
@@ -1204,8 +1320,46 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
* can coalesce an outstanding duplicate ACK. We have to
* send the earliest so that RDMA reads can be restarted at
* the requester's expected PSN.
+ *
+ * First, find where this duplicate PSN falls within the
+ * ACKs previously sent.
*/
- if (opcode == OP(RDMA_READ_REQUEST)) {
+ psn &= IPATH_PSN_MASK;
+ e = NULL;
+ old_req = 1;
+ spin_lock_irq(&qp->s_lock);
+ for (i = qp->r_head_ack_queue; ; i = prev) {
+ if (i == qp->s_tail_ack_queue)
+ old_req = 0;
+ if (i)
+ prev = i - 1;
+ else
+ prev = IPATH_MAX_RDMA_ATOMIC;
+ if (prev == qp->r_head_ack_queue) {
+ e = NULL;
+ break;
+ }
+ e = &qp->s_ack_queue[prev];
+ if (!e->opcode) {
+ e = NULL;
+ break;
+ }
+ if (ipath_cmp24(psn, e->psn) >= 0)
+ break;
+ }
+ switch (opcode) {
+ case OP(RDMA_READ_REQUEST): {
+ struct ib_reth *reth;
+ u32 offset;
+ u32 len;
+
+ /*
+ * If we didn't find the RDMA read request in the ack queue,
+ * or the send tasklet is already backed up to send an
+ * earlier entry, we can ignore this request.
+ */
+ if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req)
+ goto unlock_done;
/* RETH comes after BTH */
if (!header_in_data)
reth = &ohdr->u.rc.reth;
@@ -1214,88 +1368,87 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
data += sizeof(*reth);
}
/*
- * If we receive a duplicate RDMA request, it means the
- * requester saw a sequence error and needs to restart
- * from an earlier point. We can abort the current
- * RDMA read send in that case.
+ * Address range must be a subset of the original
+ * request and start on pmtu boundaries.
+ * We reuse the old ack_queue slot since the requester
+ * should not back up and request an earlier PSN for the
+ * same request.
*/
- spin_lock_irq(&qp->s_lock);
- if (qp->s_ack_state != OP(ACKNOWLEDGE) &&
- (qp->s_hdrwords || ipath_cmp24(psn, qp->s_ack_psn) >= 0)) {
- /*
- * We are already sending earlier requested data.
- * Don't abort it to send later out of sequence data.
- */
- spin_unlock_irq(&qp->s_lock);
- goto done;
- }
- qp->s_rdma_len = be32_to_cpu(reth->length);
- if (qp->s_rdma_len != 0) {
+ offset = ((psn - e->psn) & IPATH_PSN_MASK) *
+ ib_mtu_enum_to_int(qp->path_mtu);
+ len = be32_to_cpu(reth->length);
+ if (unlikely(offset + len > e->rdma_sge.sge.sge_length))
+ goto unlock_done;
+ if (len != 0) {
u32 rkey = be32_to_cpu(reth->rkey);
u64 vaddr = be64_to_cpu(reth->vaddr);
int ok;
- /*
- * Address range must be a subset of the original
- * request and start on pmtu boundaries.
- */
- ok = ipath_rkey_ok(qp, &qp->s_rdma_sge,
- qp->s_rdma_len, vaddr, rkey,
+ ok = ipath_rkey_ok(qp, &e->rdma_sge,
+ len, vaddr, rkey,
IB_ACCESS_REMOTE_READ);
- if (unlikely(!ok)) {
- spin_unlock_irq(&qp->s_lock);
- goto done;
- }
+ if (unlikely(!ok))
+ goto unlock_done;
} else {
- qp->s_rdma_sge.sg_list = NULL;
- qp->s_rdma_sge.num_sge = 0;
- qp->s_rdma_sge.sge.mr = NULL;
- qp->s_rdma_sge.sge.vaddr = NULL;
- qp->s_rdma_sge.sge.length = 0;
- qp->s_rdma_sge.sge.sge_length = 0;
+ e->rdma_sge.sg_list = NULL;
+ e->rdma_sge.num_sge = 0;
+ e->rdma_sge.sge.mr = NULL;
+ e->rdma_sge.sge.vaddr = NULL;
+ e->rdma_sge.sge.length = 0;
+ e->rdma_sge.sge.sge_length = 0;
}
- qp->s_ack_state = opcode;
- qp->s_ack_psn = psn;
- spin_unlock_irq(&qp->s_lock);
- tasklet_hi_schedule(&qp->s_task);
- goto send_ack;
+ e->psn = psn;
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ qp->s_tail_ack_queue = prev;
+ break;
}
- /*
- * A pending RDMA read will ACK anything before it so
- * ignore earlier duplicate requests.
- */
- if (qp->s_ack_state != OP(ACKNOWLEDGE))
- goto done;
-
- /*
- * If an ACK is pending, don't replace the pending ACK
- * with an earlier one since the later one will ACK the earlier.
- * Also, if we already have a pending atomic, send it.
- */
- if (qp->r_ack_state != OP(ACKNOWLEDGE) &&
- (ipath_cmp24(psn, qp->r_ack_psn) <= 0 ||
- qp->r_ack_state >= OP(COMPARE_SWAP)))
- goto send_ack;
- switch (opcode) {
case OP(COMPARE_SWAP):
- case OP(FETCH_ADD):
+ case OP(FETCH_ADD): {
/*
- * Check for the PSN of the last atomic operation
- * performed and resend the result if found.
+ * If we didn't find the atomic request in the ack queue
+ * or the send tasklet is already backed up to send an
+ * earlier entry, we can ignore this request.
*/
- if ((psn & IPATH_PSN_MASK) != qp->r_atomic_psn)
- goto done;
+ if (!e || e->opcode != (u8) opcode || old_req)
+ goto unlock_done;
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ qp->s_tail_ack_queue = prev;
+ break;
+ }
+
+ default:
+ if (old_req)
+ goto unlock_done;
+ /*
+ * Resend the most recent ACK if this request is
+ * after all the previous RDMA reads and atomics.
+ */
+ if (i == qp->r_head_ack_queue) {
+ spin_unlock_irq(&qp->s_lock);
+ qp->r_nak_state = 0;
+ qp->r_ack_psn = qp->r_psn - 1;
+ goto send_ack;
+ }
+ /*
+ * Resend the RDMA read or atomic op which
+ * ACKs this duplicate request.
+ */
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ qp->s_tail_ack_queue = i;
break;
}
- qp->r_ack_state = opcode;
qp->r_nak_state = 0;
- qp->r_ack_psn = psn;
-send_ack:
- return 0;
+ spin_unlock_irq(&qp->s_lock);
+ tasklet_hi_schedule(&qp->s_task);
+unlock_done:
+ spin_unlock_irq(&qp->s_lock);
done:
return 1;
+
+send_ack:
+ return 0;
}
static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
@@ -1391,15 +1544,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
opcode == OP(SEND_LAST_WITH_IMMEDIATE))
break;
nack_inv:
- /*
- * A NAK will ACK earlier sends and RDMA writes.
- * Don't queue the NAK if a RDMA read, atomic, or NAK
- * is pending though.
- */
- if (qp->r_ack_state >= OP(COMPARE_SWAP))
- goto send_ack;
ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR);
- qp->r_ack_state = OP(SEND_ONLY);
qp->r_nak_state = IB_NAK_INVALID_REQUEST;
qp->r_ack_psn = qp->r_psn;
goto send_ack;
@@ -1441,9 +1586,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
* Don't queue the NAK if a RDMA read or atomic
* is pending though.
*/
- if (qp->r_ack_state >= OP(COMPARE_SWAP))
- goto send_ack;
- qp->r_ack_state = OP(SEND_ONLY);
+ if (qp->r_nak_state)
+ goto done;
qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
qp->r_ack_psn = qp->r_psn;
goto send_ack;
@@ -1567,7 +1711,19 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
goto rnr_nak;
goto send_last_imm;
- case OP(RDMA_READ_REQUEST):
+ case OP(RDMA_READ_REQUEST): {
+ struct ipath_ack_entry *e;
+ u32 len;
+ u8 next;
+
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
+ goto nack_acc;
+ next = qp->r_head_ack_queue + 1;
+ if (next > IPATH_MAX_RDMA_ATOMIC)
+ next = 0;
+ if (unlikely(next == qp->s_tail_ack_queue))
+ goto nack_inv;
+ e = &qp->s_ack_queue[qp->r_head_ack_queue];
/* RETH comes after BTH */
if (!header_in_data)
reth = &ohdr->u.rc.reth;
@@ -1575,72 +1731,75 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
reth = (struct ib_reth *)data;
data += sizeof(*reth);
}
- if (unlikely(!(qp->qp_access_flags &
- IB_ACCESS_REMOTE_READ)))
- goto nack_acc;
- spin_lock_irq(&qp->s_lock);
- qp->s_rdma_len = be32_to_cpu(reth->length);
- if (qp->s_rdma_len != 0) {
+ len = be32_to_cpu(reth->length);
+ if (len) {
u32 rkey = be32_to_cpu(reth->rkey);
u64 vaddr = be64_to_cpu(reth->vaddr);
int ok;
/* Check rkey & NAK */
- ok = ipath_rkey_ok(qp, &qp->s_rdma_sge,
- qp->s_rdma_len, vaddr, rkey,
- IB_ACCESS_REMOTE_READ);
- if (unlikely(!ok)) {
- spin_unlock_irq(&qp->s_lock);
+ ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
+ rkey, IB_ACCESS_REMOTE_READ);
+ if (unlikely(!ok))
goto nack_acc;
- }
/*
* Update the next expected PSN. We add 1 later
* below, so only add the remainder here.
*/
- if (qp->s_rdma_len > pmtu)
- qp->r_psn += (qp->s_rdma_len - 1) / pmtu;
+ if (len > pmtu)
+ qp->r_psn += (len - 1) / pmtu;
} else {
- qp->s_rdma_sge.sg_list = NULL;
- qp->s_rdma_sge.num_sge = 0;
- qp->s_rdma_sge.sge.mr = NULL;
- qp->s_rdma_sge.sge.vaddr = NULL;
- qp->s_rdma_sge.sge.length = 0;
- qp->s_rdma_sge.sge.sge_length = 0;
+ e->rdma_sge.sg_list = NULL;
+ e->rdma_sge.num_sge = 0;
+ e->rdma_sge.sge.mr = NULL;
+ e->rdma_sge.sge.vaddr = NULL;
+ e->rdma_sge.sge.length = 0;
+ e->rdma_sge.sge.sge_length = 0;
}
+ e->opcode = opcode;
+ e->psn = psn;
/*
* We need to increment the MSN here instead of when we
* finish sending the result since a duplicate request would
* increment it more than once.
*/
qp->r_msn++;
-
- qp->s_ack_state = opcode;
- qp->s_ack_psn = psn;
- spin_unlock_irq(&qp->s_lock);
-
qp->r_psn++;
qp->r_state = opcode;
qp->r_nak_state = 0;
+ barrier();
+ qp->r_head_ack_queue = next;
/* Call ipath_do_rc_send() in another thread. */
tasklet_hi_schedule(&qp->s_task);
goto done;
+ }
case OP(COMPARE_SWAP):
case OP(FETCH_ADD): {
struct ib_atomic_eth *ateth;
+ struct ipath_ack_entry *e;
u64 vaddr;
+ atomic64_t *maddr;
u64 sdata;
u32 rkey;
+ u8 next;
+ if (unlikely(!(qp->qp_access_flags &
+ IB_ACCESS_REMOTE_ATOMIC)))
+ goto nack_acc;
+ next = qp->r_head_ack_queue + 1;
+ if (next > IPATH_MAX_RDMA_ATOMIC)
+ next = 0;
+ if (unlikely(next == qp->s_tail_ack_queue))
+ goto nack_inv;
if (!header_in_data)
ateth = &ohdr->u.atomic_eth;
- else {
+ else
ateth = (struct ib_atomic_eth *)data;
- data += sizeof(*ateth);
- }
- vaddr = be64_to_cpu(ateth->vaddr);
+ vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
+ be32_to_cpu(ateth->vaddr[1]);
if (unlikely(vaddr & (sizeof(u64) - 1)))
goto nack_inv;
rkey = be32_to_cpu(ateth->rkey);
@@ -1649,63 +1808,50 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
sizeof(u64), vaddr, rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto nack_acc;
- if (unlikely(!(qp->qp_access_flags &
- IB_ACCESS_REMOTE_ATOMIC)))
- goto nack_acc;
/* Perform atomic OP and save result. */
+ maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
sdata = be64_to_cpu(ateth->swap_data);
- spin_lock_irq(&dev->pending_lock);
- qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr;
- if (opcode == OP(FETCH_ADD))
- *(u64 *) qp->r_sge.sge.vaddr =
- qp->r_atomic_data + sdata;
- else if (qp->r_atomic_data ==
- be64_to_cpu(ateth->compare_data))
- *(u64 *) qp->r_sge.sge.vaddr = sdata;
- spin_unlock_irq(&dev->pending_lock);
+ e = &qp->s_ack_queue[qp->r_head_ack_queue];
+ e->atomic_data = (opcode == OP(FETCH_ADD)) ?
+ (u64) atomic64_add_return(sdata, maddr) - sdata :
+ (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+ be64_to_cpu(ateth->compare_data),
+ sdata);
+ e->opcode = opcode;
+ e->psn = psn & IPATH_PSN_MASK;
qp->r_msn++;
- qp->r_atomic_psn = psn & IPATH_PSN_MASK;
- psn |= 1 << 31;
- break;
+ qp->r_psn++;
+ qp->r_state = opcode;
+ qp->r_nak_state = 0;
+ barrier();
+ qp->r_head_ack_queue = next;
+
+ /* Call ipath_do_rc_send() in another thread. */
+ tasklet_hi_schedule(&qp->s_task);
+
+ goto done;
}
default:
- /* Drop packet for unknown opcodes. */
- goto done;
+ /* NAK unknown opcodes. */
+ goto nack_inv;
}
qp->r_psn++;
qp->r_state = opcode;
+ qp->r_ack_psn = psn;
qp->r_nak_state = 0;
/* Send an ACK if requested or required. */
- if (psn & (1 << 31)) {
- /*
- * Coalesce ACKs unless there is a RDMA READ or
- * ATOMIC pending.
- */
- if (qp->r_ack_state < OP(COMPARE_SWAP)) {
- qp->r_ack_state = opcode;
- qp->r_ack_psn = psn;
- }
+ if (psn & (1 << 31))
goto send_ack;
- }
goto done;
nack_acc:
- /*
- * A NAK will ACK earlier sends and RDMA writes.
- * Don't queue the NAK if a RDMA read, atomic, or NAK
- * is pending though.
- */
- if (qp->r_ack_state < OP(COMPARE_SWAP)) {
- ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
- qp->r_ack_state = OP(RDMA_WRITE_ONLY);
- qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
- qp->r_ack_psn = qp->r_psn;
- }
+ ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
+ qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
+ qp->r_ack_psn = qp->r_psn;
+
send_ack:
- /* Send ACK right away unless the send tasklet has a pending ACK. */
- if (qp->s_ack_state == OP(ACKNOWLEDGE))
- send_rc_ack(qp);
+ send_rc_ack(qp);
done:
return;
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index dffc76016d3..c182bcd6209 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -126,9 +126,18 @@
#define INFINIPATH_E_RESET 0x0004000000000000ULL
#define INFINIPATH_E_HARDWARE 0x0008000000000000ULL
+/*
+ * this is used to print "common" packet errors only when the
+ * __IPATH_ERRPKTDBG bit is set in ipath_debug.
+ */
+#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \
+ | INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \
+ | INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \
+ | INFINIPATH_E_REBP )
+
/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
- * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: eagerTID, 3: expTID
+ * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: expTID, 3: eagerTID
* bit 4: flag buffer, 5: datainfo, 6: header info */
#define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL
#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
@@ -143,8 +152,8 @@
/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */
#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x04ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x08ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x04ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x08ULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL
@@ -299,13 +308,6 @@
#define INFINIPATH_XGXS_RX_POL_SHIFT 19
#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
-#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
-
-/* TID entries (memory), HT-only */
-#define INFINIPATH_RT_VALID 0x8000000000000000ULL
-#define INFINIPATH_RT_ADDR_SHIFT 0
-#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF
-#define INFINIPATH_RT_BUFSIZE_SHIFT 48
/*
* IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index e86cb171872..d9c2a9b15d8 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -202,6 +202,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
wq->tail = tail;
ret = 1;
+ qp->r_wrid_valid = 1;
if (handler) {
u32 n;
@@ -229,7 +230,6 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
}
}
spin_unlock_irqrestore(&rq->lock, flags);
- qp->r_wrid_valid = 1;
bail:
return ret;
@@ -255,6 +255,7 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
unsigned long flags;
struct ib_wc wc;
u64 sdata;
+ atomic64_t *maddr;
qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
if (!qp) {
@@ -265,7 +266,8 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
again:
spin_lock_irqsave(&sqp->s_lock, flags);
- if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK)) {
+ if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) ||
+ qp->s_rnr_timeout) {
spin_unlock_irqrestore(&sqp->s_lock, flags);
goto done;
}
@@ -310,7 +312,7 @@ again:
sqp->s_rnr_retry--;
dev->n_rnr_naks++;
sqp->s_rnr_timeout =
- ib_ipath_rnr_table[sqp->r_min_rnr_timer];
+ ib_ipath_rnr_table[qp->r_min_rnr_timer];
ipath_insert_rnr_queue(sqp);
goto done;
}
@@ -343,20 +345,22 @@ again:
wc.sl = sqp->remote_ah_attr.sl;
wc.dlid_path_bits = 0;
wc.port_num = 0;
+ spin_lock_irqsave(&sqp->s_lock, flags);
ipath_sqerror_qp(sqp, &wc);
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
goto done;
}
break;
case IB_WR_RDMA_READ:
+ if (unlikely(!(qp->qp_access_flags &
+ IB_ACCESS_REMOTE_READ)))
+ goto acc_err;
if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_READ)))
goto acc_err;
- if (unlikely(!(qp->qp_access_flags &
- IB_ACCESS_REMOTE_READ)))
- goto acc_err;
qp->r_sge.sge = wqe->sg_list[0];
qp->r_sge.sg_list = wqe->sg_list + 1;
qp->r_sge.num_sge = wqe->wr.num_sge;
@@ -364,22 +368,22 @@ again:
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
+ if (unlikely(!(qp->qp_access_flags &
+ IB_ACCESS_REMOTE_ATOMIC)))
+ goto acc_err;
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
- wqe->wr.wr.rdma.remote_addr,
- wqe->wr.wr.rdma.rkey,
+ wqe->wr.wr.atomic.remote_addr,
+ wqe->wr.wr.atomic.rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto acc_err;
/* Perform atomic OP and save result. */
- sdata = wqe->wr.wr.atomic.swap;
- spin_lock_irqsave(&dev->pending_lock, flags);
- qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr;
- if (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
- *(u64 *) qp->r_sge.sge.vaddr =
- qp->r_atomic_data + sdata;
- else if (qp->r_atomic_data == wqe->wr.wr.atomic.compare_add)
- *(u64 *) qp->r_sge.sge.vaddr = sdata;
- spin_unlock_irqrestore(&dev->pending_lock, flags);
- *(u64 *) sqp->s_sge.sge.vaddr = qp->r_atomic_data;
+ maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
+ sdata = wqe->wr.wr.atomic.compare_add;
+ *(u64 *) sqp->s_sge.sge.vaddr =
+ (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
+ (u64) atomic64_add_return(sdata, maddr) - sdata :
+ (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+ sdata, wqe->wr.wr.atomic.swap);
goto send_comp;
default:
@@ -440,7 +444,7 @@ again:
send_comp:
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
- if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &sqp->s_flags) ||
+ if (!(sqp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
wc.wr_id = wqe->wr.wr_id;
wc.status = IB_WC_SUCCESS;
@@ -502,7 +506,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
* We clear the tasklet flag now since we are committing to return
* from the tasklet function.
*/
- clear_bit(IPATH_S_BUSY, &qp->s_flags);
+ clear_bit(IPATH_S_BUSY, &qp->s_busy);
tasklet_unlock(&qp->s_task);
want_buffer(dev->dd);
dev->n_piowait++;
@@ -541,6 +545,9 @@ int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
wr->sg_list[0].addr & (sizeof(u64) - 1))) {
ret = -EINVAL;
goto bail;
+ } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) {
+ ret = -EINVAL;
+ goto bail;
}
/* IB spec says that num_sge == 0 is OK. */
if (wr->num_sge > qp->s_max_sge) {
@@ -647,7 +654,7 @@ void ipath_do_ruc_send(unsigned long data)
u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
struct ipath_other_headers *ohdr;
- if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags))
+ if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
goto bail;
if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) {
@@ -683,19 +690,15 @@ again:
*/
spin_lock_irqsave(&qp->s_lock, flags);
- /* Sending responses has higher priority over sending requests. */
- if (qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE &&
- (bth0 = ipath_make_rc_ack(qp, ohdr, pmtu)) != 0)
- bth2 = qp->s_ack_psn++ & IPATH_PSN_MASK;
- else if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
- ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
- ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
+ if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
+ ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
+ ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
/*
* Clear the busy bit before unlocking to avoid races with
* adding new work queue items and then failing to process
* them.
*/
- clear_bit(IPATH_S_BUSY, &qp->s_flags);
+ clear_bit(IPATH_S_BUSY, &qp->s_busy);
spin_unlock_irqrestore(&qp->s_lock, flags);
goto bail;
}
@@ -728,7 +731,7 @@ again:
goto again;
clear:
- clear_bit(IPATH_S_BUSY, &qp->s_flags);
+ clear_bit(IPATH_S_BUSY, &qp->s_busy);
bail:
return;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index 30a825928fc..9307f7187ca 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -207,7 +207,7 @@ void ipath_get_faststats(unsigned long opaque)
* don't access the chip while running diags, or memory diags can
* fail
*/
- if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT) ||
+ if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) ||
ipath_diag_inuse)
/* but re-arm the timer, for diags case; won't hurt other */
goto done;
@@ -237,11 +237,13 @@ void ipath_get_faststats(unsigned long opaque)
if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs)
&& time_after(jiffies, dd->ipath_unmasktime)) {
char ebuf[256];
- ipath_decode_err(ebuf, sizeof ebuf,
+ int iserr;
+ iserr = ipath_decode_err(ebuf, sizeof ebuf,
(dd->ipath_maskederrs & ~dd->
ipath_ignorederrs));
if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
- ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
+ ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+ INFINIPATH_E_PKTERRS ))
ipath_dev_err(dd, "Re-enabling masked errors "
"(%s)\n", ebuf);
else {
@@ -252,8 +254,12 @@ void ipath_get_faststats(unsigned long opaque)
* them. So only complain about these at debug
* level.
*/
- ipath_dbg("Disabling frequent queue full errors "
- "(%s)\n", ebuf);
+ if (iserr)
+ ipath_dbg("Re-enabling queue full errors (%s)\n",
+ ebuf);
+ else
+ ipath_cdbg(ERRPKT, "Re-enabling packet"
+ " problem interrupt (%s)\n", ebuf);
}
dd->ipath_maskederrs = dd->ipath_ignorederrs;
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index 325d6634ff5..1c2b03c2ef5 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -42,7 +42,7 @@ static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
{
if (++qp->s_last == qp->s_size)
qp->s_last = 0;
- if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
+ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED)) {
wc->wr_id = wqe->wr.wr_id;
wc->status = IB_WC_SUCCESS;
@@ -344,13 +344,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
send_first:
if (qp->r_reuse_sge) {
qp->r_reuse_sge = 0;
- qp->r_sge = qp->s_rdma_sge;
+ qp->r_sge = qp->s_rdma_read_sge;
} else if (!ipath_get_rwqe(qp, 0)) {
dev->n_pkt_drops++;
goto done;
}
/* Save the WQE so we can reuse it in case of an error. */
- qp->s_rdma_sge = qp->r_sge;
+ qp->s_rdma_read_sge = qp->r_sge;
qp->r_rcv_len = 0;
if (opcode == OP(SEND_ONLY))
goto send_last;
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 9a3e54664ee..a518f7c8fa8 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -308,6 +308,11 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
goto bail;
}
+ if (wr->wr.ud.ah->pd != qp->ibqp.pd) {
+ ret = -EPERM;
+ goto bail;
+ }
+
/* IB spec says that num_sge == 0 is OK. */
if (wr->num_sge > qp->s_max_sge) {
ret = -EINVAL;
@@ -467,7 +472,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
done:
/* Queue the completion status entry. */
- if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
+ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
(wr->send_flags & IB_SEND_SIGNALED)) {
wc.wr_id = wr->wr_id;
wc.status = IB_WC_SUCCESS;
@@ -647,6 +652,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
ipath_copy_sge(&qp->r_sge, data,
wc.byte_len - sizeof(struct ib_grh));
+ qp->r_wrid_valid = 0;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 2aaacdb7e52..18c6df2052c 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -438,6 +438,10 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
struct ipath_mcast *mcast;
struct ipath_mcast_qp *p;
+ if (lnh != IPATH_LRH_GRH) {
+ dev->n_pkt_drops++;
+ goto bail;
+ }
mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
if (mcast == NULL) {
dev->n_pkt_drops++;
@@ -445,8 +449,7 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
}
dev->n_multicast_rcv++;
list_for_each_entry_rcu(p, &mcast->qp_list, list)
- ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,
- tlen, p->qp);
+ ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
/*
* Notify ipath_multicast_detach() if it is waiting for us
* to finish.
@@ -773,7 +776,6 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
/* +1 is for the qword padding of pbc */
plen = hdrwords + ((len + 3) >> 2) + 1;
if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
- ipath_dbg("packet len 0x%x too long, failing\n", plen);
ret = -EINVAL;
goto bail;
}
@@ -980,14 +982,14 @@ static int ipath_query_device(struct ib_device *ibdev,
props->max_cqe = ib_ipath_max_cqes;
props->max_mr = dev->lk_table.max;
props->max_pd = ib_ipath_max_pds;
- props->max_qp_rd_atom = 1;
- props->max_qp_init_rd_atom = 1;
+ props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
+ props->max_qp_init_rd_atom = 255;
/* props->max_res_rd_atom */
props->max_srq = ib_ipath_max_srqs;
props->max_srq_wr = ib_ipath_max_srq_wrs;
props->max_srq_sge = ib_ipath_max_srq_sges;
/* props->local_ca_ack_delay */
- props->atomic_cap = IB_ATOMIC_HCA;
+ props->atomic_cap = IB_ATOMIC_GLOB;
props->max_pkeys = ipath_get_npkeys(dev->dd);
props->max_mcast_grp = ib_ipath_max_mcast_grps;
props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
@@ -1557,7 +1559,6 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
dev->node_type = RDMA_NODE_IB_CA;
dev->phys_port_cnt = 1;
dev->dma_device = &dd->pcidev->dev;
- dev->class_dev.dev = dev->dma_device;
dev->query_device = ipath_query_device;
dev->modify_device = ipath_modify_device;
dev->query_port = ipath_query_port;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index c0c8d5b24a7..7c4929f1cb5 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -40,9 +40,12 @@
#include <linux/interrupt.h>
#include <linux/kref.h>
#include <rdma/ib_pack.h>
+#include <rdma/ib_user_verbs.h>
#include "ipath_layer.h"
+#define IPATH_MAX_RDMA_ATOMIC 4
+
#define QPN_MAX (1 << 24)
#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
@@ -89,7 +92,7 @@ struct ib_reth {
} __attribute__ ((packed));
struct ib_atomic_eth {
- __be64 vaddr;
+ __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
__be32 rkey;
__be64 swap_data;
__be64 compare_data;
@@ -108,7 +111,7 @@ struct ipath_other_headers {
} rc;
struct {
__be32 aeth;
- __be64 atomic_ack_eth;
+ __be32 atomic_ack_eth[2];
} at;
__be32 imm_data;
__be32 aeth;
@@ -186,7 +189,7 @@ struct ipath_mmap_info {
struct ipath_cq_wc {
u32 head; /* index of next entry to fill */
u32 tail; /* index of next ib_poll_cq() entry */
- struct ib_wc queue[1]; /* this is actually size ibcq.cqe + 1 */
+ struct ib_uverbs_wc queue[1]; /* this is actually size ibcq.cqe + 1 */
};
/*
@@ -312,6 +315,19 @@ struct ipath_sge_state {
};
/*
+ * This structure holds the information that the send tasklet needs
+ * to send a RDMA read response or atomic operation.
+ */
+struct ipath_ack_entry {
+ u8 opcode;
+ u32 psn;
+ union {
+ struct ipath_sge_state rdma_sge;
+ u64 atomic_data;
+ };
+};
+
+/*
* Variables prefixed with s_ are for the requester (sender).
* Variables prefixed with r_ are for the responder (receiver).
* Variables prefixed with ack_ are for responder replies.
@@ -333,24 +349,24 @@ struct ipath_qp {
struct ipath_mmap_info *ip;
struct ipath_sge_state *s_cur_sge;
struct ipath_sge_state s_sge; /* current send request data */
- /* current RDMA read send data */
- struct ipath_sge_state s_rdma_sge;
+ struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1];
+ struct ipath_sge_state s_ack_rdma_sge;
+ struct ipath_sge_state s_rdma_read_sge;
struct ipath_sge_state r_sge; /* current receive data */
spinlock_t s_lock;
- unsigned long s_flags;
+ unsigned long s_busy;
u32 s_hdrwords; /* size of s_hdr in 32 bit words */
u32 s_cur_size; /* size of send packet in bytes */
u32 s_len; /* total length of s_sge */
- u32 s_rdma_len; /* total length of s_rdma_sge */
+ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */
u32 s_next_psn; /* PSN for next request */
u32 s_last_psn; /* last response PSN processed */
u32 s_psn; /* current packet sequence number */
- u32 s_ack_psn; /* PSN for RDMA_READ */
+ u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */
+ u32 s_ack_psn; /* PSN for acking sends and RDMA writes */
u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
u64 r_wr_id; /* ID for current receive WQE */
- u64 r_atomic_data; /* data for last atomic op */
- u32 r_atomic_psn; /* PSN of last atomic op */
u32 r_len; /* total length of r_sge */
u32 r_rcv_len; /* receive data len processed */
u32 r_psn; /* expected rcv packet sequence number */
@@ -360,12 +376,13 @@ struct ipath_qp {
u8 s_ack_state; /* opcode of packet to ACK */
u8 s_nak_state; /* non-zero if NAK is pending */
u8 r_state; /* opcode of last packet received */
- u8 r_ack_state; /* opcode of packet to ACK */
u8 r_nak_state; /* non-zero if NAK is pending */
u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
u8 r_reuse_sge; /* for UC receive errors */
u8 r_sge_inx; /* current index into sg_list */
u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */
+ u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
+ u8 r_head_ack_queue; /* index into s_ack_queue[] */
u8 qp_access_flags;
u8 s_max_sge; /* size of s_wq->sg_list */
u8 s_retry_cnt; /* number of times to retry */
@@ -374,6 +391,10 @@ struct ipath_qp {
u8 s_rnr_retry; /* requester RNR retry counter */
u8 s_wait_credit; /* limit number of unacked packets sent */
u8 s_pkey_index; /* PKEY index to use */
+ u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
+ u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
+ u8 s_tail_ack_queue; /* index into s_ack_queue[] */
+ u8 s_flags;
u8 timeout; /* Timeout for this QP */
enum ib_mtu path_mtu;
u32 remote_qpn;
@@ -390,11 +411,16 @@ struct ipath_qp {
struct ipath_sge r_sg_list[0]; /* verified SGEs */
};
+/* Bit definition for s_busy. */
+#define IPATH_S_BUSY 0
+
/*
* Bit definitions for s_flags.
*/
-#define IPATH_S_BUSY 0
-#define IPATH_S_SIGNAL_REQ_WR 1
+#define IPATH_S_SIGNAL_REQ_WR 0x01
+#define IPATH_S_FENCE_PENDING 0x02
+#define IPATH_S_RDMAR_PENDING 0x04
+#define IPATH_S_ACK_PENDING 0x08
#define IPATH_PSN_CREDIT 2048
@@ -706,8 +732,6 @@ int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
int ipath_destroy_srq(struct ib_srq *ibsrq);
-void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
-
int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
@@ -757,9 +781,6 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
void ipath_do_ruc_send(unsigned long data);
-u32 ipath_make_rc_ack(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
- u32 pmtu);
-
int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
u32 pmtu, u32 *bth0p, u32 *bth2p);
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 0d9b7d06bbc..773145e2994 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -1013,14 +1013,14 @@ static struct {
u64 latest_fw;
u32 flags;
} mthca_hca_table[] = {
- [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 4, 0),
+ [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 5, 0),
.flags = 0 },
- [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 600),
+ [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 8, 200),
.flags = MTHCA_FLAG_PCIE },
- [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 400),
+ [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 2, 0),
.flags = MTHCA_FLAG_MEMFREE |
MTHCA_FLAG_PCIE },
- [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 1, 0),
+ [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 2, 0),
.flags = MTHCA_FLAG_MEMFREE |
MTHCA_FLAG_PCIE |
MTHCA_FLAG_SINAI_OPT }
@@ -1135,7 +1135,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
goto err_cmd;
if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) {
- mthca_warn(mdev, "HCA FW version %d.%d.%d is old (%d.%d.%d is current).\n",
+ mthca_warn(mdev, "HCA FW version %d.%d.%3d is old (%d.%d.%3d is current).\n",
(int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff,
(int) (mdev->fw_ver & 0xffff),
(int) (mthca_hca_table[hca_type].latest_fw >> 32),
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 0b9d053a599..48f7c65e9ae 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -175,7 +175,9 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
if (!ret) {
++chunk->npages;
- if (!coherent && chunk->npages == MTHCA_ICM_CHUNK_LEN) {
+ if (coherent)
+ ++chunk->nsg;
+ else if (chunk->npages == MTHCA_ICM_CHUNK_LEN) {
chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
chunk->npages,
PCI_DMA_BIDIRECTIONAL);
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index 6037dd3f87d..aa6c70a6a36 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -297,7 +297,8 @@ out:
int mthca_write_mtt_size(struct mthca_dev *dev)
{
- if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy)
+ if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
+ !(dev->mthca_flags & MTHCA_FLAG_FMR))
/*
* Be friendly to WRITE_MTT command
* and leave two empty slots for the
@@ -310,8 +311,9 @@ int mthca_write_mtt_size(struct mthca_dev *dev)
return mthca_is_memfree(dev) ? (PAGE_SIZE / sizeof (u64)) : 0x7ffffff;
}
-void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt,
- int start_index, u64 *buffer_list, int list_len)
+static void mthca_tavor_write_mtt_seg(struct mthca_dev *dev,
+ struct mthca_mtt *mtt, int start_index,
+ u64 *buffer_list, int list_len)
{
u64 __iomem *mtts;
int i;
@@ -323,8 +325,9 @@ void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt,
mtts + i);
}
-void mthca_arbel_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt,
- int start_index, u64 *buffer_list, int list_len)
+static void mthca_arbel_write_mtt_seg(struct mthca_dev *dev,
+ struct mthca_mtt *mtt, int start_index,
+ u64 *buffer_list, int list_len)
{
__be64 *mtts;
dma_addr_t dma_handle;
@@ -353,7 +356,8 @@ int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
int size = mthca_write_mtt_size(dev);
int chunk;
- if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy)
+ if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
+ !(dev->mthca_flags & MTHCA_FLAG_FMR))
return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len);
while (list_len > 0) {
@@ -833,6 +837,7 @@ void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
key = arbel_key_to_hw_index(fmr->ibmr.lkey);
key &= dev->limits.num_mpts - 1;
+ key = adjust_key(dev, key);
fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
fmr->maps = 0;
@@ -879,8 +884,8 @@ int mthca_init_mr_table(struct mthca_dev *dev)
}
mpts = mtts = 1 << i;
} else {
- mpts = dev->limits.num_mtt_segs;
- mtts = dev->limits.num_mpts;
+ mtts = dev->limits.num_mtt_segs;
+ mpts = dev->limits.num_mpts;
}
if (!mthca_is_memfree(dev) &&
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 0725ad7ad9b..47e6fd46d9c 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1293,7 +1293,6 @@ int mthca_register_device(struct mthca_dev *dev)
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
dev->ib_dev.dma_device = &dev->pdev->dev;
- dev->ib_dev.class_dev.dev = &dev->pdev->dev;
dev->ib_dev.query_device = mthca_query_device;
dev->ib_dev.query_port = mthca_query_port;
dev->ib_dev.modify_device = mthca_modify_device;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 224c93dd29e..8fe6fee7a97 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -573,6 +573,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
goto out;
}
+ if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+ err = 0;
+ goto out;
+ }
+
if ((attr_mask & IB_QP_PKEY_INDEX) &&
attr->pkey_index >= dev->limits.pkey_table_len) {
mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
@@ -1083,21 +1088,21 @@ static void mthca_unmap_memfree(struct mthca_dev *dev,
static int mthca_alloc_memfree(struct mthca_dev *dev,
struct mthca_qp *qp)
{
- int ret = 0;
-
if (mthca_is_memfree(dev)) {
qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
qp->qpn, &qp->rq.db);
if (qp->rq.db_index < 0)
- return ret;
+ return -ENOMEM;
qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
qp->qpn, &qp->sq.db);
- if (qp->sq.db_index < 0)
+ if (qp->sq.db_index < 0) {
mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
+ return -ENOMEM;
+ }
}
- return ret;
+ return 0;
}
static void mthca_free_memfree(struct mthca_dev *dev,
@@ -1414,11 +1419,10 @@ void mthca_free_qp(struct mthca_dev *dev,
* unref the mem-free tables and free the QPN in our table.
*/
if (!qp->ibqp.uobject) {
- mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn,
+ mthca_cq_clean(dev, recv_cq, qp->qpn,
qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
- if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
- mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,
- qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
+ if (send_cq != recv_cq)
+ mthca_cq_clean(dev, send_cq, qp->qpn, NULL);
mthca_free_memfree(dev, qp);
mthca_free_wqe_buf(dev, qp);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 2594db2030b..fd558267d1c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -219,7 +219,6 @@ struct ipoib_dev_priv {
union ib_gid local_gid;
u16 local_lid;
- u8 local_rate;
unsigned int admin_mtu;
unsigned int mcast_mtu;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 2d483874a58..0c4e59b906c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -65,14 +65,14 @@ struct ipoib_cm_id {
static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
struct ib_cm_event *event);
-static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv,
+static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
u64 mapping[IPOIB_CM_RX_SG])
{
int i;
ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
- for (i = 0; i < IPOIB_CM_RX_SG - 1; ++i)
+ for (i = 0; i < frags; ++i)
ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
}
@@ -90,7 +90,8 @@ static int ipoib_cm_post_receive(struct net_device *dev, int id)
ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
if (unlikely(ret)) {
ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
- ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[id].mapping);
+ ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
+ priv->cm.srq_ring[id].mapping);
dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
priv->cm.srq_ring[id].skb = NULL;
}
@@ -98,8 +99,8 @@ static int ipoib_cm_post_receive(struct net_device *dev, int id)
return ret;
}
-static int ipoib_cm_alloc_rx_skb(struct net_device *dev, int id,
- u64 mapping[IPOIB_CM_RX_SG])
+static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, int id, int frags,
+ u64 mapping[IPOIB_CM_RX_SG])
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct sk_buff *skb;
@@ -107,7 +108,7 @@ static int ipoib_cm_alloc_rx_skb(struct net_device *dev, int id,
skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12);
if (unlikely(!skb))
- return -ENOMEM;
+ return NULL;
/*
* IPoIB adds a 4 byte header. So we need 12 more bytes to align the
@@ -119,10 +120,10 @@ static int ipoib_cm_alloc_rx_skb(struct net_device *dev, int id,
DMA_FROM_DEVICE);
if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) {
dev_kfree_skb_any(skb);
- return -EIO;
+ return NULL;
}
- for (i = 0; i < IPOIB_CM_RX_SG - 1; i++) {
+ for (i = 0; i < frags; i++) {
struct page *page = alloc_page(GFP_ATOMIC);
if (!page)
@@ -130,13 +131,13 @@ static int ipoib_cm_alloc_rx_skb(struct net_device *dev, int id,
skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE);
mapping[i + 1] = ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[i].page,
- 0, PAGE_SIZE, DMA_TO_DEVICE);
+ 0, PAGE_SIZE, DMA_FROM_DEVICE);
if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1])))
goto partial_error;
}
priv->cm.srq_ring[id].skb = skb;
- return 0;
+ return skb;
partial_error:
@@ -145,8 +146,8 @@ partial_error:
for (; i >= 0; --i)
ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
- kfree_skb(skb);
- return -ENOMEM;
+ dev_kfree_skb_any(skb);
+ return NULL;
}
static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
@@ -227,7 +228,6 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
struct net_device *dev = cm_id->context;
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_cm_rx *p;
- unsigned long flags;
unsigned psn;
int ret;
@@ -256,9 +256,9 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
cm_id->context = p;
p->jiffies = jiffies;
- spin_lock_irqsave(&priv->lock, flags);
+ spin_lock_irq(&priv->lock);
list_add(&p->list, &priv->cm.passive_ids);
- spin_unlock_irqrestore(&priv->lock, flags);
+ spin_unlock_irq(&priv->lock);
queue_delayed_work(ipoib_workqueue,
&priv->cm.stale_task, IPOIB_CM_RX_DELAY);
return 0;
@@ -276,7 +276,6 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
{
struct ipoib_cm_rx *p;
struct ipoib_dev_priv *priv;
- unsigned long flags;
int ret;
switch (event->event) {
@@ -289,14 +288,14 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
case IB_CM_REJ_RECEIVED:
p = cm_id->context;
priv = netdev_priv(p->dev);
- spin_lock_irqsave(&priv->lock, flags);
+ spin_lock_irq(&priv->lock);
if (list_empty(&p->list))
ret = 0; /* Connection is going away already. */
else {
list_del_init(&p->list);
ret = -ECONNRESET;
}
- spin_unlock_irqrestore(&priv->lock, flags);
+ spin_unlock_irq(&priv->lock);
if (ret) {
ib_destroy_qp(p->qp);
kfree(p);
@@ -309,7 +308,7 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
}
/* Adjust length of skb with fragments to match received data */
static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
- unsigned int length)
+ unsigned int length, struct sk_buff *toskb)
{
int i, num_frags;
unsigned int size;
@@ -326,7 +325,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
if (length == 0) {
/* don't need this page */
- __free_page(frag->page);
+ skb_fill_page_desc(toskb, i, frag->page, 0, PAGE_SIZE);
--skb_shinfo(skb)->nr_frags;
} else {
size = min(length, (unsigned) PAGE_SIZE);
@@ -344,13 +343,14 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
unsigned int wr_id = wc->wr_id & ~IPOIB_CM_OP_SRQ;
- struct sk_buff *skb;
+ struct sk_buff *skb, *newskb;
struct ipoib_cm_rx *p;
unsigned long flags;
u64 mapping[IPOIB_CM_RX_SG];
+ int frags;
- ipoib_dbg_data(priv, "cm recv completion: id %d, op %d, status: %d\n",
- wr_id, wc->opcode, wc->status);
+ ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
+ wr_id, wc->status);
if (unlikely(wr_id >= ipoib_recvq_size)) {
ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
@@ -383,7 +383,11 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
}
}
- if (unlikely(ipoib_cm_alloc_rx_skb(dev, wr_id, mapping))) {
+ frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
+ (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
+
+ newskb = ipoib_cm_alloc_rx_skb(dev, wr_id, frags, mapping);
+ if (unlikely(!newskb)) {
/*
* If we can't allocate a new RX buffer, dump
* this packet and reuse the old buffer.
@@ -393,16 +397,16 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
goto repost;
}
- ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[wr_id].mapping);
- memcpy(priv->cm.srq_ring[wr_id].mapping, mapping, sizeof mapping);
+ ipoib_cm_dma_unmap_rx(priv, frags, priv->cm.srq_ring[wr_id].mapping);
+ memcpy(priv->cm.srq_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping);
ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
wc->byte_len, wc->slid);
- skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len);
+ skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
skb->protocol = ((struct ipoib_header *) skb->data)->proto;
- skb->mac.raw = skb->data;
+ skb_reset_mac_header(skb);
skb_pull(skb, IPOIB_ENCAP_LEN);
dev->last_rx = jiffies;
@@ -446,7 +450,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
skb->len, tx->mtu);
++priv->stats.tx_dropped;
++priv->stats.tx_errors;
- ipoib_cm_skb_too_long(dev, skb, tx->mtu - INFINIBAND_ALEN);
+ ipoib_cm_skb_too_long(dev, skb, tx->mtu - IPOIB_ENCAP_LEN);
return;
}
@@ -498,8 +502,8 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx
struct ipoib_tx_buf *tx_req;
unsigned long flags;
- ipoib_dbg_data(priv, "cm send completion: id %d, op %d, status: %d\n",
- wr_id, wc->opcode, wc->status);
+ ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n",
+ wr_id, wc->status);
if (unlikely(wr_id >= ipoib_sendq_size)) {
ipoib_warn(priv, "cm send completion event with wrid %d (> %d)\n",
@@ -606,23 +610,22 @@ void ipoib_cm_dev_stop(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_cm_rx *p;
- unsigned long flags;
if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
return;
ib_destroy_cm_id(priv->cm.id);
- spin_lock_irqsave(&priv->lock, flags);
+ spin_lock_irq(&priv->lock);
while (!list_empty(&priv->cm.passive_ids)) {
p = list_entry(priv->cm.passive_ids.next, typeof(*p), list);
list_del_init(&p->list);
- spin_unlock_irqrestore(&priv->lock, flags);
+ spin_unlock_irq(&priv->lock);
ib_destroy_cm_id(p->id);
ib_destroy_qp(p->qp);
kfree(p);
- spin_lock_irqsave(&priv->lock, flags);
+ spin_lock_irq(&priv->lock);
}
- spin_unlock_irqrestore(&priv->lock, flags);
+ spin_unlock_irq(&priv->lock);
cancel_delayed_work(&priv->cm.stale_task);
}
@@ -636,7 +639,6 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
struct sk_buff *skb;
- unsigned long flags;
p->mtu = be32_to_cpu(data->mtu);
@@ -674,12 +676,12 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
skb_queue_head_init(&skqueue);
- spin_lock_irqsave(&priv->lock, flags);
+ spin_lock_irq(&priv->lock);
set_bit(IPOIB_FLAG_OPER_UP, &p->flags);
if (p->neigh)
while ((skb = __skb_dequeue(&p->neigh->queue)))
__skb_queue_tail(&skqueue, skb);
- spin_unlock_irqrestore(&priv->lock, flags);
+ spin_unlock_irq(&priv->lock);
while ((skb = __skb_dequeue(&skqueue))) {
skb->dev = p->dev;
@@ -889,7 +891,6 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
struct net_device *dev = priv->dev;
struct ipoib_neigh *neigh;
- unsigned long flags;
int ret;
switch (event->event) {
@@ -908,7 +909,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
case IB_CM_REJ_RECEIVED:
case IB_CM_TIMEWAIT_EXIT:
ipoib_dbg(priv, "CM error %d.\n", event->event);
- spin_lock_irqsave(&priv->tx_lock, flags);
+ spin_lock_irq(&priv->tx_lock);
spin_lock(&priv->lock);
neigh = tx->neigh;
@@ -928,7 +929,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
}
spin_unlock(&priv->lock);
- spin_unlock_irqrestore(&priv->tx_lock, flags);
+ spin_unlock_irq(&priv->tx_lock);
break;
default:
break;
@@ -1017,21 +1018,20 @@ static void ipoib_cm_tx_reap(struct work_struct *work)
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
cm.reap_task);
struct ipoib_cm_tx *p;
- unsigned long flags;
- spin_lock_irqsave(&priv->tx_lock, flags);
+ spin_lock_irq(&priv->tx_lock);
spin_lock(&priv->lock);
while (!list_empty(&priv->cm.reap_list)) {
p = list_entry(priv->cm.reap_list.next, typeof(*p), list);
list_del(&p->list);
spin_unlock(&priv->lock);
- spin_unlock_irqrestore(&priv->tx_lock, flags);
+ spin_unlock_irq(&priv->tx_lock);
ipoib_cm_tx_destroy(p);
- spin_lock_irqsave(&priv->tx_lock, flags);
+ spin_lock_irq(&priv->tx_lock);
spin_lock(&priv->lock);
}
spin_unlock(&priv->lock);
- spin_unlock_irqrestore(&priv->tx_lock, flags);
+ spin_unlock_irq(&priv->tx_lock);
}
static void ipoib_cm_skb_reap(struct work_struct *work)
@@ -1040,15 +1040,14 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
cm.skb_task);
struct net_device *dev = priv->dev;
struct sk_buff *skb;
- unsigned long flags;
unsigned mtu = priv->mcast_mtu;
- spin_lock_irqsave(&priv->tx_lock, flags);
+ spin_lock_irq(&priv->tx_lock);
spin_lock(&priv->lock);
while ((skb = skb_dequeue(&priv->cm.skb_queue))) {
spin_unlock(&priv->lock);
- spin_unlock_irqrestore(&priv->tx_lock, flags);
+ spin_unlock_irq(&priv->tx_lock);
if (skb->protocol == htons(ETH_P_IP))
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -1056,11 +1055,11 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
#endif
dev_kfree_skb_any(skb);
- spin_lock_irqsave(&priv->tx_lock, flags);
+ spin_lock_irq(&priv->tx_lock);
spin_lock(&priv->lock);
}
spin_unlock(&priv->lock);
- spin_unlock_irqrestore(&priv->tx_lock, flags);
+ spin_unlock_irq(&priv->tx_lock);
}
void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
@@ -1082,23 +1081,22 @@ static void ipoib_cm_stale_task(struct work_struct *work)
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
cm.stale_task.work);
struct ipoib_cm_rx *p;
- unsigned long flags;
- spin_lock_irqsave(&priv->lock, flags);
+ spin_lock_irq(&priv->lock);
while (!list_empty(&priv->cm.passive_ids)) {
/* List if sorted by LRU, start from tail,
* stop when we see a recently used entry */
p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list);
- if (time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT))
+ if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT))
break;
list_del_init(&p->list);
- spin_unlock_irqrestore(&priv->lock, flags);
+ spin_unlock_irq(&priv->lock);
ib_destroy_cm_id(p->id);
ib_destroy_qp(p->qp);
kfree(p);
- spin_lock_irqsave(&priv->lock, flags);
+ spin_lock_irq(&priv->lock);
}
- spin_unlock_irqrestore(&priv->lock, flags);
+ spin_unlock_irq(&priv->lock);
}
@@ -1138,7 +1136,7 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
return -EINVAL;
}
-static DEVICE_ATTR(mode, S_IWUGO | S_IRUGO, show_mode, set_mode);
+static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode);
int ipoib_cm_add_mode_attr(struct net_device *dev)
{
@@ -1193,7 +1191,8 @@ int ipoib_cm_dev_init(struct net_device *dev)
priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG;
for (i = 0; i < ipoib_recvq_size; ++i) {
- if (ipoib_cm_alloc_rx_skb(dev, i, priv->cm.srq_ring[i].mapping)) {
+ if (!ipoib_cm_alloc_rx_skb(dev, i, IPOIB_CM_RX_SG - 1,
+ priv->cm.srq_ring[i].mapping)) {
ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
ipoib_cm_dev_cleanup(dev);
return -ENOMEM;
@@ -1228,7 +1227,8 @@ void ipoib_cm_dev_cleanup(struct net_device *dev)
return;
for (i = 0; i < ipoib_recvq_size; ++i)
if (priv->cm.srq_ring[i].skb) {
- ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[i].mapping);
+ ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
+ priv->cm.srq_ring[i].mapping);
dev_kfree_skb_any(priv->cm.srq_ring[i].skb);
priv->cm.srq_ring[i].skb = NULL;
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index f2aa923ddbe..1bdb9101911 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -172,8 +172,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
struct sk_buff *skb;
u64 addr;
- ipoib_dbg_data(priv, "recv completion: id %d, op %d, status: %d\n",
- wr_id, wc->opcode, wc->status);
+ ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n",
+ wr_id, wc->status);
if (unlikely(wr_id >= ipoib_recvq_size)) {
ipoib_warn(priv, "recv completion event with wrid %d (> %d)\n",
@@ -216,7 +216,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
if (wc->slid != priv->local_lid ||
wc->src_qp != priv->qp->qp_num) {
skb->protocol = ((struct ipoib_header *) skb->data)->proto;
- skb->mac.raw = skb->data;
+ skb_reset_mac_header(skb);
skb_pull(skb, IPOIB_ENCAP_LEN);
dev->last_rx = jiffies;
@@ -245,8 +245,8 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
struct ipoib_tx_buf *tx_req;
unsigned long flags;
- ipoib_dbg_data(priv, "send completion: id %d, op %d, status: %d\n",
- wr_id, wc->opcode, wc->status);
+ ipoib_dbg_data(priv, "send completion: id %d, status: %d\n",
+ wr_id, wc->status);
if (unlikely(wr_id >= ipoib_sendq_size)) {
ipoib_warn(priv, "send completion event with wrid %d (> %d)\n",
@@ -328,9 +328,9 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
struct ipoib_tx_buf *tx_req;
u64 addr;
- if (unlikely(skb->len > priv->mcast_mtu + INFINIBAND_ALEN)) {
+ if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
- skb->len, priv->mcast_mtu + INFINIBAND_ALEN);
+ skb->len, priv->mcast_mtu + IPOIB_ENCAP_LEN);
++priv->stats.tx_dropped;
++priv->stats.tx_errors;
ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 18d27fd352a..b4c380c5a3b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -380,12 +380,12 @@ static void path_rec_completion(int status,
struct net_device *dev = path->dev;
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_ah *ah = NULL;
- struct ipoib_neigh *neigh;
+ struct ipoib_neigh *neigh, *tn;
struct sk_buff_head skqueue;
struct sk_buff *skb;
unsigned long flags;
- if (pathrec)
+ if (!status)
ipoib_dbg(priv, "PathRec LID 0x%04x for GID " IPOIB_GID_FMT "\n",
be16_to_cpu(pathrec->dlid), IPOIB_GID_ARG(pathrec->dgid));
else
@@ -395,14 +395,10 @@ static void path_rec_completion(int status,
skb_queue_head_init(&skqueue);
if (!status) {
- struct ib_ah_attr av = {
- .dlid = be16_to_cpu(pathrec->dlid),
- .sl = pathrec->sl,
- .port_num = priv->port,
- .static_rate = pathrec->rate
- };
-
- ah = ipoib_create_ah(dev, priv->pd, &av);
+ struct ib_ah_attr av;
+
+ if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av))
+ ah = ipoib_create_ah(dev, priv->pd, &av);
}
spin_lock_irqsave(&priv->lock, flags);
@@ -418,7 +414,7 @@ static void path_rec_completion(int status,
while ((skb = __skb_dequeue(&path->queue)))
__skb_queue_tail(&skqueue, skb);
- list_for_each_entry(neigh, &path->neigh_list, list) {
+ list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) {
kref_get(&path->ah->ref);
neigh->ah = path->ah;
memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
@@ -814,7 +810,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
queue_work(ipoib_workqueue, &priv->restart_task);
}
-static void ipoib_neigh_destructor(struct neighbour *n)
+static void ipoib_neigh_cleanup(struct neighbour *n)
{
struct ipoib_neigh *neigh;
struct ipoib_dev_priv *priv = netdev_priv(n->dev);
@@ -822,7 +818,7 @@ static void ipoib_neigh_destructor(struct neighbour *n)
struct ipoib_ah *ah = NULL;
ipoib_dbg(priv,
- "neigh_destructor for %06x " IPOIB_GID_FMT "\n",
+ "neigh_cleanup for %06x " IPOIB_GID_FMT "\n",
IPOIB_QPN(n->ha),
IPOIB_GID_RAW_ARG(n->ha + 4));
@@ -874,7 +870,7 @@ void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh)
static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms)
{
- parms->neigh_destructor = ipoib_neigh_destructor;
+ parms->neigh_cleanup = ipoib_neigh_cleanup;
return 0;
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index fea737f520f..54fbead4de0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -60,14 +60,11 @@ static DEFINE_MUTEX(mcast_mutex);
/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
struct ipoib_mcast {
struct ib_sa_mcmember_rec mcmember;
+ struct ib_sa_multicast *mc;
struct ipoib_ah *ah;
struct rb_node rb_node;
struct list_head list;
- struct completion done;
-
- int query_id;
- struct ib_sa_query *query;
unsigned long created;
unsigned long backoff;
@@ -299,18 +296,22 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
return 0;
}
-static void
+static int
ipoib_mcast_sendonly_join_complete(int status,
- struct ib_sa_mcmember_rec *mcmember,
- void *mcast_ptr)
+ struct ib_sa_multicast *multicast)
{
- struct ipoib_mcast *mcast = mcast_ptr;
+ struct ipoib_mcast *mcast = multicast->context;
struct net_device *dev = mcast->dev;
struct ipoib_dev_priv *priv = netdev_priv(dev);
+ /* We trap for port events ourselves. */
+ if (status == -ENETRESET)
+ return 0;
+
if (!status)
- ipoib_mcast_join_finish(mcast, mcmember);
- else {
+ status = ipoib_mcast_join_finish(mcast, &multicast->rec);
+
+ if (status) {
if (mcast->logcount++ < 20)
ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for "
IPOIB_GID_FMT ", status %d\n",
@@ -325,11 +326,10 @@ ipoib_mcast_sendonly_join_complete(int status,
spin_unlock_irq(&priv->tx_lock);
/* Clear the busy flag so we try again */
- clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
- mcast->query = NULL;
+ status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
+ &mcast->flags);
}
-
- complete(&mcast->done);
+ return status;
}
static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
@@ -359,35 +359,33 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
rec.port_gid = priv->local_gid;
rec.pkey = cpu_to_be16(priv->pkey);
- init_completion(&mcast->done);
-
- ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port, &rec,
- IB_SA_MCMEMBER_REC_MGID |
- IB_SA_MCMEMBER_REC_PORT_GID |
- IB_SA_MCMEMBER_REC_PKEY |
- IB_SA_MCMEMBER_REC_JOIN_STATE,
- 1000, GFP_ATOMIC,
- ipoib_mcast_sendonly_join_complete,
- mcast, &mcast->query);
- if (ret < 0) {
- ipoib_warn(priv, "ib_sa_mcmember_rec_set failed (ret = %d)\n",
+ mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
+ priv->port, &rec,
+ IB_SA_MCMEMBER_REC_MGID |
+ IB_SA_MCMEMBER_REC_PORT_GID |
+ IB_SA_MCMEMBER_REC_PKEY |
+ IB_SA_MCMEMBER_REC_JOIN_STATE,
+ GFP_ATOMIC,
+ ipoib_mcast_sendonly_join_complete,
+ mcast);
+ if (IS_ERR(mcast->mc)) {
+ ret = PTR_ERR(mcast->mc);
+ clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
ret);
} else {
ipoib_dbg_mcast(priv, "no multicast record for " IPOIB_GID_FMT
", starting join\n",
IPOIB_GID_ARG(mcast->mcmember.mgid));
-
- mcast->query_id = ret;
}
return ret;
}
-static void ipoib_mcast_join_complete(int status,
- struct ib_sa_mcmember_rec *mcmember,
- void *mcast_ptr)
+static int ipoib_mcast_join_complete(int status,
+ struct ib_sa_multicast *multicast)
{
- struct ipoib_mcast *mcast = mcast_ptr;
+ struct ipoib_mcast *mcast = multicast->context;
struct net_device *dev = mcast->dev;
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -395,24 +393,29 @@ static void ipoib_mcast_join_complete(int status,
" (status %d)\n",
IPOIB_GID_ARG(mcast->mcmember.mgid), status);
- if (!status && !ipoib_mcast_join_finish(mcast, mcmember)) {
+ /* We trap for port events ourselves. */
+ if (status == -ENETRESET)
+ return 0;
+
+ if (!status)
+ status = ipoib_mcast_join_finish(mcast, &multicast->rec);
+
+ if (!status) {
mcast->backoff = 1;
mutex_lock(&mcast_mutex);
if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
queue_delayed_work(ipoib_workqueue,
&priv->mcast_task, 0);
mutex_unlock(&mcast_mutex);
- complete(&mcast->done);
- return;
- }
- if (status == -EINTR) {
- complete(&mcast->done);
- return;
+ if (mcast == priv->broadcast)
+ netif_carrier_on(dev);
+
+ return 0;
}
- if (status && mcast->logcount++ < 20) {
- if (status == -ETIMEDOUT || status == -EINTR) {
+ if (mcast->logcount++ < 20) {
+ if (status == -ETIMEDOUT) {
ipoib_dbg_mcast(priv, "multicast join failed for " IPOIB_GID_FMT
", status %d\n",
IPOIB_GID_ARG(mcast->mcmember.mgid),
@@ -429,24 +432,18 @@ static void ipoib_mcast_join_complete(int status,
if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
- mutex_lock(&mcast_mutex);
+ /* Clear the busy flag so we try again */
+ status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ mutex_lock(&mcast_mutex);
spin_lock_irq(&priv->lock);
- mcast->query = NULL;
-
- if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) {
- if (status == -ETIMEDOUT)
- queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
- 0);
- else
- queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
- mcast->backoff * HZ);
- } else
- complete(&mcast->done);
+ if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
+ queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
+ mcast->backoff * HZ);
spin_unlock_irq(&priv->lock);
mutex_unlock(&mcast_mutex);
- return;
+ return status;
}
static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
@@ -495,15 +492,14 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
rec.hop_limit = priv->broadcast->mcmember.hop_limit;
}
- init_completion(&mcast->done);
-
- ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port,
- &rec, comp_mask, mcast->backoff * 1000,
- GFP_ATOMIC, ipoib_mcast_join_complete,
- mcast, &mcast->query);
-
- if (ret < 0) {
- ipoib_warn(priv, "ib_sa_mcmember_rec_set failed, status %d\n", ret);
+ set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
+ &rec, comp_mask, GFP_KERNEL,
+ ipoib_mcast_join_complete, mcast);
+ if (IS_ERR(mcast->mc)) {
+ clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ ret = PTR_ERR(mcast->mc);
+ ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
mcast->backoff *= 2;
if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
@@ -515,8 +511,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
&priv->mcast_task,
mcast->backoff * HZ);
mutex_unlock(&mcast_mutex);
- } else
- mcast->query_id = ret;
+ }
}
void ipoib_mcast_join_task(struct work_struct *work)
@@ -536,12 +531,10 @@ void ipoib_mcast_join_task(struct work_struct *work)
{
struct ib_port_attr attr;
- if (!ib_query_port(priv->ca, priv->port, &attr)) {
- priv->local_lid = attr.lid;
- priv->local_rate = attr.active_speed *
- ib_width_enum_to_int(attr.active_width);
- } else
- ipoib_warn(priv, "ib_query_port failed\n");
+ if (!ib_query_port(priv->ca, priv->port, &attr))
+ priv->local_lid = attr.lid;
+ else
+ ipoib_warn(priv, "ib_query_port failed\n");
}
if (!priv->broadcast) {
@@ -568,7 +561,8 @@ void ipoib_mcast_join_task(struct work_struct *work)
}
if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
- ipoib_mcast_join(dev, priv->broadcast, 0);
+ if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
+ ipoib_mcast_join(dev, priv->broadcast, 0);
return;
}
@@ -604,7 +598,6 @@ void ipoib_mcast_join_task(struct work_struct *work)
ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
clear_bit(IPOIB_MCAST_RUN, &priv->flags);
- netif_carrier_on(dev);
}
int ipoib_mcast_start_thread(struct net_device *dev)
@@ -625,26 +618,9 @@ int ipoib_mcast_start_thread(struct net_device *dev)
return 0;
}
-static void wait_for_mcast_join(struct ipoib_dev_priv *priv,
- struct ipoib_mcast *mcast)
-{
- spin_lock_irq(&priv->lock);
- if (mcast && mcast->query) {
- ib_sa_cancel_query(mcast->query_id, mcast->query);
- mcast->query = NULL;
- spin_unlock_irq(&priv->lock);
- ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n",
- IPOIB_GID_ARG(mcast->mcmember.mgid));
- wait_for_completion(&mcast->done);
- }
- else
- spin_unlock_irq(&priv->lock);
-}
-
int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ipoib_mcast *mcast;
ipoib_dbg_mcast(priv, "stopping multicast thread\n");
@@ -660,52 +636,27 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
if (flush)
flush_workqueue(ipoib_workqueue);
- wait_for_mcast_join(priv, priv->broadcast);
-
- list_for_each_entry(mcast, &priv->multicast_list, list)
- wait_for_mcast_join(priv, mcast);
-
return 0;
}
static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ib_sa_mcmember_rec rec = {
- .join_state = 1
- };
int ret = 0;
- if (!test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags))
- return 0;
-
- ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n",
- IPOIB_GID_ARG(mcast->mcmember.mgid));
+ if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+ ib_sa_free_multicast(mcast->mc);
- rec.mgid = mcast->mcmember.mgid;
- rec.port_gid = priv->local_gid;
- rec.pkey = cpu_to_be16(priv->pkey);
-
- /* Remove ourselves from the multicast group */
- ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid),
- &mcast->mcmember.mgid);
- if (ret)
- ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret);
+ if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
+ ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n",
+ IPOIB_GID_ARG(mcast->mcmember.mgid));
- /*
- * Just make one shot at leaving and don't wait for a reply;
- * if we fail, too bad.
- */
- ret = ib_sa_mcmember_rec_delete(&ipoib_sa_client, priv->ca, priv->port, &rec,
- IB_SA_MCMEMBER_REC_MGID |
- IB_SA_MCMEMBER_REC_PORT_GID |
- IB_SA_MCMEMBER_REC_PKEY |
- IB_SA_MCMEMBER_REC_JOIN_STATE,
- 0, GFP_ATOMIC, NULL,
- mcast, &mcast->query);
- if (ret < 0)
- ipoib_warn(priv, "ib_sa_mcmember_rec_delete failed "
- "for leave (result = %d)\n", ret);
+ /* Remove ourselves from the multicast group */
+ ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid),
+ &mcast->mcmember.mgid);
+ if (ret)
+ ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret);
+ }
return 0;
}
@@ -758,7 +709,7 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
dev_kfree_skb_any(skb);
}
- if (mcast->query)
+ if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
ipoib_dbg_mcast(priv, "no address vector, "
"but multicast join already started\n");
else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
@@ -916,7 +867,6 @@ void ipoib_mcast_restart_task(struct work_struct *work)
/* We have to cancel outside of the spinlock */
list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
- wait_for_mcast_join(priv, mcast);
ipoib_mcast_leave(mcast->dev, mcast);
ipoib_mcast_free(mcast);
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 3cb551b8875..7f3ec205e35 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -259,12 +259,13 @@ void ipoib_event(struct ib_event_handler *handler,
struct ipoib_dev_priv *priv =
container_of(handler, struct ipoib_dev_priv, event_handler);
- if (record->event == IB_EVENT_PORT_ERR ||
- record->event == IB_EVENT_PKEY_CHANGE ||
- record->event == IB_EVENT_PORT_ACTIVE ||
- record->event == IB_EVENT_LID_CHANGE ||
- record->event == IB_EVENT_SM_CHANGE ||
- record->event == IB_EVENT_CLIENT_REREGISTER) {
+ if ((record->event == IB_EVENT_PORT_ERR ||
+ record->event == IB_EVENT_PKEY_CHANGE ||
+ record->event == IB_EVENT_PORT_ACTIVE ||
+ record->event == IB_EVENT_LID_CHANGE ||
+ record->event == IB_EVENT_SM_CHANGE ||
+ record->event == IB_EVENT_CLIENT_REREGISTER) &&
+ record->element.port_num == priv->port) {
ipoib_dbg(priv, "Port state change event\n");
queue_work(ipoib_workqueue, &priv->flush_task);
}
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index cae8c96a55f..8960196ffb0 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -245,7 +245,6 @@ struct iser_conn {
wait_queue_head_t wait; /* waitq for conn/disconn */
atomic_t post_recv_buf_count; /* posted rx count */
atomic_t post_send_buf_count; /* posted tx count */
- struct work_struct comperror_work; /* conn term sleepable ctx*/
char name[ISER_OBJECT_NAME_SIZE];
struct iser_page_vec *page_vec; /* represents SG to fmr maps*
* maps serialized as tx is*/
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 89e37283c83..278fcbccc2d 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -658,6 +658,7 @@ void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask)
{
int deferred;
int is_rdma_aligned = 1;
+ struct iser_regd_buf *regd;
/* if we were reading, copy back to unaligned sglist,
* anyway dma_unmap and free the copy
@@ -672,20 +673,20 @@ void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask)
}
if (iser_ctask->dir[ISER_DIR_IN]) {
- deferred = iser_regd_buff_release
- (&iser_ctask->rdma_regd[ISER_DIR_IN]);
+ regd = &iser_ctask->rdma_regd[ISER_DIR_IN];
+ deferred = iser_regd_buff_release(regd);
if (deferred) {
- iser_err("References remain for BUF-IN rdma reg\n");
- BUG();
+ iser_err("%d references remain for BUF-IN rdma reg\n",
+ atomic_read(&regd->ref_count));
}
}
if (iser_ctask->dir[ISER_DIR_OUT]) {
- deferred = iser_regd_buff_release
- (&iser_ctask->rdma_regd[ISER_DIR_OUT]);
+ regd = &iser_ctask->rdma_regd[ISER_DIR_OUT];
+ deferred = iser_regd_buff_release(regd);
if (deferred) {
- iser_err("References remain for BUF-OUT rdma reg\n");
- BUG();
+ iser_err("%d references remain for BUF-OUT rdma reg\n",
+ atomic_read(&regd->ref_count));
}
}
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 693b7700289..1fc967464a2 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -48,7 +48,6 @@
static void iser_cq_tasklet_fn(unsigned long data);
static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
-static void iser_comp_error_worker(struct work_struct *work);
static void iser_cq_event_callback(struct ib_event *cause, void *context)
{
@@ -480,7 +479,6 @@ int iser_conn_init(struct iser_conn **ibconn)
init_waitqueue_head(&ib_conn->wait);
atomic_set(&ib_conn->post_recv_buf_count, 0);
atomic_set(&ib_conn->post_send_buf_count, 0);
- INIT_WORK(&ib_conn->comperror_work, iser_comp_error_worker);
INIT_LIST_HEAD(&ib_conn->conn_list);
spin_lock_init(&ib_conn->lock);
@@ -753,26 +751,6 @@ int iser_post_send(struct iser_desc *tx_desc)
return ret_val;
}
-static void iser_comp_error_worker(struct work_struct *work)
-{
- struct iser_conn *ib_conn =
- container_of(work, struct iser_conn, comperror_work);
-
- /* getting here when the state is UP means that the conn is being *
- * terminated asynchronously from the iSCSI layer's perspective. */
- if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,
- ISER_CONN_TERMINATING))
- iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn,
- ISCSI_ERR_CONN_FAILED);
-
- /* complete the termination process if disconnect event was delivered *
- * note there are no more non completed posts to the QP */
- if (ib_conn->disc_evt_flag) {
- ib_conn->state = ISER_CONN_DOWN;
- wake_up_interruptible(&ib_conn->wait);
- }
-}
-
static void iser_handle_comp_error(struct iser_desc *desc)
{
struct iser_dto *dto = &desc->dto;
@@ -791,8 +769,22 @@ static void iser_handle_comp_error(struct iser_desc *desc)
}
if (atomic_read(&ib_conn->post_recv_buf_count) == 0 &&
- atomic_read(&ib_conn->post_send_buf_count) == 0)
- schedule_work(&ib_conn->comperror_work);
+ atomic_read(&ib_conn->post_send_buf_count) == 0) {
+ /* getting here when the state is UP means that the conn is *
+ * being terminated asynchronously from the iSCSI layer's *
+ * perspective. */
+ if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,
+ ISER_CONN_TERMINATING))
+ iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn,
+ ISCSI_ERR_CONN_FAILED);
+
+ /* complete the termination process if disconnect event was delivered *
+ * note there are no more non completed posts to the QP */
+ if (ib_conn->disc_evt_flag) {
+ ib_conn->state = ISER_CONN_DOWN;
+ wake_up_interruptible(&ib_conn->wait);
+ }
+ }
}
static void iser_cq_tasklet_fn(unsigned long data)