summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/addr.c33
-rw-r--r--drivers/infiniband/core/agent.c3
-rw-r--r--drivers/infiniband/core/cm.c20
-rw-r--r--drivers/infiniband/core/cma.c58
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c22
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c28
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c24
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h1
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c4
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sysfs.c1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_pages.c6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c3
-rw-r--r--drivers/infiniband/hw/nes/nes.c5
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c13
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c12
-rw-r--r--drivers/infiniband/hw/qib/qib_qsfp.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_user_pages.c6
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c26
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c725
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h38
22 files changed, 682 insertions, 364 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 8aba0ba57de..4ffc224faa7 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -183,20 +183,15 @@ static int addr4_resolve(struct sockaddr_in *src_in,
{
__be32 src_ip = src_in->sin_addr.s_addr;
__be32 dst_ip = dst_in->sin_addr.s_addr;
- struct flowi fl;
struct rtable *rt;
struct neighbour *neigh;
int ret;
- memset(&fl, 0, sizeof fl);
- fl.nl_u.ip4_u.daddr = dst_ip;
- fl.nl_u.ip4_u.saddr = src_ip;
- fl.oif = addr->bound_dev_if;
-
- ret = ip_route_output_key(&init_net, &rt, &fl);
- if (ret)
+ rt = ip_route_output(&init_net, dst_ip, src_ip, 0, addr->bound_dev_if);
+ if (IS_ERR(rt)) {
+ ret = PTR_ERR(rt);
goto out;
-
+ }
src_in->sin_family = AF_INET;
src_in->sin_addr.s_addr = rt->rt_src;
@@ -209,7 +204,7 @@ static int addr4_resolve(struct sockaddr_in *src_in,
/* If the device does ARP internally, return 'done' */
if (rt->dst.dev->flags & IFF_NOARP) {
- rdma_copy_addr(addr, rt->dst.dev, NULL);
+ ret = rdma_copy_addr(addr, rt->dst.dev, NULL);
goto put;
}
@@ -236,28 +231,28 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
struct sockaddr_in6 *dst_in,
struct rdma_dev_addr *addr)
{
- struct flowi fl;
+ struct flowi6 fl6;
struct neighbour *neigh;
struct dst_entry *dst;
int ret;
- memset(&fl, 0, sizeof fl);
- ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr);
- ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr);
- fl.oif = addr->bound_dev_if;
+ memset(&fl6, 0, sizeof fl6);
+ ipv6_addr_copy(&fl6.daddr, &dst_in->sin6_addr);
+ ipv6_addr_copy(&fl6.saddr, &src_in->sin6_addr);
+ fl6.flowi6_oif = addr->bound_dev_if;
- dst = ip6_route_output(&init_net, NULL, &fl);
+ dst = ip6_route_output(&init_net, NULL, &fl6);
if ((ret = dst->error))
goto put;
- if (ipv6_addr_any(&fl.fl6_src)) {
+ if (ipv6_addr_any(&fl6.saddr)) {
ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
- &fl.fl6_dst, 0, &fl.fl6_src);
+ &fl6.daddr, 0, &fl6.saddr);
if (ret)
goto put;
src_in->sin6_family = AF_INET6;
- ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src);
+ ipv6_addr_copy(&src_in->sin6_addr, &fl6.saddr);
}
if (dst->dev->flags & IFF_LOOPBACK) {
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 91916a8d5de..2bc7f5af64f 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -101,7 +101,8 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
agent = port_priv->agent[qpn];
ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num);
if (IS_ERR(ah)) {
- printk(KERN_ERR SPFX "ib_create_ah_from_wc error\n");
+ printk(KERN_ERR SPFX "ib_create_ah_from_wc error %ld\n",
+ PTR_ERR(ah));
return;
}
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 64e0903091a..f804e28e1eb 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1988,6 +1988,10 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
goto out;
}
+ if (cm_id->lap_state == IB_CM_LAP_SENT ||
+ cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+
ret = cm_alloc_msg(cm_id_priv, &msg);
if (ret) {
cm_enter_timewait(cm_id_priv);
@@ -2129,6 +2133,10 @@ static int cm_dreq_handler(struct cm_work *work)
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
break;
case IB_CM_ESTABLISHED:
+ if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
+ cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ break;
case IB_CM_MRA_REP_RCVD:
break;
case IB_CM_TIMEWAIT:
@@ -2349,9 +2357,18 @@ static int cm_rej_handler(struct cm_work *work)
/* fall through */
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
- case IB_CM_ESTABLISHED:
cm_enter_timewait(cm_id_priv);
break;
+ case IB_CM_ESTABLISHED:
+ if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
+ cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
+ if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent,
+ cm_id_priv->msg);
+ cm_enter_timewait(cm_id_priv);
+ break;
+ }
+ /* fall through */
default:
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
@@ -2989,6 +3006,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
goto out; /* No match. */
}
atomic_inc(&cur_cm_id_priv->refcount);
+ atomic_inc(&cm_id_priv->refcount);
spin_unlock_irq(&cm.lock);
cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 6884da24fde..5ed9d25d021 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -308,11 +308,13 @@ static inline void release_mc(struct kref *kref)
kfree(mc);
}
-static void cma_detach_from_dev(struct rdma_id_private *id_priv)
+static void cma_release_dev(struct rdma_id_private *id_priv)
{
+ mutex_lock(&lock);
list_del(&id_priv->list);
cma_deref_dev(id_priv->cma_dev);
id_priv->cma_dev = NULL;
+ mutex_unlock(&lock);
}
static int cma_set_qkey(struct rdma_id_private *id_priv)
@@ -373,6 +375,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
+ mutex_lock(&lock);
iboe_addr_get_sgid(dev_addr, &iboe_gid);
memcpy(&gid, dev_addr->src_dev_addr +
rdma_addr_gid_offset(dev_addr), sizeof gid);
@@ -398,6 +401,7 @@ out:
if (!ret)
cma_attach_to_dev(id_priv, cma_dev);
+ mutex_unlock(&lock);
return ret;
}
@@ -904,9 +908,14 @@ void rdma_destroy_id(struct rdma_cm_id *id)
state = cma_exch(id_priv, CMA_DESTROYING);
cma_cancel_operation(id_priv, state);
- mutex_lock(&lock);
+ /*
+ * Wait for any active callback to finish. New callbacks will find
+ * the id_priv state set to destroying and abort.
+ */
+ mutex_lock(&id_priv->handler_mutex);
+ mutex_unlock(&id_priv->handler_mutex);
+
if (id_priv->cma_dev) {
- mutex_unlock(&lock);
switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
case RDMA_TRANSPORT_IB:
if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
@@ -920,10 +929,8 @@ void rdma_destroy_id(struct rdma_cm_id *id)
break;
}
cma_leave_mc_groups(id_priv);
- mutex_lock(&lock);
- cma_detach_from_dev(id_priv);
+ cma_release_dev(id_priv);
}
- mutex_unlock(&lock);
cma_release_port(id_priv);
cma_deref_id(id_priv);
@@ -1200,9 +1207,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
}
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
- mutex_lock(&lock);
ret = cma_acquire_dev(conn_id);
- mutex_unlock(&lock);
if (ret)
goto release_conn_id;
@@ -1210,6 +1215,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
cm_id->context = conn_id;
cm_id->cm_handler = cma_ib_handler;
+ /*
+ * Protect against the user destroying conn_id from another thread
+ * until we're done accessing it.
+ */
+ atomic_inc(&conn_id->refcount);
ret = conn_id->id.event_handler(&conn_id->id, &event);
if (!ret) {
/*
@@ -1222,8 +1232,10 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
mutex_unlock(&lock);
mutex_unlock(&conn_id->handler_mutex);
+ cma_deref_id(conn_id);
goto out;
}
+ cma_deref_id(conn_id);
/* Destroy the CM ID by returning a non-zero value. */
conn_id->cm_id.ib = NULL;
@@ -1394,9 +1406,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
goto out;
}
- mutex_lock(&lock);
ret = cma_acquire_dev(conn_id);
- mutex_unlock(&lock);
if (ret) {
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);
@@ -1425,17 +1435,25 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
event.param.conn.private_data_len = iw_event->private_data_len;
event.param.conn.initiator_depth = attr.max_qp_init_rd_atom;
event.param.conn.responder_resources = attr.max_qp_rd_atom;
+
+ /*
+ * Protect against the user destroying conn_id from another thread
+ * until we're done accessing it.
+ */
+ atomic_inc(&conn_id->refcount);
ret = conn_id->id.event_handler(&conn_id->id, &event);
if (ret) {
/* User wants to destroy the CM ID */
conn_id->cm_id.iw = NULL;
cma_exch(conn_id, CMA_DESTROYING);
mutex_unlock(&conn_id->handler_mutex);
+ cma_deref_id(conn_id);
rdma_destroy_id(&conn_id->id);
goto out;
}
mutex_unlock(&conn_id->handler_mutex);
+ cma_deref_id(conn_id);
out:
if (dev)
@@ -1951,20 +1969,11 @@ static void addr_handler(int status, struct sockaddr *src_addr,
memset(&event, 0, sizeof event);
mutex_lock(&id_priv->handler_mutex);
-
- /*
- * Grab mutex to block rdma_destroy_id() from removing the device while
- * we're trying to acquire it.
- */
- mutex_lock(&lock);
- if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
- mutex_unlock(&lock);
+ if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED))
goto out;
- }
if (!status && !id_priv->cma_dev)
status = cma_acquire_dev(id_priv);
- mutex_unlock(&lock);
if (status) {
if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
@@ -2265,9 +2274,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (ret)
goto err1;
- mutex_lock(&lock);
ret = cma_acquire_dev(id_priv);
- mutex_unlock(&lock);
if (ret)
goto err1;
}
@@ -2279,11 +2286,8 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
return 0;
err2:
- if (id_priv->cma_dev) {
- mutex_lock(&lock);
- cma_detach_from_dev(id_priv);
- mutex_unlock(&lock);
- }
+ if (id_priv->cma_dev)
+ cma_release_dev(id_priv);
err1:
cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
return ret;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index d02dcc6e596..3216bcad7e8 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -338,23 +338,11 @@ static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip,
__be16 peer_port, u8 tos)
{
struct rtable *rt;
- struct flowi fl = {
- .oif = 0,
- .nl_u = {
- .ip4_u = {
- .daddr = peer_ip,
- .saddr = local_ip,
- .tos = tos}
- },
- .proto = IPPROTO_TCP,
- .uli_u = {
- .ports = {
- .sport = local_port,
- .dport = peer_port}
- }
- };
-
- if (ip_route_output_flow(&init_net, &rt, &fl, NULL, 0))
+
+ rt = ip_route_output_ports(&init_net, NULL, peer_ip, local_ip,
+ peer_port, local_port, IPPROTO_TCP,
+ tos, 0);
+ if (IS_ERR(rt))
return NULL;
return rt;
}
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 8b00e6c46f0..9d8dcfab2b3 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -61,9 +61,9 @@ static char *states[] = {
NULL,
};
-static int dack_mode;
+static int dack_mode = 1;
module_param(dack_mode, int, 0644);
-MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=0)");
+MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)");
int c4iw_max_read_depth = 8;
module_param(c4iw_max_read_depth, int, 0644);
@@ -315,23 +315,11 @@ static struct rtable *find_route(struct c4iw_dev *dev, __be32 local_ip,
__be16 peer_port, u8 tos)
{
struct rtable *rt;
- struct flowi fl = {
- .oif = 0,
- .nl_u = {
- .ip4_u = {
- .daddr = peer_ip,
- .saddr = local_ip,
- .tos = tos}
- },
- .proto = IPPROTO_TCP,
- .uli_u = {
- .ports = {
- .sport = local_port,
- .dport = peer_port}
- }
- };
-
- if (ip_route_output_flow(&init_net, &rt, &fl, NULL, 0))
+
+ rt = ip_route_output_ports(&init_net, NULL, peer_ip, local_ip,
+ peer_port, local_port, IPPROTO_TCP,
+ tos, 0);
+ if (IS_ERR(rt))
return NULL;
return rt;
}
@@ -482,6 +470,7 @@ static int send_connect(struct c4iw_ep *ep)
TX_CHAN(ep->tx_chan) |
SMAC_SEL(ep->smac_idx) |
DSCP(ep->tos) |
+ ULP_MODE(ULP_MODE_TCPDDP) |
RCV_BUFSIZ(rcv_win>>10);
opt2 = RX_CHANNEL(0) |
RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
@@ -1274,6 +1263,7 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb,
TX_CHAN(ep->tx_chan) |
SMAC_SEL(ep->smac_idx) |
DSCP(ep->tos) |
+ ULP_MODE(ULP_MODE_TCPDDP) |
RCV_BUFSIZ(rcv_win>>10);
opt2 = RX_CHANNEL(0) |
RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 54fbc1118ab..e29172c2afc 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -87,17 +87,22 @@ static int dump_qp(int id, void *p, void *data)
return 1;
if (qp->ep)
- cc = snprintf(qpd->buf + qpd->pos, space, "qp id %u state %u "
+ cc = snprintf(qpd->buf + qpd->pos, space,
+ "qp sq id %u rq id %u state %u onchip %u "
"ep tid %u state %u %pI4:%u->%pI4:%u\n",
- qp->wq.sq.qid, (int)qp->attr.state,
+ qp->wq.sq.qid, qp->wq.rq.qid, (int)qp->attr.state,
+ qp->wq.sq.flags & T4_SQ_ONCHIP,
qp->ep->hwtid, (int)qp->ep->com.state,
&qp->ep->com.local_addr.sin_addr.s_addr,
ntohs(qp->ep->com.local_addr.sin_port),
&qp->ep->com.remote_addr.sin_addr.s_addr,
ntohs(qp->ep->com.remote_addr.sin_port));
else
- cc = snprintf(qpd->buf + qpd->pos, space, "qp id %u state %u\n",
- qp->wq.sq.qid, (int)qp->attr.state);
+ cc = snprintf(qpd->buf + qpd->pos, space,
+ "qp sq id %u rq id %u state %u onchip %u\n",
+ qp->wq.sq.qid, qp->wq.rq.qid,
+ (int)qp->attr.state,
+ qp->wq.sq.flags & T4_SQ_ONCHIP);
if (cc < space)
qpd->pos += cc;
return 0;
@@ -368,7 +373,6 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev)
static void c4iw_remove(struct c4iw_dev *dev)
{
PDBG("%s c4iw_dev %p\n", __func__, dev);
- cancel_delayed_work_sync(&dev->db_drop_task);
list_del(&dev->entry);
if (dev->registered)
c4iw_unregister_device(dev);
@@ -523,8 +527,16 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
case CXGB4_STATE_START_RECOVERY:
printk(KERN_INFO MOD "%s: Fatal Error\n",
pci_name(dev->rdev.lldi.pdev));
- if (dev->registered)
+ dev->rdev.flags |= T4_FATAL_ERROR;
+ if (dev->registered) {
+ struct ib_event event;
+
+ memset(&event, 0, sizeof event);
+ event.event = IB_EVENT_DEVICE_FATAL;
+ event.device = &dev->ibdev;
+ ib_dispatch_event(&event);
c4iw_unregister_device(dev);
+ }
break;
case CXGB4_STATE_DETACH:
printk(KERN_INFO MOD "%s: Detach\n",
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 2fe19ec9ba6..9f6166f5926 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -176,7 +176,6 @@ struct c4iw_dev {
struct idr mmidr;
spinlock_t lock;
struct list_head entry;
- struct delayed_work db_drop_task;
struct dentry *debugfs_root;
u8 registered;
};
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 4f0be25cab1..70a5a3c646d 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -31,9 +31,9 @@
*/
#include "iw_cxgb4.h"
-static int ocqp_support;
+static int ocqp_support = 1;
module_param(ocqp_support, int, 0644);
-MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=0)");
+MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)");
static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state)
{
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 70004425d69..24af12fc822 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -507,8 +507,14 @@ static inline void t4_swcq_consume(struct t4_cq *cq)
static inline void t4_hwcq_consume(struct t4_cq *cq)
{
cq->bits_type_ts = cq->queue[cq->cidx].bits_type_ts;
- if (++cq->cidx_inc == cq->size)
+ if (++cq->cidx_inc == (cq->size >> 4)) {
+ u32 val;
+
+ val = SEINTARM(0) | CIDXINC(cq->cidx_inc) | TIMERREG(7) |
+ INGRESSQID(cq->cqid);
+ writel(val, cq->gts);
cq->cidx_inc = 0;
+ }
if (++cq->cidx == cq->size) {
cq->cidx = 0;
cq->gen ^= 1;
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index b8cb2f145ae..8991677e9a0 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -557,6 +557,7 @@ static ssize_t store_reset(struct device *dev,
dev_info(dev,"Unit %d is disabled, can't reset\n",
dd->ipath_unit);
ret = -EINVAL;
+ goto bail;
}
ret = ipath_reset_device(dd->ipath_unit);
bail:
diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c
index bab9f74c066..cfed5399f07 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_pages.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c
@@ -53,8 +53,8 @@ static void __ipath_release_user_pages(struct page **p, size_t num_pages,
}
/* call with current->mm->mmap_sem held */
-static int __get_user_pages(unsigned long start_page, size_t num_pages,
- struct page **p, struct vm_area_struct **vma)
+static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages,
+ struct page **p, struct vm_area_struct **vma)
{
unsigned long lock_limit;
size_t got;
@@ -165,7 +165,7 @@ int ipath_get_user_pages(unsigned long start_page, size_t num_pages,
down_write(&current->mm->mmap_sem);
- ret = __get_user_pages(start_page, num_pages, p, NULL);
+ ret = __ipath_get_user_pages(start_page, num_pages, p, NULL);
up_write(&current->mm->mmap_sem);
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 8a40cd539ab..f24b79b805f 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -1043,6 +1043,9 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
}
}
+ /* We can handle large RDMA requests, so allow larger segments. */
+ dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
+
mdev = (struct mthca_dev *) ib_alloc_device(sizeof *mdev);
if (!mdev) {
dev_err(&pdev->dev, "Device struct alloc failed, "
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 3b4ec3238ce..13de1192927 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -153,7 +153,8 @@ static int nes_inetaddr_event(struct notifier_block *notifier,
nesdev, nesdev->netdev[0]->name);
netdev = nesdev->netdev[0];
nesvnic = netdev_priv(netdev);
- is_bonded = (netdev->master == event_netdev);
+ is_bonded = netif_is_bond_slave(netdev) &&
+ (netdev->master == event_netdev);
if ((netdev == event_netdev) || is_bonded) {
if (nesvnic->rdma_enabled == 0) {
nes_debug(NES_DBG_NETDEV, "Returning without processing event for %s since"
@@ -693,7 +694,7 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i
nesdev->netdev_count++;
nesdev->nesadapter->netdev_count++;
- printk(KERN_ERR PFX "%s: NetEffect RNIC driver successfully loaded.\n",
+ printk(KERN_INFO PFX "%s: NetEffect RNIC driver successfully loaded.\n",
pci_name(pcidev));
return 0;
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 009ec814d51..ef3291551bc 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -1104,21 +1104,19 @@ static inline int mini_cm_accelerated(struct nes_cm_core *cm_core,
static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpindex)
{
struct rtable *rt;
- struct flowi fl;
struct neighbour *neigh;
int rc = arpindex;
struct net_device *netdev;
struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter;
- memset(&fl, 0, sizeof fl);
- fl.nl_u.ip4_u.daddr = htonl(dst_ip);
- if (ip_route_output_key(&init_net, &rt, &fl)) {
+ rt = ip_route_output(&init_net, htonl(dst_ip), 0, 0, 0);
+ if (IS_ERR(rt)) {
printk(KERN_ERR "%s: ip_route_output_key failed for 0x%08X\n",
__func__, dst_ip);
return rc;
}
- if (nesvnic->netdev->master)
+ if (netif_is_bond_slave(netdev))
netdev = nesvnic->netdev->master;
else
netdev = nesvnic->netdev;
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index b01809a82cb..4a2d21e15a7 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -5582,9 +5582,16 @@ static void qsfp_7322_event(struct work_struct *work)
* even on failure to read cable information. We don't
* get here for QME, so IS_QME check not needed here.
*/
- le2 = (!ret && qd->cache.atten[1] >= qib_long_atten &&
- !ppd->dd->cspec->r1 && QSFP_IS_CU(qd->cache.tech)) ?
- LE2_5m : LE2_DEFAULT;
+ if (!ret && !ppd->dd->cspec->r1) {
+ if (QSFP_IS_ACTIVE_FAR(qd->cache.tech))
+ le2 = LE2_QME;
+ else if (qd->cache.atten[1] >= qib_long_atten &&
+ QSFP_IS_CU(qd->cache.tech))
+ le2 = LE2_5m;
+ else
+ le2 = LE2_DEFAULT;
+ } else
+ le2 = LE2_DEFAULT;
ibsd_wr_allchans(ppd, 13, (le2 << 7), BMASK(9, 7));
init_txdds_table(ppd, 0);
}
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 5ad224e4a38..8fd3df5bf04 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -464,8 +464,9 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
memset(smp->data, 0, sizeof(smp->data));
/* Only return the mkey if the protection field allows it. */
- if (smp->method == IB_MGMT_METHOD_SET || ibp->mkey == smp->mkey ||
- ibp->mkeyprot == 0)
+ if (!(smp->method == IB_MGMT_METHOD_GET &&
+ ibp->mkey != smp->mkey &&
+ ibp->mkeyprot == 1))
pip->mkey = ibp->mkey;
pip->gid_prefix = ibp->gid_prefix;
lid = ppd->lid;
@@ -705,7 +706,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
lwe = pip->link_width_enabled;
if (lwe) {
if (lwe == 0xFF)
- lwe = ppd->link_width_supported;
+ set_link_width_enabled(ppd, ppd->link_width_supported);
else if (lwe >= 16 || (lwe & ~ppd->link_width_supported))
smp->status |= IB_SMP_INVALID_FIELD;
else if (lwe != ppd->link_width_enabled)
@@ -720,7 +721,8 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
* speeds.
*/
if (lse == 15)
- lse = ppd->link_speed_supported;
+ set_link_speed_enabled(ppd,
+ ppd->link_speed_supported);
else if (lse >= 8 || (lse & ~ppd->link_speed_supported))
smp->status |= IB_SMP_INVALID_FIELD;
else if (lse != ppd->link_speed_enabled)
@@ -849,7 +851,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
if (clientrereg)
pip->clientrereg_resv_subnetto |= 0x80;
- goto done;
+ goto get_only;
err:
smp->status |= IB_SMP_INVALID_FIELD;
diff --git a/drivers/infiniband/hw/qib/qib_qsfp.h b/drivers/infiniband/hw/qib/qib_qsfp.h
index 19b527bafd5..c109bbdc90a 100644
--- a/drivers/infiniband/hw/qib/qib_qsfp.h
+++ b/drivers/infiniband/hw/qib/qib_qsfp.h
@@ -79,6 +79,8 @@
extern const char *const qib_qsfp_devtech[16];
/* Active Equalization includes fiber, copper full EQ, and copper near Eq */
#define QSFP_IS_ACTIVE(tech) ((0xA2FF >> ((tech) >> 4)) & 1)
+/* Active Equalization includes fiber, copper full EQ, and copper far Eq */
+#define QSFP_IS_ACTIVE_FAR(tech) ((0x32FF >> ((tech) >> 4)) & 1)
/* Attenuation should be valid for copper other than full/near Eq */
#define QSFP_HAS_ATTEN(tech) ((0x4D00 >> ((tech) >> 4)) & 1)
/* Length is only valid if technology is "copper" */
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index d7a26c1d4f3..7689e49c13c 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -51,8 +51,8 @@ static void __qib_release_user_pages(struct page **p, size_t num_pages,
/*
* Call with current->mm->mmap_sem held.
*/
-static int __get_user_pages(unsigned long start_page, size_t num_pages,
- struct page **p, struct vm_area_struct **vma)
+static int __qib_get_user_pages(unsigned long start_page, size_t num_pages,
+ struct page **p, struct vm_area_struct **vma)
{
unsigned long lock_limit;
size_t got;
@@ -136,7 +136,7 @@ int qib_get_user_pages(unsigned long start_page, size_t num_pages,
down_write(&current->mm->mmap_sem);
- ret = __get_user_pages(start_page, num_pages, p, NULL);
+ ret = __qib_get_user_pages(start_page, num_pages, p, NULL);
up_write(&current->mm->mmap_sem);
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 7b2fc98e2f2..8db008de539 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -532,6 +532,29 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s
stats->custom[3].value = conn->fmr_unalign_cnt;
}
+static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep,
+ enum iscsi_param param, char *buf)
+{
+ struct iser_conn *ib_conn = ep->dd_data;
+ int len;
+
+ switch (param) {
+ case ISCSI_PARAM_CONN_PORT:
+ case ISCSI_PARAM_CONN_ADDRESS:
+ if (!ib_conn || !ib_conn->cma_id)
+ return -ENOTCONN;
+
+ return iscsi_conn_get_addr_param((struct sockaddr_storage *)
+ &ib_conn->cma_id->route.addr.dst_addr,
+ param, buf);
+ break;
+ default:
+ return -ENOSYS;
+ }
+
+ return len;
+}
+
static struct iscsi_endpoint *
iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
int non_blocking)
@@ -637,6 +660,8 @@ static struct iscsi_transport iscsi_iser_transport = {
ISCSI_MAX_BURST |
ISCSI_PDU_INORDER_EN |
ISCSI_DATASEQ_INORDER_EN |
+ ISCSI_CONN_PORT |
+ ISCSI_CONN_ADDRESS |
ISCSI_EXP_STATSN |
ISCSI_PERSISTENT_PORT |
ISCSI_PERSISTENT_ADDRESS |
@@ -659,6 +684,7 @@ static struct iscsi_transport iscsi_iser_transport = {
.destroy_conn = iscsi_iser_conn_destroy,
.set_param = iscsi_iser_set_param,
.get_conn_param = iscsi_conn_get_param,
+ .get_ep_param = iscsi_iser_get_ep_param,
.get_session_param = iscsi_session_get_param,
.start_conn = iscsi_iser_conn_start,
.stop_conn = iscsi_iser_conn_stop,
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 83664ed2804..376d640487d 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -59,25 +59,31 @@ MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator "
"v" DRV_VERSION " (" DRV_RELDATE ")");
MODULE_LICENSE("Dual BSD/GPL");
-static int srp_sg_tablesize = SRP_DEF_SG_TABLESIZE;
-static int srp_max_iu_len;
+static unsigned int srp_sg_tablesize;
+static unsigned int cmd_sg_entries;
+static unsigned int indirect_sg_entries;
+static bool allow_ext_sg;
+static int topspin_workarounds = 1;
-module_param(srp_sg_tablesize, int, 0444);
-MODULE_PARM_DESC(srp_sg_tablesize,
- "Max number of gather/scatter entries per I/O (default is 12, max 255)");
+module_param(srp_sg_tablesize, uint, 0444);
+MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
-static int topspin_workarounds = 1;
+module_param(cmd_sg_entries, uint, 0444);
+MODULE_PARM_DESC(cmd_sg_entries,
+ "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
+
+module_param(indirect_sg_entries, uint, 0444);
+MODULE_PARM_DESC(indirect_sg_entries,
+ "Default max number of gather/scatter entries (default is 12, max is " __stringify(SCSI_MAX_SG_CHAIN_SEGMENTS) ")");
+
+module_param(allow_ext_sg, bool, 0444);
+MODULE_PARM_DESC(allow_ext_sg,
+ "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
module_param(topspin_workarounds, int, 0444);
MODULE_PARM_DESC(topspin_workarounds,
"Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
-static int mellanox_workarounds = 1;
-
-module_param(mellanox_workarounds, int, 0444);
-MODULE_PARM_DESC(mellanox_workarounds,
- "Enable workarounds for Mellanox SRP target bugs if != 0");
-
static void srp_add_one(struct ib_device *device);
static void srp_remove_one(struct ib_device *device);
static void srp_recv_completion(struct ib_cq *cq, void *target_ptr);
@@ -114,14 +120,6 @@ static int srp_target_is_topspin(struct srp_target_port *target)
!memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
}
-static int srp_target_is_mellanox(struct srp_target_port *target)
-{
- static const u8 mellanox_oui[3] = { 0x00, 0x02, 0xc9 };
-
- return mellanox_workarounds &&
- !memcmp(&target->ioc_guid, mellanox_oui, sizeof mellanox_oui);
-}
-
static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
gfp_t gfp_mask,
enum dma_data_direction direction)
@@ -378,7 +376,7 @@ static int srp_send_req(struct srp_target_port *target)
req->priv.opcode = SRP_LOGIN_REQ;
req->priv.tag = 0;
- req->priv.req_it_iu_len = cpu_to_be32(srp_max_iu_len);
+ req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
SRP_BUF_FORMAT_INDIRECT);
/*
@@ -456,6 +454,24 @@ static bool srp_change_state(struct srp_target_port *target,
return changed;
}
+static void srp_free_req_data(struct srp_target_port *target)
+{
+ struct ib_device *ibdev = target->srp_host->srp_dev->dev;
+ struct srp_request *req;
+ int i;
+
+ for (i = 0, req = target->req_ring; i < SRP_CMD_SQ_SIZE; ++i, ++req) {
+ kfree(req->fmr_list);
+ kfree(req->map_page);
+ if (req->indirect_dma_addr) {
+ ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
+ target->indirect_size,
+ DMA_TO_DEVICE);
+ }
+ kfree(req->indirect_desc);
+ }
+}
+
static void srp_remove_work(struct work_struct *work)
{
struct srp_target_port *target =
@@ -472,6 +488,7 @@ static void srp_remove_work(struct work_struct *work)
scsi_remove_host(target->scsi_host);
ib_destroy_cm_id(target->cm_id);
srp_free_target_ib(target);
+ srp_free_req_data(target);
scsi_host_put(target->scsi_host);
}
@@ -535,18 +552,20 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
struct srp_target_port *target,
struct srp_request *req)
{
+ struct ib_device *ibdev = target->srp_host->srp_dev->dev;
+ struct ib_pool_fmr **pfmr;
+
if (!scsi_sglist(scmnd) ||
(scmnd->sc_data_direction != DMA_TO_DEVICE &&
scmnd->sc_data_direction != DMA_FROM_DEVICE))
return;
- if (req->fmr) {
- ib_fmr_pool_unmap(req->fmr);
- req->fmr = NULL;
- }
+ pfmr = req->fmr_list;
+ while (req->nfmr--)
+ ib_fmr_pool_unmap(*pfmr++);
- ib_dma_unmap_sg(target->srp_host->srp_dev->dev, scsi_sglist(scmnd),
- scsi_sg_count(scmnd), scmnd->sc_data_direction);
+ ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
+ scmnd->sc_data_direction);
}
static void srp_remove_req(struct srp_target_port *target,
@@ -645,96 +664,151 @@ err:
return ret;
}
-static int srp_map_fmr(struct srp_target_port *target, struct scatterlist *scat,
- int sg_cnt, struct srp_request *req,
- struct srp_direct_buf *buf)
+static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
+ unsigned int dma_len, u32 rkey)
{
- u64 io_addr = 0;
- u64 *dma_pages;
- u32 len;
- int page_cnt;
- int i, j;
- int ret;
- struct srp_device *dev = target->srp_host->srp_dev;
- struct ib_device *ibdev = dev->dev;
- struct scatterlist *sg;
+ struct srp_direct_buf *desc = state->desc;
- if (!dev->fmr_pool)
- return -ENODEV;
+ desc->va = cpu_to_be64(dma_addr);
+ desc->key = cpu_to_be32(rkey);
+ desc->len = cpu_to_be32(dma_len);
- if (srp_target_is_mellanox(target) &&
- (ib_sg_dma_address(ibdev, &scat[0]) & ~dev->fmr_page_mask))
- return -EINVAL;
+ state->total_len += dma_len;
+ state->desc++;
+ state->ndesc++;
+}
- len = page_cnt = 0;
- scsi_for_each_sg(req->scmnd, sg, sg_cnt, i) {
- unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
+static int srp_map_finish_fmr(struct srp_map_state *state,
+ struct srp_target_port *target)
+{
+ struct srp_device *dev = target->srp_host->srp_dev;
+ struct ib_pool_fmr *fmr;
+ u64 io_addr = 0;
- if (ib_sg_dma_address(ibdev, sg) & ~dev->fmr_page_mask) {
- if (i > 0)
- return -EINVAL;
- else
- ++page_cnt;
- }
- if ((ib_sg_dma_address(ibdev, sg) + dma_len) &
- ~dev->fmr_page_mask) {
- if (i < sg_cnt - 1)
- return -EINVAL;
- else
- ++page_cnt;
- }
+ if (!state->npages)
+ return 0;
- len += dma_len;
+ if (state->npages == 1) {
+ srp_map_desc(state, state->base_dma_addr, state->fmr_len,
+ target->rkey);
+ state->npages = state->fmr_len = 0;
+ return 0;
}
- page_cnt += len >> dev->fmr_page_shift;
- if (page_cnt > SRP_FMR_SIZE)
- return -ENOMEM;
+ fmr = ib_fmr_pool_map_phys(dev->fmr_pool, state->pages,
+ state->npages, io_addr);
+ if (IS_ERR(fmr))
+ return PTR_ERR(fmr);
- dma_pages = kmalloc(sizeof (u64) * page_cnt, GFP_ATOMIC);
- if (!dma_pages)
- return -ENOMEM;
+ *state->next_fmr++ = fmr;
+ state->nfmr++;
- page_cnt = 0;
- scsi_for_each_sg(req->scmnd, sg, sg_cnt, i) {
- unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
+ srp_map_desc(state, 0, state->fmr_len, fmr->fmr->rkey);
+ state->npages = state->fmr_len = 0;
+ return 0;
+}
- for (j = 0; j < dma_len; j += dev->fmr_page_size)
- dma_pages[page_cnt++] =
- (ib_sg_dma_address(ibdev, sg) &
- dev->fmr_page_mask) + j;
+static void srp_map_update_start(struct srp_map_state *state,
+ struct scatterlist *sg, int sg_index,
+ dma_addr_t dma_addr)
+{
+ state->unmapped_sg = sg;
+ state->unmapped_index = sg_index;
+ state->unmapped_addr = dma_addr;
+}
+
+static int srp_map_sg_entry(struct srp_map_state *state,
+ struct srp_target_port *target,
+ struct scatterlist *sg, int sg_index,
+ int use_fmr)
+{
+ struct srp_device *dev = target->srp_host->srp_dev;
+ struct ib_device *ibdev = dev->dev;
+ dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
+ unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
+ unsigned int len;
+ int ret;
+
+ if (!dma_len)
+ return 0;
+
+ if (use_fmr == SRP_MAP_NO_FMR) {
+ /* Once we're in direct map mode for a request, we don't
+ * go back to FMR mode, so no need to update anything
+ * other than the descriptor.
+ */
+ srp_map_desc(state, dma_addr, dma_len, target->rkey);
+ return 0;
}
- req->fmr = ib_fmr_pool_map_phys(dev->fmr_pool,
- dma_pages, page_cnt, io_addr);
- if (IS_ERR(req->fmr)) {
- ret = PTR_ERR(req->fmr);
- req->fmr = NULL;
- goto out;
+ /* If we start at an offset into the FMR page, don't merge into
+ * the current FMR. Finish it out, and use the kernel's MR for this
+ * sg entry. This is to avoid potential bugs on some SRP targets
+ * that were never quite defined, but went away when the initiator
+ * avoided using FMR on such page fragments.
+ */
+ if (dma_addr & ~dev->fmr_page_mask || dma_len > dev->fmr_max_size) {
+ ret = srp_map_finish_fmr(state, target);
+ if (ret)
+ return ret;
+
+ srp_map_desc(state, dma_addr, dma_len, target->rkey);
+ srp_map_update_start(state, NULL, 0, 0);
+ return 0;
}
- buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, &scat[0]) &
- ~dev->fmr_page_mask);
- buf->key = cpu_to_be32(req->fmr->fmr->rkey);
- buf->len = cpu_to_be32(len);
+ /* If this is the first sg to go into the FMR, save our position.
+ * We need to know the first unmapped entry, its index, and the
+ * first unmapped address within that entry to be able to restart
+ * mapping after an error.
+ */
+ if (!state->unmapped_sg)
+ srp_map_update_start(state, sg, sg_index, dma_addr);
- ret = 0;
+ while (dma_len) {
+ if (state->npages == SRP_FMR_SIZE) {
+ ret = srp_map_finish_fmr(state, target);
+ if (ret)
+ return ret;
-out:
- kfree(dma_pages);
+ srp_map_update_start(state, sg, sg_index, dma_addr);
+ }
+
+ len = min_t(unsigned int, dma_len, dev->fmr_page_size);
+ if (!state->npages)
+ state->base_dma_addr = dma_addr;
+ state->pages[state->npages++] = dma_addr;
+ state->fmr_len += len;
+ dma_addr += len;
+ dma_len -= len;
+ }
+
+ /* If the last entry of the FMR wasn't a full page, then we need to
+ * close it out and start a new one -- we can only merge at page
+ * boundries.
+ */
+ ret = 0;
+ if (len != dev->fmr_page_size) {
+ ret = srp_map_finish_fmr(state, target);
+ if (!ret)
+ srp_map_update_start(state, NULL, 0, 0);
+ }
return ret;
}
static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
struct srp_request *req)
{
- struct scatterlist *scat;
+ struct scatterlist *scat, *sg;
struct srp_cmd *cmd = req->cmd->buf;
- int len, nents, count;
- u8 fmt = SRP_DATA_DESC_DIRECT;
+ int i, len, nents, count, use_fmr;
struct srp_device *dev;
struct ib_device *ibdev;
+ struct srp_map_state state;
+ struct srp_indirect_buf *indirect_hdr;
+ u32 table_len;
+ u8 fmt;
if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
return sizeof (struct srp_cmd);
@@ -754,6 +828,8 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
ibdev = dev->dev;
count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
+ if (unlikely(count == 0))
+ return -EIO;
fmt = SRP_DATA_DESC_DIRECT;
len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
@@ -770,49 +846,99 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
buf->key = cpu_to_be32(target->rkey);
buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
- } else if (srp_map_fmr(target, scat, count, req,
- (void *) cmd->add_data)) {
- /*
- * FMR mapping failed, and the scatterlist has more
- * than one entry. Generate an indirect memory
- * descriptor.
- */
- struct srp_indirect_buf *buf = (void *) cmd->add_data;
- struct scatterlist *sg;
- u32 datalen = 0;
- int i;
-
- fmt = SRP_DATA_DESC_INDIRECT;
- len = sizeof (struct srp_cmd) +
- sizeof (struct srp_indirect_buf) +
- count * sizeof (struct srp_direct_buf);
-
- scsi_for_each_sg(scmnd, sg, count, i) {
- unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
-
- buf->desc_list[i].va =
- cpu_to_be64(ib_sg_dma_address(ibdev, sg));
- buf->desc_list[i].key =
- cpu_to_be32(target->rkey);
- buf->desc_list[i].len = cpu_to_be32(dma_len);
- datalen += dma_len;
+
+ req->nfmr = 0;
+ goto map_complete;
+ }
+
+ /* We have more than one scatter/gather entry, so build our indirect
+ * descriptor table, trying to merge as many entries with FMR as we
+ * can.
+ */
+ indirect_hdr = (void *) cmd->add_data;
+
+ ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
+ target->indirect_size, DMA_TO_DEVICE);
+
+ memset(&state, 0, sizeof(state));
+ state.desc = req->indirect_desc;
+ state.pages = req->map_page;
+ state.next_fmr = req->fmr_list;
+
+ use_fmr = dev->fmr_pool ? SRP_MAP_ALLOW_FMR : SRP_MAP_NO_FMR;
+
+ for_each_sg(scat, sg, count, i) {
+ if (srp_map_sg_entry(&state, target, sg, i, use_fmr)) {
+ /* FMR mapping failed, so backtrack to the first
+ * unmapped entry and continue on without using FMR.
+ */
+ dma_addr_t dma_addr;
+ unsigned int dma_len;
+
+backtrack:
+ sg = state.unmapped_sg;
+ i = state.unmapped_index;
+
+ dma_addr = ib_sg_dma_address(ibdev, sg);
+ dma_len = ib_sg_dma_len(ibdev, sg);
+ dma_len -= (state.unmapped_addr - dma_addr);
+ dma_addr = state.unmapped_addr;
+ use_fmr = SRP_MAP_NO_FMR;
+ srp_map_desc(&state, dma_addr, dma_len, target->rkey);
}
+ }
- if (scmnd->sc_data_direction == DMA_TO_DEVICE)
- cmd->data_out_desc_cnt = count;
- else
- cmd->data_in_desc_cnt = count;
+ if (use_fmr == SRP_MAP_ALLOW_FMR && srp_map_finish_fmr(&state, target))
+ goto backtrack;
- buf->table_desc.va =
- cpu_to_be64(req->cmd->dma + sizeof *cmd + sizeof *buf);
- buf->table_desc.key =
- cpu_to_be32(target->rkey);
- buf->table_desc.len =
- cpu_to_be32(count * sizeof (struct srp_direct_buf));
+ /* We've mapped the request, now pull as much of the indirect
+ * descriptor table as we can into the command buffer. If this
+ * target is not using an external indirect table, we are
+ * guaranteed to fit into the command, as the SCSI layer won't
+ * give us more S/G entries than we allow.
+ */
+ req->nfmr = state.nfmr;
+ if (state.ndesc == 1) {
+ /* FMR mapping was able to collapse this to one entry,
+ * so use a direct descriptor.
+ */
+ struct srp_direct_buf *buf = (void *) cmd->add_data;
- buf->len = cpu_to_be32(datalen);
+ *buf = req->indirect_desc[0];
+ goto map_complete;
+ }
+
+ if (unlikely(target->cmd_sg_cnt < state.ndesc &&
+ !target->allow_ext_sg)) {
+ shost_printk(KERN_ERR, target->scsi_host,
+ "Could not fit S/G list into SRP_CMD\n");
+ return -EIO;
}
+ count = min(state.ndesc, target->cmd_sg_cnt);
+ table_len = state.ndesc * sizeof (struct srp_direct_buf);
+
+ fmt = SRP_DATA_DESC_INDIRECT;
+ len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
+ len += count * sizeof (struct srp_direct_buf);
+
+ memcpy(indirect_hdr->desc_list, req->indirect_desc,
+ count * sizeof (struct srp_direct_buf));
+
+ indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
+ indirect_hdr->table_desc.key = cpu_to_be32(target->rkey);
+ indirect_hdr->table_desc.len = cpu_to_be32(table_len);
+ indirect_hdr->len = cpu_to_be32(state.total_len);
+
+ if (scmnd->sc_data_direction == DMA_TO_DEVICE)
+ cmd->data_out_desc_cnt = count;
+ else
+ cmd->data_in_desc_cnt = count;
+
+ ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
+ DMA_TO_DEVICE);
+
+map_complete:
if (scmnd->sc_data_direction == DMA_TO_DEVICE)
cmd->buf_fmt = fmt << 4;
else
@@ -1140,7 +1266,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
spin_unlock_irqrestore(&target->lock, flags);
dev = target->srp_host->srp_dev->dev;
- ib_dma_sync_single_for_cpu(dev, iu->dma, srp_max_iu_len,
+ ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
DMA_TO_DEVICE);
scmnd->result = 0;
@@ -1164,7 +1290,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
goto err_iu;
}
- ib_dma_sync_single_for_device(dev, iu->dma, srp_max_iu_len,
+ ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
DMA_TO_DEVICE);
if (srp_post_send(target, iu, len)) {
@@ -1204,7 +1330,7 @@ static int srp_alloc_iu_bufs(struct srp_target_port *target)
for (i = 0; i < SRP_SQ_SIZE; ++i) {
target->tx_ring[i] = srp_alloc_iu(target->srp_host,
- srp_max_iu_len,
+ target->max_iu_len,
GFP_KERNEL, DMA_TO_DEVICE);
if (!target->tx_ring[i])
goto err;
@@ -1228,6 +1354,78 @@ err:
return -ENOMEM;
}
+static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
+ struct srp_login_rsp *lrsp,
+ struct srp_target_port *target)
+{
+ struct ib_qp_attr *qp_attr = NULL;
+ int attr_mask = 0;
+ int ret;
+ int i;
+
+ if (lrsp->opcode == SRP_LOGIN_RSP) {
+ target->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
+ target->req_lim = be32_to_cpu(lrsp->req_lim_delta);
+
+ /*
+ * Reserve credits for task management so we don't
+ * bounce requests back to the SCSI mid-layer.
+ */
+ target->scsi_host->can_queue
+ = min(target->req_lim - SRP_TSK_MGMT_SQ_SIZE,
+ target->scsi_host->can_queue);
+ } else {
+ shost_printk(KERN_WARNING, target->scsi_host,
+ PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
+ ret = -ECONNRESET;
+ goto error;
+ }
+
+ if (!target->rx_ring[0]) {
+ ret = srp_alloc_iu_bufs(target);
+ if (ret)
+ goto error;
+ }
+
+ ret = -ENOMEM;
+ qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
+ if (!qp_attr)
+ goto error;
+
+ qp_attr->qp_state = IB_QPS_RTR;
+ ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
+ if (ret)
+ goto error_free;
+
+ ret = ib_modify_qp(target->qp, qp_attr, attr_mask);
+ if (ret)
+ goto error_free;
+
+ for (i = 0; i < SRP_RQ_SIZE; i++) {
+ struct srp_iu *iu = target->rx_ring[i];
+ ret = srp_post_recv(target, iu);
+ if (ret)
+ goto error_free;
+ }
+
+ qp_attr->qp_state = IB_QPS_RTS;
+ ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
+ if (ret)
+ goto error_free;
+
+ ret = ib_modify_qp(target->qp, qp_attr, attr_mask);
+ if (ret)
+ goto error_free;
+
+ ret = ib_send_cm_rtu(cm_id, NULL, 0);
+
+error_free:
+ kfree(qp_attr);
+
+error:
+ target->status = ret;
+}
+
static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
struct ib_cm_event *event,
struct srp_target_port *target)
@@ -1311,11 +1509,7 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
{
struct srp_target_port *target = cm_id->context;
- struct ib_qp_attr *qp_attr = NULL;
- int attr_mask = 0;
int comp = 0;
- int opcode = 0;
- int i;
switch (event->event) {
case IB_CM_REQ_ERROR:
@@ -1327,71 +1521,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
case IB_CM_REP_RECEIVED:
comp = 1;
- opcode = *(u8 *) event->private_data;
-
- if (opcode == SRP_LOGIN_RSP) {
- struct srp_login_rsp *rsp = event->private_data;
-
- target->max_ti_iu_len = be32_to_cpu(rsp->max_ti_iu_len);
- target->req_lim = be32_to_cpu(rsp->req_lim_delta);
-
- /*
- * Reserve credits for task management so we don't
- * bounce requests back to the SCSI mid-layer.
- */
- target->scsi_host->can_queue
- = min(target->req_lim - SRP_TSK_MGMT_SQ_SIZE,
- target->scsi_host->can_queue);
- } else {
- shost_printk(KERN_WARNING, target->scsi_host,
- PFX "Unhandled RSP opcode %#x\n", opcode);
- target->status = -ECONNRESET;
- break;
- }
-
- if (!target->rx_ring[0]) {
- target->status = srp_alloc_iu_bufs(target);
- if (target->status)
- break;
- }
-
- qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
- if (!qp_attr) {
- target->status = -ENOMEM;
- break;
- }
-
- qp_attr->qp_state = IB_QPS_RTR;
- target->status = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
- if (target->status)
- break;
-
- target->status = ib_modify_qp(target->qp, qp_attr, attr_mask);
- if (target->status)
- break;
-
- for (i = 0; i < SRP_RQ_SIZE; i++) {
- struct srp_iu *iu = target->rx_ring[i];
- target->status = srp_post_recv(target, iu);
- if (target->status)
- break;
- }
- if (target->status)
- break;
-
- qp_attr->qp_state = IB_QPS_RTS;
- target->status = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
- if (target->status)
- break;
-
- target->status = ib_modify_qp(target->qp, qp_attr, attr_mask);
- if (target->status)
- break;
-
- target->status = ib_send_cm_rtu(cm_id, NULL, 0);
- if (target->status)
- break;
-
+ srp_cm_rep_handler(cm_id, event->private_data, target);
break;
case IB_CM_REJ_RECEIVED:
@@ -1431,8 +1561,6 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
if (comp)
complete(&target->done);
- kfree(qp_attr);
-
return 0;
}
@@ -1658,6 +1786,22 @@ static ssize_t show_local_ib_device(struct device *dev,
return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
}
+static ssize_t show_cmd_sg_entries(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct srp_target_port *target = host_to_target(class_to_shost(dev));
+
+ return sprintf(buf, "%u\n", target->cmd_sg_cnt);
+}
+
+static ssize_t show_allow_ext_sg(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct srp_target_port *target = host_to_target(class_to_shost(dev));
+
+ return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
+}
+
static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
@@ -1668,6 +1812,8 @@ static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
+static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
+static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
static struct device_attribute *srp_host_attrs[] = {
&dev_attr_id_ext,
@@ -1680,6 +1826,8 @@ static struct device_attribute *srp_host_attrs[] = {
&dev_attr_zero_req_lim,
&dev_attr_local_ib_port,
&dev_attr_local_ib_device,
+ &dev_attr_cmd_sg_entries,
+ &dev_attr_allow_ext_sg,
NULL
};
@@ -1692,6 +1840,7 @@ static struct scsi_host_template srp_template = {
.eh_abort_handler = srp_abort,
.eh_device_reset_handler = srp_reset_device,
.eh_host_reset_handler = srp_reset_host,
+ .sg_tablesize = SRP_DEF_SG_TABLESIZE,
.can_queue = SRP_CMD_SQ_SIZE,
.this_id = -1,
.cmd_per_lun = SRP_CMD_SQ_SIZE,
@@ -1763,6 +1912,9 @@ enum {
SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
SRP_OPT_IO_CLASS = 1 << 7,
SRP_OPT_INITIATOR_EXT = 1 << 8,
+ SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
+ SRP_OPT_ALLOW_EXT_SG = 1 << 10,
+ SRP_OPT_SG_TABLESIZE = 1 << 11,
SRP_OPT_ALL = (SRP_OPT_ID_EXT |
SRP_OPT_IOC_GUID |
SRP_OPT_DGID |
@@ -1780,6 +1932,9 @@ static const match_table_t srp_opt_tokens = {
{ SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
{ SRP_OPT_IO_CLASS, "io_class=%x" },
{ SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
+ { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
+ { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
+ { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
{ SRP_OPT_ERR, NULL }
};
@@ -1907,6 +2062,31 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
kfree(p);
break;
+ case SRP_OPT_CMD_SG_ENTRIES:
+ if (match_int(args, &token) || token < 1 || token > 255) {
+ printk(KERN_WARNING PFX "bad max cmd_sg_entries parameter '%s'\n", p);
+ goto out;
+ }
+ target->cmd_sg_cnt = token;
+ break;
+
+ case SRP_OPT_ALLOW_EXT_SG:
+ if (match_int(args, &token)) {
+ printk(KERN_WARNING PFX "bad allow_ext_sg parameter '%s'\n", p);
+ goto out;
+ }
+ target->allow_ext_sg = !!token;
+ break;
+
+ case SRP_OPT_SG_TABLESIZE:
+ if (match_int(args, &token) || token < 1 ||
+ token > SCSI_MAX_SG_CHAIN_SEGMENTS) {
+ printk(KERN_WARNING PFX "bad max sg_tablesize parameter '%s'\n", p);
+ goto out;
+ }
+ target->sg_tablesize = token;
+ break;
+
default:
printk(KERN_WARNING PFX "unknown parameter or missing value "
"'%s' in target creation request\n", p);
@@ -1937,39 +2117,73 @@ static ssize_t srp_create_target(struct device *dev,
container_of(dev, struct srp_host, dev);
struct Scsi_Host *target_host;
struct srp_target_port *target;
- int ret;
- int i;
+ struct ib_device *ibdev = host->srp_dev->dev;
+ dma_addr_t dma_addr;
+ int i, ret;
target_host = scsi_host_alloc(&srp_template,
sizeof (struct srp_target_port));
if (!target_host)
return -ENOMEM;
- target_host->transportt = ib_srp_transport_template;
+ target_host->transportt = ib_srp_transport_template;
target_host->max_lun = SRP_MAX_LUN;
target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
target = host_to_target(target_host);
- target->io_class = SRP_REV16A_IB_IO_CLASS;
- target->scsi_host = target_host;
- target->srp_host = host;
- target->lkey = host->srp_dev->mr->lkey;
- target->rkey = host->srp_dev->mr->rkey;
+ target->io_class = SRP_REV16A_IB_IO_CLASS;
+ target->scsi_host = target_host;
+ target->srp_host = host;
+ target->lkey = host->srp_dev->mr->lkey;
+ target->rkey = host->srp_dev->mr->rkey;
+ target->cmd_sg_cnt = cmd_sg_entries;
+ target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
+ target->allow_ext_sg = allow_ext_sg;
+
+ ret = srp_parse_options(buf, target);
+ if (ret)
+ goto err;
+
+ if (!host->srp_dev->fmr_pool && !target->allow_ext_sg &&
+ target->cmd_sg_cnt < target->sg_tablesize) {
+ printk(KERN_WARNING PFX "No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
+ target->sg_tablesize = target->cmd_sg_cnt;
+ }
+
+ target_host->sg_tablesize = target->sg_tablesize;
+ target->indirect_size = target->sg_tablesize *
+ sizeof (struct srp_direct_buf);
+ target->max_iu_len = sizeof (struct srp_cmd) +
+ sizeof (struct srp_indirect_buf) +
+ target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
spin_lock_init(&target->lock);
INIT_LIST_HEAD(&target->free_tx);
INIT_LIST_HEAD(&target->free_reqs);
for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) {
- target->req_ring[i].index = i;
- list_add_tail(&target->req_ring[i].list, &target->free_reqs);
- }
+ struct srp_request *req = &target->req_ring[i];
- ret = srp_parse_options(buf, target);
- if (ret)
- goto err;
+ req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof (void *),
+ GFP_KERNEL);
+ req->map_page = kmalloc(SRP_FMR_SIZE * sizeof (void *),
+ GFP_KERNEL);
+ req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
+ if (!req->fmr_list || !req->map_page || !req->indirect_desc)
+ goto err_free_mem;
+
+ dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
+ target->indirect_size,
+ DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(ibdev, dma_addr))
+ goto err_free_mem;
+
+ req->indirect_dma_addr = dma_addr;
+ req->index = i;
+ list_add_tail(&req->list, &target->free_reqs);
+ }
- ib_query_gid(host->srp_dev->dev, host->port, 0, &target->path.sgid);
+ ib_query_gid(ibdev, host->port, 0, &target->path.sgid);
shost_printk(KERN_DEBUG, target->scsi_host, PFX
"new target: id_ext %016llx ioc_guid %016llx pkey %04x "
@@ -1982,11 +2196,11 @@ static ssize_t srp_create_target(struct device *dev,
ret = srp_create_target_ib(target);
if (ret)
- goto err;
+ goto err_free_mem;
ret = srp_new_cm_id(target);
if (ret)
- goto err_free;
+ goto err_free_ib;
target->qp_in_error = 0;
ret = srp_connect_target(target);
@@ -2008,9 +2222,12 @@ err_disconnect:
err_cm_id:
ib_destroy_cm_id(target->cm_id);
-err_free:
+err_free_ib:
srp_free_target_ib(target);
+err_free_mem:
+ srp_free_req_data(target);
+
err:
scsi_host_put(target_host);
@@ -2083,7 +2300,7 @@ static void srp_add_one(struct ib_device *device)
struct ib_device_attr *dev_attr;
struct ib_fmr_pool_param fmr_param;
struct srp_host *host;
- int s, e, p;
+ int max_pages_per_fmr, fmr_page_shift, s, e, p;
dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
if (!dev_attr)
@@ -2101,12 +2318,13 @@ static void srp_add_one(struct ib_device *device)
/*
* Use the smallest page size supported by the HCA, down to a
- * minimum of 512 bytes (which is the smallest sector that a
- * SCSI command will ever carry).
+ * minimum of 4096 bytes. We're unlikely to build large sglists
+ * out of smaller entries.
*/
- srp_dev->fmr_page_shift = max(9, ffs(dev_attr->page_size_cap) - 1);
- srp_dev->fmr_page_size = 1 << srp_dev->fmr_page_shift;
- srp_dev->fmr_page_mask = ~((u64) srp_dev->fmr_page_size - 1);
+ fmr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1);
+ srp_dev->fmr_page_size = 1 << fmr_page_shift;
+ srp_dev->fmr_page_mask = ~((u64) srp_dev->fmr_page_size - 1);
+ srp_dev->fmr_max_size = srp_dev->fmr_page_size * SRP_FMR_SIZE;
INIT_LIST_HEAD(&srp_dev->dev_list);
@@ -2122,17 +2340,24 @@ static void srp_add_one(struct ib_device *device)
if (IS_ERR(srp_dev->mr))
goto err_pd;
- memset(&fmr_param, 0, sizeof fmr_param);
- fmr_param.pool_size = SRP_FMR_POOL_SIZE;
- fmr_param.dirty_watermark = SRP_FMR_DIRTY_SIZE;
- fmr_param.cache = 1;
- fmr_param.max_pages_per_fmr = SRP_FMR_SIZE;
- fmr_param.page_shift = srp_dev->fmr_page_shift;
- fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_REMOTE_READ);
-
- srp_dev->fmr_pool = ib_create_fmr_pool(srp_dev->pd, &fmr_param);
+ for (max_pages_per_fmr = SRP_FMR_SIZE;
+ max_pages_per_fmr >= SRP_FMR_MIN_SIZE;
+ max_pages_per_fmr /= 2, srp_dev->fmr_max_size /= 2) {
+ memset(&fmr_param, 0, sizeof fmr_param);
+ fmr_param.pool_size = SRP_FMR_POOL_SIZE;
+ fmr_param.dirty_watermark = SRP_FMR_DIRTY_SIZE;
+ fmr_param.cache = 1;
+ fmr_param.max_pages_per_fmr = max_pages_per_fmr;
+ fmr_param.page_shift = fmr_page_shift;
+ fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ);
+
+ srp_dev->fmr_pool = ib_create_fmr_pool(srp_dev->pd, &fmr_param);
+ if (!IS_ERR(srp_dev->fmr_pool))
+ break;
+ }
+
if (IS_ERR(srp_dev->fmr_pool))
srp_dev->fmr_pool = NULL;
@@ -2207,6 +2432,7 @@ static void srp_remove_one(struct ib_device *device)
srp_disconnect_target(target);
ib_destroy_cm_id(target->cm_id);
srp_free_target_ib(target);
+ srp_free_req_data(target);
scsi_host_put(target->scsi_host);
}
@@ -2230,9 +2456,25 @@ static int __init srp_init_module(void)
BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
- if (srp_sg_tablesize > 255) {
- printk(KERN_WARNING PFX "Clamping srp_sg_tablesize to 255\n");
- srp_sg_tablesize = 255;
+ if (srp_sg_tablesize) {
+ printk(KERN_WARNING PFX "srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
+ if (!cmd_sg_entries)
+ cmd_sg_entries = srp_sg_tablesize;
+ }
+
+ if (!cmd_sg_entries)
+ cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
+
+ if (cmd_sg_entries > 255) {
+ printk(KERN_WARNING PFX "Clamping cmd_sg_entries to 255\n");
+ cmd_sg_entries = 255;
+ }
+
+ if (!indirect_sg_entries)
+ indirect_sg_entries = cmd_sg_entries;
+ else if (indirect_sg_entries < cmd_sg_entries) {
+ printk(KERN_WARNING PFX "Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n", cmd_sg_entries);
+ indirect_sg_entries = cmd_sg_entries;
}
ib_srp_transport_template =
@@ -2240,11 +2482,6 @@ static int __init srp_init_module(void)
if (!ib_srp_transport_template)
return -ENOMEM;
- srp_template.sg_tablesize = srp_sg_tablesize;
- srp_max_iu_len = (sizeof (struct srp_cmd) +
- sizeof (struct srp_indirect_buf) +
- srp_sg_tablesize * 16);
-
ret = class_register(&srp_class);
if (ret) {
printk(KERN_ERR PFX "couldn't register class infiniband_srp\n");
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 9dc6fc3fd89..020caf0c378 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -69,9 +69,13 @@ enum {
SRP_TAG_NO_REQ = ~0U,
SRP_TAG_TSK_MGMT = 1U << 31,
- SRP_FMR_SIZE = 256,
+ SRP_FMR_SIZE = 512,
+ SRP_FMR_MIN_SIZE = 128,
SRP_FMR_POOL_SIZE = 1024,
- SRP_FMR_DIRTY_SIZE = SRP_FMR_POOL_SIZE / 4
+ SRP_FMR_DIRTY_SIZE = SRP_FMR_POOL_SIZE / 4,
+
+ SRP_MAP_ALLOW_FMR = 0,
+ SRP_MAP_NO_FMR = 1,
};
enum srp_target_state {
@@ -93,9 +97,9 @@ struct srp_device {
struct ib_pd *pd;
struct ib_mr *mr;
struct ib_fmr_pool *fmr_pool;
- int fmr_page_shift;
- int fmr_page_size;
u64 fmr_page_mask;
+ int fmr_page_size;
+ int fmr_max_size;
};
struct srp_host {
@@ -112,7 +116,11 @@ struct srp_request {
struct list_head list;
struct scsi_cmnd *scmnd;
struct srp_iu *cmd;
- struct ib_pool_fmr *fmr;
+ struct ib_pool_fmr **fmr_list;
+ u64 *map_page;
+ struct srp_direct_buf *indirect_desc;
+ dma_addr_t indirect_dma_addr;
+ short nfmr;
short index;
};
@@ -130,6 +138,10 @@ struct srp_target_port {
u32 lkey;
u32 rkey;
enum srp_target_state state;
+ unsigned int max_iu_len;
+ unsigned int cmd_sg_cnt;
+ unsigned int indirect_size;
+ bool allow_ext_sg;
/* Everything above this point is used in the hot path of
* command processing. Try to keep them packed into cachelines.
@@ -144,6 +156,7 @@ struct srp_target_port {
struct Scsi_Host *scsi_host;
char target_name[32];
unsigned int scsi_id;
+ unsigned int sg_tablesize;
struct ib_sa_path_rec path;
__be16 orig_dgid[8];
@@ -179,4 +192,19 @@ struct srp_iu {
enum dma_data_direction direction;
};
+struct srp_map_state {
+ struct ib_pool_fmr **next_fmr;
+ struct srp_direct_buf *desc;
+ u64 *pages;
+ dma_addr_t base_dma_addr;
+ u32 fmr_len;
+ u32 total_len;
+ unsigned int npages;
+ unsigned int nfmr;
+ unsigned int ndesc;
+ struct scatterlist *unmapped_sg;
+ int unmapped_index;
+ dma_addr_t unmapped_addr;
+};
+
#endif /* IB_SRP_H */