summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/ulp
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/ulp')
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c3
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c9
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c22
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h73
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c148
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c231
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c292
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c747
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h26
9 files changed, 1148 insertions, 403 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 3eceb61e353..7a3175400b2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -817,7 +817,6 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
if (neigh) {
neigh->cm = NULL;
- list_del(&neigh->list);
ipoib_neigh_free(neigh);
tx->neigh = NULL;
@@ -1234,7 +1233,6 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
if (neigh) {
neigh->cm = NULL;
- list_del(&neigh->list);
ipoib_neigh_free(neigh);
tx->neigh = NULL;
@@ -1325,7 +1323,6 @@ static void ipoib_cm_tx_start(struct work_struct *work)
neigh = p->neigh;
if (neigh) {
neigh->cm = NULL;
- list_del(&neigh->list);
ipoib_neigh_free(neigh);
}
list_del(&p->list);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index c6f71a88c55..82cec1af902 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -493,7 +493,6 @@ static void path_rec_completion(int status,
path,
neigh));
if (!ipoib_cm_get(neigh)) {
- list_del(&neigh->list);
ipoib_neigh_free(neigh);
continue;
}
@@ -618,7 +617,6 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
if (!ipoib_cm_get(neigh))
ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh));
if (!ipoib_cm_get(neigh)) {
- list_del(&neigh->list);
ipoib_neigh_free(neigh);
goto err_drop;
}
@@ -639,7 +637,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
neigh->ah = NULL;
if (!path->query && path_rec_start(dev, path))
- goto err_list;
+ goto err_path;
__skb_queue_tail(&neigh->queue, skb);
}
@@ -648,9 +646,6 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
ipoib_neigh_put(neigh);
return;
-err_list:
- list_del(&neigh->list);
-
err_path:
ipoib_neigh_free(neigh);
err_drop:
@@ -1098,6 +1093,8 @@ void ipoib_neigh_free(struct ipoib_neigh *neigh)
rcu_assign_pointer(*np,
rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&priv->lock)));
+ /* remove from parent list */
+ list_del(&neigh->list);
call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
return;
} else {
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 2e84ef859c5..dd03cfe596d 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -347,6 +347,7 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
{
struct iscsi_conn *conn = cls_conn->dd_data;
struct iscsi_iser_conn *iser_conn;
+ struct iscsi_session *session;
struct iser_conn *ib_conn;
struct iscsi_endpoint *ep;
int error;
@@ -365,7 +366,8 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
}
ib_conn = ep->dd_data;
- if (iser_alloc_rx_descriptors(ib_conn))
+ session = conn->session;
+ if (iser_alloc_rx_descriptors(ib_conn, session))
return -ENOMEM;
/* binds the iSER connection retrieved from the previously
@@ -419,12 +421,13 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
struct iscsi_cls_session *cls_session;
struct iscsi_session *session;
struct Scsi_Host *shost;
- struct iser_conn *ib_conn;
+ struct iser_conn *ib_conn = NULL;
shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
if (!shost)
return NULL;
shost->transportt = iscsi_iser_scsi_transport;
+ shost->cmd_per_lun = qdepth;
shost->max_lun = iscsi_max_lun;
shost->max_id = 0;
shost->max_channel = 0;
@@ -441,12 +444,14 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
ep ? ib_conn->device->ib_device->dma_device : NULL))
goto free_host;
- /*
- * we do not support setting can_queue cmd_per_lun from userspace yet
- * because we preallocate so many resources
- */
+ if (cmds_max > ISER_DEF_XMIT_CMDS_MAX) {
+ iser_info("cmds_max changed from %u to %u\n",
+ cmds_max, ISER_DEF_XMIT_CMDS_MAX);
+ cmds_max = ISER_DEF_XMIT_CMDS_MAX;
+ }
+
cls_session = iscsi_session_setup(&iscsi_iser_transport, shost,
- ISCSI_DEF_XMIT_CMDS_MAX, 0,
+ cmds_max, 0,
sizeof(struct iscsi_iser_task),
initial_cmdsn, 0);
if (!cls_session)
@@ -672,6 +677,7 @@ static umode_t iser_attr_is_visible(int param_type, int param)
case ISCSI_PARAM_TGT_RESET_TMO:
case ISCSI_PARAM_IFACE_NAME:
case ISCSI_PARAM_INITIATOR_NAME:
+ case ISCSI_PARAM_DISCOVERY_SESS:
return S_IRUGO;
default:
return 0;
@@ -701,7 +707,7 @@ static struct scsi_host_template iscsi_iser_sht = {
static struct iscsi_transport iscsi_iser_transport = {
.owner = THIS_MODULE,
.name = "iser",
- .caps = CAP_RECOVERY_L0 | CAP_MULTI_R2T,
+ .caps = CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_TEXT_NEGO,
/* session management */
.create_session = iscsi_iser_session_create,
.destroy_session = iscsi_iser_session_destroy,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 4f069c0d4c0..67914027c61 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -78,14 +78,14 @@
#define iser_warn(fmt, arg...) \
do { \
- if (iser_debug_level > 1) \
+ if (iser_debug_level > 0) \
pr_warn(PFX "%s:" fmt, \
__func__ , ## arg); \
} while (0)
#define iser_info(fmt, arg...) \
do { \
- if (iser_debug_level > 0) \
+ if (iser_debug_level > 1) \
pr_info(PFX "%s:" fmt, \
__func__ , ## arg); \
} while (0)
@@ -102,7 +102,13 @@
/* support up to 512KB in one RDMA */
#define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K)
-#define ISER_DEF_CMD_PER_LUN ISCSI_DEF_XMIT_CMDS_MAX
+#define ISER_DEF_XMIT_CMDS_DEFAULT 512
+#if ISCSI_DEF_XMIT_CMDS_MAX > ISER_DEF_XMIT_CMDS_DEFAULT
+ #define ISER_DEF_XMIT_CMDS_MAX ISCSI_DEF_XMIT_CMDS_MAX
+#else
+ #define ISER_DEF_XMIT_CMDS_MAX ISER_DEF_XMIT_CMDS_DEFAULT
+#endif
+#define ISER_DEF_CMD_PER_LUN ISER_DEF_XMIT_CMDS_MAX
/* QP settings */
/* Maximal bounds on received asynchronous PDUs */
@@ -111,9 +117,9 @@
#define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), *
* SCSI_TMFUNC(2), LOGOUT(1) */
-#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
+#define ISER_QP_MAX_RECV_DTOS (ISER_DEF_XMIT_CMDS_MAX)
-#define ISER_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
+#define ISER_MIN_POSTED_RX (ISER_DEF_XMIT_CMDS_MAX >> 2)
/* the max TX (send) WR supported by the iSER QP is defined by *
* max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect *
@@ -123,7 +129,7 @@
#define ISER_INFLIGHT_DATAOUTS 8
-#define ISER_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \
+#define ISER_QP_MAX_REQ_DTOS (ISER_DEF_XMIT_CMDS_MAX * \
(1 + ISER_INFLIGHT_DATAOUTS) + \
ISER_MAX_TX_MISC_PDUS + \
ISER_MAX_RX_MISC_PDUS)
@@ -205,7 +211,7 @@ struct iser_mem_reg {
u64 va;
u64 len;
void *mem_h;
- int is_fmr;
+ int is_mr;
};
struct iser_regd_buf {
@@ -246,6 +252,9 @@ struct iser_rx_desc {
#define ISER_MAX_CQ 4
+struct iser_conn;
+struct iscsi_iser_task;
+
struct iser_device {
struct ib_device *ib_device;
struct ib_pd *pd;
@@ -259,6 +268,22 @@ struct iser_device {
int cq_active_qps[ISER_MAX_CQ];
int cqs_used;
struct iser_cq_desc *cq_desc;
+ int (*iser_alloc_rdma_reg_res)(struct iser_conn *ib_conn,
+ unsigned cmds_max);
+ void (*iser_free_rdma_reg_res)(struct iser_conn *ib_conn);
+ int (*iser_reg_rdma_mem)(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir);
+ void (*iser_unreg_rdma_mem)(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir);
+};
+
+struct fast_reg_descriptor {
+ struct list_head list;
+ /* For fast registration - FRWR */
+ struct ib_mr *data_mr;
+ struct ib_fast_reg_page_list *data_frpl;
+ /* Valid for fast registration flag */
+ bool valid;
};
struct iser_conn {
@@ -270,13 +295,13 @@ struct iser_conn {
struct iser_device *device; /* device context */
struct rdma_cm_id *cma_id; /* CMA ID */
struct ib_qp *qp; /* QP */
- struct ib_fmr_pool *fmr_pool; /* pool of IB FMRs */
wait_queue_head_t wait; /* waitq for conn/disconn */
+ unsigned qp_max_recv_dtos; /* num of rx buffers */
+ unsigned qp_max_recv_dtos_mask; /* above minus 1 */
+ unsigned min_posted_rx; /* qp_max_recv_dtos >> 2 */
int post_recv_buf_count; /* posted rx count */
atomic_t post_send_buf_count; /* posted tx count */
char name[ISER_OBJECT_NAME_SIZE];
- struct iser_page_vec *page_vec; /* represents SG to fmr maps*
- * maps serialized as tx is*/
struct list_head conn_list; /* entry in ig conn list */
char *login_buf;
@@ -285,6 +310,17 @@ struct iser_conn {
unsigned int rx_desc_head;
struct iser_rx_desc *rx_descs;
struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
+ union {
+ struct {
+ struct ib_fmr_pool *pool; /* pool of IB FMRs */
+ struct iser_page_vec *page_vec; /* represents SG to fmr maps*
+ * maps serialized as tx is*/
+ } fmr;
+ struct {
+ struct list_head pool;
+ int pool_size;
+ } frwr;
+ } fastreg;
};
struct iscsi_iser_conn {
@@ -368,8 +404,10 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn);
void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task,
enum iser_data_dir cmd_dir);
-int iser_reg_rdma_mem(struct iscsi_iser_task *task,
- enum iser_data_dir cmd_dir);
+int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,
+ enum iser_data_dir cmd_dir);
+int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *task,
+ enum iser_data_dir cmd_dir);
int iser_connect(struct iser_conn *ib_conn,
struct sockaddr_in *src_addr,
@@ -380,7 +418,10 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,
struct iser_page_vec *page_vec,
struct iser_mem_reg *mem_reg);
-void iser_unreg_mem(struct iser_mem_reg *mem_reg);
+void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir);
+void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir);
int iser_post_recvl(struct iser_conn *ib_conn);
int iser_post_recvm(struct iser_conn *ib_conn, int count);
@@ -394,5 +435,9 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task);
int iser_initialize_task_headers(struct iscsi_task *task,
struct iser_tx_desc *tx_desc);
-int iser_alloc_rx_descriptors(struct iser_conn *ib_conn);
+int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session);
+int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max);
+void iser_free_fmr_pool(struct iser_conn *ib_conn);
+int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max);
+void iser_free_frwr_pool(struct iser_conn *ib_conn);
#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index b6d81a86c97..538822684d5 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -49,6 +49,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task,
{
struct iscsi_iser_task *iser_task = task->dd_data;
+ struct iser_device *device = iser_task->iser_conn->ib_conn->device;
struct iser_regd_buf *regd_buf;
int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header;
@@ -69,7 +70,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task,
return -EINVAL;
}
- err = iser_reg_rdma_mem(iser_task,ISER_DIR_IN);
+ err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_IN);
if (err) {
iser_err("Failed to set up Data-IN RDMA\n");
return err;
@@ -98,6 +99,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
unsigned int edtl)
{
struct iscsi_iser_task *iser_task = task->dd_data;
+ struct iser_device *device = iser_task->iser_conn->ib_conn->device;
struct iser_regd_buf *regd_buf;
int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header;
@@ -119,7 +121,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
return -EINVAL;
}
- err = iser_reg_rdma_mem(iser_task,ISER_DIR_OUT);
+ err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_OUT);
if (err != 0) {
iser_err("Failed to register write cmd RDMA mem\n");
return err;
@@ -170,8 +172,78 @@ static void iser_create_send_desc(struct iser_conn *ib_conn,
}
}
+static void iser_free_login_buf(struct iser_conn *ib_conn)
+{
+ if (!ib_conn->login_buf)
+ return;
+
+ if (ib_conn->login_req_dma)
+ ib_dma_unmap_single(ib_conn->device->ib_device,
+ ib_conn->login_req_dma,
+ ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
+
+ if (ib_conn->login_resp_dma)
+ ib_dma_unmap_single(ib_conn->device->ib_device,
+ ib_conn->login_resp_dma,
+ ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
+
+ kfree(ib_conn->login_buf);
+
+ /* make sure we never redo any unmapping */
+ ib_conn->login_req_dma = 0;
+ ib_conn->login_resp_dma = 0;
+ ib_conn->login_buf = NULL;
+}
+
+static int iser_alloc_login_buf(struct iser_conn *ib_conn)
+{
+ struct iser_device *device;
+ int req_err, resp_err;
+
+ BUG_ON(ib_conn->device == NULL);
+
+ device = ib_conn->device;
+
+ ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
+ ISER_RX_LOGIN_SIZE, GFP_KERNEL);
+ if (!ib_conn->login_buf)
+ goto out_err;
+
+ ib_conn->login_req_buf = ib_conn->login_buf;
+ ib_conn->login_resp_buf = ib_conn->login_buf +
+ ISCSI_DEF_MAX_RECV_SEG_LEN;
+
+ ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device,
+ (void *)ib_conn->login_req_buf,
+ ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
+
+ ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device,
+ (void *)ib_conn->login_resp_buf,
+ ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
+
+ req_err = ib_dma_mapping_error(device->ib_device,
+ ib_conn->login_req_dma);
+ resp_err = ib_dma_mapping_error(device->ib_device,
+ ib_conn->login_resp_dma);
+
+ if (req_err || resp_err) {
+ if (req_err)
+ ib_conn->login_req_dma = 0;
+ if (resp_err)
+ ib_conn->login_resp_dma = 0;
+ goto free_login_buf;
+ }
+ return 0;
+
+free_login_buf:
+ iser_free_login_buf(ib_conn);
-int iser_alloc_rx_descriptors(struct iser_conn *ib_conn)
+out_err:
+ iser_err("unable to alloc or map login buf\n");
+ return -ENOMEM;
+}
+
+int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session)
{
int i, j;
u64 dma_addr;
@@ -179,14 +251,24 @@ int iser_alloc_rx_descriptors(struct iser_conn *ib_conn)
struct ib_sge *rx_sg;
struct iser_device *device = ib_conn->device;
- ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS *
+ ib_conn->qp_max_recv_dtos = session->cmds_max;
+ ib_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */
+ ib_conn->min_posted_rx = ib_conn->qp_max_recv_dtos >> 2;
+
+ if (device->iser_alloc_rdma_reg_res(ib_conn, session->scsi_cmds_max))
+ goto create_rdma_reg_res_failed;
+
+ if (iser_alloc_login_buf(ib_conn))
+ goto alloc_login_buf_fail;
+
+ ib_conn->rx_descs = kmalloc(session->cmds_max *
sizeof(struct iser_rx_desc), GFP_KERNEL);
if (!ib_conn->rx_descs)
goto rx_desc_alloc_fail;
rx_desc = ib_conn->rx_descs;
- for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) {
+ for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++) {
dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
if (ib_dma_mapping_error(device->ib_device, dma_addr))
@@ -207,10 +289,14 @@ rx_desc_dma_map_failed:
rx_desc = ib_conn->rx_descs;
for (j = 0; j < i; j++, rx_desc++)
ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
- ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
+ ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
kfree(ib_conn->rx_descs);
ib_conn->rx_descs = NULL;
rx_desc_alloc_fail:
+ iser_free_login_buf(ib_conn);
+alloc_login_buf_fail:
+ device->iser_free_rdma_reg_res(ib_conn);
+create_rdma_reg_res_failed:
iser_err("failed allocating rx descriptors / data buffers\n");
return -ENOMEM;
}
@@ -222,18 +308,27 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn)
struct iser_device *device = ib_conn->device;
if (!ib_conn->rx_descs)
- return;
+ goto free_login_buf;
+
+ if (device->iser_free_rdma_reg_res)
+ device->iser_free_rdma_reg_res(ib_conn);
rx_desc = ib_conn->rx_descs;
- for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++)
+ for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++)
ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
- ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
+ ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
kfree(ib_conn->rx_descs);
+ /* make sure we never redo any unmapping */
+ ib_conn->rx_descs = NULL;
+
+free_login_buf:
+ iser_free_login_buf(ib_conn);
}
static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
{
struct iscsi_iser_conn *iser_conn = conn->dd_data;
+ struct iscsi_session *session = conn->session;
iser_dbg("req op %x flags %x\n", req->opcode, req->flags);
/* check if this is the last login - going to full feature phase */
@@ -248,9 +343,16 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
WARN_ON(iser_conn->ib_conn->post_recv_buf_count != 1);
WARN_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0);
- iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX);
+ if (session->discovery_sess) {
+ iser_info("Discovery session, re-using login RX buffer\n");
+ return 0;
+ } else
+ iser_info("Normal session, posting batch of RX %d buffers\n",
+ iser_conn->ib_conn->min_posted_rx);
+
/* Initial post receive buffers */
- if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX))
+ if (iser_post_recvm(iser_conn->ib_conn,
+ iser_conn->ib_conn->min_posted_rx))
return -ENOMEM;
return 0;
@@ -425,6 +527,8 @@ int iser_send_control(struct iscsi_conn *conn,
}
if (task == conn->login_task) {
+ iser_dbg("op %x dsl %lx, posting login rx buffer\n",
+ task->hdr->opcode, data_seg_len);
err = iser_post_recvl(iser_conn->ib_conn);
if (err)
goto send_control_error;
@@ -487,9 +591,9 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
return;
outstanding = ib_conn->post_recv_buf_count;
- if (outstanding + ISER_MIN_POSTED_RX <= ISER_QP_MAX_RECV_DTOS) {
- count = min(ISER_QP_MAX_RECV_DTOS - outstanding,
- ISER_MIN_POSTED_RX);
+ if (outstanding + ib_conn->min_posted_rx <= ib_conn->qp_max_recv_dtos) {
+ count = min(ib_conn->qp_max_recv_dtos - outstanding,
+ ib_conn->min_posted_rx);
err = iser_post_recvm(ib_conn, count);
if (err)
iser_err("posting %d rx bufs err %d\n", count, err);
@@ -538,8 +642,8 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
{
+ struct iser_device *device = iser_task->iser_conn->ib_conn->device;
int is_rdma_aligned = 1;
- struct iser_regd_buf *regd;
/* if we were reading, copy back to unaligned sglist,
* anyway dma_unmap and free the copy
@@ -553,17 +657,11 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_OUT);
}
- if (iser_task->dir[ISER_DIR_IN]) {
- regd = &iser_task->rdma_regd[ISER_DIR_IN];
- if (regd->reg.is_fmr)
- iser_unreg_mem(&regd->reg);
- }
+ if (iser_task->dir[ISER_DIR_IN])
+ device->iser_unreg_rdma_mem(iser_task, ISER_DIR_IN);
- if (iser_task->dir[ISER_DIR_OUT]) {
- regd = &iser_task->rdma_regd[ISER_DIR_OUT];
- if (regd->reg.is_fmr)
- iser_unreg_mem(&regd->reg);
- }
+ if (iser_task->dir[ISER_DIR_OUT])
+ device->iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT);
/* if the data was unaligned, it was already unmapped and then copied */
if (is_rdma_aligned)
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 7827baf455a..1ce0c97d2cc 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -170,8 +170,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
*/
static int iser_sg_to_page_vec(struct iser_data_buf *data,
- struct iser_page_vec *page_vec,
- struct ib_device *ibdev)
+ struct ib_device *ibdev, u64 *pages,
+ int *offset, int *data_size)
{
struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf;
u64 start_addr, end_addr, page, chunk_start = 0;
@@ -180,7 +180,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
/* compute the offset of first element */
- page_vec->offset = (u64) sgl[0].offset & ~MASK_4K;
+ *offset = (u64) sgl[0].offset & ~MASK_4K;
new_chunk = 1;
cur_page = 0;
@@ -204,13 +204,14 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
which might be unaligned */
page = chunk_start & MASK_4K;
do {
- page_vec->pages[cur_page++] = page;
+ pages[cur_page++] = page;
page += SIZE_4K;
} while (page < end_addr);
}
- page_vec->data_size = total_sz;
- iser_dbg("page_vec->data_size:%d cur_page %d\n", page_vec->data_size,cur_page);
+ *data_size = total_sz;
+ iser_dbg("page_vec->data_size:%d cur_page %d\n",
+ *data_size, cur_page);
return cur_page;
}
@@ -267,11 +268,8 @@ static void iser_data_buf_dump(struct iser_data_buf *data,
struct scatterlist *sg;
int i;
- if (iser_debug_level == 0)
- return;
-
for_each_sg(sgl, sg, data->dma_nents, i)
- iser_warn("sg[%d] dma_addr:0x%lX page:0x%p "
+ iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
"off:0x%x sz:0x%x dma_len:0x%x\n",
i, (unsigned long)ib_sg_dma_address(ibdev, sg),
sg_page(sg), sg->offset,
@@ -298,8 +296,10 @@ static void iser_page_vec_build(struct iser_data_buf *data,
page_vec->offset = 0;
iser_dbg("Translating sg sz: %d\n", data->dma_nents);
- page_vec_len = iser_sg_to_page_vec(data, page_vec, ibdev);
- iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents,page_vec_len);
+ page_vec_len = iser_sg_to_page_vec(data, ibdev, page_vec->pages,
+ &page_vec->offset,
+ &page_vec->data_size);
+ iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents, page_vec_len);
page_vec->length = page_vec_len;
@@ -347,16 +347,41 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task)
}
}
+static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
+ struct ib_device *ibdev,
+ enum iser_data_dir cmd_dir,
+ int aligned_len)
+{
+ struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
+ struct iser_data_buf *mem = &iser_task->data[cmd_dir];
+
+ iscsi_conn->fmr_unalign_cnt++;
+ iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
+ aligned_len, mem->size);
+
+ if (iser_debug_level > 0)
+ iser_data_buf_dump(mem, ibdev);
+
+ /* unmap the command data before accessing it */
+ iser_dma_unmap_task_data(iser_task);
+
+ /* allocate copy buf, if we are writing, copy the */
+ /* unaligned scatterlist, dma map the copy */
+ if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
/**
- * iser_reg_rdma_mem - Registers memory intended for RDMA,
- * obtaining rkey and va
+ * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA,
+ * using FMR (if possible) obtaining rkey and va
*
* returns 0 on success, errno code on failure
*/
-int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,
- enum iser_data_dir cmd_dir)
+int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir)
{
- struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
struct ib_device *ibdev = device->ib_device;
@@ -370,20 +395,13 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,
regd_buf = &iser_task->rdma_regd[cmd_dir];
aligned_len = iser_data_buf_aligned_len(mem, ibdev);
- if (aligned_len != mem->dma_nents ||
- (!ib_conn->fmr_pool && mem->dma_nents > 1)) {
- iscsi_conn->fmr_unalign_cnt++;
- iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
- aligned_len, mem->size);
- iser_data_buf_dump(mem, ibdev);
-
- /* unmap the command data before accessing it */
- iser_dma_unmap_task_data(iser_task);
-
- /* allocate copy buf, if we are writing, copy the */
- /* unaligned scatterlist, dma map the copy */
- if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0)
- return -ENOMEM;
+ if (aligned_len != mem->dma_nents) {
+ err = fall_to_bounce_buf(iser_task, ibdev,
+ cmd_dir, aligned_len);
+ if (err) {
+ iser_err("failed to allocate bounce buffer\n");
+ return err;
+ }
mem = &iser_task->data_copy[cmd_dir];
}
@@ -395,7 +413,7 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,
regd_buf->reg.rkey = device->mr->rkey;
regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]);
regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]);
- regd_buf->reg.is_fmr = 0;
+ regd_buf->reg.is_mr = 0;
iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X "
"va: 0x%08lX sz: %ld]\n",
@@ -404,22 +422,159 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,
(unsigned long)regd_buf->reg.va,
(unsigned long)regd_buf->reg.len);
} else { /* use FMR for multiple dma entries */
- iser_page_vec_build(mem, ib_conn->page_vec, ibdev);
- err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, &regd_buf->reg);
+ iser_page_vec_build(mem, ib_conn->fastreg.fmr.page_vec, ibdev);
+ err = iser_reg_page_vec(ib_conn, ib_conn->fastreg.fmr.page_vec,
+ &regd_buf->reg);
if (err && err != -EAGAIN) {
iser_data_buf_dump(mem, ibdev);
iser_err("mem->dma_nents = %d (dlength = 0x%x)\n",
mem->dma_nents,
ntoh24(iser_task->desc.iscsi_header.dlength));
iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
- ib_conn->page_vec->data_size, ib_conn->page_vec->length,
- ib_conn->page_vec->offset);
- for (i=0 ; i<ib_conn->page_vec->length ; i++)
+ ib_conn->fastreg.fmr.page_vec->data_size,
+ ib_conn->fastreg.fmr.page_vec->length,
+ ib_conn->fastreg.fmr.page_vec->offset);
+ for (i = 0; i < ib_conn->fastreg.fmr.page_vec->length; i++)
iser_err("page_vec[%d] = 0x%llx\n", i,
- (unsigned long long) ib_conn->page_vec->pages[i]);
+ (unsigned long long) ib_conn->fastreg.fmr.page_vec->pages[i]);
}
if (err)
return err;
}
return 0;
}
+
+static int iser_fast_reg_mr(struct fast_reg_descriptor *desc,
+ struct iser_conn *ib_conn,
+ struct iser_regd_buf *regd_buf,
+ u32 offset, unsigned int data_size,
+ unsigned int page_list_len)
+{
+ struct ib_send_wr fastreg_wr, inv_wr;
+ struct ib_send_wr *bad_wr, *wr = NULL;
+ u8 key;
+ int ret;
+
+ if (!desc->valid) {
+ memset(&inv_wr, 0, sizeof(inv_wr));
+ inv_wr.opcode = IB_WR_LOCAL_INV;
+ inv_wr.send_flags = IB_SEND_SIGNALED;
+ inv_wr.ex.invalidate_rkey = desc->data_mr->rkey;
+ wr = &inv_wr;
+ /* Bump the key */
+ key = (u8)(desc->data_mr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(desc->data_mr, ++key);
+ }
+
+ /* Prepare FASTREG WR */
+ memset(&fastreg_wr, 0, sizeof(fastreg_wr));
+ fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+ fastreg_wr.send_flags = IB_SEND_SIGNALED;
+ fastreg_wr.wr.fast_reg.iova_start = desc->data_frpl->page_list[0] + offset;
+ fastreg_wr.wr.fast_reg.page_list = desc->data_frpl;
+ fastreg_wr.wr.fast_reg.page_list_len = page_list_len;
+ fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
+ fastreg_wr.wr.fast_reg.length = data_size;
+ fastreg_wr.wr.fast_reg.rkey = desc->data_mr->rkey;
+ fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ);
+
+ if (!wr) {
+ wr = &fastreg_wr;
+ atomic_inc(&ib_conn->post_send_buf_count);
+ } else {
+ wr->next = &fastreg_wr;
+ atomic_add(2, &ib_conn->post_send_buf_count);
+ }
+
+ ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
+ if (ret) {
+ if (bad_wr->next)
+ atomic_sub(2, &ib_conn->post_send_buf_count);
+ else
+ atomic_dec(&ib_conn->post_send_buf_count);
+ iser_err("fast registration failed, ret:%d\n", ret);
+ return ret;
+ }
+ desc->valid = false;
+
+ regd_buf->reg.mem_h = desc;
+ regd_buf->reg.lkey = desc->data_mr->lkey;
+ regd_buf->reg.rkey = desc->data_mr->rkey;
+ regd_buf->reg.va = desc->data_frpl->page_list[0] + offset;
+ regd_buf->reg.len = data_size;
+ regd_buf->reg.is_mr = 1;
+
+ return ret;
+}
+
+/**
+ * iser_reg_rdma_mem_frwr - Registers memory intended for RDMA,
+ * using Fast Registration WR (if possible) obtaining rkey and va
+ *
+ * returns 0 on success, errno code on failure
+ */
+int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir)
+{
+ struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;
+ struct iser_device *device = ib_conn->device;
+ struct ib_device *ibdev = device->ib_device;
+ struct iser_data_buf *mem = &iser_task->data[cmd_dir];
+ struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir];
+ struct fast_reg_descriptor *desc;
+ unsigned int data_size, page_list_len;
+ int err, aligned_len;
+ unsigned long flags;
+ u32 offset;
+
+ aligned_len = iser_data_buf_aligned_len(mem, ibdev);
+ if (aligned_len != mem->dma_nents) {
+ err = fall_to_bounce_buf(iser_task, ibdev,
+ cmd_dir, aligned_len);
+ if (err) {
+ iser_err("failed to allocate bounce buffer\n");
+ return err;
+ }
+ mem = &iser_task->data_copy[cmd_dir];
+ }
+
+ /* if there a single dma entry, dma mr suffices */
+ if (mem->dma_nents == 1) {
+ struct scatterlist *sg = (struct scatterlist *)mem->buf;
+
+ regd_buf->reg.lkey = device->mr->lkey;
+ regd_buf->reg.rkey = device->mr->rkey;
+ regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]);
+ regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]);
+ regd_buf->reg.is_mr = 0;
+ } else {
+ spin_lock_irqsave(&ib_conn->lock, flags);
+ desc = list_first_entry(&ib_conn->fastreg.frwr.pool,
+ struct fast_reg_descriptor, list);
+ list_del(&desc->list);
+ spin_unlock_irqrestore(&ib_conn->lock, flags);
+ page_list_len = iser_sg_to_page_vec(mem, device->ib_device,
+ desc->data_frpl->page_list,
+ &offset, &data_size);
+
+ if (page_list_len * SIZE_4K < data_size) {
+ iser_err("fast reg page_list too short to hold this SG\n");
+ err = -EINVAL;
+ goto err_reg;
+ }
+
+ err = iser_fast_reg_mr(desc, ib_conn, regd_buf,
+ offset, data_size, page_list_len);
+ if (err)
+ goto err_reg;
+ }
+
+ return 0;
+err_reg:
+ spin_lock_irqsave(&ib_conn->lock, flags);
+ list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
+ spin_unlock_irqrestore(&ib_conn->lock, flags);
+ return err;
+}
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 2c4941d0656..afe95674008 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -73,6 +73,36 @@ static int iser_create_device_ib_res(struct iser_device *device)
{
int i, j;
struct iser_cq_desc *cq_desc;
+ struct ib_device_attr *dev_attr;
+
+ dev_attr = kmalloc(sizeof(*dev_attr), GFP_KERNEL);
+ if (!dev_attr)
+ return -ENOMEM;
+
+ if (ib_query_device(device->ib_device, dev_attr)) {
+ pr_warn("Query device failed for %s\n", device->ib_device->name);
+ goto dev_attr_err;
+ }
+
+ /* Assign function handles - based on FMR support */
+ if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr &&
+ device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) {
+ iser_info("FMR supported, using FMR for registration\n");
+ device->iser_alloc_rdma_reg_res = iser_create_fmr_pool;
+ device->iser_free_rdma_reg_res = iser_free_fmr_pool;
+ device->iser_reg_rdma_mem = iser_reg_rdma_mem_fmr;
+ device->iser_unreg_rdma_mem = iser_unreg_mem_fmr;
+ } else
+ if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+ iser_info("FRWR supported, using FRWR for registration\n");
+ device->iser_alloc_rdma_reg_res = iser_create_frwr_pool;
+ device->iser_free_rdma_reg_res = iser_free_frwr_pool;
+ device->iser_reg_rdma_mem = iser_reg_rdma_mem_frwr;
+ device->iser_unreg_rdma_mem = iser_unreg_mem_frwr;
+ } else {
+ iser_err("IB device does not support FMRs nor FRWRs, can't register memory\n");
+ goto dev_attr_err;
+ }
device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors);
iser_info("using %d CQs, device %s supports %d vectors\n",
@@ -128,6 +158,7 @@ static int iser_create_device_ib_res(struct iser_device *device)
if (ib_register_event_handler(&device->event_handler))
goto handler_err;
+ kfree(dev_attr);
return 0;
handler_err:
@@ -147,6 +178,8 @@ pd_err:
kfree(device->cq_desc);
cq_desc_err:
iser_err("failed to allocate an IB resource\n");
+dev_attr_err:
+ kfree(dev_attr);
return -1;
}
@@ -178,56 +211,23 @@ static void iser_free_device_ib_res(struct iser_device *device)
}
/**
- * iser_create_ib_conn_res - Creates FMR pool and Queue-Pair (QP)
+ * iser_create_fmr_pool - Creates FMR pool and page_vector
*
- * returns 0 on success, -1 on failure
+ * returns 0 on success, or errno code on failure
*/
-static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
+int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
{
- struct iser_device *device;
- struct ib_qp_init_attr init_attr;
- int req_err, resp_err, ret = -ENOMEM;
+ struct iser_device *device = ib_conn->device;
struct ib_fmr_pool_param params;
- int index, min_index = 0;
-
- BUG_ON(ib_conn->device == NULL);
-
- device = ib_conn->device;
-
- ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
- ISER_RX_LOGIN_SIZE, GFP_KERNEL);
- if (!ib_conn->login_buf)
- goto out_err;
-
- ib_conn->login_req_buf = ib_conn->login_buf;
- ib_conn->login_resp_buf = ib_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN;
-
- ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device,
- (void *)ib_conn->login_req_buf,
- ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
-
- ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device,
- (void *)ib_conn->login_resp_buf,
- ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
-
- req_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_req_dma);
- resp_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_resp_dma);
-
- if (req_err || resp_err) {
- if (req_err)
- ib_conn->login_req_dma = 0;
- if (resp_err)
- ib_conn->login_resp_dma = 0;
- goto out_err;
- }
+ int ret = -ENOMEM;
- ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
- (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
- GFP_KERNEL);
- if (!ib_conn->page_vec)
- goto out_err;
+ ib_conn->fastreg.fmr.page_vec = kmalloc(sizeof(struct iser_page_vec) +
+ (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)),
+ GFP_KERNEL);
+ if (!ib_conn->fastreg.fmr.page_vec)
+ return ret;
- ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1);
+ ib_conn->fastreg.fmr.page_vec->pages = (u64 *)(ib_conn->fastreg.fmr.page_vec + 1);
params.page_shift = SHIFT_4K;
/* when the first/last SG element are not start/end *
@@ -235,24 +235,143 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
/* make the pool size twice the max number of SCSI commands *
* the ML is expected to queue, watermark for unmap at 50% */
- params.pool_size = ISCSI_DEF_XMIT_CMDS_MAX * 2;
- params.dirty_watermark = ISCSI_DEF_XMIT_CMDS_MAX;
+ params.pool_size = cmds_max * 2;
+ params.dirty_watermark = cmds_max;
params.cache = 0;
params.flush_function = NULL;
params.access = (IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_READ);
- ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, &params);
- ret = PTR_ERR(ib_conn->fmr_pool);
- if (IS_ERR(ib_conn->fmr_pool) && ret != -ENOSYS) {
- ib_conn->fmr_pool = NULL;
- goto out_err;
- } else if (ret == -ENOSYS) {
- ib_conn->fmr_pool = NULL;
+ ib_conn->fastreg.fmr.pool = ib_create_fmr_pool(device->pd, &params);
+ if (!IS_ERR(ib_conn->fastreg.fmr.pool))
+ return 0;
+
+ /* no FMR => no need for page_vec */
+ kfree(ib_conn->fastreg.fmr.page_vec);
+ ib_conn->fastreg.fmr.page_vec = NULL;
+
+ ret = PTR_ERR(ib_conn->fastreg.fmr.pool);
+ ib_conn->fastreg.fmr.pool = NULL;
+ if (ret != -ENOSYS) {
+ iser_err("FMR allocation failed, err %d\n", ret);
+ return ret;
+ } else {
iser_warn("FMRs are not supported, using unaligned mode\n");
- ret = 0;
+ return 0;
}
+}
+
+/**
+ * iser_free_fmr_pool - releases the FMR pool and page vec
+ */
+void iser_free_fmr_pool(struct iser_conn *ib_conn)
+{
+ iser_info("freeing conn %p fmr pool %p\n",
+ ib_conn, ib_conn->fastreg.fmr.pool);
+
+ if (ib_conn->fastreg.fmr.pool != NULL)
+ ib_destroy_fmr_pool(ib_conn->fastreg.fmr.pool);
+
+ ib_conn->fastreg.fmr.pool = NULL;
+
+ kfree(ib_conn->fastreg.fmr.page_vec);
+ ib_conn->fastreg.fmr.page_vec = NULL;
+}
+
+/**
+ * iser_create_frwr_pool - Creates pool of fast_reg descriptors
+ * for fast registration work requests.
+ * returns 0 on success, or errno code on failure
+ */
+int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
+{
+ struct iser_device *device = ib_conn->device;
+ struct fast_reg_descriptor *desc;
+ int i, ret;
+
+ INIT_LIST_HEAD(&ib_conn->fastreg.frwr.pool);
+ ib_conn->fastreg.frwr.pool_size = 0;
+ for (i = 0; i < cmds_max; i++) {
+ desc = kmalloc(sizeof(*desc), GFP_KERNEL);
+ if (!desc) {
+ iser_err("Failed to allocate a new fast_reg descriptor\n");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ desc->data_frpl = ib_alloc_fast_reg_page_list(device->ib_device,
+ ISCSI_ISER_SG_TABLESIZE + 1);
+ if (IS_ERR(desc->data_frpl)) {
+ ret = PTR_ERR(desc->data_frpl);
+ iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", ret);
+ goto fast_reg_page_failure;
+ }
+
+ desc->data_mr = ib_alloc_fast_reg_mr(device->pd,
+ ISCSI_ISER_SG_TABLESIZE + 1);
+ if (IS_ERR(desc->data_mr)) {
+ ret = PTR_ERR(desc->data_mr);
+ iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
+ goto fast_reg_mr_failure;
+ }
+ desc->valid = true;
+ list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
+ ib_conn->fastreg.frwr.pool_size++;
+ }
+
+ return 0;
+
+fast_reg_mr_failure:
+ ib_free_fast_reg_page_list(desc->data_frpl);
+fast_reg_page_failure:
+ kfree(desc);
+err:
+ iser_free_frwr_pool(ib_conn);
+ return ret;
+}
+
+/**
+ * iser_free_frwr_pool - releases the pool of fast_reg descriptors
+ */
+void iser_free_frwr_pool(struct iser_conn *ib_conn)
+{
+ struct fast_reg_descriptor *desc, *tmp;
+ int i = 0;
+
+ if (list_empty(&ib_conn->fastreg.frwr.pool))
+ return;
+
+ iser_info("freeing conn %p frwr pool\n", ib_conn);
+
+ list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.frwr.pool, list) {
+ list_del(&desc->list);
+ ib_free_fast_reg_page_list(desc->data_frpl);
+ ib_dereg_mr(desc->data_mr);
+ kfree(desc);
+ ++i;
+ }
+
+ if (i < ib_conn->fastreg.frwr.pool_size)
+ iser_warn("pool still has %d regions registered\n",
+ ib_conn->fastreg.frwr.pool_size - i);
+}
+
+/**
+ * iser_create_ib_conn_res - Queue-Pair (QP)
+ *
+ * returns 0 on success, -1 on failure
+ */
+static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
+{
+ struct iser_device *device;
+ struct ib_qp_init_attr init_attr;
+ int ret = -ENOMEM;
+ int index, min_index = 0;
+
+ BUG_ON(ib_conn->device == NULL);
+
+ device = ib_conn->device;
memset(&init_attr, 0, sizeof init_attr);
@@ -282,9 +401,9 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
goto out_err;
ib_conn->qp = ib_conn->cma_id->qp;
- iser_info("setting conn %p cma_id %p: fmr_pool %p qp %p\n",
+ iser_info("setting conn %p cma_id %p qp %p\n",
ib_conn, ib_conn->cma_id,
- ib_conn->fmr_pool, ib_conn->cma_id->qp);
+ ib_conn->cma_id->qp);
return ret;
out_err:
@@ -293,7 +412,7 @@ out_err:
}
/**
- * releases the FMR pool and QP objects, returns 0 on success,
+ * releases the QP objects, returns 0 on success,
* -1 on failure
*/
static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
@@ -301,13 +420,11 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
int cq_index;
BUG_ON(ib_conn == NULL);
- iser_info("freeing conn %p cma_id %p fmr pool %p qp %p\n",
+ iser_info("freeing conn %p cma_id %p qp %p\n",
ib_conn, ib_conn->cma_id,
- ib_conn->fmr_pool, ib_conn->qp);
+ ib_conn->qp);
/* qp is created only once both addr & route are resolved */
- if (ib_conn->fmr_pool != NULL)
- ib_destroy_fmr_pool(ib_conn->fmr_pool);
if (ib_conn->qp != NULL) {
cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index;
@@ -316,21 +433,7 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
rdma_destroy_qp(ib_conn->cma_id);
}
- ib_conn->fmr_pool = NULL;
ib_conn->qp = NULL;
- kfree(ib_conn->page_vec);
-
- if (ib_conn->login_buf) {
- if (ib_conn->login_req_dma)
- ib_dma_unmap_single(ib_conn->device->ib_device,
- ib_conn->login_req_dma,
- ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
- if (ib_conn->login_resp_dma)
- ib_dma_unmap_single(ib_conn->device->ib_device,
- ib_conn->login_resp_dma,
- ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
- kfree(ib_conn->login_buf);
- }
return 0;
}
@@ -694,7 +797,7 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,
page_list = page_vec->pages;
io_addr = page_list[0];
- mem = ib_fmr_pool_map_phys(ib_conn->fmr_pool,
+ mem = ib_fmr_pool_map_phys(ib_conn->fastreg.fmr.pool,
page_list,
page_vec->length,
io_addr);
@@ -709,7 +812,7 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,
mem_reg->rkey = mem->fmr->rkey;
mem_reg->len = page_vec->length * SIZE_4K;
mem_reg->va = io_addr;
- mem_reg->is_fmr = 1;
+ mem_reg->is_mr = 1;
mem_reg->mem_h = (void *)mem;
mem_reg->va += page_vec->offset;
@@ -727,12 +830,18 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,
}
/**
- * Unregister (previosuly registered) memory.
+ * Unregister (previosuly registered using FMR) memory.
+ * If memory is non-FMR does nothing.
*/
-void iser_unreg_mem(struct iser_mem_reg *reg)
+void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir)
{
+ struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
int ret;
+ if (!reg->is_mr)
+ return;
+
iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
@@ -742,6 +851,23 @@ void iser_unreg_mem(struct iser_mem_reg *reg)
reg->mem_h = NULL;
}
+void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir)
+{
+ struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
+ struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;
+ struct fast_reg_descriptor *desc = reg->mem_h;
+
+ if (!reg->is_mr)
+ return;
+
+ reg->mem_h = NULL;
+ reg->is_mr = 0;
+ spin_lock_bh(&ib_conn->lock);
+ list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
+ spin_unlock_bh(&ib_conn->lock);
+}
+
int iser_post_recvl(struct iser_conn *ib_conn)
{
struct ib_recv_wr rx_wr, *rx_wr_failed;
@@ -779,7 +905,7 @@ int iser_post_recvm(struct iser_conn *ib_conn, int count)
rx_wr->sg_list = &rx_desc->rx_sg;
rx_wr->num_sge = 1;
rx_wr->next = rx_wr + 1;
- my_rx_head = (my_rx_head + 1) & (ISER_QP_MAX_RECV_DTOS - 1);
+ my_rx_head = (my_rx_head + 1) & ib_conn->qp_max_recv_dtos_mask;
}
rx_wr--;
@@ -863,7 +989,11 @@ static int iser_drain_tx_cq(struct iser_device *device, int cq_index)
if (wc.status == IB_WC_SUCCESS) {
if (wc.opcode == IB_WC_SEND)
iser_snd_completion(tx_desc, ib_conn);
- else
+ else if (wc.opcode == IB_WC_LOCAL_INV ||
+ wc.opcode == IB_WC_FAST_REG_MR) {
+ atomic_dec(&ib_conn->post_send_buf_count);
+ continue;
+ } else
iser_err("expected opcode %d got %d\n",
IB_WC_SEND, wc.opcode);
} else {
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 3f62041222f..3591855cc5b 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -1,7 +1,7 @@
/*******************************************************************************
* This file contains iSCSI extentions for RDMA (iSER) Verbs
*
- * (c) Copyright 2013 RisingTide Systems LLC.
+ * (c) Copyright 2013 Datera, Inc.
*
* Nicholas A. Bellinger <nab@linux-iscsi.org>
*
@@ -39,7 +39,17 @@ static DEFINE_MUTEX(device_list_mutex);
static LIST_HEAD(device_list);
static struct workqueue_struct *isert_rx_wq;
static struct workqueue_struct *isert_comp_wq;
-static struct kmem_cache *isert_cmd_cache;
+
+static void
+isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn);
+static int
+isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
+ struct isert_rdma_wr *wr);
+static void
+isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn);
+static int
+isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
+ struct isert_rdma_wr *wr);
static void
isert_qp_event_callback(struct ib_event *e, void *context)
@@ -80,14 +90,8 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
{
struct isert_device *device = isert_conn->conn_device;
struct ib_qp_init_attr attr;
- struct ib_device_attr devattr;
int ret, index, min_index = 0;
- memset(&devattr, 0, sizeof(struct ib_device_attr));
- ret = isert_query_device(cma_id->device, &devattr);
- if (ret)
- return ret;
-
mutex_lock(&device_list_mutex);
for (index = 0; index < device->cqs_used; index++)
if (device->cq_active_qps[index] <
@@ -108,7 +112,7 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
* FIXME: Use devattr.max_sge - 2 for max_send_sge as
* work-around for RDMA_READ..
*/
- attr.cap.max_send_sge = devattr.max_sge - 2;
+ attr.cap.max_send_sge = device->dev_attr.max_sge - 2;
isert_conn->max_sge = attr.cap.max_send_sge;
attr.cap.max_recv_sge = 1;
@@ -210,14 +214,31 @@ isert_create_device_ib_res(struct isert_device *device)
{
struct ib_device *ib_dev = device->ib_device;
struct isert_cq_desc *cq_desc;
+ struct ib_device_attr *dev_attr;
int ret = 0, i, j;
+ dev_attr = &device->dev_attr;
+ ret = isert_query_device(ib_dev, dev_attr);
+ if (ret)
+ return ret;
+
+ /* asign function handlers */
+ if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+ device->use_frwr = 1;
+ device->reg_rdma_mem = isert_reg_rdma_frwr;
+ device->unreg_rdma_mem = isert_unreg_rdma_frwr;
+ } else {
+ device->use_frwr = 0;
+ device->reg_rdma_mem = isert_map_rdma;
+ device->unreg_rdma_mem = isert_unmap_cmd;
+ }
+
device->cqs_used = min_t(int, num_online_cpus(),
device->ib_device->num_comp_vectors);
device->cqs_used = min(ISERT_MAX_CQ, device->cqs_used);
- pr_debug("Using %d CQs, device %s supports %d vectors\n",
+ pr_debug("Using %d CQs, device %s supports %d vectors support FRWR %d\n",
device->cqs_used, device->ib_device->name,
- device->ib_device->num_comp_vectors);
+ device->ib_device->num_comp_vectors, device->use_frwr);
device->cq_desc = kzalloc(sizeof(struct isert_cq_desc) *
device->cqs_used, GFP_KERNEL);
if (!device->cq_desc) {
@@ -363,6 +384,85 @@ isert_device_find_by_ib_dev(struct rdma_cm_id *cma_id)
return device;
}
+static void
+isert_conn_free_frwr_pool(struct isert_conn *isert_conn)
+{
+ struct fast_reg_descriptor *fr_desc, *tmp;
+ int i = 0;
+
+ if (list_empty(&isert_conn->conn_frwr_pool))
+ return;
+
+ pr_debug("Freeing conn %p frwr pool", isert_conn);
+
+ list_for_each_entry_safe(fr_desc, tmp,
+ &isert_conn->conn_frwr_pool, list) {
+ list_del(&fr_desc->list);
+ ib_free_fast_reg_page_list(fr_desc->data_frpl);
+ ib_dereg_mr(fr_desc->data_mr);
+ kfree(fr_desc);
+ ++i;
+ }
+
+ if (i < isert_conn->conn_frwr_pool_size)
+ pr_warn("Pool still has %d regions registered\n",
+ isert_conn->conn_frwr_pool_size - i);
+}
+
+static int
+isert_conn_create_frwr_pool(struct isert_conn *isert_conn)
+{
+ struct fast_reg_descriptor *fr_desc;
+ struct isert_device *device = isert_conn->conn_device;
+ int i, ret;
+
+ INIT_LIST_HEAD(&isert_conn->conn_frwr_pool);
+ isert_conn->conn_frwr_pool_size = 0;
+ for (i = 0; i < ISCSI_DEF_XMIT_CMDS_MAX; i++) {
+ fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL);
+ if (!fr_desc) {
+ pr_err("Failed to allocate fast_reg descriptor\n");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ fr_desc->data_frpl =
+ ib_alloc_fast_reg_page_list(device->ib_device,
+ ISCSI_ISER_SG_TABLESIZE);
+ if (IS_ERR(fr_desc->data_frpl)) {
+ pr_err("Failed to allocate fr_pg_list err=%ld\n",
+ PTR_ERR(fr_desc->data_frpl));
+ ret = PTR_ERR(fr_desc->data_frpl);
+ goto err;
+ }
+
+ fr_desc->data_mr = ib_alloc_fast_reg_mr(device->dev_pd,
+ ISCSI_ISER_SG_TABLESIZE);
+ if (IS_ERR(fr_desc->data_mr)) {
+ pr_err("Failed to allocate frmr err=%ld\n",
+ PTR_ERR(fr_desc->data_mr));
+ ret = PTR_ERR(fr_desc->data_mr);
+ ib_free_fast_reg_page_list(fr_desc->data_frpl);
+ goto err;
+ }
+ pr_debug("Create fr_desc %p page_list %p\n",
+ fr_desc, fr_desc->data_frpl->page_list);
+
+ fr_desc->valid = true;
+ list_add_tail(&fr_desc->list, &isert_conn->conn_frwr_pool);
+ isert_conn->conn_frwr_pool_size++;
+ }
+
+ pr_debug("Creating conn %p frwr pool size=%d",
+ isert_conn, isert_conn->conn_frwr_pool_size);
+
+ return 0;
+
+err:
+ isert_conn_free_frwr_pool(isert_conn);
+ return ret;
+}
+
static int
isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
{
@@ -389,6 +489,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
kref_init(&isert_conn->conn_kref);
kref_get(&isert_conn->conn_kref);
mutex_init(&isert_conn->conn_mutex);
+ spin_lock_init(&isert_conn->conn_lock);
cma_id->context = isert_conn;
isert_conn->conn_cm_id = cma_id;
@@ -446,6 +547,14 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
isert_conn->conn_pd = device->dev_pd;
isert_conn->conn_mr = device->dev_mr;
+ if (device->use_frwr) {
+ ret = isert_conn_create_frwr_pool(isert_conn);
+ if (ret) {
+ pr_err("Conn: %p failed to create frwr_pool\n", isert_conn);
+ goto out_frwr;
+ }
+ }
+
ret = isert_conn_setup_qp(isert_conn, cma_id);
if (ret)
goto out_conn_dev;
@@ -459,6 +568,9 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
return 0;
out_conn_dev:
+ if (device->use_frwr)
+ isert_conn_free_frwr_pool(isert_conn);
+out_frwr:
isert_device_try_release(device);
out_rsp_dma_map:
ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma,
@@ -482,6 +594,9 @@ isert_connect_release(struct isert_conn *isert_conn)
pr_debug("Entering isert_connect_release(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n");
+ if (device->use_frwr)
+ isert_conn_free_frwr_pool(isert_conn);
+
if (isert_conn->conn_qp) {
cq_index = ((struct isert_cq_desc *)
isert_conn->conn_qp->recv_cq->cq_context)->cq_index;
@@ -869,46 +984,37 @@ isert_rx_login_req(struct iser_rx_desc *rx_desc, int rx_buflen,
size, rx_buflen, MAX_KEY_VALUE_PAIRS);
memcpy(login->req_buf, &rx_desc->data[0], size);
- complete(&isert_conn->conn_login_comp);
-}
-
-static void
-isert_release_cmd(struct iscsi_cmd *cmd)
-{
- struct isert_cmd *isert_cmd = container_of(cmd, struct isert_cmd,
- iscsi_cmd);
-
- pr_debug("Entering isert_release_cmd %p >>>>>>>>>>>>>>>.\n", isert_cmd);
-
- kfree(cmd->buf_ptr);
- kfree(cmd->tmr_req);
-
- kmem_cache_free(isert_cmd_cache, isert_cmd);
+ if (login->first_request) {
+ complete(&isert_conn->conn_login_comp);
+ return;
+ }
+ schedule_delayed_work(&conn->login_work, 0);
}
static struct iscsi_cmd
-*isert_alloc_cmd(struct iscsi_conn *conn, gfp_t gfp)
+*isert_allocate_cmd(struct iscsi_conn *conn, gfp_t gfp)
{
struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
struct isert_cmd *isert_cmd;
+ struct iscsi_cmd *cmd;
- isert_cmd = kmem_cache_zalloc(isert_cmd_cache, gfp);
- if (!isert_cmd) {
- pr_err("Unable to allocate isert_cmd\n");
+ cmd = iscsit_allocate_cmd(conn, gfp);
+ if (!cmd) {
+ pr_err("Unable to allocate iscsi_cmd + isert_cmd\n");
return NULL;
}
+ isert_cmd = iscsit_priv_cmd(cmd);
isert_cmd->conn = isert_conn;
- isert_cmd->iscsi_cmd.release_cmd = &isert_release_cmd;
+ isert_cmd->iscsi_cmd = cmd;
- return &isert_cmd->iscsi_cmd;
+ return cmd;
}
static int
isert_handle_scsi_cmd(struct isert_conn *isert_conn,
- struct isert_cmd *isert_cmd, struct iser_rx_desc *rx_desc,
- unsigned char *buf)
+ struct isert_cmd *isert_cmd, struct iscsi_cmd *cmd,
+ struct iser_rx_desc *rx_desc, unsigned char *buf)
{
- struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd;
struct iscsi_conn *conn = isert_conn->conn;
struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)buf;
struct scatterlist *sg;
@@ -1015,9 +1121,9 @@ isert_handle_iscsi_dataout(struct isert_conn *isert_conn,
static int
isert_handle_nop_out(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
- struct iser_rx_desc *rx_desc, unsigned char *buf)
+ struct iscsi_cmd *cmd, struct iser_rx_desc *rx_desc,
+ unsigned char *buf)
{
- struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd;
struct iscsi_conn *conn = isert_conn->conn;
struct iscsi_nopout *hdr = (struct iscsi_nopout *)buf;
int rc;
@@ -1034,9 +1140,9 @@ isert_handle_nop_out(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
static int
isert_handle_text_cmd(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
- struct iser_rx_desc *rx_desc, struct iscsi_text *hdr)
+ struct iscsi_cmd *cmd, struct iser_rx_desc *rx_desc,
+ struct iscsi_text *hdr)
{
- struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd;
struct iscsi_conn *conn = isert_conn->conn;
u32 payload_length = ntoh24(hdr->dlength);
int rc;
@@ -1081,26 +1187,26 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
switch (opcode) {
case ISCSI_OP_SCSI_CMD:
- cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+ cmd = isert_allocate_cmd(conn, GFP_KERNEL);
if (!cmd)
break;
- isert_cmd = container_of(cmd, struct isert_cmd, iscsi_cmd);
+ isert_cmd = iscsit_priv_cmd(cmd);
isert_cmd->read_stag = read_stag;
isert_cmd->read_va = read_va;
isert_cmd->write_stag = write_stag;
isert_cmd->write_va = write_va;
- ret = isert_handle_scsi_cmd(isert_conn, isert_cmd,
+ ret = isert_handle_scsi_cmd(isert_conn, isert_cmd, cmd,
rx_desc, (unsigned char *)hdr);
break;
case ISCSI_OP_NOOP_OUT:
- cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+ cmd = isert_allocate_cmd(conn, GFP_KERNEL);
if (!cmd)
break;
- isert_cmd = container_of(cmd, struct isert_cmd, iscsi_cmd);
- ret = isert_handle_nop_out(isert_conn, isert_cmd,
+ isert_cmd = iscsit_priv_cmd(cmd);
+ ret = isert_handle_nop_out(isert_conn, isert_cmd, cmd,
rx_desc, (unsigned char *)hdr);
break;
case ISCSI_OP_SCSI_DATA_OUT:
@@ -1108,7 +1214,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
(unsigned char *)hdr);
break;
case ISCSI_OP_SCSI_TMFUNC:
- cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+ cmd = isert_allocate_cmd(conn, GFP_KERNEL);
if (!cmd)
break;
@@ -1116,7 +1222,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
(unsigned char *)hdr);
break;
case ISCSI_OP_LOGOUT:
- cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+ cmd = isert_allocate_cmd(conn, GFP_KERNEL);
if (!cmd)
break;
@@ -1127,12 +1233,12 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
HZ);
break;
case ISCSI_OP_TEXT:
- cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+ cmd = isert_allocate_cmd(conn, GFP_KERNEL);
if (!cmd)
break;
- isert_cmd = container_of(cmd, struct isert_cmd, iscsi_cmd);
- ret = isert_handle_text_cmd(isert_conn, isert_cmd,
+ isert_cmd = iscsit_priv_cmd(cmd);
+ ret = isert_handle_text_cmd(isert_conn, isert_cmd, cmd,
rx_desc, (struct iscsi_text *)hdr);
break;
default:
@@ -1243,26 +1349,65 @@ isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
- pr_debug("isert_unmap_cmd >>>>>>>>>>>>>>>>>>>>>>>\n");
+ pr_debug("isert_unmap_cmd: %p\n", isert_cmd);
+ if (wr->sge) {
+ pr_debug("isert_unmap_cmd: %p unmap_sg op\n", isert_cmd);
+ ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge,
+ (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ?
+ DMA_TO_DEVICE : DMA_FROM_DEVICE);
+ wr->sge = NULL;
+ }
+
+ if (wr->send_wr) {
+ pr_debug("isert_unmap_cmd: %p free send_wr\n", isert_cmd);
+ kfree(wr->send_wr);
+ wr->send_wr = NULL;
+ }
+
+ if (wr->ib_sge) {
+ pr_debug("isert_unmap_cmd: %p free ib_sge\n", isert_cmd);
+ kfree(wr->ib_sge);
+ wr->ib_sge = NULL;
+ }
+}
+
+static void
+isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
+{
+ struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
+ struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ LIST_HEAD(unmap_list);
+
+ pr_debug("unreg_frwr_cmd: %p\n", isert_cmd);
+
+ if (wr->fr_desc) {
+ pr_debug("unreg_frwr_cmd: %p free fr_desc %p\n",
+ isert_cmd, wr->fr_desc);
+ spin_lock_bh(&isert_conn->conn_lock);
+ list_add_tail(&wr->fr_desc->list, &isert_conn->conn_frwr_pool);
+ spin_unlock_bh(&isert_conn->conn_lock);
+ wr->fr_desc = NULL;
+ }
if (wr->sge) {
- ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge, DMA_TO_DEVICE);
+ pr_debug("unreg_frwr_cmd: %p unmap_sg op\n", isert_cmd);
+ ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge,
+ (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ?
+ DMA_TO_DEVICE : DMA_FROM_DEVICE);
wr->sge = NULL;
}
- kfree(wr->send_wr);
+ wr->ib_sge = NULL;
wr->send_wr = NULL;
-
- kfree(isert_cmd->ib_sge);
- isert_cmd->ib_sge = NULL;
}
static void
isert_put_cmd(struct isert_cmd *isert_cmd)
{
- struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd;
+ struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
struct isert_conn *isert_conn = isert_cmd->conn;
struct iscsi_conn *conn = isert_conn->conn;
+ struct isert_device *device = isert_conn->conn_device;
pr_debug("Entering isert_put_cmd: %p\n", isert_cmd);
@@ -1276,7 +1421,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd)
if (cmd->data_direction == DMA_TO_DEVICE)
iscsit_stop_dataout_timer(cmd);
- isert_unmap_cmd(isert_cmd, isert_conn);
+ device->unreg_rdma_mem(isert_cmd, isert_conn);
transport_generic_free_cmd(&cmd->se_cmd, 0);
break;
case ISCSI_OP_SCSI_TMFUNC:
@@ -1311,7 +1456,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd)
* Fall-through
*/
default:
- isert_release_cmd(cmd);
+ iscsit_release_cmd(cmd);
break;
}
}
@@ -1347,27 +1492,16 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc,
struct isert_cmd *isert_cmd)
{
struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
- struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd;
+ struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
struct se_cmd *se_cmd = &cmd->se_cmd;
- struct ib_device *ib_dev = isert_cmd->conn->conn_cm_id->device;
+ struct isert_conn *isert_conn = isert_cmd->conn;
+ struct isert_device *device = isert_conn->conn_device;
iscsit_stop_dataout_timer(cmd);
+ device->unreg_rdma_mem(isert_cmd, isert_conn);
+ cmd->write_data_done = wr->cur_rdma_length;
- if (wr->sge) {
- pr_debug("isert_do_rdma_read_comp: Unmapping wr->sge from t_data_sg\n");
- ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge, DMA_TO_DEVICE);
- wr->sge = NULL;
- }
-
- if (isert_cmd->ib_sge) {
- pr_debug("isert_do_rdma_read_comp: Freeing isert_cmd->ib_sge\n");
- kfree(isert_cmd->ib_sge);
- isert_cmd->ib_sge = NULL;
- }
-
- cmd->write_data_done = se_cmd->data_length;
-
- pr_debug("isert_do_rdma_read_comp, calling target_execute_cmd\n");
+ pr_debug("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd);
spin_lock_bh(&cmd->istate_lock);
cmd->cmd_flags |= ICF_GOT_LAST_DATAOUT;
cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT;
@@ -1383,7 +1517,7 @@ isert_do_control_comp(struct work_struct *work)
struct isert_cmd, comp_work);
struct isert_conn *isert_conn = isert_cmd->conn;
struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
- struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd;
+ struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
switch (cmd->i_state) {
case ISTATE_SEND_TASKMGTRSP:
@@ -1429,7 +1563,7 @@ isert_response_completion(struct iser_tx_desc *tx_desc,
struct isert_conn *isert_conn,
struct ib_device *ib_dev)
{
- struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd;
+ struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
if (cmd->i_state == ISTATE_SEND_TASKMGTRSP ||
cmd->i_state == ISTATE_SEND_LOGOUTRSP ||
@@ -1621,8 +1755,7 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd)
static int
isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
{
- struct isert_cmd *isert_cmd = container_of(cmd,
- struct isert_cmd, iscsi_cmd);
+ struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
struct iscsi_scsi_rsp *hdr = (struct iscsi_scsi_rsp *)
@@ -1671,8 +1804,7 @@ static int
isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn,
bool nopout_response)
{
- struct isert_cmd *isert_cmd = container_of(cmd,
- struct isert_cmd, iscsi_cmd);
+ struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
@@ -1691,8 +1823,7 @@ isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn,
static int
isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
{
- struct isert_cmd *isert_cmd = container_of(cmd,
- struct isert_cmd, iscsi_cmd);
+ struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
@@ -1710,8 +1841,7 @@ isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
static int
isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
{
- struct isert_cmd *isert_cmd = container_of(cmd,
- struct isert_cmd, iscsi_cmd);
+ struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
@@ -1729,8 +1859,7 @@ isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
static int
isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
{
- struct isert_cmd *isert_cmd = container_of(cmd,
- struct isert_cmd, iscsi_cmd);
+ struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
@@ -1762,8 +1891,7 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
static int
isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
{
- struct isert_cmd *isert_cmd = container_of(cmd,
- struct isert_cmd, iscsi_cmd);
+ struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
struct iscsi_text_rsp *hdr =
@@ -1805,7 +1933,7 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
struct ib_sge *ib_sge, struct ib_send_wr *send_wr,
u32 data_left, u32 offset)
{
- struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd;
+ struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
struct scatterlist *sg_start, *tmp_sg;
struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
u32 sg_off, page_off;
@@ -1832,8 +1960,8 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
ib_sg_dma_len(ib_dev, tmp_sg) - page_off);
ib_sge->lkey = isert_conn->conn_mr->lkey;
- pr_debug("RDMA ib_sge: addr: 0x%16llx length: %u\n",
- ib_sge->addr, ib_sge->length);
+ pr_debug("RDMA ib_sge: addr: 0x%16llx length: %u lkey: %08x\n",
+ ib_sge->addr, ib_sge->length, ib_sge->lkey);
page_off = 0;
data_left -= ib_sge->length;
ib_sge++;
@@ -1847,200 +1975,373 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
}
static int
-isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
+isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
+ struct isert_rdma_wr *wr)
{
struct se_cmd *se_cmd = &cmd->se_cmd;
- struct isert_cmd *isert_cmd = container_of(cmd,
- struct isert_cmd, iscsi_cmd);
- struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
+ struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
- struct ib_send_wr *wr_failed, *send_wr;
struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct ib_send_wr *send_wr;
struct ib_sge *ib_sge;
- struct scatterlist *sg;
- u32 offset = 0, data_len, data_left, rdma_write_max;
- int rc, ret = 0, count, sg_nents, i, ib_sge_cnt;
-
- pr_debug("RDMA_WRITE: data_length: %u\n", se_cmd->data_length);
+ struct scatterlist *sg_start;
+ u32 sg_off = 0, sg_nents;
+ u32 offset = 0, data_len, data_left, rdma_write_max, va_offset = 0;
+ int ret = 0, count, i, ib_sge_cnt;
+
+ if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
+ data_left = se_cmd->data_length;
+ iscsit_increment_maxcmdsn(cmd, conn->sess);
+ cmd->stat_sn = conn->stat_sn++;
+ } else {
+ sg_off = cmd->write_data_done / PAGE_SIZE;
+ data_left = se_cmd->data_length - cmd->write_data_done;
+ offset = cmd->write_data_done;
+ isert_cmd->tx_desc.isert_cmd = isert_cmd;
+ }
- sg = &se_cmd->t_data_sg[0];
- sg_nents = se_cmd->t_data_nents;
+ sg_start = &cmd->se_cmd.t_data_sg[sg_off];
+ sg_nents = se_cmd->t_data_nents - sg_off;
- count = ib_dma_map_sg(ib_dev, sg, sg_nents, DMA_TO_DEVICE);
+ count = ib_dma_map_sg(ib_dev, sg_start, sg_nents,
+ (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ?
+ DMA_TO_DEVICE : DMA_FROM_DEVICE);
if (unlikely(!count)) {
- pr_err("Unable to map put_datain SGs\n");
+ pr_err("Cmd: %p unrable to map SGs\n", isert_cmd);
return -EINVAL;
}
- wr->sge = sg;
+ wr->sge = sg_start;
wr->num_sge = sg_nents;
- pr_debug("Mapped IB count: %u sg: %p sg_nents: %u for RDMA_WRITE\n",
- count, sg, sg_nents);
+ wr->cur_rdma_length = data_left;
+ pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n",
+ isert_cmd, count, sg_start, sg_nents, data_left);
ib_sge = kzalloc(sizeof(struct ib_sge) * sg_nents, GFP_KERNEL);
if (!ib_sge) {
- pr_warn("Unable to allocate datain ib_sge\n");
+ pr_warn("Unable to allocate ib_sge\n");
ret = -ENOMEM;
goto unmap_sg;
}
- isert_cmd->ib_sge = ib_sge;
-
- pr_debug("Allocated ib_sge: %p from t_data_ents: %d for RDMA_WRITE\n",
- ib_sge, se_cmd->t_data_nents);
+ wr->ib_sge = ib_sge;
wr->send_wr_num = DIV_ROUND_UP(sg_nents, isert_conn->max_sge);
wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num,
GFP_KERNEL);
if (!wr->send_wr) {
- pr_err("Unable to allocate wr->send_wr\n");
+ pr_debug("Unable to allocate wr->send_wr\n");
ret = -ENOMEM;
goto unmap_sg;
}
- pr_debug("Allocated wr->send_wr: %p wr->send_wr_num: %u\n",
- wr->send_wr, wr->send_wr_num);
-
- iscsit_increment_maxcmdsn(cmd, conn->sess);
- cmd->stat_sn = conn->stat_sn++;
wr->isert_cmd = isert_cmd;
rdma_write_max = isert_conn->max_sge * PAGE_SIZE;
- data_left = se_cmd->data_length;
for (i = 0; i < wr->send_wr_num; i++) {
send_wr = &isert_cmd->rdma_wr.send_wr[i];
data_len = min(data_left, rdma_write_max);
- send_wr->opcode = IB_WR_RDMA_WRITE;
send_wr->send_flags = 0;
- send_wr->wr.rdma.remote_addr = isert_cmd->read_va + offset;
- send_wr->wr.rdma.rkey = isert_cmd->read_stag;
+ if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
+ send_wr->opcode = IB_WR_RDMA_WRITE;
+ send_wr->wr.rdma.remote_addr = isert_cmd->read_va + offset;
+ send_wr->wr.rdma.rkey = isert_cmd->read_stag;
+ if (i + 1 == wr->send_wr_num)
+ send_wr->next = &isert_cmd->tx_desc.send_wr;
+ else
+ send_wr->next = &wr->send_wr[i + 1];
+ } else {
+ send_wr->opcode = IB_WR_RDMA_READ;
+ send_wr->wr.rdma.remote_addr = isert_cmd->write_va + va_offset;
+ send_wr->wr.rdma.rkey = isert_cmd->write_stag;
+ if (i + 1 == wr->send_wr_num)
+ send_wr->send_flags = IB_SEND_SIGNALED;
+ else
+ send_wr->next = &wr->send_wr[i + 1];
+ }
ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge,
send_wr, data_len, offset);
ib_sge += ib_sge_cnt;
- if (i + 1 == wr->send_wr_num)
- send_wr->next = &isert_cmd->tx_desc.send_wr;
- else
- send_wr->next = &wr->send_wr[i + 1];
-
offset += data_len;
+ va_offset += data_len;
data_left -= data_len;
}
- /*
- * Build isert_conn->tx_desc for iSCSI response PDU and attach
- */
- isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc);
- iscsit_build_rsp_pdu(cmd, conn, false, (struct iscsi_scsi_rsp *)
- &isert_cmd->tx_desc.iscsi_header);
- isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
- isert_init_send_wr(isert_cmd, &isert_cmd->tx_desc.send_wr);
- atomic_inc(&isert_conn->post_send_buf_count);
+ return 0;
+unmap_sg:
+ ib_dma_unmap_sg(ib_dev, sg_start, sg_nents,
+ (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ?
+ DMA_TO_DEVICE : DMA_FROM_DEVICE);
+ return ret;
+}
- rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed);
- if (rc) {
- pr_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n");
- atomic_dec(&isert_conn->post_send_buf_count);
+static int
+isert_map_fr_pagelist(struct ib_device *ib_dev,
+ struct scatterlist *sg_start, int sg_nents, u64 *fr_pl)
+{
+ u64 start_addr, end_addr, page, chunk_start = 0;
+ struct scatterlist *tmp_sg;
+ int i = 0, new_chunk, last_ent, n_pages;
+
+ n_pages = 0;
+ new_chunk = 1;
+ last_ent = sg_nents - 1;
+ for_each_sg(sg_start, tmp_sg, sg_nents, i) {
+ start_addr = ib_sg_dma_address(ib_dev, tmp_sg);
+ if (new_chunk)
+ chunk_start = start_addr;
+ end_addr = start_addr + ib_sg_dma_len(ib_dev, tmp_sg);
+
+ pr_debug("SGL[%d] dma_addr: 0x%16llx len: %u\n",
+ i, (unsigned long long)tmp_sg->dma_address,
+ tmp_sg->length);
+
+ if ((end_addr & ~PAGE_MASK) && i < last_ent) {
+ new_chunk = 0;
+ continue;
+ }
+ new_chunk = 1;
+
+ page = chunk_start & PAGE_MASK;
+ do {
+ fr_pl[n_pages++] = page;
+ pr_debug("Mapped page_list[%d] page_addr: 0x%16llx\n",
+ n_pages - 1, page);
+ page += PAGE_SIZE;
+ } while (page < end_addr);
}
- pr_debug("Posted RDMA_WRITE + Response for iSER Data READ\n");
- return 1;
-unmap_sg:
- ib_dma_unmap_sg(ib_dev, sg, sg_nents, DMA_TO_DEVICE);
+ return n_pages;
+}
+
+static int
+isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc,
+ struct isert_cmd *isert_cmd, struct isert_conn *isert_conn,
+ struct ib_sge *ib_sge, u32 offset, unsigned int data_len)
+{
+ struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
+ struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct scatterlist *sg_start;
+ u32 sg_off, page_off;
+ struct ib_send_wr fr_wr, inv_wr;
+ struct ib_send_wr *bad_wr, *wr = NULL;
+ u8 key;
+ int ret, sg_nents, pagelist_len;
+
+ sg_off = offset / PAGE_SIZE;
+ sg_start = &cmd->se_cmd.t_data_sg[sg_off];
+ sg_nents = min_t(unsigned int, cmd->se_cmd.t_data_nents - sg_off,
+ ISCSI_ISER_SG_TABLESIZE);
+ page_off = offset % PAGE_SIZE;
+
+ pr_debug("Cmd: %p use fr_desc %p sg_nents %d sg_off %d offset %u\n",
+ isert_cmd, fr_desc, sg_nents, sg_off, offset);
+
+ pagelist_len = isert_map_fr_pagelist(ib_dev, sg_start, sg_nents,
+ &fr_desc->data_frpl->page_list[0]);
+
+ if (!fr_desc->valid) {
+ memset(&inv_wr, 0, sizeof(inv_wr));
+ inv_wr.opcode = IB_WR_LOCAL_INV;
+ inv_wr.ex.invalidate_rkey = fr_desc->data_mr->rkey;
+ wr = &inv_wr;
+ /* Bump the key */
+ key = (u8)(fr_desc->data_mr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(fr_desc->data_mr, ++key);
+ }
+
+ /* Prepare FASTREG WR */
+ memset(&fr_wr, 0, sizeof(fr_wr));
+ fr_wr.opcode = IB_WR_FAST_REG_MR;
+ fr_wr.wr.fast_reg.iova_start =
+ fr_desc->data_frpl->page_list[0] + page_off;
+ fr_wr.wr.fast_reg.page_list = fr_desc->data_frpl;
+ fr_wr.wr.fast_reg.page_list_len = pagelist_len;
+ fr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+ fr_wr.wr.fast_reg.length = data_len;
+ fr_wr.wr.fast_reg.rkey = fr_desc->data_mr->rkey;
+ fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE;
+
+ if (!wr)
+ wr = &fr_wr;
+ else
+ wr->next = &fr_wr;
+
+ ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr);
+ if (ret) {
+ pr_err("fast registration failed, ret:%d\n", ret);
+ return ret;
+ }
+ fr_desc->valid = false;
+
+ ib_sge->lkey = fr_desc->data_mr->lkey;
+ ib_sge->addr = fr_desc->data_frpl->page_list[0] + page_off;
+ ib_sge->length = data_len;
+
+ pr_debug("RDMA ib_sge: addr: 0x%16llx length: %u lkey: %08x\n",
+ ib_sge->addr, ib_sge->length, ib_sge->lkey);
+
return ret;
}
static int
-isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
+isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
+ struct isert_rdma_wr *wr)
{
struct se_cmd *se_cmd = &cmd->se_cmd;
- struct isert_cmd *isert_cmd = container_of(cmd,
- struct isert_cmd, iscsi_cmd);
- struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
+ struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
- struct ib_send_wr *wr_failed, *send_wr;
- struct ib_sge *ib_sge;
struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct ib_send_wr *send_wr;
+ struct ib_sge *ib_sge;
struct scatterlist *sg_start;
- u32 sg_off, sg_nents, page_off, va_offset = 0;
+ struct fast_reg_descriptor *fr_desc;
+ u32 sg_off = 0, sg_nents;
u32 offset = 0, data_len, data_left, rdma_write_max;
- int rc, ret = 0, count, i, ib_sge_cnt;
+ int ret = 0, count;
+ unsigned long flags;
- pr_debug("RDMA_READ: data_length: %u write_data_done: %u\n",
- se_cmd->data_length, cmd->write_data_done);
+ if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
+ data_left = se_cmd->data_length;
+ iscsit_increment_maxcmdsn(cmd, conn->sess);
+ cmd->stat_sn = conn->stat_sn++;
+ } else {
+ sg_off = cmd->write_data_done / PAGE_SIZE;
+ data_left = se_cmd->data_length - cmd->write_data_done;
+ offset = cmd->write_data_done;
+ isert_cmd->tx_desc.isert_cmd = isert_cmd;
+ }
- sg_off = cmd->write_data_done / PAGE_SIZE;
sg_start = &cmd->se_cmd.t_data_sg[sg_off];
- page_off = cmd->write_data_done % PAGE_SIZE;
-
- pr_debug("RDMA_READ: sg_off: %d, sg_start: %p page_off: %d\n",
- sg_off, sg_start, page_off);
-
- data_left = se_cmd->data_length - cmd->write_data_done;
sg_nents = se_cmd->t_data_nents - sg_off;
- pr_debug("RDMA_READ: data_left: %d, sg_nents: %d\n",
- data_left, sg_nents);
-
- count = ib_dma_map_sg(ib_dev, sg_start, sg_nents, DMA_FROM_DEVICE);
+ count = ib_dma_map_sg(ib_dev, sg_start, sg_nents,
+ (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ?
+ DMA_TO_DEVICE : DMA_FROM_DEVICE);
if (unlikely(!count)) {
- pr_err("Unable to map get_dataout SGs\n");
+ pr_err("Cmd: %p unrable to map SGs\n", isert_cmd);
return -EINVAL;
}
wr->sge = sg_start;
wr->num_sge = sg_nents;
- pr_debug("Mapped IB count: %u sg_start: %p sg_nents: %u for RDMA_READ\n",
- count, sg_start, sg_nents);
+ pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n",
+ isert_cmd, count, sg_start, sg_nents, data_left);
- ib_sge = kzalloc(sizeof(struct ib_sge) * sg_nents, GFP_KERNEL);
- if (!ib_sge) {
- pr_warn("Unable to allocate dataout ib_sge\n");
- ret = -ENOMEM;
- goto unmap_sg;
+ memset(&wr->s_ib_sge, 0, sizeof(*ib_sge));
+ ib_sge = &wr->s_ib_sge;
+ wr->ib_sge = ib_sge;
+
+ wr->send_wr_num = 1;
+ memset(&wr->s_send_wr, 0, sizeof(*send_wr));
+ wr->send_wr = &wr->s_send_wr;
+
+ wr->isert_cmd = isert_cmd;
+ rdma_write_max = ISCSI_ISER_SG_TABLESIZE * PAGE_SIZE;
+
+ send_wr = &isert_cmd->rdma_wr.s_send_wr;
+ send_wr->sg_list = ib_sge;
+ send_wr->num_sge = 1;
+ send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc;
+ if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
+ send_wr->opcode = IB_WR_RDMA_WRITE;
+ send_wr->wr.rdma.remote_addr = isert_cmd->read_va;
+ send_wr->wr.rdma.rkey = isert_cmd->read_stag;
+ send_wr->send_flags = 0;
+ send_wr->next = &isert_cmd->tx_desc.send_wr;
+ } else {
+ send_wr->opcode = IB_WR_RDMA_READ;
+ send_wr->wr.rdma.remote_addr = isert_cmd->write_va;
+ send_wr->wr.rdma.rkey = isert_cmd->write_stag;
+ send_wr->send_flags = IB_SEND_SIGNALED;
}
- isert_cmd->ib_sge = ib_sge;
- pr_debug("Using ib_sge: %p from sg_ents: %d for RDMA_READ\n",
- ib_sge, sg_nents);
+ data_len = min(data_left, rdma_write_max);
+ wr->cur_rdma_length = data_len;
- wr->send_wr_num = DIV_ROUND_UP(sg_nents, isert_conn->max_sge);
- wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num,
- GFP_KERNEL);
- if (!wr->send_wr) {
- pr_debug("Unable to allocate wr->send_wr\n");
- ret = -ENOMEM;
+ spin_lock_irqsave(&isert_conn->conn_lock, flags);
+ fr_desc = list_first_entry(&isert_conn->conn_frwr_pool,
+ struct fast_reg_descriptor, list);
+ list_del(&fr_desc->list);
+ spin_unlock_irqrestore(&isert_conn->conn_lock, flags);
+ wr->fr_desc = fr_desc;
+
+ ret = isert_fast_reg_mr(fr_desc, isert_cmd, isert_conn,
+ ib_sge, offset, data_len);
+ if (ret) {
+ list_add_tail(&fr_desc->list, &isert_conn->conn_frwr_pool);
goto unmap_sg;
}
- pr_debug("Allocated wr->send_wr: %p wr->send_wr_num: %u\n",
- wr->send_wr, wr->send_wr_num);
- isert_cmd->tx_desc.isert_cmd = isert_cmd;
+ return 0;
- wr->iser_ib_op = ISER_IB_RDMA_READ;
- wr->isert_cmd = isert_cmd;
- rdma_write_max = isert_conn->max_sge * PAGE_SIZE;
- offset = cmd->write_data_done;
+unmap_sg:
+ ib_dma_unmap_sg(ib_dev, sg_start, sg_nents,
+ (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ?
+ DMA_TO_DEVICE : DMA_FROM_DEVICE);
+ return ret;
+}
- for (i = 0; i < wr->send_wr_num; i++) {
- send_wr = &isert_cmd->rdma_wr.send_wr[i];
- data_len = min(data_left, rdma_write_max);
+static int
+isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
+{
+ struct se_cmd *se_cmd = &cmd->se_cmd;
+ struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
+ struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
+ struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+ struct isert_device *device = isert_conn->conn_device;
+ struct ib_send_wr *wr_failed;
+ int rc;
- send_wr->opcode = IB_WR_RDMA_READ;
- send_wr->wr.rdma.remote_addr = isert_cmd->write_va + va_offset;
- send_wr->wr.rdma.rkey = isert_cmd->write_stag;
+ pr_debug("Cmd: %p RDMA_WRITE data_length: %u\n",
+ isert_cmd, se_cmd->data_length);
+ wr->iser_ib_op = ISER_IB_RDMA_WRITE;
+ rc = device->reg_rdma_mem(conn, cmd, wr);
+ if (rc) {
+ pr_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
+ return rc;
+ }
- ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge,
- send_wr, data_len, offset);
- ib_sge += ib_sge_cnt;
+ /*
+ * Build isert_conn->tx_desc for iSCSI response PDU and attach
+ */
+ isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc);
+ iscsit_build_rsp_pdu(cmd, conn, false, (struct iscsi_scsi_rsp *)
+ &isert_cmd->tx_desc.iscsi_header);
+ isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
+ isert_init_send_wr(isert_cmd, &isert_cmd->tx_desc.send_wr);
- if (i + 1 == wr->send_wr_num)
- send_wr->send_flags = IB_SEND_SIGNALED;
- else
- send_wr->next = &wr->send_wr[i + 1];
+ atomic_inc(&isert_conn->post_send_buf_count);
- offset += data_len;
- va_offset += data_len;
- data_left -= data_len;
+ rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed);
+ if (rc) {
+ pr_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n");
+ atomic_dec(&isert_conn->post_send_buf_count);
+ }
+ pr_debug("Cmd: %p posted RDMA_WRITE + Response for iSER Data READ\n",
+ isert_cmd);
+
+ return 1;
+}
+
+static int
+isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
+{
+ struct se_cmd *se_cmd = &cmd->se_cmd;
+ struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
+ struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
+ struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+ struct isert_device *device = isert_conn->conn_device;
+ struct ib_send_wr *wr_failed;
+ int rc;
+
+ pr_debug("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n",
+ isert_cmd, se_cmd->data_length, cmd->write_data_done);
+ wr->iser_ib_op = ISER_IB_RDMA_READ;
+ rc = device->reg_rdma_mem(conn, cmd, wr);
+ if (rc) {
+ pr_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
+ return rc;
}
atomic_inc(&isert_conn->post_send_buf_count);
@@ -2050,12 +2351,10 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
pr_warn("ib_post_send() failed for IB_WR_RDMA_READ\n");
atomic_dec(&isert_conn->post_send_buf_count);
}
- pr_debug("Posted RDMA_READ memory for ISER Data WRITE\n");
- return 0;
+ pr_debug("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n",
+ isert_cmd);
-unmap_sg:
- ib_dma_unmap_sg(ib_dev, sg_start, sg_nents, DMA_FROM_DEVICE);
- return ret;
+ return 0;
}
static int
@@ -2224,6 +2523,14 @@ isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login)
int ret;
pr_debug("isert_get_login_rx before conn_login_comp conn: %p\n", conn);
+ /*
+ * For login requests after the first PDU, isert_rx_login_req() will
+ * kick schedule_delayed_work(&conn->login_work) as the packet is
+ * received, which turns this callback from iscsi_target_do_login_rx()
+ * into a NOP.
+ */
+ if (!login->first_request)
+ return 0;
ret = wait_for_completion_interruptible(&isert_conn->conn_login_comp);
if (ret)
@@ -2393,12 +2700,12 @@ static void isert_free_conn(struct iscsi_conn *conn)
static struct iscsit_transport iser_target_transport = {
.name = "IB/iSER",
.transport_type = ISCSI_INFINIBAND,
+ .priv_size = sizeof(struct isert_cmd),
.owner = THIS_MODULE,
.iscsit_setup_np = isert_setup_np,
.iscsit_accept_np = isert_accept_np,
.iscsit_free_np = isert_free_np,
.iscsit_free_conn = isert_free_conn,
- .iscsit_alloc_cmd = isert_alloc_cmd,
.iscsit_get_login_rx = isert_get_login_rx,
.iscsit_put_login_tx = isert_put_login_tx,
.iscsit_immediate_queue = isert_immediate_queue,
@@ -2425,21 +2732,10 @@ static int __init isert_init(void)
goto destroy_rx_wq;
}
- isert_cmd_cache = kmem_cache_create("isert_cmd_cache",
- sizeof(struct isert_cmd), __alignof__(struct isert_cmd),
- 0, NULL);
- if (!isert_cmd_cache) {
- pr_err("Unable to create isert_cmd_cache\n");
- ret = -ENOMEM;
- goto destroy_tx_cq;
- }
-
iscsit_register_transport(&iser_target_transport);
pr_debug("iSER_TARGET[0] - Loaded iser_target_transport\n");
return 0;
-destroy_tx_cq:
- destroy_workqueue(isert_comp_wq);
destroy_rx_wq:
destroy_workqueue(isert_rx_wq);
return ret;
@@ -2447,7 +2743,6 @@ destroy_rx_wq:
static void __exit isert_exit(void)
{
- kmem_cache_destroy(isert_cmd_cache);
destroy_workqueue(isert_comp_wq);
destroy_workqueue(isert_rx_wq);
iscsit_unregister_transport(&iser_target_transport);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index 191117b5b50..631f2090f0b 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -5,6 +5,7 @@
#include <rdma/rdma_cm.h>
#define ISERT_RDMA_LISTEN_BACKLOG 10
+#define ISCSI_ISER_SG_TABLESIZE 256
enum isert_desc_type {
ISCSI_TX_CONTROL,
@@ -45,15 +46,26 @@ struct iser_tx_desc {
struct ib_send_wr send_wr;
} __packed;
+struct fast_reg_descriptor {
+ struct list_head list;
+ struct ib_mr *data_mr;
+ struct ib_fast_reg_page_list *data_frpl;
+ bool valid;
+};
+
struct isert_rdma_wr {
struct list_head wr_list;
struct isert_cmd *isert_cmd;
enum iser_ib_op_code iser_ib_op;
struct ib_sge *ib_sge;
+ struct ib_sge s_ib_sge;
int num_sge;
struct scatterlist *sge;
int send_wr_num;
struct ib_send_wr *send_wr;
+ struct ib_send_wr s_send_wr;
+ u32 cur_rdma_length;
+ struct fast_reg_descriptor *fr_desc;
};
struct isert_cmd {
@@ -67,8 +79,7 @@ struct isert_cmd {
u32 write_va_off;
u32 rdma_wr_num;
struct isert_conn *conn;
- struct iscsi_cmd iscsi_cmd;
- struct ib_sge *ib_sge;
+ struct iscsi_cmd *iscsi_cmd;
struct iser_tx_desc tx_desc;
struct isert_rdma_wr rdma_wr;
struct work_struct comp_work;
@@ -106,6 +117,10 @@ struct isert_conn {
wait_queue_head_t conn_wait;
wait_queue_head_t conn_wait_comp_err;
struct kref conn_kref;
+ struct list_head conn_frwr_pool;
+ int conn_frwr_pool_size;
+ /* lock to protect frwr_pool */
+ spinlock_t conn_lock;
};
#define ISERT_MAX_CQ 64
@@ -118,6 +133,7 @@ struct isert_cq_desc {
};
struct isert_device {
+ int use_frwr;
int cqs_used;
int refcount;
int cq_active_qps[ISERT_MAX_CQ];
@@ -128,6 +144,12 @@ struct isert_device {
struct ib_cq *dev_tx_cq[ISERT_MAX_CQ];
struct isert_cq_desc *cq_desc;
struct list_head dev_node;
+ struct ib_device_attr dev_attr;
+ int (*reg_rdma_mem)(struct iscsi_conn *conn,
+ struct iscsi_cmd *cmd,
+ struct isert_rdma_wr *wr);
+ void (*unreg_rdma_mem)(struct isert_cmd *isert_cmd,
+ struct isert_conn *isert_conn);
};
struct isert_np {