diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-05 09:39:27 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-05 09:39:27 -0700 |
commit | 7c049d086941a74c1babac6dc6901b88e1ce5b7e (patch) | |
tree | eace23c54f30c4bd04c714b4341a6759756187f4 /drivers/infiniband/ulp | |
parent | 00341b5301009ba6f36ee3298e69b649ac5540ff (diff) | |
parent | 82af24ac6fc3f4910218419a0ca4f05d42b45c67 (diff) |
Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
Pull main batch of InfiniBand/RDMA changes from Roland Dreier:
- Large ocrdma HW driver update: add "fast register" work requests,
fixes, cleanups
- Add receive flow steering support for raw QPs
- Fix IPoIB neighbour race that leads to crash
- iSER updates including support for using "fast register" memory
registration
- IPv6 support for iWARP
- XRC transport fixes
* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (54 commits)
RDMA/ocrdma: Fix compiler warning about int/pointer size mismatch
IB/iser: Fix redundant pointer check in dealloc flow
IB/iser: Fix possible memory leak in iser_create_frwr_pool()
IB/qib: Move COUNTER_MASK definition within qib_mad.h header guards
RDMA/ocrdma: Fix passing wrong opcode to modify_srq
RDMA/ocrdma: Fill PVID in UMC case
RDMA/ocrdma: Add ABI versioning support
RDMA/ocrdma: Consider multiple SGES in case of DPP
RDMA/ocrdma: Fix for displaying proper link speed
RDMA/ocrdma: Increase STAG array size
RDMA/ocrdma: Dont use PD 0 for userpace CQ DB
RDMA/ocrdma: FRMA code cleanup
RDMA/ocrdma: For ERX2 irrespective of Qid, num_posted offset is 24
RDMA/ocrdma: Fix to work with even a single MSI-X vector
RDMA/ocrdma: Remove the MTU check based on Ethernet MTU
RDMA/ocrdma: Add support for fast register work requests (FRWR)
RDMA/ocrdma: Create IRD queue fix
IB/core: Better checking of userspace values for receive flow steering
IB/mlx4: Add receive flow steering support
IB/core: Export ib_create/destroy_flow through uverbs
...
Diffstat (limited to 'drivers/infiniband/ulp')
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_cm.c | 3 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_main.c | 9 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iscsi_iser.c | 19 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iscsi_iser.h | 73 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_initiator.c | 139 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_memory.c | 231 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_verbs.c | 292 |
7 files changed, 592 insertions, 174 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 3eceb61e353..7a3175400b2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -817,7 +817,6 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) if (neigh) { neigh->cm = NULL; - list_del(&neigh->list); ipoib_neigh_free(neigh); tx->neigh = NULL; @@ -1234,7 +1233,6 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, if (neigh) { neigh->cm = NULL; - list_del(&neigh->list); ipoib_neigh_free(neigh); tx->neigh = NULL; @@ -1325,7 +1323,6 @@ static void ipoib_cm_tx_start(struct work_struct *work) neigh = p->neigh; if (neigh) { neigh->cm = NULL; - list_del(&neigh->list); ipoib_neigh_free(neigh); } list_del(&p->list); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index c6f71a88c55..82cec1af902 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -493,7 +493,6 @@ static void path_rec_completion(int status, path, neigh)); if (!ipoib_cm_get(neigh)) { - list_del(&neigh->list); ipoib_neigh_free(neigh); continue; } @@ -618,7 +617,6 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, if (!ipoib_cm_get(neigh)) ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh)); if (!ipoib_cm_get(neigh)) { - list_del(&neigh->list); ipoib_neigh_free(neigh); goto err_drop; } @@ -639,7 +637,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, neigh->ah = NULL; if (!path->query && path_rec_start(dev, path)) - goto err_list; + goto err_path; __skb_queue_tail(&neigh->queue, skb); } @@ -648,9 +646,6 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, ipoib_neigh_put(neigh); return; -err_list: - list_del(&neigh->list); - err_path: ipoib_neigh_free(neigh); err_drop: @@ -1098,6 +1093,8 @@ void ipoib_neigh_free(struct ipoib_neigh *neigh) rcu_assign_pointer(*np, rcu_dereference_protected(neigh->hnext, lockdep_is_held(&priv->lock))); + /* remove from parent list */ + list_del(&neigh->list); call_rcu(&neigh->rcu, ipoib_neigh_reclaim); return; } else { diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 1ec78bd8fa1..dd03cfe596d 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -347,6 +347,7 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, { struct iscsi_conn *conn = cls_conn->dd_data; struct iscsi_iser_conn *iser_conn; + struct iscsi_session *session; struct iser_conn *ib_conn; struct iscsi_endpoint *ep; int error; @@ -365,7 +366,8 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, } ib_conn = ep->dd_data; - if (iser_alloc_rx_descriptors(ib_conn)) + session = conn->session; + if (iser_alloc_rx_descriptors(ib_conn, session)) return -ENOMEM; /* binds the iSER connection retrieved from the previously @@ -419,12 +421,13 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, struct iscsi_cls_session *cls_session; struct iscsi_session *session; struct Scsi_Host *shost; - struct iser_conn *ib_conn; + struct iser_conn *ib_conn = NULL; shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0); if (!shost) return NULL; shost->transportt = iscsi_iser_scsi_transport; + shost->cmd_per_lun = qdepth; shost->max_lun = iscsi_max_lun; shost->max_id = 0; shost->max_channel = 0; @@ -441,12 +444,14 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, ep ? ib_conn->device->ib_device->dma_device : NULL)) goto free_host; - /* - * we do not support setting can_queue cmd_per_lun from userspace yet - * because we preallocate so many resources - */ + if (cmds_max > ISER_DEF_XMIT_CMDS_MAX) { + iser_info("cmds_max changed from %u to %u\n", + cmds_max, ISER_DEF_XMIT_CMDS_MAX); + cmds_max = ISER_DEF_XMIT_CMDS_MAX; + } + cls_session = iscsi_session_setup(&iscsi_iser_transport, shost, - ISCSI_DEF_XMIT_CMDS_MAX, 0, + cmds_max, 0, sizeof(struct iscsi_iser_task), initial_cmdsn, 0); if (!cls_session) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 4f069c0d4c0..67914027c61 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -78,14 +78,14 @@ #define iser_warn(fmt, arg...) \ do { \ - if (iser_debug_level > 1) \ + if (iser_debug_level > 0) \ pr_warn(PFX "%s:" fmt, \ __func__ , ## arg); \ } while (0) #define iser_info(fmt, arg...) \ do { \ - if (iser_debug_level > 0) \ + if (iser_debug_level > 1) \ pr_info(PFX "%s:" fmt, \ __func__ , ## arg); \ } while (0) @@ -102,7 +102,13 @@ /* support up to 512KB in one RDMA */ #define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K) -#define ISER_DEF_CMD_PER_LUN ISCSI_DEF_XMIT_CMDS_MAX +#define ISER_DEF_XMIT_CMDS_DEFAULT 512 +#if ISCSI_DEF_XMIT_CMDS_MAX > ISER_DEF_XMIT_CMDS_DEFAULT + #define ISER_DEF_XMIT_CMDS_MAX ISCSI_DEF_XMIT_CMDS_MAX +#else + #define ISER_DEF_XMIT_CMDS_MAX ISER_DEF_XMIT_CMDS_DEFAULT +#endif +#define ISER_DEF_CMD_PER_LUN ISER_DEF_XMIT_CMDS_MAX /* QP settings */ /* Maximal bounds on received asynchronous PDUs */ @@ -111,9 +117,9 @@ #define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), * * SCSI_TMFUNC(2), LOGOUT(1) */ -#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX) +#define ISER_QP_MAX_RECV_DTOS (ISER_DEF_XMIT_CMDS_MAX) -#define ISER_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2) +#define ISER_MIN_POSTED_RX (ISER_DEF_XMIT_CMDS_MAX >> 2) /* the max TX (send) WR supported by the iSER QP is defined by * * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect * @@ -123,7 +129,7 @@ #define ISER_INFLIGHT_DATAOUTS 8 -#define ISER_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \ +#define ISER_QP_MAX_REQ_DTOS (ISER_DEF_XMIT_CMDS_MAX * \ (1 + ISER_INFLIGHT_DATAOUTS) + \ ISER_MAX_TX_MISC_PDUS + \ ISER_MAX_RX_MISC_PDUS) @@ -205,7 +211,7 @@ struct iser_mem_reg { u64 va; u64 len; void *mem_h; - int is_fmr; + int is_mr; }; struct iser_regd_buf { @@ -246,6 +252,9 @@ struct iser_rx_desc { #define ISER_MAX_CQ 4 +struct iser_conn; +struct iscsi_iser_task; + struct iser_device { struct ib_device *ib_device; struct ib_pd *pd; @@ -259,6 +268,22 @@ struct iser_device { int cq_active_qps[ISER_MAX_CQ]; int cqs_used; struct iser_cq_desc *cq_desc; + int (*iser_alloc_rdma_reg_res)(struct iser_conn *ib_conn, + unsigned cmds_max); + void (*iser_free_rdma_reg_res)(struct iser_conn *ib_conn); + int (*iser_reg_rdma_mem)(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir); + void (*iser_unreg_rdma_mem)(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir); +}; + +struct fast_reg_descriptor { + struct list_head list; + /* For fast registration - FRWR */ + struct ib_mr *data_mr; + struct ib_fast_reg_page_list *data_frpl; + /* Valid for fast registration flag */ + bool valid; }; struct iser_conn { @@ -270,13 +295,13 @@ struct iser_conn { struct iser_device *device; /* device context */ struct rdma_cm_id *cma_id; /* CMA ID */ struct ib_qp *qp; /* QP */ - struct ib_fmr_pool *fmr_pool; /* pool of IB FMRs */ wait_queue_head_t wait; /* waitq for conn/disconn */ + unsigned qp_max_recv_dtos; /* num of rx buffers */ + unsigned qp_max_recv_dtos_mask; /* above minus 1 */ + unsigned min_posted_rx; /* qp_max_recv_dtos >> 2 */ int post_recv_buf_count; /* posted rx count */ atomic_t post_send_buf_count; /* posted tx count */ char name[ISER_OBJECT_NAME_SIZE]; - struct iser_page_vec *page_vec; /* represents SG to fmr maps* - * maps serialized as tx is*/ struct list_head conn_list; /* entry in ig conn list */ char *login_buf; @@ -285,6 +310,17 @@ struct iser_conn { unsigned int rx_desc_head; struct iser_rx_desc *rx_descs; struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; + union { + struct { + struct ib_fmr_pool *pool; /* pool of IB FMRs */ + struct iser_page_vec *page_vec; /* represents SG to fmr maps* + * maps serialized as tx is*/ + } fmr; + struct { + struct list_head pool; + int pool_size; + } frwr; + } fastreg; }; struct iscsi_iser_conn { @@ -368,8 +404,10 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn); void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); -int iser_reg_rdma_mem(struct iscsi_iser_task *task, - enum iser_data_dir cmd_dir); +int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task, + enum iser_data_dir cmd_dir); +int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *task, + enum iser_data_dir cmd_dir); int iser_connect(struct iser_conn *ib_conn, struct sockaddr_in *src_addr, @@ -380,7 +418,10 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, struct iser_page_vec *page_vec, struct iser_mem_reg *mem_reg); -void iser_unreg_mem(struct iser_mem_reg *mem_reg); +void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir); +void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir); int iser_post_recvl(struct iser_conn *ib_conn); int iser_post_recvm(struct iser_conn *ib_conn, int count); @@ -394,5 +435,9 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task); int iser_initialize_task_headers(struct iscsi_task *task, struct iser_tx_desc *tx_desc); -int iser_alloc_rx_descriptors(struct iser_conn *ib_conn); +int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session); +int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max); +void iser_free_fmr_pool(struct iser_conn *ib_conn); +int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max); +void iser_free_frwr_pool(struct iser_conn *ib_conn); #endif diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index b31fa1d954a..538822684d5 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -49,6 +49,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task, { struct iscsi_iser_task *iser_task = task->dd_data; + struct iser_device *device = iser_task->iser_conn->ib_conn->device; struct iser_regd_buf *regd_buf; int err; struct iser_hdr *hdr = &iser_task->desc.iser_header; @@ -69,7 +70,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task, return -EINVAL; } - err = iser_reg_rdma_mem(iser_task,ISER_DIR_IN); + err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_IN); if (err) { iser_err("Failed to set up Data-IN RDMA\n"); return err; @@ -98,6 +99,7 @@ iser_prepare_write_cmd(struct iscsi_task *task, unsigned int edtl) { struct iscsi_iser_task *iser_task = task->dd_data; + struct iser_device *device = iser_task->iser_conn->ib_conn->device; struct iser_regd_buf *regd_buf; int err; struct iser_hdr *hdr = &iser_task->desc.iser_header; @@ -119,7 +121,7 @@ iser_prepare_write_cmd(struct iscsi_task *task, return -EINVAL; } - err = iser_reg_rdma_mem(iser_task,ISER_DIR_OUT); + err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_OUT); if (err != 0) { iser_err("Failed to register write cmd RDMA mem\n"); return err; @@ -170,8 +172,78 @@ static void iser_create_send_desc(struct iser_conn *ib_conn, } } +static void iser_free_login_buf(struct iser_conn *ib_conn) +{ + if (!ib_conn->login_buf) + return; + + if (ib_conn->login_req_dma) + ib_dma_unmap_single(ib_conn->device->ib_device, + ib_conn->login_req_dma, + ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); + + if (ib_conn->login_resp_dma) + ib_dma_unmap_single(ib_conn->device->ib_device, + ib_conn->login_resp_dma, + ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); + + kfree(ib_conn->login_buf); + + /* make sure we never redo any unmapping */ + ib_conn->login_req_dma = 0; + ib_conn->login_resp_dma = 0; + ib_conn->login_buf = NULL; +} + +static int iser_alloc_login_buf(struct iser_conn *ib_conn) +{ + struct iser_device *device; + int req_err, resp_err; + + BUG_ON(ib_conn->device == NULL); + + device = ib_conn->device; + + ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + + ISER_RX_LOGIN_SIZE, GFP_KERNEL); + if (!ib_conn->login_buf) + goto out_err; + + ib_conn->login_req_buf = ib_conn->login_buf; + ib_conn->login_resp_buf = ib_conn->login_buf + + ISCSI_DEF_MAX_RECV_SEG_LEN; + + ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device, + (void *)ib_conn->login_req_buf, + ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); + + ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device, + (void *)ib_conn->login_resp_buf, + ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); + + req_err = ib_dma_mapping_error(device->ib_device, + ib_conn->login_req_dma); + resp_err = ib_dma_mapping_error(device->ib_device, + ib_conn->login_resp_dma); -int iser_alloc_rx_descriptors(struct iser_conn *ib_conn) + if (req_err || resp_err) { + if (req_err) + ib_conn->login_req_dma = 0; + if (resp_err) + ib_conn->login_resp_dma = 0; + goto free_login_buf; + } + return 0; + +free_login_buf: + iser_free_login_buf(ib_conn); + +out_err: + iser_err("unable to alloc or map login buf\n"); + return -ENOMEM; +} + +int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session) { int i, j; u64 dma_addr; @@ -179,14 +251,24 @@ int iser_alloc_rx_descriptors(struct iser_conn *ib_conn) struct ib_sge *rx_sg; struct iser_device *device = ib_conn->device; - ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS * + ib_conn->qp_max_recv_dtos = session->cmds_max; + ib_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */ + ib_conn->min_posted_rx = ib_conn->qp_max_recv_dtos >> 2; + + if (device->iser_alloc_rdma_reg_res(ib_conn, session->scsi_cmds_max)) + goto create_rdma_reg_res_failed; + + if (iser_alloc_login_buf(ib_conn)) + goto alloc_login_buf_fail; + + ib_conn->rx_descs = kmalloc(session->cmds_max * sizeof(struct iser_rx_desc), GFP_KERNEL); if (!ib_conn->rx_descs) goto rx_desc_alloc_fail; rx_desc = ib_conn->rx_descs; - for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) { + for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++) { dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(device->ib_device, dma_addr)) @@ -207,10 +289,14 @@ rx_desc_dma_map_failed: rx_desc = ib_conn->rx_descs; for (j = 0; j < i; j++, rx_desc++) ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); kfree(ib_conn->rx_descs); ib_conn->rx_descs = NULL; rx_desc_alloc_fail: + iser_free_login_buf(ib_conn); +alloc_login_buf_fail: + device->iser_free_rdma_reg_res(ib_conn); +create_rdma_reg_res_failed: iser_err("failed allocating rx descriptors / data buffers\n"); return -ENOMEM; } @@ -222,13 +308,21 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn) struct iser_device *device = ib_conn->device; if (!ib_conn->rx_descs) - return; + goto free_login_buf; + + if (device->iser_free_rdma_reg_res) + device->iser_free_rdma_reg_res(ib_conn); rx_desc = ib_conn->rx_descs; - for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) + for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++) ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); kfree(ib_conn->rx_descs); + /* make sure we never redo any unmapping */ + ib_conn->rx_descs = NULL; + +free_login_buf: + iser_free_login_buf(ib_conn); } static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) @@ -254,10 +348,11 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) return 0; } else iser_info("Normal session, posting batch of RX %d buffers\n", - ISER_MIN_POSTED_RX); + iser_conn->ib_conn->min_posted_rx); /* Initial post receive buffers */ - if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX)) + if (iser_post_recvm(iser_conn->ib_conn, + iser_conn->ib_conn->min_posted_rx)) return -ENOMEM; return 0; @@ -496,9 +591,9 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc, return; outstanding = ib_conn->post_recv_buf_count; - if (outstanding + ISER_MIN_POSTED_RX <= ISER_QP_MAX_RECV_DTOS) { - count = min(ISER_QP_MAX_RECV_DTOS - outstanding, - ISER_MIN_POSTED_RX); + if (outstanding + ib_conn->min_posted_rx <= ib_conn->qp_max_recv_dtos) { + count = min(ib_conn->qp_max_recv_dtos - outstanding, + ib_conn->min_posted_rx); err = iser_post_recvm(ib_conn, count); if (err) iser_err("posting %d rx bufs err %d\n", count, err); @@ -547,8 +642,8 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) { + struct iser_device *device = iser_task->iser_conn->ib_conn->device; int is_rdma_aligned = 1; - struct iser_regd_buf *regd; /* if we were reading, copy back to unaligned sglist, * anyway dma_unmap and free the copy @@ -562,17 +657,11 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_OUT); } - if (iser_task->dir[ISER_DIR_IN]) { - regd = &iser_task->rdma_regd[ISER_DIR_IN]; - if (regd->reg.is_fmr) - iser_unreg_mem(®d->reg); - } + if (iser_task->dir[ISER_DIR_IN]) + device->iser_unreg_rdma_mem(iser_task, ISER_DIR_IN); - if (iser_task->dir[ISER_DIR_OUT]) { - regd = &iser_task->rdma_regd[ISER_DIR_OUT]; - if (regd->reg.is_fmr) - iser_unreg_mem(®d->reg); - } + if (iser_task->dir[ISER_DIR_OUT]) + device->iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT); /* if the data was unaligned, it was already unmapped and then copied */ if (is_rdma_aligned) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 7827baf455a..1ce0c97d2cc 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -170,8 +170,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, */ static int iser_sg_to_page_vec(struct iser_data_buf *data, - struct iser_page_vec *page_vec, - struct ib_device *ibdev) + struct ib_device *ibdev, u64 *pages, + int *offset, int *data_size) { struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf; u64 start_addr, end_addr, page, chunk_start = 0; @@ -180,7 +180,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data, int i, new_chunk, cur_page, last_ent = data->dma_nents - 1; /* compute the offset of first element */ - page_vec->offset = (u64) sgl[0].offset & ~MASK_4K; + *offset = (u64) sgl[0].offset & ~MASK_4K; new_chunk = 1; cur_page = 0; @@ -204,13 +204,14 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data, which might be unaligned */ page = chunk_start & MASK_4K; do { - page_vec->pages[cur_page++] = page; + pages[cur_page++] = page; page += SIZE_4K; } while (page < end_addr); } - page_vec->data_size = total_sz; - iser_dbg("page_vec->data_size:%d cur_page %d\n", page_vec->data_size,cur_page); + *data_size = total_sz; + iser_dbg("page_vec->data_size:%d cur_page %d\n", + *data_size, cur_page); return cur_page; } @@ -267,11 +268,8 @@ static void iser_data_buf_dump(struct iser_data_buf *data, struct scatterlist *sg; int i; - if (iser_debug_level == 0) - return; - for_each_sg(sgl, sg, data->dma_nents, i) - iser_warn("sg[%d] dma_addr:0x%lX page:0x%p " + iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p " "off:0x%x sz:0x%x dma_len:0x%x\n", i, (unsigned long)ib_sg_dma_address(ibdev, sg), sg_page(sg), sg->offset, @@ -298,8 +296,10 @@ static void iser_page_vec_build(struct iser_data_buf *data, page_vec->offset = 0; iser_dbg("Translating sg sz: %d\n", data->dma_nents); - page_vec_len = iser_sg_to_page_vec(data, page_vec, ibdev); - iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents,page_vec_len); + page_vec_len = iser_sg_to_page_vec(data, ibdev, page_vec->pages, + &page_vec->offset, + &page_vec->data_size); + iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents, page_vec_len); page_vec->length = page_vec_len; @@ -347,16 +347,41 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task) } } +static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, + struct ib_device *ibdev, + enum iser_data_dir cmd_dir, + int aligned_len) +{ + struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn; + struct iser_data_buf *mem = &iser_task->data[cmd_dir]; + + iscsi_conn->fmr_unalign_cnt++; + iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n", + aligned_len, mem->size); + + if (iser_debug_level > 0) + iser_data_buf_dump(mem, ibdev); + + /* unmap the command data before accessing it */ + iser_dma_unmap_task_data(iser_task); + + /* allocate copy buf, if we are writing, copy the */ + /* unaligned scatterlist, dma map the copy */ + if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0) + return -ENOMEM; + + return 0; +} + /** - * iser_reg_rdma_mem - Registers memory intended for RDMA, - * obtaining rkey and va + * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA, + * using FMR (if possible) obtaining rkey and va * * returns 0 on success, errno code on failure */ -int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task, - enum iser_data_dir cmd_dir) +int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir) { - struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn; struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; struct iser_device *device = ib_conn->device; struct ib_device *ibdev = device->ib_device; @@ -370,20 +395,13 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task, regd_buf = &iser_task->rdma_regd[cmd_dir]; aligned_len = iser_data_buf_aligned_len(mem, ibdev); - if (aligned_len != mem->dma_nents || - (!ib_conn->fmr_pool && mem->dma_nents > 1)) { - iscsi_conn->fmr_unalign_cnt++; - iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n", - aligned_len, mem->size); - iser_data_buf_dump(mem, ibdev); - - /* unmap the command data before accessing it */ - iser_dma_unmap_task_data(iser_task); - - /* allocate copy buf, if we are writing, copy the */ - /* unaligned scatterlist, dma map the copy */ - if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0) - return -ENOMEM; + if (aligned_len != mem->dma_nents) { + err = fall_to_bounce_buf(iser_task, ibdev, + cmd_dir, aligned_len); + if (err) { + iser_err("failed to allocate bounce buffer\n"); + return err; + } mem = &iser_task->data_copy[cmd_dir]; } @@ -395,7 +413,7 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task, regd_buf->reg.rkey = device->mr->rkey; regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]); regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]); - regd_buf->reg.is_fmr = 0; + regd_buf->reg.is_mr = 0; iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X " "va: 0x%08lX sz: %ld]\n", @@ -404,22 +422,159 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task, (unsigned long)regd_buf->reg.va, (unsigned long)regd_buf->reg.len); } else { /* use FMR for multiple dma entries */ - iser_page_vec_build(mem, ib_conn->page_vec, ibdev); - err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, ®d_buf->reg); + iser_page_vec_build(mem, ib_conn->fastreg.fmr.page_vec, ibdev); + err = iser_reg_page_vec(ib_conn, ib_conn->fastreg.fmr.page_vec, + ®d_buf->reg); if (err && err != -EAGAIN) { iser_data_buf_dump(mem, ibdev); iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", mem->dma_nents, ntoh24(iser_task->desc.iscsi_header.dlength)); iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n", - ib_conn->page_vec->data_size, ib_conn->page_vec->length, - ib_conn->page_vec->offset); - for (i=0 ; i<ib_conn->page_vec->length ; i++) + ib_conn->fastreg.fmr.page_vec->data_size, + ib_conn->fastreg.fmr.page_vec->length, + ib_conn->fastreg.fmr.page_vec->offset); + for (i = 0; i < ib_conn->fastreg.fmr.page_vec->length; i++) iser_err("page_vec[%d] = 0x%llx\n", i, - (unsigned long long) ib_conn->page_vec->pages[i]); + (unsigned long long) ib_conn->fastreg.fmr.page_vec->pages[i]); } if (err) return err; } return 0; } + +static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, + struct iser_conn *ib_conn, + struct iser_regd_buf *regd_buf, + u32 offset, unsigned int data_size, + unsigned int page_list_len) +{ + struct ib_send_wr fastreg_wr, inv_wr; + struct ib_send_wr *bad_wr, *wr = NULL; + u8 key; + int ret; + + if (!desc->valid) { + memset(&inv_wr, 0, sizeof(inv_wr)); + inv_wr.opcode = IB_WR_LOCAL_INV; + inv_wr.send_flags = IB_SEND_SIGNALED; + inv_wr.ex.invalidate_rkey = desc->data_mr->rkey; + wr = &inv_wr; + /* Bump the key */ + key = (u8)(desc->data_mr->rkey & 0x000000FF); + ib_update_fast_reg_key(desc->data_mr, ++key); + } + + /* Prepare FASTREG WR */ + memset(&fastreg_wr, 0, sizeof(fastreg_wr)); + fastreg_wr.opcode = IB_WR_FAST_REG_MR; + fastreg_wr.send_flags = IB_SEND_SIGNALED; + fastreg_wr.wr.fast_reg.iova_start = desc->data_frpl->page_list[0] + offset; + fastreg_wr.wr.fast_reg.page_list = desc->data_frpl; + fastreg_wr.wr.fast_reg.page_list_len = page_list_len; + fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K; + fastreg_wr.wr.fast_reg.length = data_size; + fastreg_wr.wr.fast_reg.rkey = desc->data_mr->rkey; + fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ); + + if (!wr) { + wr = &fastreg_wr; + atomic_inc(&ib_conn->post_send_buf_count); + } else { + wr->next = &fastreg_wr; + atomic_add(2, &ib_conn->post_send_buf_count); + } + + ret = ib_post_send(ib_conn->qp, wr, &bad_wr); + if (ret) { + if (bad_wr->next) + atomic_sub(2, &ib_conn->post_send_buf_count); + else + atomic_dec(&ib_conn->post_send_buf_count); + iser_err("fast registration failed, ret:%d\n", ret); + return ret; + } + desc->valid = false; + + regd_buf->reg.mem_h = desc; + regd_buf->reg.lkey = desc->data_mr->lkey; + regd_buf->reg.rkey = desc->data_mr->rkey; + regd_buf->reg.va = desc->data_frpl->page_list[0] + offset; + regd_buf->reg.len = data_size; + regd_buf->reg.is_mr = 1; + + return ret; +} + +/** + * iser_reg_rdma_mem_frwr - Registers memory intended for RDMA, + * using Fast Registration WR (if possible) obtaining rkey and va + * + * returns 0 on success, errno code on failure + */ +int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir) +{ + struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; + struct iser_device *device = ib_conn->device; + struct ib_device *ibdev = device->ib_device; + struct iser_data_buf *mem = &iser_task->data[cmd_dir]; + struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir]; + struct fast_reg_descriptor *desc; + unsigned int data_size, page_list_len; + int err, aligned_len; + unsigned long flags; + u32 offset; + + aligned_len = iser_data_buf_aligned_len(mem, ibdev); + if (aligned_len != mem->dma_nents) { + err = fall_to_bounce_buf(iser_task, ibdev, + cmd_dir, aligned_len); + if (err) { + iser_err("failed to allocate bounce buffer\n"); + return err; + } + mem = &iser_task->data_copy[cmd_dir]; + } + + /* if there a single dma entry, dma mr suffices */ + if (mem->dma_nents == 1) { + struct scatterlist *sg = (struct scatterlist *)mem->buf; + + regd_buf->reg.lkey = device->mr->lkey; + regd_buf->reg.rkey = device->mr->rkey; + regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]); + regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]); + regd_buf->reg.is_mr = 0; + } else { + spin_lock_irqsave(&ib_conn->lock, flags); + desc = list_first_entry(&ib_conn->fastreg.frwr.pool, + struct fast_reg_descriptor, list); + list_del(&desc->list); + spin_unlock_irqrestore(&ib_conn->lock, flags); + page_list_len = iser_sg_to_page_vec(mem, device->ib_device, + desc->data_frpl->page_list, + &offset, &data_size); + + if (page_list_len * SIZE_4K < data_size) { + iser_err("fast reg page_list too short to hold this SG\n"); + err = -EINVAL; + goto err_reg; + } + + err = iser_fast_reg_mr(desc, ib_conn, regd_buf, + offset, data_size, page_list_len); + if (err) + goto err_reg; + } + + return 0; +err_reg: + spin_lock_irqsave(&ib_conn->lock, flags); + list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); + spin_unlock_irqrestore(&ib_conn->lock, flags); + return err; +} diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 2c4941d0656..afe95674008 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -73,6 +73,36 @@ static int iser_create_device_ib_res(struct iser_device *device) { int i, j; struct iser_cq_desc *cq_desc; + struct ib_device_attr *dev_attr; + + dev_attr = kmalloc(sizeof(*dev_attr), GFP_KERNEL); + if (!dev_attr) + return -ENOMEM; + + if (ib_query_device(device->ib_device, dev_attr)) { + pr_warn("Query device failed for %s\n", device->ib_device->name); + goto dev_attr_err; + } + + /* Assign function handles - based on FMR support */ + if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr && + device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) { + iser_info("FMR supported, using FMR for registration\n"); + device->iser_alloc_rdma_reg_res = iser_create_fmr_pool; + device->iser_free_rdma_reg_res = iser_free_fmr_pool; + device->iser_reg_rdma_mem = iser_reg_rdma_mem_fmr; + device->iser_unreg_rdma_mem = iser_unreg_mem_fmr; + } else + if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { + iser_info("FRWR supported, using FRWR for registration\n"); + device->iser_alloc_rdma_reg_res = iser_create_frwr_pool; + device->iser_free_rdma_reg_res = iser_free_frwr_pool; + device->iser_reg_rdma_mem = iser_reg_rdma_mem_frwr; + device->iser_unreg_rdma_mem = iser_unreg_mem_frwr; + } else { + iser_err("IB device does not support FMRs nor FRWRs, can't register memory\n"); + goto dev_attr_err; + } device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors); iser_info("using %d CQs, device %s supports %d vectors\n", @@ -128,6 +158,7 @@ static int iser_create_device_ib_res(struct iser_device *device) if (ib_register_event_handler(&device->event_handler)) goto handler_err; + kfree(dev_attr); return 0; handler_err: @@ -147,6 +178,8 @@ pd_err: kfree(device->cq_desc); cq_desc_err: iser_err("failed to allocate an IB resource\n"); +dev_attr_err: + kfree(dev_attr); return -1; } @@ -178,56 +211,23 @@ static void iser_free_device_ib_res(struct iser_device *device) } /** - * iser_create_ib_conn_res - Creates FMR pool and Queue-Pair (QP) + * iser_create_fmr_pool - Creates FMR pool and page_vector * - * returns 0 on success, -1 on failure + * returns 0 on success, or errno code on failure */ -static int iser_create_ib_conn_res(struct iser_conn *ib_conn) +int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max) { - struct iser_device *device; - struct ib_qp_init_attr init_attr; - int req_err, resp_err, ret = -ENOMEM; + struct iser_device *device = ib_conn->device; struct ib_fmr_pool_param params; - int index, min_index = 0; - - BUG_ON(ib_conn->device == NULL); - - device = ib_conn->device; - - ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + - ISER_RX_LOGIN_SIZE, GFP_KERNEL); - if (!ib_conn->login_buf) - goto out_err; - - ib_conn->login_req_buf = ib_conn->login_buf; - ib_conn->login_resp_buf = ib_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN; - - ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device, - (void *)ib_conn->login_req_buf, - ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); - - ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device, - (void *)ib_conn->login_resp_buf, - ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); - - req_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_req_dma); - resp_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_resp_dma); - - if (req_err || resp_err) { - if (req_err) - ib_conn->login_req_dma = 0; - if (resp_err) - ib_conn->login_resp_dma = 0; - goto out_err; - } + int ret = -ENOMEM; - ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + - (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)), - GFP_KERNEL); - if (!ib_conn->page_vec) - goto out_err; + ib_conn->fastreg.fmr.page_vec = kmalloc(sizeof(struct iser_page_vec) + + (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)), + GFP_KERNEL); + if (!ib_conn->fastreg.fmr.page_vec) + return ret; - ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1); + ib_conn->fastreg.fmr.page_vec->pages = (u64 *)(ib_conn->fastreg.fmr.page_vec + 1); params.page_shift = SHIFT_4K; /* when the first/last SG element are not start/end * @@ -235,24 +235,143 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1; /* make the pool size twice the max number of SCSI commands * * the ML is expected to queue, watermark for unmap at 50% */ - params.pool_size = ISCSI_DEF_XMIT_CMDS_MAX * 2; - params.dirty_watermark = ISCSI_DEF_XMIT_CMDS_MAX; + params.pool_size = cmds_max * 2; + params.dirty_watermark = cmds_max; params.cache = 0; params.flush_function = NULL; params.access = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ); - ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, ¶ms); - ret = PTR_ERR(ib_conn->fmr_pool); - if (IS_ERR(ib_conn->fmr_pool) && ret != -ENOSYS) { - ib_conn->fmr_pool = NULL; - goto out_err; - } else if (ret == -ENOSYS) { - ib_conn->fmr_pool = NULL; + ib_conn->fastreg.fmr.pool = ib_create_fmr_pool(device->pd, ¶ms); + if (!IS_ERR(ib_conn->fastreg.fmr.pool)) + return 0; + + /* no FMR => no need for page_vec */ + kfree(ib_conn->fastreg.fmr.page_vec); + ib_conn->fastreg.fmr.page_vec = NULL; + + ret = PTR_ERR(ib_conn->fastreg.fmr.pool); + ib_conn->fastreg.fmr.pool = NULL; + if (ret != -ENOSYS) { + iser_err("FMR allocation failed, err %d\n", ret); + return ret; + } else { iser_warn("FMRs are not supported, using unaligned mode\n"); - ret = 0; + return 0; } +} + +/** + * iser_free_fmr_pool - releases the FMR pool and page vec + */ +void iser_free_fmr_pool(struct iser_conn *ib_conn) +{ + iser_info("freeing conn %p fmr pool %p\n", + ib_conn, ib_conn->fastreg.fmr.pool); + + if (ib_conn->fastreg.fmr.pool != NULL) + ib_destroy_fmr_pool(ib_conn->fastreg.fmr.pool); + + ib_conn->fastreg.fmr.pool = NULL; + + kfree(ib_conn->fastreg.fmr.page_vec); + ib_conn->fastreg.fmr.page_vec = NULL; +} + +/** + * iser_create_frwr_pool - Creates pool of fast_reg descriptors + * for fast registration work requests. + * returns 0 on success, or errno code on failure + */ +int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max) +{ + struct iser_device *device = ib_conn->device; + struct fast_reg_descriptor *desc; + int i, ret; + + INIT_LIST_HEAD(&ib_conn->fastreg.frwr.pool); + ib_conn->fastreg.frwr.pool_size = 0; + for (i = 0; i < cmds_max; i++) { + desc = kmalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) { + iser_err("Failed to allocate a new fast_reg descriptor\n"); + ret = -ENOMEM; + goto err; + } + + desc->data_frpl = ib_alloc_fast_reg_page_list(device->ib_device, + ISCSI_ISER_SG_TABLESIZE + 1); + if (IS_ERR(desc->data_frpl)) { + ret = PTR_ERR(desc->data_frpl); + iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", ret); + goto fast_reg_page_failure; + } + + desc->data_mr = ib_alloc_fast_reg_mr(device->pd, + ISCSI_ISER_SG_TABLESIZE + 1); + if (IS_ERR(desc->data_mr)) { + ret = PTR_ERR(desc->data_mr); + iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); + goto fast_reg_mr_failure; + } + desc->valid = true; + list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); + ib_conn->fastreg.frwr.pool_size++; + } + + return 0; + +fast_reg_mr_failure: + ib_free_fast_reg_page_list(desc->data_frpl); +fast_reg_page_failure: + kfree(desc); +err: + iser_free_frwr_pool(ib_conn); + return ret; +} + +/** + * iser_free_frwr_pool - releases the pool of fast_reg descriptors + */ +void iser_free_frwr_pool(struct iser_conn *ib_conn) +{ + struct fast_reg_descriptor *desc, *tmp; + int i = 0; + + if (list_empty(&ib_conn->fastreg.frwr.pool)) + return; + + iser_info("freeing conn %p frwr pool\n", ib_conn); + + list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.frwr.pool, list) { + list_del(&desc->list); + ib_free_fast_reg_page_list(desc->data_frpl); + ib_dereg_mr(desc->data_mr); + kfree(desc); + ++i; + } + + if (i < ib_conn->fastreg.frwr.pool_size) + iser_warn("pool still has %d regions registered\n", + ib_conn->fastreg.frwr.pool_size - i); +} + +/** + * iser_create_ib_conn_res - Queue-Pair (QP) + * + * returns 0 on success, -1 on failure + */ +static int iser_create_ib_conn_res(struct iser_conn *ib_conn) +{ + struct iser_device *device; + struct ib_qp_init_attr init_attr; + int ret = -ENOMEM; + int index, min_index = 0; + + BUG_ON(ib_conn->device == NULL); + + device = ib_conn->device; memset(&init_attr, 0, sizeof init_attr); @@ -282,9 +401,9 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) goto out_err; ib_conn->qp = ib_conn->cma_id->qp; - iser_info("setting conn %p cma_id %p: fmr_pool %p qp %p\n", + iser_info("setting conn %p cma_id %p qp %p\n", ib_conn, ib_conn->cma_id, - ib_conn->fmr_pool, ib_conn->cma_id->qp); + ib_conn->cma_id->qp); return ret; out_err: @@ -293,7 +412,7 @@ out_err: } /** - * releases the FMR pool and QP objects, returns 0 on success, + * releases the QP objects, returns 0 on success, * -1 on failure */ static int iser_free_ib_conn_res(struct iser_conn *ib_conn) @@ -301,13 +420,11 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn) int cq_index; BUG_ON(ib_conn == NULL); - iser_info("freeing conn %p cma_id %p fmr pool %p qp %p\n", + iser_info("freeing conn %p cma_id %p qp %p\n", ib_conn, ib_conn->cma_id, - ib_conn->fmr_pool, ib_conn->qp); + ib_conn->qp); /* qp is created only once both addr & route are resolved */ - if (ib_conn->fmr_pool != NULL) - ib_destroy_fmr_pool(ib_conn->fmr_pool); if (ib_conn->qp != NULL) { cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index; @@ -316,21 +433,7 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn) rdma_destroy_qp(ib_conn->cma_id); } - ib_conn->fmr_pool = NULL; ib_conn->qp = NULL; - kfree(ib_conn->page_vec); - - if (ib_conn->login_buf) { - if (ib_conn->login_req_dma) - ib_dma_unmap_single(ib_conn->device->ib_device, - ib_conn->login_req_dma, - ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); - if (ib_conn->login_resp_dma) - ib_dma_unmap_single(ib_conn->device->ib_device, - ib_conn->login_resp_dma, - ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); - kfree(ib_conn->login_buf); - } return 0; } @@ -694,7 +797,7 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, page_list = page_vec->pages; io_addr = page_list[0]; - mem = ib_fmr_pool_map_phys(ib_conn->fmr_pool, + mem = ib_fmr_pool_map_phys(ib_conn->fastreg.fmr.pool, page_list, page_vec->length, io_addr); @@ -709,7 +812,7 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, mem_reg->rkey = mem->fmr->rkey; mem_reg->len = page_vec->length * SIZE_4K; mem_reg->va = io_addr; - mem_reg->is_fmr = 1; + mem_reg->is_mr = 1; mem_reg->mem_h = (void *)mem; mem_reg->va += page_vec->offset; @@ -727,12 +830,18 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, } /** - * Unregister (previosuly registered) memory. + * Unregister (previosuly registered using FMR) memory. + * If memory is non-FMR does nothing. */ -void iser_unreg_mem(struct iser_mem_reg *reg) +void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir) { + struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; int ret; + if (!reg->is_mr) + return; + iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h); ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h); @@ -742,6 +851,23 @@ void iser_unreg_mem(struct iser_mem_reg *reg) reg->mem_h = NULL; } +void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir) +{ + struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; + struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; + struct fast_reg_descriptor *desc = reg->mem_h; + + if (!reg->is_mr) + return; + + reg->mem_h = NULL; + reg->is_mr = 0; + spin_lock_bh(&ib_conn->lock); + list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); + spin_unlock_bh(&ib_conn->lock); +} + int iser_post_recvl(struct iser_conn *ib_conn) { struct ib_recv_wr rx_wr, *rx_wr_failed; @@ -779,7 +905,7 @@ int iser_post_recvm(struct iser_conn *ib_conn, int count) rx_wr->sg_list = &rx_desc->rx_sg; rx_wr->num_sge = 1; rx_wr->next = rx_wr + 1; - my_rx_head = (my_rx_head + 1) & (ISER_QP_MAX_RECV_DTOS - 1); + my_rx_head = (my_rx_head + 1) & ib_conn->qp_max_recv_dtos_mask; } rx_wr--; @@ -863,7 +989,11 @@ static int iser_drain_tx_cq(struct iser_device *device, int cq_index) if (wc.status == IB_WC_SUCCESS) { if (wc.opcode == IB_WC_SEND) iser_snd_completion(tx_desc, ib_conn); - else + else if (wc.opcode == IB_WC_LOCAL_INV || + wc.opcode == IB_WC_FAST_REG_MR) { + atomic_dec(&ib_conn->post_send_buf_count); + continue; + } else iser_err("expected opcode %d got %d\n", IB_WC_SEND, wc.opcode); } else { |