From f91424cf5b0f22a33dea4ccfb0780ee45517b3c8 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 28 Jul 2013 12:35:36 +0300 Subject: IB/iser: Use proper debug level value for info prints Commit 4f363882612 ("IB/iser: Move informational messages from error to info level") set info prints to be emitted at a lower debug level than warning prints, which is a bit odd. Fix that. Also move the prints on unaligned SG from warning to debug level. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 4 ++-- drivers/infiniband/ulp/iser/iser_memory.c | 13 ++++++------- 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 4f069c0d4c0..d694bcd479f 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -78,14 +78,14 @@ #define iser_warn(fmt, arg...) \ do { \ - if (iser_debug_level > 1) \ + if (iser_debug_level > 0) \ pr_warn(PFX "%s:" fmt, \ __func__ , ## arg); \ } while (0) #define iser_info(fmt, arg...) \ do { \ - if (iser_debug_level > 0) \ + if (iser_debug_level > 1) \ pr_info(PFX "%s:" fmt, \ __func__ , ## arg); \ } while (0) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 7827baf455a..797e49ff7f8 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -267,11 +267,8 @@ static void iser_data_buf_dump(struct iser_data_buf *data, struct scatterlist *sg; int i; - if (iser_debug_level == 0) - return; - for_each_sg(sgl, sg, data->dma_nents, i) - iser_warn("sg[%d] dma_addr:0x%lX page:0x%p " + iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p " "off:0x%x sz:0x%x dma_len:0x%x\n", i, (unsigned long)ib_sg_dma_address(ibdev, sg), sg_page(sg), sg->offset, @@ -373,9 +370,11 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task, if (aligned_len != mem->dma_nents || (!ib_conn->fmr_pool && mem->dma_nents > 1)) { iscsi_conn->fmr_unalign_cnt++; - iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n", - aligned_len, mem->size); - iser_data_buf_dump(mem, ibdev); + iser_dbg("rdma alignment violation (%d/%d aligned) or FMR not supported\n", + aligned_len, mem->size); + + if (iser_debug_level > 0) + iser_data_buf_dump(mem, ibdev); /* unmap the command data before accessing it */ iser_dma_unmap_task_data(iser_task); -- cgit v1.2.3-70-g09d2 From 986db0d6c08125bdf50d8ffdc3b0307aa2871e3e Mon Sep 17 00:00:00 2001 From: Shlomo Pongratz Date: Sun, 28 Jul 2013 12:35:37 +0300 Subject: IB/iser: Restructure allocation/deallocation of connection resources This is a preparation step to a patch that accepts the number of max SCSI commands to be supported a session from user space iSCSI tools. Move the allocation of the login buffer, FMR pool and its associated page vector from iser_create_ib_conn_res() (which is called prior when we actually know how many commands should be supported) to iser_alloc_rx_descriptors() (which is called during the iscsi connection bind step where this quantity is known). Also do small refactoring around the deallocation to make that path similar to the allocation one. Signed-off-by: Shlomo Pongratz Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 2 + drivers/infiniband/ulp/iser/iser_initiator.c | 92 ++++++++++++++++++- drivers/infiniband/ulp/iser/iser_verbs.c | 128 +++++++++++++-------------- 3 files changed, 151 insertions(+), 71 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index d694bcd479f..fee8829053e 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -395,4 +395,6 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task); int iser_initialize_task_headers(struct iscsi_task *task, struct iser_tx_desc *tx_desc); int iser_alloc_rx_descriptors(struct iser_conn *ib_conn); +int iser_create_fmr_pool(struct iser_conn *ib_conn); +void iser_free_fmr_pool(struct iser_conn *ib_conn); #endif diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index b6d81a86c97..626d950b64a 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -170,6 +170,76 @@ static void iser_create_send_desc(struct iser_conn *ib_conn, } } +static void iser_free_login_buf(struct iser_conn *ib_conn) +{ + if (!ib_conn->login_buf) + return; + + if (ib_conn->login_req_dma) + ib_dma_unmap_single(ib_conn->device->ib_device, + ib_conn->login_req_dma, + ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); + + if (ib_conn->login_resp_dma) + ib_dma_unmap_single(ib_conn->device->ib_device, + ib_conn->login_resp_dma, + ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); + + kfree(ib_conn->login_buf); + + /* make sure we never redo any unmapping */ + ib_conn->login_req_dma = 0; + ib_conn->login_resp_dma = 0; + ib_conn->login_buf = NULL; +} + +static int iser_alloc_login_buf(struct iser_conn *ib_conn) +{ + struct iser_device *device; + int req_err, resp_err; + + BUG_ON(ib_conn->device == NULL); + + device = ib_conn->device; + + ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + + ISER_RX_LOGIN_SIZE, GFP_KERNEL); + if (!ib_conn->login_buf) + goto out_err; + + ib_conn->login_req_buf = ib_conn->login_buf; + ib_conn->login_resp_buf = ib_conn->login_buf + + ISCSI_DEF_MAX_RECV_SEG_LEN; + + ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device, + (void *)ib_conn->login_req_buf, + ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); + + ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device, + (void *)ib_conn->login_resp_buf, + ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); + + req_err = ib_dma_mapping_error(device->ib_device, + ib_conn->login_req_dma); + resp_err = ib_dma_mapping_error(device->ib_device, + ib_conn->login_resp_dma); + + if (req_err || resp_err) { + if (req_err) + ib_conn->login_req_dma = 0; + if (resp_err) + ib_conn->login_resp_dma = 0; + goto free_login_buf; + } + return 0; + +free_login_buf: + iser_free_login_buf(ib_conn); + +out_err: + iser_err("unable to alloc or map login buf\n"); + return -ENOMEM; +} int iser_alloc_rx_descriptors(struct iser_conn *ib_conn) { @@ -179,6 +249,12 @@ int iser_alloc_rx_descriptors(struct iser_conn *ib_conn) struct ib_sge *rx_sg; struct iser_device *device = ib_conn->device; + if (iser_create_fmr_pool(ib_conn)) + goto create_fmr_pool_failed; + + if (iser_alloc_login_buf(ib_conn)) + goto alloc_login_buf_fail; + ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS * sizeof(struct iser_rx_desc), GFP_KERNEL); if (!ib_conn->rx_descs) @@ -207,10 +283,14 @@ rx_desc_dma_map_failed: rx_desc = ib_conn->rx_descs; for (j = 0; j < i; j++, rx_desc++) ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); kfree(ib_conn->rx_descs); ib_conn->rx_descs = NULL; rx_desc_alloc_fail: + iser_free_login_buf(ib_conn); +alloc_login_buf_fail: + iser_free_fmr_pool(ib_conn); +create_fmr_pool_failed: iser_err("failed allocating rx descriptors / data buffers\n"); return -ENOMEM; } @@ -222,13 +302,19 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn) struct iser_device *device = ib_conn->device; if (!ib_conn->rx_descs) - return; + goto free_login_buf; rx_desc = ib_conn->rx_descs; for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); kfree(ib_conn->rx_descs); + /* make sure we never redo any unmapping */ + ib_conn->rx_descs = NULL; + +free_login_buf: + iser_free_login_buf(ib_conn); + iser_free_fmr_pool(ib_conn); } static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 2c4941d0656..b72e349790d 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -178,56 +178,23 @@ static void iser_free_device_ib_res(struct iser_device *device) } /** - * iser_create_ib_conn_res - Creates FMR pool and Queue-Pair (QP) + * iser_create_fmr_pool - Creates FMR pool and page_vector * - * returns 0 on success, -1 on failure + * returns 0 on success, or errno code on failure */ -static int iser_create_ib_conn_res(struct iser_conn *ib_conn) +int iser_create_fmr_pool(struct iser_conn *ib_conn) { - struct iser_device *device; - struct ib_qp_init_attr init_attr; - int req_err, resp_err, ret = -ENOMEM; + struct iser_device *device = ib_conn->device; struct ib_fmr_pool_param params; - int index, min_index = 0; - - BUG_ON(ib_conn->device == NULL); - - device = ib_conn->device; - - ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + - ISER_RX_LOGIN_SIZE, GFP_KERNEL); - if (!ib_conn->login_buf) - goto out_err; - - ib_conn->login_req_buf = ib_conn->login_buf; - ib_conn->login_resp_buf = ib_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN; - - ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device, - (void *)ib_conn->login_req_buf, - ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); - - ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device, - (void *)ib_conn->login_resp_buf, - ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); - - req_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_req_dma); - resp_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_resp_dma); - - if (req_err || resp_err) { - if (req_err) - ib_conn->login_req_dma = 0; - if (resp_err) - ib_conn->login_resp_dma = 0; - goto out_err; - } + int ret = -ENOMEM; ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + - (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)), + (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE+1)), GFP_KERNEL); if (!ib_conn->page_vec) - goto out_err; + return ret; - ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1); + ib_conn->page_vec->pages = (u64 *)(ib_conn->page_vec + 1); params.page_shift = SHIFT_4K; /* when the first/last SG element are not start/end * @@ -244,15 +211,56 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) IB_ACCESS_REMOTE_READ); ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, ¶ms); + if (!IS_ERR(ib_conn->fmr_pool)) + return 0; + + /* no FMR => no need for page_vec */ + kfree(ib_conn->page_vec); + ib_conn->page_vec = NULL; + ret = PTR_ERR(ib_conn->fmr_pool); - if (IS_ERR(ib_conn->fmr_pool) && ret != -ENOSYS) { - ib_conn->fmr_pool = NULL; - goto out_err; - } else if (ret == -ENOSYS) { - ib_conn->fmr_pool = NULL; + ib_conn->fmr_pool = NULL; + if (ret != -ENOSYS) { + iser_err("FMR allocation failed, err %d\n", ret); + return ret; + } else { iser_warn("FMRs are not supported, using unaligned mode\n"); - ret = 0; + return 0; } +} + +/** + * iser_free_fmr_pool - releases the FMR pool and page vec + */ +void iser_free_fmr_pool(struct iser_conn *ib_conn) +{ + iser_info("freeing conn %p fmr pool %p\n", + ib_conn, ib_conn->fmr_pool); + + if (ib_conn->fmr_pool != NULL) + ib_destroy_fmr_pool(ib_conn->fmr_pool); + + ib_conn->fmr_pool = NULL; + + kfree(ib_conn->page_vec); + ib_conn->page_vec = NULL; +} + +/** + * iser_create_ib_conn_res - Queue-Pair (QP) + * + * returns 0 on success, -1 on failure + */ +static int iser_create_ib_conn_res(struct iser_conn *ib_conn) +{ + struct iser_device *device; + struct ib_qp_init_attr init_attr; + int ret = -ENOMEM; + int index, min_index = 0; + + BUG_ON(ib_conn->device == NULL); + + device = ib_conn->device; memset(&init_attr, 0, sizeof init_attr); @@ -282,9 +290,9 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) goto out_err; ib_conn->qp = ib_conn->cma_id->qp; - iser_info("setting conn %p cma_id %p: fmr_pool %p qp %p\n", + iser_info("setting conn %p cma_id %p qp %p\n", ib_conn, ib_conn->cma_id, - ib_conn->fmr_pool, ib_conn->cma_id->qp); + ib_conn->cma_id->qp); return ret; out_err: @@ -293,7 +301,7 @@ out_err: } /** - * releases the FMR pool and QP objects, returns 0 on success, + * releases the QP objects, returns 0 on success, * -1 on failure */ static int iser_free_ib_conn_res(struct iser_conn *ib_conn) @@ -301,13 +309,11 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn) int cq_index; BUG_ON(ib_conn == NULL); - iser_info("freeing conn %p cma_id %p fmr pool %p qp %p\n", + iser_info("freeing conn %p cma_id %p qp %p\n", ib_conn, ib_conn->cma_id, - ib_conn->fmr_pool, ib_conn->qp); + ib_conn->qp); /* qp is created only once both addr & route are resolved */ - if (ib_conn->fmr_pool != NULL) - ib_destroy_fmr_pool(ib_conn->fmr_pool); if (ib_conn->qp != NULL) { cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index; @@ -316,21 +322,7 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn) rdma_destroy_qp(ib_conn->cma_id); } - ib_conn->fmr_pool = NULL; ib_conn->qp = NULL; - kfree(ib_conn->page_vec); - - if (ib_conn->login_buf) { - if (ib_conn->login_req_dma) - ib_dma_unmap_single(ib_conn->device->ib_device, - ib_conn->login_req_dma, - ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); - if (ib_conn->login_resp_dma) - ib_dma_unmap_single(ib_conn->device->ib_device, - ib_conn->login_resp_dma, - ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); - kfree(ib_conn->login_buf); - } return 0; } -- cgit v1.2.3-70-g09d2 From b7f04513090cf12394de27588a1956d1f97188cb Mon Sep 17 00:00:00 2001 From: Shlomo Pongratz Date: Sun, 28 Jul 2013 12:35:38 +0300 Subject: IB/iser: Accept session->cmds_max from user space Use cmds_max passed from user space to be the number of PDUs to be supported for the session instead of hard-coded ISCSI_DEF_XMIT_CMDS_MAX. This allow controlling the max number of SCSI commands for the session. Also don't ignore the qdepth passed from user space. Derive from session->cmds_max the actual number of RX buffers and FMR pool size to allocate during the connection bind phase. Since the iser transport connection is established before the iscsi session/connection are created and bound, we still use one hard-coded quantity ISER_DEF_XMIT_CMDS_MAX to compute the maximum number of work-requests to be supported by the RC QP used for the connection. The above quantity is made to be a power of two between ISCSI_TOTAL_CMDS_MIN (16) and ISER_DEF_XMIT_CMDS_MAX (512) inclusive. Signed-off-by: Shlomo Pongratz Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.c | 19 ++++++++++++------- drivers/infiniband/ulp/iser/iscsi_iser.h | 21 +++++++++++++++------ drivers/infiniband/ulp/iser/iser_initiator.c | 25 +++++++++++++++---------- drivers/infiniband/ulp/iser/iser_verbs.c | 8 ++++---- 4 files changed, 46 insertions(+), 27 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 2e84ef859c5..705de7b4020 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -347,6 +347,7 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, { struct iscsi_conn *conn = cls_conn->dd_data; struct iscsi_iser_conn *iser_conn; + struct iscsi_session *session; struct iser_conn *ib_conn; struct iscsi_endpoint *ep; int error; @@ -365,7 +366,8 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, } ib_conn = ep->dd_data; - if (iser_alloc_rx_descriptors(ib_conn)) + session = conn->session; + if (iser_alloc_rx_descriptors(ib_conn, session)) return -ENOMEM; /* binds the iSER connection retrieved from the previously @@ -419,12 +421,13 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, struct iscsi_cls_session *cls_session; struct iscsi_session *session; struct Scsi_Host *shost; - struct iser_conn *ib_conn; + struct iser_conn *ib_conn = NULL; shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0); if (!shost) return NULL; shost->transportt = iscsi_iser_scsi_transport; + shost->cmd_per_lun = qdepth; shost->max_lun = iscsi_max_lun; shost->max_id = 0; shost->max_channel = 0; @@ -441,12 +444,14 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, ep ? ib_conn->device->ib_device->dma_device : NULL)) goto free_host; - /* - * we do not support setting can_queue cmd_per_lun from userspace yet - * because we preallocate so many resources - */ + if (cmds_max > ISER_DEF_XMIT_CMDS_MAX) { + iser_info("cmds_max changed from %u to %u\n", + cmds_max, ISER_DEF_XMIT_CMDS_MAX); + cmds_max = ISER_DEF_XMIT_CMDS_MAX; + } + cls_session = iscsi_session_setup(&iscsi_iser_transport, shost, - ISCSI_DEF_XMIT_CMDS_MAX, 0, + cmds_max, 0, sizeof(struct iscsi_iser_task), initial_cmdsn, 0); if (!cls_session) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index fee8829053e..d2fc55a15e9 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -102,7 +102,13 @@ /* support up to 512KB in one RDMA */ #define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K) -#define ISER_DEF_CMD_PER_LUN ISCSI_DEF_XMIT_CMDS_MAX +#define ISER_DEF_XMIT_CMDS_DEFAULT 512 +#if ISCSI_DEF_XMIT_CMDS_MAX > ISER_DEF_XMIT_CMDS_DEFAULT + #define ISER_DEF_XMIT_CMDS_MAX ISCSI_DEF_XMIT_CMDS_MAX +#else + #define ISER_DEF_XMIT_CMDS_MAX ISER_DEF_XMIT_CMDS_DEFAULT +#endif +#define ISER_DEF_CMD_PER_LUN ISER_DEF_XMIT_CMDS_MAX /* QP settings */ /* Maximal bounds on received asynchronous PDUs */ @@ -111,9 +117,9 @@ #define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), * * SCSI_TMFUNC(2), LOGOUT(1) */ -#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX) +#define ISER_QP_MAX_RECV_DTOS (ISER_DEF_XMIT_CMDS_MAX) -#define ISER_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2) +#define ISER_MIN_POSTED_RX (ISER_DEF_XMIT_CMDS_MAX >> 2) /* the max TX (send) WR supported by the iSER QP is defined by * * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect * @@ -123,7 +129,7 @@ #define ISER_INFLIGHT_DATAOUTS 8 -#define ISER_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \ +#define ISER_QP_MAX_REQ_DTOS (ISER_DEF_XMIT_CMDS_MAX * \ (1 + ISER_INFLIGHT_DATAOUTS) + \ ISER_MAX_TX_MISC_PDUS + \ ISER_MAX_RX_MISC_PDUS) @@ -272,6 +278,9 @@ struct iser_conn { struct ib_qp *qp; /* QP */ struct ib_fmr_pool *fmr_pool; /* pool of IB FMRs */ wait_queue_head_t wait; /* waitq for conn/disconn */ + unsigned qp_max_recv_dtos; /* num of rx buffers */ + unsigned qp_max_recv_dtos_mask; /* above minus 1 */ + unsigned min_posted_rx; /* qp_max_recv_dtos >> 2 */ int post_recv_buf_count; /* posted rx count */ atomic_t post_send_buf_count; /* posted tx count */ char name[ISER_OBJECT_NAME_SIZE]; @@ -394,7 +403,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task); int iser_initialize_task_headers(struct iscsi_task *task, struct iser_tx_desc *tx_desc); -int iser_alloc_rx_descriptors(struct iser_conn *ib_conn); -int iser_create_fmr_pool(struct iser_conn *ib_conn); +int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session); +int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max); void iser_free_fmr_pool(struct iser_conn *ib_conn); #endif diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 626d950b64a..5c2b142840d 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -241,7 +241,7 @@ out_err: return -ENOMEM; } -int iser_alloc_rx_descriptors(struct iser_conn *ib_conn) +int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session) { int i, j; u64 dma_addr; @@ -249,20 +249,24 @@ int iser_alloc_rx_descriptors(struct iser_conn *ib_conn) struct ib_sge *rx_sg; struct iser_device *device = ib_conn->device; - if (iser_create_fmr_pool(ib_conn)) + ib_conn->qp_max_recv_dtos = session->cmds_max; + ib_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */ + ib_conn->min_posted_rx = ib_conn->qp_max_recv_dtos >> 2; + + if (iser_create_fmr_pool(ib_conn, session->scsi_cmds_max)) goto create_fmr_pool_failed; if (iser_alloc_login_buf(ib_conn)) goto alloc_login_buf_fail; - ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS * + ib_conn->rx_descs = kmalloc(session->cmds_max * sizeof(struct iser_rx_desc), GFP_KERNEL); if (!ib_conn->rx_descs) goto rx_desc_alloc_fail; rx_desc = ib_conn->rx_descs; - for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) { + for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++) { dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(device->ib_device, dma_addr)) @@ -305,7 +309,7 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn) goto free_login_buf; rx_desc = ib_conn->rx_descs; - for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) + for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++) ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); kfree(ib_conn->rx_descs); @@ -334,9 +338,10 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) WARN_ON(iser_conn->ib_conn->post_recv_buf_count != 1); WARN_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); - iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX); + iser_dbg("Initially post: %d\n", iser_conn->ib_conn->min_posted_rx); /* Initial post receive buffers */ - if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX)) + if (iser_post_recvm(iser_conn->ib_conn, + iser_conn->ib_conn->min_posted_rx)) return -ENOMEM; return 0; @@ -573,9 +578,9 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc, return; outstanding = ib_conn->post_recv_buf_count; - if (outstanding + ISER_MIN_POSTED_RX <= ISER_QP_MAX_RECV_DTOS) { - count = min(ISER_QP_MAX_RECV_DTOS - outstanding, - ISER_MIN_POSTED_RX); + if (outstanding + ib_conn->min_posted_rx <= ib_conn->qp_max_recv_dtos) { + count = min(ib_conn->qp_max_recv_dtos - outstanding, + ib_conn->min_posted_rx); err = iser_post_recvm(ib_conn, count); if (err) iser_err("posting %d rx bufs err %d\n", count, err); diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index b72e349790d..5e49a36c727 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -182,7 +182,7 @@ static void iser_free_device_ib_res(struct iser_device *device) * * returns 0 on success, or errno code on failure */ -int iser_create_fmr_pool(struct iser_conn *ib_conn) +int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max) { struct iser_device *device = ib_conn->device; struct ib_fmr_pool_param params; @@ -202,8 +202,8 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn) params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1; /* make the pool size twice the max number of SCSI commands * * the ML is expected to queue, watermark for unmap at 50% */ - params.pool_size = ISCSI_DEF_XMIT_CMDS_MAX * 2; - params.dirty_watermark = ISCSI_DEF_XMIT_CMDS_MAX; + params.pool_size = cmds_max * 2; + params.dirty_watermark = cmds_max; params.cache = 0; params.flush_function = NULL; params.access = (IB_ACCESS_LOCAL_WRITE | @@ -771,7 +771,7 @@ int iser_post_recvm(struct iser_conn *ib_conn, int count) rx_wr->sg_list = &rx_desc->rx_sg; rx_wr->num_sge = 1; rx_wr->next = rx_wr + 1; - my_rx_head = (my_rx_head + 1) & (ISER_QP_MAX_RECV_DTOS - 1); + my_rx_head = (my_rx_head + 1) & ib_conn->qp_max_recv_dtos_mask; } rx_wr--; -- cgit v1.2.3-70-g09d2 From b4e155ffbbd65cba77207bc5522c7b734a5c8c9d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 28 Jul 2013 12:35:39 +0300 Subject: IB/iser: Generalize rdma memory registration Currently the driver uses FMRs as the only means to register the memory pointed by SG provided by the SCSI mid-layer with the RDMA device. As preparation step for adding more methods for fast path memory registration, make the alloc/free and reg/unreg calls function pointers, which are for now just set to the existing FMR ones. Signed-off-by: Sagi Grimberg Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 13 ++++++++++- drivers/infiniband/ulp/iser/iser_initiator.c | 34 +++++++++++++--------------- drivers/infiniband/ulp/iser/iser_verbs.c | 13 ++++++++++- 3 files changed, 40 insertions(+), 20 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index d2fc55a15e9..78117be0c89 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -252,6 +252,9 @@ struct iser_rx_desc { #define ISER_MAX_CQ 4 +struct iser_conn; +struct iscsi_iser_task; + struct iser_device { struct ib_device *ib_device; struct ib_pd *pd; @@ -265,6 +268,13 @@ struct iser_device { int cq_active_qps[ISER_MAX_CQ]; int cqs_used; struct iser_cq_desc *cq_desc; + int (*iser_alloc_rdma_reg_res)(struct iser_conn *ib_conn, + unsigned cmds_max); + void (*iser_free_rdma_reg_res)(struct iser_conn *ib_conn); + int (*iser_reg_rdma_mem)(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir); + void (*iser_unreg_rdma_mem)(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir); }; struct iser_conn { @@ -389,7 +399,8 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, struct iser_page_vec *page_vec, struct iser_mem_reg *mem_reg); -void iser_unreg_mem(struct iser_mem_reg *mem_reg); +void iser_unreg_mem(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir); int iser_post_recvl(struct iser_conn *ib_conn); int iser_post_recvm(struct iser_conn *ib_conn, int count); diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 5c2b142840d..bdc38f423ca 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -49,6 +49,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task, { struct iscsi_iser_task *iser_task = task->dd_data; + struct iser_device *device = iser_task->iser_conn->ib_conn->device; struct iser_regd_buf *regd_buf; int err; struct iser_hdr *hdr = &iser_task->desc.iser_header; @@ -69,7 +70,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task, return -EINVAL; } - err = iser_reg_rdma_mem(iser_task,ISER_DIR_IN); + err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_IN); if (err) { iser_err("Failed to set up Data-IN RDMA\n"); return err; @@ -98,6 +99,7 @@ iser_prepare_write_cmd(struct iscsi_task *task, unsigned int edtl) { struct iscsi_iser_task *iser_task = task->dd_data; + struct iser_device *device = iser_task->iser_conn->ib_conn->device; struct iser_regd_buf *regd_buf; int err; struct iser_hdr *hdr = &iser_task->desc.iser_header; @@ -119,7 +121,7 @@ iser_prepare_write_cmd(struct iscsi_task *task, return -EINVAL; } - err = iser_reg_rdma_mem(iser_task,ISER_DIR_OUT); + err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_OUT); if (err != 0) { iser_err("Failed to register write cmd RDMA mem\n"); return err; @@ -253,8 +255,8 @@ int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *s ib_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */ ib_conn->min_posted_rx = ib_conn->qp_max_recv_dtos >> 2; - if (iser_create_fmr_pool(ib_conn, session->scsi_cmds_max)) - goto create_fmr_pool_failed; + if (device->iser_alloc_rdma_reg_res(ib_conn, session->scsi_cmds_max)) + goto create_rdma_reg_res_failed; if (iser_alloc_login_buf(ib_conn)) goto alloc_login_buf_fail; @@ -293,8 +295,8 @@ rx_desc_dma_map_failed: rx_desc_alloc_fail: iser_free_login_buf(ib_conn); alloc_login_buf_fail: - iser_free_fmr_pool(ib_conn); -create_fmr_pool_failed: + device->iser_free_rdma_reg_res(ib_conn); +create_rdma_reg_res_failed: iser_err("failed allocating rx descriptors / data buffers\n"); return -ENOMEM; } @@ -308,6 +310,9 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn) if (!ib_conn->rx_descs) goto free_login_buf; + if (device && device->iser_free_rdma_reg_res) + device->iser_free_rdma_reg_res(ib_conn); + rx_desc = ib_conn->rx_descs; for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++) ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, @@ -318,7 +323,6 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn) free_login_buf: iser_free_login_buf(ib_conn); - iser_free_fmr_pool(ib_conn); } static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) @@ -629,8 +633,8 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) { + struct iser_device *device = iser_task->iser_conn->ib_conn->device; int is_rdma_aligned = 1; - struct iser_regd_buf *regd; /* if we were reading, copy back to unaligned sglist, * anyway dma_unmap and free the copy @@ -644,17 +648,11 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_OUT); } - if (iser_task->dir[ISER_DIR_IN]) { - regd = &iser_task->rdma_regd[ISER_DIR_IN]; - if (regd->reg.is_fmr) - iser_unreg_mem(®d->reg); - } + if (iser_task->dir[ISER_DIR_IN]) + device->iser_unreg_rdma_mem(iser_task, ISER_DIR_IN); - if (iser_task->dir[ISER_DIR_OUT]) { - regd = &iser_task->rdma_regd[ISER_DIR_OUT]; - if (regd->reg.is_fmr) - iser_unreg_mem(®d->reg); - } + if (iser_task->dir[ISER_DIR_OUT]) + device->iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT); /* if the data was unaligned, it was already unmapped and then copied */ if (is_rdma_aligned) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 5e49a36c727..5b5342895c0 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -74,6 +74,12 @@ static int iser_create_device_ib_res(struct iser_device *device) int i, j; struct iser_cq_desc *cq_desc; + /* Assign function handles */ + device->iser_alloc_rdma_reg_res = iser_create_fmr_pool; + device->iser_free_rdma_reg_res = iser_free_fmr_pool; + device->iser_reg_rdma_mem = iser_reg_rdma_mem; + device->iser_unreg_rdma_mem = iser_unreg_mem; + device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors); iser_info("using %d CQs, device %s supports %d vectors\n", device->cqs_used, device->ib_device->name, @@ -721,10 +727,15 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, /** * Unregister (previosuly registered) memory. */ -void iser_unreg_mem(struct iser_mem_reg *reg) +void iser_unreg_mem(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir) { + struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; int ret; + if (!reg->is_fmr) + return; + iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h); ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h); -- cgit v1.2.3-70-g09d2 From 919fc274ef102841ad56a37d482720f332d459d1 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 28 Jul 2013 12:35:40 +0300 Subject: IB/iser: Handle unaligned SG in separate function This routine will be shared with other rdma management schemes. The bounce buffer solution for unaligned SG-lists and the sg_to_page_vec routine are likely to be used for other registration schemes and not just FMR. Move them out of the FMR specific code, and call them from there. Later they will be called also from other reg methods code. Signed-off-by: Sagi Grimberg Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_memory.c | 66 ++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 23 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 797e49ff7f8..4dea1ba1278 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -170,8 +170,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, */ static int iser_sg_to_page_vec(struct iser_data_buf *data, - struct iser_page_vec *page_vec, - struct ib_device *ibdev) + struct ib_device *ibdev, u64 *pages, + int *offset, int *data_size) { struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf; u64 start_addr, end_addr, page, chunk_start = 0; @@ -180,7 +180,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data, int i, new_chunk, cur_page, last_ent = data->dma_nents - 1; /* compute the offset of first element */ - page_vec->offset = (u64) sgl[0].offset & ~MASK_4K; + *offset = (u64) sgl[0].offset & ~MASK_4K; new_chunk = 1; cur_page = 0; @@ -204,13 +204,14 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data, which might be unaligned */ page = chunk_start & MASK_4K; do { - page_vec->pages[cur_page++] = page; + pages[cur_page++] = page; page += SIZE_4K; } while (page < end_addr); } - page_vec->data_size = total_sz; - iser_dbg("page_vec->data_size:%d cur_page %d\n", page_vec->data_size,cur_page); + *data_size = total_sz; + iser_dbg("page_vec->data_size:%d cur_page %d\n", + *data_size, cur_page); return cur_page; } @@ -295,8 +296,10 @@ static void iser_page_vec_build(struct iser_data_buf *data, page_vec->offset = 0; iser_dbg("Translating sg sz: %d\n", data->dma_nents); - page_vec_len = iser_sg_to_page_vec(data, page_vec, ibdev); - iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents,page_vec_len); + page_vec_len = iser_sg_to_page_vec(data, ibdev, page_vec->pages, + &page_vec->offset, + &page_vec->data_size); + iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents, page_vec_len); page_vec->length = page_vec_len; @@ -344,6 +347,32 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task) } } +static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, + struct ib_device *ibdev, + enum iser_data_dir cmd_dir, + int aligned_len) +{ + struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn; + struct iser_data_buf *mem = &iser_task->data[cmd_dir]; + + iscsi_conn->fmr_unalign_cnt++; + iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n", + aligned_len, mem->size); + + if (iser_debug_level > 0) + iser_data_buf_dump(mem, ibdev); + + /* unmap the command data before accessing it */ + iser_dma_unmap_task_data(iser_task); + + /* allocate copy buf, if we are writing, copy the */ + /* unaligned scatterlist, dma map the copy */ + if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0) + return -ENOMEM; + + return 0; +} + /** * iser_reg_rdma_mem - Registers memory intended for RDMA, * obtaining rkey and va @@ -353,7 +382,6 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task) int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { - struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn; struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; struct iser_device *device = ib_conn->device; struct ib_device *ibdev = device->ib_device; @@ -369,20 +397,12 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task, aligned_len = iser_data_buf_aligned_len(mem, ibdev); if (aligned_len != mem->dma_nents || (!ib_conn->fmr_pool && mem->dma_nents > 1)) { - iscsi_conn->fmr_unalign_cnt++; - iser_dbg("rdma alignment violation (%d/%d aligned) or FMR not supported\n", - aligned_len, mem->size); - - if (iser_debug_level > 0) - iser_data_buf_dump(mem, ibdev); - - /* unmap the command data before accessing it */ - iser_dma_unmap_task_data(iser_task); - - /* allocate copy buf, if we are writing, copy the */ - /* unaligned scatterlist, dma map the copy */ - if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0) - return -ENOMEM; + err = fall_to_bounce_buf(iser_task, ibdev, + cmd_dir, aligned_len); + if (err) { + iser_err("failed to allocate bounce buffer\n"); + return err; + } mem = &iser_task->data_copy[cmd_dir]; } -- cgit v1.2.3-70-g09d2 From e657571b76faf96a1de1aaf40b2111adcf76c673 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 28 Jul 2013 12:35:41 +0300 Subject: IB/iser: Place the fmr pool into a union in iser's IB conn struct This is preparation step for other memory registration methods to be added. In addition, change reg/unreg routines signature to indicate they use FMRs. Signed-off-by: Sagi Grimberg Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 18 +++++++----- drivers/infiniband/ulp/iser/iser_memory.c | 24 ++++++++-------- drivers/infiniband/ulp/iser/iser_verbs.c | 47 ++++++++++++++++--------------- 3 files changed, 48 insertions(+), 41 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 78117be0c89..75c535260e7 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -286,7 +286,6 @@ struct iser_conn { struct iser_device *device; /* device context */ struct rdma_cm_id *cma_id; /* CMA ID */ struct ib_qp *qp; /* QP */ - struct ib_fmr_pool *fmr_pool; /* pool of IB FMRs */ wait_queue_head_t wait; /* waitq for conn/disconn */ unsigned qp_max_recv_dtos; /* num of rx buffers */ unsigned qp_max_recv_dtos_mask; /* above minus 1 */ @@ -294,8 +293,6 @@ struct iser_conn { int post_recv_buf_count; /* posted rx count */ atomic_t post_send_buf_count; /* posted tx count */ char name[ISER_OBJECT_NAME_SIZE]; - struct iser_page_vec *page_vec; /* represents SG to fmr maps* - * maps serialized as tx is*/ struct list_head conn_list; /* entry in ig conn list */ char *login_buf; @@ -304,6 +301,13 @@ struct iser_conn { unsigned int rx_desc_head; struct iser_rx_desc *rx_descs; struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; + union { + struct { + struct ib_fmr_pool *pool; /* pool of IB FMRs */ + struct iser_page_vec *page_vec; /* represents SG to fmr maps* + * maps serialized as tx is*/ + } fmr; + } fastreg; }; struct iscsi_iser_conn { @@ -387,8 +391,8 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn); void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); -int iser_reg_rdma_mem(struct iscsi_iser_task *task, - enum iser_data_dir cmd_dir); +int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task, + enum iser_data_dir cmd_dir); int iser_connect(struct iser_conn *ib_conn, struct sockaddr_in *src_addr, @@ -399,8 +403,8 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, struct iser_page_vec *page_vec, struct iser_mem_reg *mem_reg); -void iser_unreg_mem(struct iscsi_iser_task *iser_task, - enum iser_data_dir cmd_dir); +void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir); int iser_post_recvl(struct iser_conn *ib_conn); int iser_post_recvm(struct iser_conn *ib_conn, int count); diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 4dea1ba1278..1985e907f03 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -374,13 +374,13 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, } /** - * iser_reg_rdma_mem - Registers memory intended for RDMA, - * obtaining rkey and va + * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA, + * using FMR (if possible) obtaining rkey and va * * returns 0 on success, errno code on failure */ -int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task, - enum iser_data_dir cmd_dir) +int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir) { struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; struct iser_device *device = ib_conn->device; @@ -396,7 +396,7 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task, aligned_len = iser_data_buf_aligned_len(mem, ibdev); if (aligned_len != mem->dma_nents || - (!ib_conn->fmr_pool && mem->dma_nents > 1)) { + (!ib_conn->fastreg.fmr.pool && mem->dma_nents > 1)) { err = fall_to_bounce_buf(iser_task, ibdev, cmd_dir, aligned_len); if (err) { @@ -423,19 +423,21 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task, (unsigned long)regd_buf->reg.va, (unsigned long)regd_buf->reg.len); } else { /* use FMR for multiple dma entries */ - iser_page_vec_build(mem, ib_conn->page_vec, ibdev); - err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, ®d_buf->reg); + iser_page_vec_build(mem, ib_conn->fastreg.fmr.page_vec, ibdev); + err = iser_reg_page_vec(ib_conn, ib_conn->fastreg.fmr.page_vec, + ®d_buf->reg); if (err && err != -EAGAIN) { iser_data_buf_dump(mem, ibdev); iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", mem->dma_nents, ntoh24(iser_task->desc.iscsi_header.dlength)); iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n", - ib_conn->page_vec->data_size, ib_conn->page_vec->length, - ib_conn->page_vec->offset); - for (i=0 ; ipage_vec->length ; i++) + ib_conn->fastreg.fmr.page_vec->data_size, + ib_conn->fastreg.fmr.page_vec->length, + ib_conn->fastreg.fmr.page_vec->offset); + for (i = 0; i < ib_conn->fastreg.fmr.page_vec->length; i++) iser_err("page_vec[%d] = 0x%llx\n", i, - (unsigned long long) ib_conn->page_vec->pages[i]); + (unsigned long long) ib_conn->fastreg.fmr.page_vec->pages[i]); } if (err) return err; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 5b5342895c0..d9a47b91fe4 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -77,8 +77,8 @@ static int iser_create_device_ib_res(struct iser_device *device) /* Assign function handles */ device->iser_alloc_rdma_reg_res = iser_create_fmr_pool; device->iser_free_rdma_reg_res = iser_free_fmr_pool; - device->iser_reg_rdma_mem = iser_reg_rdma_mem; - device->iser_unreg_rdma_mem = iser_unreg_mem; + device->iser_reg_rdma_mem = iser_reg_rdma_mem_fmr; + device->iser_unreg_rdma_mem = iser_unreg_mem_fmr; device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors); iser_info("using %d CQs, device %s supports %d vectors\n", @@ -194,13 +194,13 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max) struct ib_fmr_pool_param params; int ret = -ENOMEM; - ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + - (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE+1)), - GFP_KERNEL); - if (!ib_conn->page_vec) + ib_conn->fastreg.fmr.page_vec = kmalloc(sizeof(struct iser_page_vec) + + (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)), + GFP_KERNEL); + if (!ib_conn->fastreg.fmr.page_vec) return ret; - ib_conn->page_vec->pages = (u64 *)(ib_conn->page_vec + 1); + ib_conn->fastreg.fmr.page_vec->pages = (u64 *)(ib_conn->fastreg.fmr.page_vec + 1); params.page_shift = SHIFT_4K; /* when the first/last SG element are not start/end * @@ -216,16 +216,16 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max) IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ); - ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, ¶ms); - if (!IS_ERR(ib_conn->fmr_pool)) + ib_conn->fastreg.fmr.pool = ib_create_fmr_pool(device->pd, ¶ms); + if (!IS_ERR(ib_conn->fastreg.fmr.pool)) return 0; /* no FMR => no need for page_vec */ - kfree(ib_conn->page_vec); - ib_conn->page_vec = NULL; + kfree(ib_conn->fastreg.fmr.page_vec); + ib_conn->fastreg.fmr.page_vec = NULL; - ret = PTR_ERR(ib_conn->fmr_pool); - ib_conn->fmr_pool = NULL; + ret = PTR_ERR(ib_conn->fastreg.fmr.pool); + ib_conn->fastreg.fmr.pool = NULL; if (ret != -ENOSYS) { iser_err("FMR allocation failed, err %d\n", ret); return ret; @@ -241,15 +241,15 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max) void iser_free_fmr_pool(struct iser_conn *ib_conn) { iser_info("freeing conn %p fmr pool %p\n", - ib_conn, ib_conn->fmr_pool); + ib_conn, ib_conn->fastreg.fmr.pool); - if (ib_conn->fmr_pool != NULL) - ib_destroy_fmr_pool(ib_conn->fmr_pool); + if (ib_conn->fastreg.fmr.pool != NULL) + ib_destroy_fmr_pool(ib_conn->fastreg.fmr.pool); - ib_conn->fmr_pool = NULL; + ib_conn->fastreg.fmr.pool = NULL; - kfree(ib_conn->page_vec); - ib_conn->page_vec = NULL; + kfree(ib_conn->fastreg.fmr.page_vec); + ib_conn->fastreg.fmr.page_vec = NULL; } /** @@ -692,7 +692,7 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, page_list = page_vec->pages; io_addr = page_list[0]; - mem = ib_fmr_pool_map_phys(ib_conn->fmr_pool, + mem = ib_fmr_pool_map_phys(ib_conn->fastreg.fmr.pool, page_list, page_vec->length, io_addr); @@ -725,10 +725,11 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, } /** - * Unregister (previosuly registered) memory. + * Unregister (previosuly registered using FMR) memory. + * If memory is non-FMR does nothing. */ -void iser_unreg_mem(struct iscsi_iser_task *iser_task, - enum iser_data_dir cmd_dir) +void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir) { struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; int ret; -- cgit v1.2.3-70-g09d2 From 5587856c9659ac2d6ab201141aa8a5c2ff3be4cd Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 28 Jul 2013 12:35:42 +0300 Subject: IB/iser: Introduce fast memory registration model (FRWR) Newer HCAs and Virtual functions may not support FMRs but rather a fast registration model, which we call FRWR - "Fast Registration Work Requests". This model was introduced in 00f7ec36c ("RDMA/core: Add memory management extensions support") and works when the IB device supports the IB_DEVICE_MEM_MGT_EXTENSIONS capability. Upon creating the iser device iser will test whether the HCA supports FMRs. If no support for FMRs, check if IB_DEVICE_MEM_MGT_EXTENSIONS is supported and assign function pointers that handle fast registration and allocation of appropriate resources (fast_reg descriptors). Registration is done using posting IB_WR_FAST_REG_MR to the QP and invalidations using posting IB_WR_LOCAL_INV. Signed-off-by: Sagi Grimberg Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 21 ++++- drivers/infiniband/ulp/iser/iser_memory.c | 140 +++++++++++++++++++++++++++++- drivers/infiniband/ulp/iser/iser_verbs.c | 138 +++++++++++++++++++++++++++-- 3 files changed, 287 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 75c535260e7..67914027c61 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -211,7 +211,7 @@ struct iser_mem_reg { u64 va; u64 len; void *mem_h; - int is_fmr; + int is_mr; }; struct iser_regd_buf { @@ -277,6 +277,15 @@ struct iser_device { enum iser_data_dir cmd_dir); }; +struct fast_reg_descriptor { + struct list_head list; + /* For fast registration - FRWR */ + struct ib_mr *data_mr; + struct ib_fast_reg_page_list *data_frpl; + /* Valid for fast registration flag */ + bool valid; +}; + struct iser_conn { struct iscsi_iser_conn *iser_conn; /* iser conn for upcalls */ struct iscsi_endpoint *ep; @@ -307,6 +316,10 @@ struct iser_conn { struct iser_page_vec *page_vec; /* represents SG to fmr maps* * maps serialized as tx is*/ } fmr; + struct { + struct list_head pool; + int pool_size; + } frwr; } fastreg; }; @@ -393,6 +406,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task, int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); +int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *task, + enum iser_data_dir cmd_dir); int iser_connect(struct iser_conn *ib_conn, struct sockaddr_in *src_addr, @@ -405,6 +420,8 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir); +void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir); int iser_post_recvl(struct iser_conn *ib_conn); int iser_post_recvm(struct iser_conn *ib_conn, int count); @@ -421,4 +438,6 @@ int iser_initialize_task_headers(struct iscsi_task *task, int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session); int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max); void iser_free_fmr_pool(struct iser_conn *ib_conn); +int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max); +void iser_free_frwr_pool(struct iser_conn *ib_conn); #endif diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 1985e907f03..1ce0c97d2cc 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -395,8 +395,7 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, regd_buf = &iser_task->rdma_regd[cmd_dir]; aligned_len = iser_data_buf_aligned_len(mem, ibdev); - if (aligned_len != mem->dma_nents || - (!ib_conn->fastreg.fmr.pool && mem->dma_nents > 1)) { + if (aligned_len != mem->dma_nents) { err = fall_to_bounce_buf(iser_task, ibdev, cmd_dir, aligned_len); if (err) { @@ -414,7 +413,7 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, regd_buf->reg.rkey = device->mr->rkey; regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]); regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]); - regd_buf->reg.is_fmr = 0; + regd_buf->reg.is_mr = 0; iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X " "va: 0x%08lX sz: %ld]\n", @@ -444,3 +443,138 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, } return 0; } + +static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, + struct iser_conn *ib_conn, + struct iser_regd_buf *regd_buf, + u32 offset, unsigned int data_size, + unsigned int page_list_len) +{ + struct ib_send_wr fastreg_wr, inv_wr; + struct ib_send_wr *bad_wr, *wr = NULL; + u8 key; + int ret; + + if (!desc->valid) { + memset(&inv_wr, 0, sizeof(inv_wr)); + inv_wr.opcode = IB_WR_LOCAL_INV; + inv_wr.send_flags = IB_SEND_SIGNALED; + inv_wr.ex.invalidate_rkey = desc->data_mr->rkey; + wr = &inv_wr; + /* Bump the key */ + key = (u8)(desc->data_mr->rkey & 0x000000FF); + ib_update_fast_reg_key(desc->data_mr, ++key); + } + + /* Prepare FASTREG WR */ + memset(&fastreg_wr, 0, sizeof(fastreg_wr)); + fastreg_wr.opcode = IB_WR_FAST_REG_MR; + fastreg_wr.send_flags = IB_SEND_SIGNALED; + fastreg_wr.wr.fast_reg.iova_start = desc->data_frpl->page_list[0] + offset; + fastreg_wr.wr.fast_reg.page_list = desc->data_frpl; + fastreg_wr.wr.fast_reg.page_list_len = page_list_len; + fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K; + fastreg_wr.wr.fast_reg.length = data_size; + fastreg_wr.wr.fast_reg.rkey = desc->data_mr->rkey; + fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ); + + if (!wr) { + wr = &fastreg_wr; + atomic_inc(&ib_conn->post_send_buf_count); + } else { + wr->next = &fastreg_wr; + atomic_add(2, &ib_conn->post_send_buf_count); + } + + ret = ib_post_send(ib_conn->qp, wr, &bad_wr); + if (ret) { + if (bad_wr->next) + atomic_sub(2, &ib_conn->post_send_buf_count); + else + atomic_dec(&ib_conn->post_send_buf_count); + iser_err("fast registration failed, ret:%d\n", ret); + return ret; + } + desc->valid = false; + + regd_buf->reg.mem_h = desc; + regd_buf->reg.lkey = desc->data_mr->lkey; + regd_buf->reg.rkey = desc->data_mr->rkey; + regd_buf->reg.va = desc->data_frpl->page_list[0] + offset; + regd_buf->reg.len = data_size; + regd_buf->reg.is_mr = 1; + + return ret; +} + +/** + * iser_reg_rdma_mem_frwr - Registers memory intended for RDMA, + * using Fast Registration WR (if possible) obtaining rkey and va + * + * returns 0 on success, errno code on failure + */ +int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir) +{ + struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; + struct iser_device *device = ib_conn->device; + struct ib_device *ibdev = device->ib_device; + struct iser_data_buf *mem = &iser_task->data[cmd_dir]; + struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir]; + struct fast_reg_descriptor *desc; + unsigned int data_size, page_list_len; + int err, aligned_len; + unsigned long flags; + u32 offset; + + aligned_len = iser_data_buf_aligned_len(mem, ibdev); + if (aligned_len != mem->dma_nents) { + err = fall_to_bounce_buf(iser_task, ibdev, + cmd_dir, aligned_len); + if (err) { + iser_err("failed to allocate bounce buffer\n"); + return err; + } + mem = &iser_task->data_copy[cmd_dir]; + } + + /* if there a single dma entry, dma mr suffices */ + if (mem->dma_nents == 1) { + struct scatterlist *sg = (struct scatterlist *)mem->buf; + + regd_buf->reg.lkey = device->mr->lkey; + regd_buf->reg.rkey = device->mr->rkey; + regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]); + regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]); + regd_buf->reg.is_mr = 0; + } else { + spin_lock_irqsave(&ib_conn->lock, flags); + desc = list_first_entry(&ib_conn->fastreg.frwr.pool, + struct fast_reg_descriptor, list); + list_del(&desc->list); + spin_unlock_irqrestore(&ib_conn->lock, flags); + page_list_len = iser_sg_to_page_vec(mem, device->ib_device, + desc->data_frpl->page_list, + &offset, &data_size); + + if (page_list_len * SIZE_4K < data_size) { + iser_err("fast reg page_list too short to hold this SG\n"); + err = -EINVAL; + goto err_reg; + } + + err = iser_fast_reg_mr(desc, ib_conn, regd_buf, + offset, data_size, page_list_len); + if (err) + goto err_reg; + } + + return 0; +err_reg: + spin_lock_irqsave(&ib_conn->lock, flags); + list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); + spin_unlock_irqrestore(&ib_conn->lock, flags); + return err; +} diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index d9a47b91fe4..28badacb013 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -73,12 +73,36 @@ static int iser_create_device_ib_res(struct iser_device *device) { int i, j; struct iser_cq_desc *cq_desc; + struct ib_device_attr *dev_attr; - /* Assign function handles */ - device->iser_alloc_rdma_reg_res = iser_create_fmr_pool; - device->iser_free_rdma_reg_res = iser_free_fmr_pool; - device->iser_reg_rdma_mem = iser_reg_rdma_mem_fmr; - device->iser_unreg_rdma_mem = iser_unreg_mem_fmr; + dev_attr = kmalloc(sizeof(*dev_attr), GFP_KERNEL); + if (!dev_attr) + return -ENOMEM; + + if (ib_query_device(device->ib_device, dev_attr)) { + pr_warn("Query device failed for %s\n", device->ib_device->name); + goto dev_attr_err; + } + + /* Assign function handles - based on FMR support */ + if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr && + device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) { + iser_info("FMR supported, using FMR for registration\n"); + device->iser_alloc_rdma_reg_res = iser_create_fmr_pool; + device->iser_free_rdma_reg_res = iser_free_fmr_pool; + device->iser_reg_rdma_mem = iser_reg_rdma_mem_fmr; + device->iser_unreg_rdma_mem = iser_unreg_mem_fmr; + } else + if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { + iser_info("FRWR supported, using FRWR for registration\n"); + device->iser_alloc_rdma_reg_res = iser_create_frwr_pool; + device->iser_free_rdma_reg_res = iser_free_frwr_pool; + device->iser_reg_rdma_mem = iser_reg_rdma_mem_frwr; + device->iser_unreg_rdma_mem = iser_unreg_mem_frwr; + } else { + iser_err("IB device does not support FMRs nor FRWRs, can't register memory\n"); + goto dev_attr_err; + } device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors); iser_info("using %d CQs, device %s supports %d vectors\n", @@ -134,6 +158,7 @@ static int iser_create_device_ib_res(struct iser_device *device) if (ib_register_event_handler(&device->event_handler)) goto handler_err; + kfree(dev_attr); return 0; handler_err: @@ -153,6 +178,8 @@ pd_err: kfree(device->cq_desc); cq_desc_err: iser_err("failed to allocate an IB resource\n"); +dev_attr_err: + kfree(dev_attr); return -1; } @@ -252,6 +279,80 @@ void iser_free_fmr_pool(struct iser_conn *ib_conn) ib_conn->fastreg.fmr.page_vec = NULL; } +/** + * iser_create_frwr_pool - Creates pool of fast_reg descriptors + * for fast registration work requests. + * returns 0 on success, or errno code on failure + */ +int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max) +{ + struct iser_device *device = ib_conn->device; + struct fast_reg_descriptor *desc; + int i, ret; + + INIT_LIST_HEAD(&ib_conn->fastreg.frwr.pool); + ib_conn->fastreg.frwr.pool_size = 0; + for (i = 0; i < cmds_max; i++) { + desc = kmalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) { + iser_err("Failed to allocate a new fast_reg descriptor\n"); + ret = -ENOMEM; + goto err; + } + + desc->data_frpl = ib_alloc_fast_reg_page_list(device->ib_device, + ISCSI_ISER_SG_TABLESIZE + 1); + if (IS_ERR(desc->data_frpl)) { + ret = PTR_ERR(desc->data_frpl); + iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", ret); + goto err; + } + + desc->data_mr = ib_alloc_fast_reg_mr(device->pd, + ISCSI_ISER_SG_TABLESIZE + 1); + if (IS_ERR(desc->data_mr)) { + ret = PTR_ERR(desc->data_mr); + iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); + ib_free_fast_reg_page_list(desc->data_frpl); + goto err; + } + desc->valid = true; + list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); + ib_conn->fastreg.frwr.pool_size++; + } + + return 0; +err: + iser_free_frwr_pool(ib_conn); + return ret; +} + +/** + * iser_free_frwr_pool - releases the pool of fast_reg descriptors + */ +void iser_free_frwr_pool(struct iser_conn *ib_conn) +{ + struct fast_reg_descriptor *desc, *tmp; + int i = 0; + + if (list_empty(&ib_conn->fastreg.frwr.pool)) + return; + + iser_info("freeing conn %p frwr pool\n", ib_conn); + + list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.frwr.pool, list) { + list_del(&desc->list); + ib_free_fast_reg_page_list(desc->data_frpl); + ib_dereg_mr(desc->data_mr); + kfree(desc); + ++i; + } + + if (i < ib_conn->fastreg.frwr.pool_size) + iser_warn("pool still has %d regions registered\n", + ib_conn->fastreg.frwr.pool_size - i); +} + /** * iser_create_ib_conn_res - Queue-Pair (QP) * @@ -707,7 +808,7 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, mem_reg->rkey = mem->fmr->rkey; mem_reg->len = page_vec->length * SIZE_4K; mem_reg->va = io_addr; - mem_reg->is_fmr = 1; + mem_reg->is_mr = 1; mem_reg->mem_h = (void *)mem; mem_reg->va += page_vec->offset; @@ -734,7 +835,7 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; int ret; - if (!reg->is_fmr) + if (!reg->is_mr) return; iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h); @@ -746,6 +847,23 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, reg->mem_h = NULL; } +void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir) +{ + struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; + struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; + struct fast_reg_descriptor *desc = reg->mem_h; + + if (!reg->is_mr) + return; + + reg->mem_h = NULL; + reg->is_mr = 0; + spin_lock_bh(&ib_conn->lock); + list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); + spin_unlock_bh(&ib_conn->lock); +} + int iser_post_recvl(struct iser_conn *ib_conn) { struct ib_recv_wr rx_wr, *rx_wr_failed; @@ -867,7 +985,11 @@ static int iser_drain_tx_cq(struct iser_device *device, int cq_index) if (wc.status == IB_WC_SUCCESS) { if (wc.opcode == IB_WC_SEND) iser_snd_completion(tx_desc, ib_conn); - else + else if (wc.opcode == IB_WC_LOCAL_INV || + wc.opcode == IB_WC_FAST_REG_MR) { + atomic_dec(&ib_conn->post_send_buf_count); + continue; + } else iser_err("expected opcode %d got %d\n", IB_WC_SEND, wc.opcode); } else { -- cgit v1.2.3-70-g09d2 From f99b1649dbb6342d618307faef1f214fd54928b9 Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Wed, 7 Aug 2013 12:52:32 +0530 Subject: RDMA/ocrdma: Style and redundant code cleanup Code cleanup and remove redundant code: 1) redundant initialization removed 2) braces changed as per CodingStyle. 3) redundant checks removed 4) extra braces in return statements removed. 5) removed unused pd pointer from mr. 6) reorganized get_dma_mr() 7) fixed set_av() to return error on invalid sgid index. 8) reference to ocrdma_dev removed from struct ocrdma_pd. Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 3 +- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 4 +- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 75 ++++++++++---------- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 104 +++++++++++++++------------- 4 files changed, 96 insertions(+), 90 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index d540180a8e4..5c00600135e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -317,7 +317,6 @@ struct ocrdma_mr { struct ib_mr ibmr; struct ib_umem *umem; struct ocrdma_hw_mr hwmr; - struct ocrdma_pd *pd; }; struct ocrdma_ucontext { @@ -393,7 +392,7 @@ static inline int is_cqe_valid(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe) { int cqe_valid; cqe_valid = le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_VALID; - return ((cqe_valid == cq->phase) ? 1 : 0); + return (cqe_valid == cq->phase); } static inline int is_cqe_for_sq(struct ocrdma_cqe *cqe) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index f4c587c68f6..a6bb3d074d2 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -92,7 +92,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) int status; struct ocrdma_ah *ah; struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); - struct ocrdma_dev *dev = pd->dev; + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); if (!(attr->ah_flags & IB_AH_GRH)) return ERR_PTR(-EINVAL); @@ -100,7 +100,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) ah = kzalloc(sizeof *ah, GFP_ATOMIC); if (!ah) return ERR_PTR(-ENOMEM); - ah->dev = pd->dev; + ah->dev = dev; status = ocrdma_alloc_av(dev, ah); if (status) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 0965278dd2e..eb41a1c9ad6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -94,7 +94,7 @@ enum cqe_status { static inline void *ocrdma_get_eqe(struct ocrdma_eq *eq) { - return (u8 *)eq->q.va + (eq->q.tail * sizeof(struct ocrdma_eqe)); + return eq->q.va + (eq->q.tail * sizeof(struct ocrdma_eqe)); } static inline void ocrdma_eq_inc_tail(struct ocrdma_eq *eq) @@ -105,8 +105,7 @@ static inline void ocrdma_eq_inc_tail(struct ocrdma_eq *eq) static inline void *ocrdma_get_mcqe(struct ocrdma_dev *dev) { struct ocrdma_mcqe *cqe = (struct ocrdma_mcqe *) - ((u8 *) dev->mq.cq.va + - (dev->mq.cq.tail * sizeof(struct ocrdma_mcqe))); + (dev->mq.cq.va + (dev->mq.cq.tail * sizeof(struct ocrdma_mcqe))); if (!(le32_to_cpu(cqe->valid_ae_cmpl_cons) & OCRDMA_MCQE_VALID_MASK)) return NULL; @@ -120,9 +119,7 @@ static inline void ocrdma_mcq_inc_tail(struct ocrdma_dev *dev) static inline struct ocrdma_mqe *ocrdma_get_mqe(struct ocrdma_dev *dev) { - return (struct ocrdma_mqe *)((u8 *) dev->mq.sq.va + - (dev->mq.sq.head * - sizeof(struct ocrdma_mqe))); + return dev->mq.sq.va + (dev->mq.sq.head * sizeof(struct ocrdma_mqe)); } static inline void ocrdma_mq_inc_head(struct ocrdma_dev *dev) @@ -132,8 +129,7 @@ static inline void ocrdma_mq_inc_head(struct ocrdma_dev *dev) static inline void *ocrdma_get_mqe_rsp(struct ocrdma_dev *dev) { - return (void *)((u8 *) dev->mq.sq.va + - (dev->mqe_ctx.tag * sizeof(struct ocrdma_mqe))); + return dev->mq.sq.va + (dev->mqe_ctx.tag * sizeof(struct ocrdma_mqe)); } enum ib_qp_state get_ibqp_state(enum ocrdma_qp_state qps) @@ -181,7 +177,7 @@ static enum ocrdma_qp_state get_ocrdma_qp_state(enum ib_qp_state qps) static int ocrdma_get_mbx_errno(u32 status) { - int err_num = -EFAULT; + int err_num; u8 mbox_status = (status & OCRDMA_MBX_RSP_STATUS_MASK) >> OCRDMA_MBX_RSP_STATUS_SHIFT; u8 add_status = (status & OCRDMA_MBX_RSP_ASTATUS_MASK) >> @@ -438,9 +434,9 @@ static int ocrdma_mbx_create_eq(struct ocrdma_dev *dev, struct ocrdma_eq *eq) NULL); if (!status) { eq->q.id = rsp->vector_eqid & 0xffff; - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) + if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { ocrdma_assign_eq_vect_gen2(dev, eq); - else { + } else { eq->vector = (rsp->vector_eqid >> 16) & 0xffff; dev->nic_info.msix.start_vector += 1; } @@ -746,8 +742,9 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, qp->srq->ibsrq.event_handler(&ib_evt, qp->srq->ibsrq. srq_context); - } else if (dev_event) + } else if (dev_event) { ib_dispatch_event(&ib_evt); + } } @@ -957,9 +954,8 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe) rsp = ocrdma_get_mqe_rsp(dev); ocrdma_copy_le32_to_cpu(mqe, rsp, (sizeof(*mqe))); if (cqe_status || ext_status) { - pr_err - ("%s() opcode=0x%x, cqe_status=0x%x, ext_status=0x%x\n", - __func__, + pr_err("%s() opcode=0x%x, cqe_status=0x%x, ext_status=0x%x\n", + __func__, (rsp->u.rsp.subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >> OCRDMA_MBX_RSP_OPCODE_SHIFT, cqe_status, ext_status); status = ocrdma_get_mbx_cqe_errno(cqe_status); @@ -1377,15 +1373,13 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, cmd->cmd.pgsz_pgcnt |= hw_pages; cmd->cmd.ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS; - if (dev->eq_cnt < 0) - goto eq_err; cq->eqn = ocrdma_bind_eq(dev); cmd->cmd.req.rsvd_version = OCRDMA_CREATE_CQ_VER2; cqe_count = cq->len / cqe_size; - if (cqe_count > 1024) + if (cqe_count > 1024) { /* Set cnt to 3 to indicate more than 1024 cq entries */ cmd->cmd.ev_cnt_flags |= (0x3 << OCRDMA_CREATE_CQ_CNT_SHIFT); - else { + } else { u8 count = 0; switch (cqe_count) { case 256: @@ -1427,7 +1421,6 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, return 0; mbx_err: ocrdma_unbind_eq(dev, cq->eqn); -eq_err: dma_free_coherent(&pdev->dev, cq->len, cq->va, cq->pa); mem_err: kfree(cmd); @@ -2057,9 +2050,10 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs, qp->rq_cq = cq; if (pd->dpp_enabled && attrs->cap.max_inline_data && pd->num_dpp_qp && - (attrs->cap.max_inline_data <= dev->attr.max_inline_data)) + (attrs->cap.max_inline_data <= dev->attr.max_inline_data)) { ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq, dpp_cq_id); + } status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); if (status) @@ -2108,27 +2102,28 @@ int ocrdma_resolve_dgid(struct ocrdma_dev *dev, union ib_gid *dgid, struct in6_addr in6; memcpy(&in6, dgid, sizeof in6); - if (rdma_is_multicast_addr(&in6)) + if (rdma_is_multicast_addr(&in6)) { rdma_get_mcast_mac(&in6, mac_addr); - else if (rdma_link_local_addr(&in6)) + } else if (rdma_link_local_addr(&in6)) { rdma_get_ll_mac(&in6, mac_addr); - else { + } else { pr_err("%s() fail to resolve mac_addr.\n", __func__); return -EINVAL; } return 0; } -static void ocrdma_set_av_params(struct ocrdma_qp *qp, +static int ocrdma_set_av_params(struct ocrdma_qp *qp, struct ocrdma_modify_qp *cmd, struct ib_qp_attr *attrs) { + int status; struct ib_ah_attr *ah_attr = &attrs->ah_attr; union ib_gid sgid; u32 vlan_id; u8 mac_addr[6]; if ((ah_attr->ah_flags & IB_AH_GRH) == 0) - return; + return -EINVAL; cmd->params.tclass_sq_psn |= (ah_attr->grh.traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT); cmd->params.rnt_rc_sl_fl |= @@ -2138,8 +2133,10 @@ static void ocrdma_set_av_params(struct ocrdma_qp *qp, cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID; memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0], sizeof(cmd->params.dgid)); - ocrdma_query_gid(&qp->dev->ibdev, 1, + status = ocrdma_query_gid(&qp->dev->ibdev, 1, ah_attr->grh.sgid_index, &sgid); + if (status) + return status; qp->sgid_idx = ah_attr->grh.sgid_index; memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid)); ocrdma_resolve_dgid(qp->dev, &ah_attr->grh.dgid, &mac_addr[0]); @@ -2155,6 +2152,7 @@ static void ocrdma_set_av_params(struct ocrdma_qp *qp, vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT; cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID; } + return 0; } static int ocrdma_set_qp_params(struct ocrdma_qp *qp, @@ -2176,9 +2174,11 @@ static int ocrdma_set_qp_params(struct ocrdma_qp *qp, cmd->params.qkey = attrs->qkey; cmd->flags |= OCRDMA_QP_PARA_QKEY_VALID; } - if (attr_mask & IB_QP_AV) - ocrdma_set_av_params(qp, cmd, attrs); - else if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_UD) { + if (attr_mask & IB_QP_AV) { + status = ocrdma_set_av_params(qp, cmd, attrs); + if (status) + return status; + } else if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_UD) { /* set the default mac address for UD, GSI QPs */ cmd->params.dmac_b0_to_b3 = qp->dev->nic_info.mac_addr[0] | (qp->dev->nic_info.mac_addr[1] << 8) | @@ -2283,10 +2283,12 @@ int ocrdma_mbx_modify_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp, OCRDMA_QP_PARAMS_STATE_SHIFT) & OCRDMA_QP_PARAMS_STATE_MASK; cmd->flags |= OCRDMA_QP_PARA_QPS_VALID; - } else + } else { cmd->params.max_sge_recv_flags |= (qp->state << OCRDMA_QP_PARAMS_STATE_SHIFT) & OCRDMA_QP_PARAMS_STATE_MASK; + } + status = ocrdma_set_qp_params(qp, cmd, attrs, attr_mask, old_qps); if (status) goto mbx_err; @@ -2497,9 +2499,9 @@ static int ocrdma_create_mq_eq(struct ocrdma_dev *dev) unsigned long flags = 0; int num_eq = 0; - if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX) + if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX) { flags = IRQF_SHARED; - else { + } else { num_eq = dev->nic_info.msix.num_vectors - dev->nic_info.msix.start_vector; /* minimum two vectors/eq are required for rdma to work. @@ -2532,8 +2534,10 @@ static int ocrdma_create_qp_eqs(struct ocrdma_dev *dev) if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX) { num_eq = 1; flags = IRQF_SHARED; - } else + } else { num_eq = min_t(u32, num_eq, num_online_cpus()); + } + dev->qp_eq_tbl = kzalloc(sizeof(struct ocrdma_eq) * num_eq, GFP_KERNEL); if (!dev->qp_eq_tbl) return -ENOMEM; @@ -2561,8 +2565,7 @@ static int ocrdma_create_qp_eqs(struct ocrdma_dev *dev) /* one eq is sufficient for data path to work */ if (dev->eq_cnt >= 1) return 0; - if (status) - ocrdma_destroy_qp_eqs(dev); + ocrdma_destroy_qp_eqs(dev); return status; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index f36630e4b6b..77fc50a5cc9 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -337,20 +337,21 @@ static int ocrdma_copy_pd_uresp(struct ocrdma_pd *pd, u32 db_page_size; struct ocrdma_alloc_pd_uresp rsp; struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx); + struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device); memset(&rsp, 0, sizeof(rsp)); rsp.id = pd->id; rsp.dpp_enabled = pd->dpp_enabled; - db_page_addr = pd->dev->nic_info.unmapped_db + - (pd->id * pd->dev->nic_info.db_page_size); - db_page_size = pd->dev->nic_info.db_page_size; + db_page_addr = dev->nic_info.unmapped_db + + (pd->id * dev->nic_info.db_page_size); + db_page_size = dev->nic_info.db_page_size; status = ocrdma_add_mmap(uctx, db_page_addr, db_page_size); if (status) return status; if (pd->dpp_enabled) { - dpp_page_addr = pd->dev->nic_info.dpp_unmapped_addr + + dpp_page_addr = dev->nic_info.dpp_unmapped_addr + (pd->id * OCRDMA_DPP_PAGE_SIZE); status = ocrdma_add_mmap(uctx, dpp_page_addr, OCRDMA_DPP_PAGE_SIZE); @@ -386,10 +387,9 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev, pd = kzalloc(sizeof(*pd), GFP_KERNEL); if (!pd) return ERR_PTR(-ENOMEM); - pd->dev = dev; if (udata && context) { - pd->dpp_enabled = (dev->nic_info.dev_family == - OCRDMA_GEN2_FAMILY) ? true : false; + pd->dpp_enabled = + (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY); pd->num_dpp_qp = pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0; } @@ -414,7 +414,7 @@ err: int ocrdma_dealloc_pd(struct ib_pd *ibpd) { struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); - struct ocrdma_dev *dev = pd->dev; + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); int status; u64 usr_db; @@ -432,25 +432,12 @@ int ocrdma_dealloc_pd(struct ib_pd *ibpd) return status; } -static struct ocrdma_mr *ocrdma_alloc_lkey(struct ib_pd *ibpd, - int acc, u32 num_pbls, - u32 addr_check) +static int ocrdma_alloc_lkey(struct ocrdma_mr *mr, u32 pdid, int acc, + u32 num_pbls, u32 addr_check) { int status; - struct ocrdma_mr *mr; - struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); - struct ocrdma_dev *dev = pd->dev; - - if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) { - pr_err("%s(%d) leaving err, invalid access rights\n", - __func__, dev->id); - return ERR_PTR(-EINVAL); - } + struct ocrdma_dev *dev = mr->hwmr.dev; - mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) - return ERR_PTR(-ENOMEM); - mr->hwmr.dev = dev; mr->hwmr.fr_mr = 0; mr->hwmr.local_rd = 1; mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; @@ -460,25 +447,39 @@ static struct ocrdma_mr *ocrdma_alloc_lkey(struct ib_pd *ibpd, mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; mr->hwmr.num_pbls = num_pbls; - status = ocrdma_mbx_alloc_lkey(dev, &mr->hwmr, pd->id, addr_check); - if (status) { - kfree(mr); - return ERR_PTR(-ENOMEM); - } - mr->pd = pd; + status = ocrdma_mbx_alloc_lkey(dev, &mr->hwmr, pdid, addr_check); + if (status) + return status; + mr->ibmr.lkey = mr->hwmr.lkey; if (mr->hwmr.remote_wr || mr->hwmr.remote_rd) mr->ibmr.rkey = mr->hwmr.lkey; - return mr; + return 0; } struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *ibpd, int acc) { + int status; struct ocrdma_mr *mr; + struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); + + if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) { + pr_err("%s err, invalid access rights\n", __func__); + return ERR_PTR(-EINVAL); + } - mr = ocrdma_alloc_lkey(ibpd, acc, 0, OCRDMA_ADDR_CHECK_DISABLE); - if (IS_ERR(mr)) - return ERR_CAST(mr); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + mr->hwmr.dev = dev; + status = ocrdma_alloc_lkey(mr, pd->id, acc, 0, + OCRDMA_ADDR_CHECK_DISABLE); + if (status) { + kfree(mr); + return ERR_PTR(status); + } return &mr->ibmr; } @@ -613,13 +614,12 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, u64 usr_addr, int acc, struct ib_udata *udata) { int status = -ENOMEM; - struct ocrdma_dev *dev; + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); struct ocrdma_mr *mr; struct ocrdma_pd *pd; u32 num_pbes; pd = get_ocrdma_pd(ibpd); - dev = pd->dev; if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) return ERR_PTR(-EINVAL); @@ -654,7 +654,6 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc); if (status) goto mbx_err; - mr->pd = pd; mr->ibmr.lkey = mr->hwmr.lkey; if (mr->hwmr.remote_wr || mr->hwmr.remote_rd) mr->ibmr.rkey = mr->hwmr.lkey; @@ -1026,7 +1025,7 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd, int status; struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); struct ocrdma_qp *qp; - struct ocrdma_dev *dev = pd->dev; + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); struct ocrdma_create_qp_ureq ureq; u16 dpp_credit_lmt, dpp_offset; @@ -1360,17 +1359,18 @@ static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq) */ discard_cnt += 1; cqe->cmn.qpn = 0; - if (is_cqe_for_sq(cqe)) + if (is_cqe_for_sq(cqe)) { ocrdma_hwq_inc_tail(&qp->sq); - else { + } else { if (qp->srq) { spin_lock_irqsave(&qp->srq->q_lock, flags); ocrdma_hwq_inc_tail(&qp->srq->rq); ocrdma_srq_toggle_bit(qp->srq, cur_getp); spin_unlock_irqrestore(&qp->srq->q_lock, flags); - } else + } else { ocrdma_hwq_inc_tail(&qp->rq); + } } skip_cqe: cur_getp = (cur_getp + 1) % cq->max_hw_cqe; @@ -1495,7 +1495,7 @@ struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd, { int status = -ENOMEM; struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); - struct ocrdma_dev *dev = pd->dev; + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); struct ocrdma_srq *srq; if (init_attr->attr.max_sge > dev->attr.max_recv_sge) @@ -1675,8 +1675,9 @@ static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, ocrdma_build_ud_hdr(qp, hdr, wr); sge = (struct ocrdma_sge *)(hdr + 2); wqe_size += sizeof(struct ocrdma_ewqe_ud_hdr); - } else + } else { sge = (struct ocrdma_sge *)(hdr + 1); + } status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size); return status; @@ -1958,7 +1959,7 @@ int ocrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, static enum ib_wc_status ocrdma_to_ibwc_err(u16 status) { - enum ib_wc_status ibwc_status = IB_WC_GENERAL_ERR; + enum ib_wc_status ibwc_status; switch (status) { case OCRDMA_CQE_GENERAL_ERR: @@ -2299,9 +2300,9 @@ static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp, ibwc->ex.invalidate_rkey = le32_to_cpu(cqe->rq.lkey_immdt); ibwc->wc_flags |= IB_WC_WITH_INVALIDATE; } - if (qp->ibqp.srq) + if (qp->ibqp.srq) { ocrdma_update_free_srq_cqe(ibwc, cqe, qp); - else { + } else { ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail]; ocrdma_hwq_inc_tail(&qp->rq); } @@ -2314,13 +2315,14 @@ static bool ocrdma_poll_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe, bool expand = false; ibwc->wc_flags = 0; - if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) + if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) { status = (le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT; - else + } else { status = (le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT; + } if (status == OCRDMA_CQE_SUCCESS) { *polled = true; @@ -2338,9 +2340,10 @@ static void ocrdma_change_cq_phase(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe, if (cq->phase_change) { if (cur_getp == 0) cq->phase = (~cq->phase & OCRDMA_CQE_VALID); - } else + } else { /* clear valid bit */ cqe->flags_status_srcqpn = 0; + } } static int ocrdma_poll_hwcq(struct ocrdma_cq *cq, int num_entries, @@ -2417,8 +2420,9 @@ static int ocrdma_add_err_cqe(struct ocrdma_cq *cq, int num_entries, } else if (!is_hw_rq_empty(qp) && qp->rq_cq == cq) { ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail]; ocrdma_hwq_inc_tail(&qp->rq); - } else + } else { return err_cqes; + } ibwc->byte_len = 0; ibwc->status = IB_WC_WR_FLUSH_ERR; ibwc = ibwc + 1; -- cgit v1.2.3-70-g09d2 From 1afc0454b6658ad2d0a87e594e1f06dc19c6977d Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Wed, 7 Aug 2013 12:52:33 +0530 Subject: RDMA/ocrdma: Remove redundant dev reference Remove redundant dev reference from structures: 1) ocrdma_cq. 2) ocrdma_ah. 3) ocrdma_hw_mr. 4) ocrdma_mw. 5) ocrdma_srq. Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 5 -- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 10 ++-- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 26 ++++++---- drivers/infiniband/hw/ocrdma/ocrdma_hw.h | 3 +- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 76 +++++++++++++---------------- 5 files changed, 57 insertions(+), 63 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 5c00600135e..8d54dc74ca7 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -172,7 +172,6 @@ struct ocrdma_dev { struct ocrdma_cq { struct ib_cq ibcq; - struct ocrdma_dev *dev; struct ocrdma_cqe *va; u32 phase; u32 getp; /* pointer to pending wrs to @@ -214,7 +213,6 @@ struct ocrdma_pd { struct ocrdma_ah { struct ib_ah ibah; - struct ocrdma_dev *dev; struct ocrdma_av *av; u16 sgid_index; u32 id; @@ -234,7 +232,6 @@ struct ocrdma_qp_hwq_info { struct ocrdma_srq { struct ib_srq ibsrq; - struct ocrdma_dev *dev; u8 __iomem *db; struct ocrdma_qp_hwq_info rq; u64 *rqe_wr_id_tbl; @@ -293,7 +290,6 @@ struct ocrdma_qp { }; struct ocrdma_hw_mr { - struct ocrdma_dev *dev; u32 lkey; u8 fr_mr; u8 remote_atomic; @@ -321,7 +317,6 @@ struct ocrdma_mr { struct ocrdma_ucontext { struct ib_ucontext ibucontext; - struct ocrdma_dev *dev; struct list_head mm_head; struct mutex mm_list_lock; /* protects list entries of mm type */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index a6bb3d074d2..df9e73758af 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -35,12 +35,11 @@ #include "ocrdma_ah.h" #include "ocrdma_hw.h" -static inline int set_av_attr(struct ocrdma_ah *ah, +static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, struct ib_ah_attr *attr, int pdid) { int status = 0; u16 vlan_tag; bool vlan_enabled = false; - struct ocrdma_dev *dev = ah->dev; struct ocrdma_eth_vlan eth; struct ocrdma_grh grh; int eth_sz; @@ -100,12 +99,11 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) ah = kzalloc(sizeof *ah, GFP_ATOMIC); if (!ah) return ERR_PTR(-ENOMEM); - ah->dev = dev; status = ocrdma_alloc_av(dev, ah); if (status) goto av_err; - status = set_av_attr(ah, attr, pd->id); + status = set_av_attr(dev, ah, attr, pd->id); if (status) goto av_conf_err; @@ -126,7 +124,9 @@ av_err: int ocrdma_destroy_ah(struct ib_ah *ibah) { struct ocrdma_ah *ah = get_ocrdma_ah(ibah); - ocrdma_free_av(ah->dev, ah); + struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device); + + ocrdma_free_av(dev, ah); kfree(ah); return 0; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index eb41a1c9ad6..6bbcc786ef6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -523,16 +523,21 @@ static int ocrdma_mbx_mq_cq_create(struct ocrdma_dev *dev, ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_CQ, OCRDMA_SUBSYS_COMMON, sizeof(*cmd)); - cmd->pgsz_pgcnt = PAGES_4K_SPANNED(cq->va, cq->size); + cmd->req.rsvd_version = OCRDMA_CREATE_CQ_VER2; + cmd->pgsz_pgcnt = (cq->size / OCRDMA_MIN_Q_PAGE_SIZE) << + OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT; + cmd->pgsz_pgcnt |= PAGES_4K_SPANNED(cq->va, cq->size); + cmd->ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS; - cmd->eqn = (eq->id << OCRDMA_CREATE_CQ_EQID_SHIFT); + cmd->eqn = eq->id; + cmd->cqe_count = cq->size / sizeof(struct ocrdma_mcqe); - ocrdma_build_q_pages(&cmd->pa[0], cmd->pgsz_pgcnt, + ocrdma_build_q_pages(&cmd->pa[0], cq->size / OCRDMA_MIN_Q_PAGE_SIZE, cq->dma, PAGE_SIZE_4K); status = be_roce_mcc_cmd(dev->nic_info.netdev, cmd, sizeof(*cmd), NULL, NULL); if (!status) { - cq->id = (rsp->cq_id & OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK); + cq->id = (u16) (rsp->cq_id & OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK); cq->created = true; } return status; @@ -2326,7 +2331,7 @@ mbx_err: return status; } -int ocrdma_mbx_create_srq(struct ocrdma_srq *srq, +int ocrdma_mbx_create_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq, struct ib_srq_init_attr *srq_attr, struct ocrdma_pd *pd) { @@ -2336,7 +2341,6 @@ int ocrdma_mbx_create_srq(struct ocrdma_srq *srq, struct ocrdma_create_srq_rsp *rsp; struct ocrdma_create_srq *cmd; dma_addr_t pa; - struct ocrdma_dev *dev = srq->dev; struct pci_dev *pdev = dev->nic_info.pdev; u32 max_rqe_allocated; @@ -2406,13 +2410,15 @@ int ocrdma_mbx_modify_srq(struct ocrdma_srq *srq, struct ib_srq_attr *srq_attr) { int status = -ENOMEM; struct ocrdma_modify_srq *cmd; + struct ocrdma_dev *dev = get_ocrdma_dev(srq->ibsrq.device); + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_SRQ, sizeof(*cmd)); if (!cmd) return status; cmd->id = srq->id; cmd->limit_max_rqe |= srq_attr->srq_limit << OCRDMA_MODIFY_SRQ_LIMIT_SHIFT; - status = ocrdma_mbx_cmd(srq->dev, (struct ocrdma_mqe *)cmd); + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); kfree(cmd); return status; } @@ -2421,11 +2427,13 @@ int ocrdma_mbx_query_srq(struct ocrdma_srq *srq, struct ib_srq_attr *srq_attr) { int status = -ENOMEM; struct ocrdma_query_srq *cmd; + struct ocrdma_dev *dev = get_ocrdma_dev(srq->ibsrq.device); + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_SRQ, sizeof(*cmd)); if (!cmd) return status; cmd->id = srq->rq.dbid; - status = ocrdma_mbx_cmd(srq->dev, (struct ocrdma_mqe *)cmd); + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); if (status == 0) { struct ocrdma_query_srq_rsp *rsp = (struct ocrdma_query_srq_rsp *)cmd; @@ -2450,7 +2458,7 @@ int ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq) if (!cmd) return status; cmd->id = srq->id; - status = ocrdma_mbx_cmd(srq->dev, (struct ocrdma_mqe *)cmd); + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); if (srq->rq.va) dma_free_coherent(&pdev->dev, srq->rq.len, srq->rq.va, srq->rq.pa); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index be5db77404d..5d8e6f518f8 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -112,8 +112,7 @@ int ocrdma_mbx_modify_qp(struct ocrdma_dev *, struct ocrdma_qp *, int ocrdma_mbx_query_qp(struct ocrdma_dev *, struct ocrdma_qp *, struct ocrdma_qp_params *param); int ocrdma_mbx_destroy_qp(struct ocrdma_dev *, struct ocrdma_qp *); - -int ocrdma_mbx_create_srq(struct ocrdma_srq *, +int ocrdma_mbx_create_srq(struct ocrdma_dev *, struct ocrdma_srq *, struct ib_srq_init_attr *, struct ocrdma_pd *); int ocrdma_mbx_modify_srq(struct ocrdma_srq *, struct ib_srq_attr *); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 77fc50a5cc9..c9ace64fc34 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -229,7 +229,6 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev, ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return ERR_PTR(-ENOMEM); - ctx->dev = dev; INIT_LIST_HEAD(&ctx->mm_head); mutex_init(&ctx->mm_list_lock); @@ -274,7 +273,8 @@ int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx) { struct ocrdma_mm *mm, *tmp; struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx); - struct pci_dev *pdev = uctx->dev->nic_info.pdev; + struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device); + struct pci_dev *pdev = dev->nic_info.pdev; ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len); dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va, @@ -291,7 +291,7 @@ int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx) int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { struct ocrdma_ucontext *ucontext = get_ocrdma_ucontext(context); - struct ocrdma_dev *dev = ucontext->dev; + struct ocrdma_dev *dev = get_ocrdma_dev(context->device); unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT; u64 unmapped_db = (u64) dev->nic_info.unmapped_db; unsigned long len = (vma->vm_end - vma->vm_start); @@ -432,11 +432,10 @@ int ocrdma_dealloc_pd(struct ib_pd *ibpd) return status; } -static int ocrdma_alloc_lkey(struct ocrdma_mr *mr, u32 pdid, int acc, - u32 num_pbls, u32 addr_check) +static int ocrdma_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_mr *mr, + u32 pdid, int acc, u32 num_pbls, u32 addr_check) { int status; - struct ocrdma_dev *dev = mr->hwmr.dev; mr->hwmr.fr_mr = 0; mr->hwmr.local_rd = 1; @@ -473,8 +472,7 @@ struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *ibpd, int acc) if (!mr) return ERR_PTR(-ENOMEM); - mr->hwmr.dev = dev; - status = ocrdma_alloc_lkey(mr, pd->id, acc, 0, + status = ocrdma_alloc_lkey(dev, mr, pd->id, acc, 0, OCRDMA_ADDR_CHECK_DISABLE); if (status) { kfree(mr); @@ -503,7 +501,8 @@ static void ocrdma_free_mr_pbl_tbl(struct ocrdma_dev *dev, } } -static int ocrdma_get_pbl_info(struct ocrdma_mr *mr, u32 num_pbes) +static int ocrdma_get_pbl_info(struct ocrdma_dev *dev, struct ocrdma_mr *mr, + u32 num_pbes) { u32 num_pbls = 0; u32 idx = 0; @@ -519,7 +518,7 @@ static int ocrdma_get_pbl_info(struct ocrdma_mr *mr, u32 num_pbes) num_pbls = roundup(num_pbes, (pbl_size / sizeof(u64))); num_pbls = num_pbls / (pbl_size / sizeof(u64)); idx++; - } while (num_pbls >= mr->hwmr.dev->attr.max_num_mr_pbl); + } while (num_pbls >= dev->attr.max_num_mr_pbl); mr->hwmr.num_pbes = num_pbes; mr->hwmr.num_pbls = num_pbls; @@ -627,14 +626,13 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(status); - mr->hwmr.dev = dev; mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0); if (IS_ERR(mr->umem)) { status = -EFAULT; goto umem_err; } num_pbes = ib_umem_page_count(mr->umem); - status = ocrdma_get_pbl_info(mr, num_pbes); + status = ocrdma_get_pbl_info(dev, mr, num_pbes); if (status) goto umem_err; @@ -670,7 +668,7 @@ umem_err: int ocrdma_dereg_mr(struct ib_mr *ib_mr) { struct ocrdma_mr *mr = get_ocrdma_mr(ib_mr); - struct ocrdma_dev *dev = mr->hwmr.dev; + struct ocrdma_dev *dev = get_ocrdma_dev(ib_mr->device); int status; status = ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey); @@ -685,7 +683,8 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr) return status; } -static int ocrdma_copy_cq_uresp(struct ocrdma_cq *cq, struct ib_udata *udata, +static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq, + struct ib_udata *udata, struct ib_ucontext *ib_ctx) { int status; @@ -698,13 +697,13 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_cq *cq, struct ib_udata *udata, uresp.num_pages = 1; uresp.max_hw_cqe = cq->max_hw_cqe; uresp.page_addr[0] = cq->pa; - uresp.db_page_addr = cq->dev->nic_info.unmapped_db; - uresp.db_page_size = cq->dev->nic_info.db_page_size; + uresp.db_page_addr = dev->nic_info.unmapped_db; + uresp.db_page_size = dev->nic_info.db_page_size; uresp.phase_change = cq->phase_change ? 1 : 0; status = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (status) { pr_err("%s(%d) copy error cqid=0x%x.\n", - __func__, cq->dev->id, cq->id); + __func__, dev->id, cq->id); goto err; } uctx = get_ocrdma_ucontext(ib_ctx); @@ -743,7 +742,6 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, spin_lock_init(&cq->comp_handler_lock); INIT_LIST_HEAD(&cq->sq_head); INIT_LIST_HEAD(&cq->rq_head); - cq->dev = dev; status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq); if (status) { @@ -751,7 +749,7 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, return ERR_PTR(status); } if (ib_ctx) { - status = ocrdma_copy_cq_uresp(cq, udata, ib_ctx); + status = ocrdma_copy_cq_uresp(dev, cq, udata, ib_ctx); if (status) goto ctx_err; } @@ -785,7 +783,7 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq) { int status; struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); - struct ocrdma_dev *dev = cq->dev; + struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device); status = ocrdma_mbx_destroy_cq(dev, cq); @@ -1457,7 +1455,8 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp) return status; } -static int ocrdma_copy_srq_uresp(struct ocrdma_srq *srq, struct ib_udata *udata) +static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq, + struct ib_udata *udata) { int status; struct ocrdma_create_srq_uresp uresp; @@ -1467,11 +1466,11 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_srq *srq, struct ib_udata *udata) uresp.num_rq_pages = 1; uresp.rq_page_addr[0] = srq->rq.pa; uresp.rq_page_size = srq->rq.len; - uresp.db_page_addr = srq->dev->nic_info.unmapped_db + - (srq->pd->id * srq->dev->nic_info.db_page_size); - uresp.db_page_size = srq->dev->nic_info.db_page_size; + uresp.db_page_addr = dev->nic_info.unmapped_db + + (srq->pd->id * dev->nic_info.db_page_size); + uresp.db_page_size = dev->nic_info.db_page_size; uresp.num_rqe_allocated = srq->rq.max_cnt; - if (srq->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { + if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ1_OFFSET; uresp.db_shift = 24; } else { @@ -1508,10 +1507,9 @@ struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd, return ERR_PTR(status); spin_lock_init(&srq->q_lock); - srq->dev = dev; srq->pd = pd; srq->db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size); - status = ocrdma_mbx_create_srq(srq, init_attr, pd); + status = ocrdma_mbx_create_srq(dev, srq, init_attr, pd); if (status) goto err; @@ -1538,7 +1536,7 @@ struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd, } if (udata) { - status = ocrdma_copy_srq_uresp(srq, udata); + status = ocrdma_copy_srq_uresp(dev, srq, udata); if (status) goto arm_err; } @@ -1584,10 +1582,9 @@ int ocrdma_destroy_srq(struct ib_srq *ibsrq) { int status; struct ocrdma_srq *srq; - struct ocrdma_dev *dev; + struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device); srq = get_ocrdma_srq(ibsrq); - dev = srq->dev; status = ocrdma_mbx_destroy_srq(dev, srq); @@ -2354,7 +2351,7 @@ static int ocrdma_poll_hwcq(struct ocrdma_cq *cq, int num_entries, bool expand = false; int polled_hw_cqes = 0; struct ocrdma_qp *qp = NULL; - struct ocrdma_dev *dev = cq->dev; + struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device); struct ocrdma_cqe *cqe; u16 cur_getp; bool polled = false; bool stop = false; @@ -2435,14 +2432,11 @@ static int ocrdma_add_err_cqe(struct ocrdma_cq *cq, int num_entries, int ocrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) { int cqes_to_poll = num_entries; - struct ocrdma_cq *cq = NULL; - unsigned long flags; - struct ocrdma_dev *dev; + struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); + struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device); int num_os_cqe = 0, err_cqes = 0; struct ocrdma_qp *qp; - - cq = get_ocrdma_cq(ibcq); - dev = cq->dev; + unsigned long flags; /* poll cqes from adapter CQ */ spin_lock_irqsave(&cq->cq_lock, flags); @@ -2473,16 +2467,14 @@ int ocrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags) { - struct ocrdma_cq *cq; - unsigned long flags; - struct ocrdma_dev *dev; + struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); + struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device); u16 cq_id; u16 cur_getp; struct ocrdma_cqe *cqe; + unsigned long flags; - cq = get_ocrdma_cq(ibcq); cq_id = cq->id; - dev = cq->dev; spin_lock_irqsave(&cq->cq_lock, flags); if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED) -- cgit v1.2.3-70-g09d2 From 9c58726ba96ad5f767ce2d8c42159c3075a98d6f Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Wed, 7 Aug 2013 12:52:34 +0530 Subject: RDMA/ocrdma: Don't allow zero/invalid sgid usage Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 6bbcc786ef6..af01ba202d4 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -2124,9 +2124,10 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, { int status; struct ib_ah_attr *ah_attr = &attrs->ah_attr; - union ib_gid sgid; + union ib_gid sgid, zgid; u32 vlan_id; u8 mac_addr[6]; + if ((ah_attr->ah_flags & IB_AH_GRH) == 0) return -EINVAL; cmd->params.tclass_sq_psn |= @@ -2142,6 +2143,11 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, ah_attr->grh.sgid_index, &sgid); if (status) return status; + + memset(&zgid, 0, sizeof(zgid)); + if (!memcmp(&sgid, &zgid, sizeof(zgid))) + return -EINVAL; + qp->sgid_idx = ah_attr->grh.sgid_index; memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid)); ocrdma_resolve_dgid(qp->dev, &ah_attr->grh.dgid, &mac_addr[0]); -- cgit v1.2.3-70-g09d2 From 057729cb234754d12e0b2a361c2fc85c6363cbf6 Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Wed, 7 Aug 2013 12:52:35 +0530 Subject: RDMA/ocrdma: Remove driver QP state machine Remove QP state machine in ocrdma low-level driver and use on the core IB stack's instead. Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 101 +++------------------------- drivers/infiniband/hw/ocrdma/ocrdma_hw.h | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 4 +- 3 files changed, 11 insertions(+), 96 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index af01ba202d4..c4bb29c9d48 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -654,7 +654,7 @@ static void ocrdma_process_qpcat_error(struct ocrdma_dev *dev, if (qp == NULL) BUG(); - ocrdma_qp_state_machine(qp, new_ib_qps, &old_ib_qps); + ocrdma_qp_state_change(qp, new_ib_qps, &old_ib_qps); } static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, @@ -1676,8 +1676,8 @@ void ocrdma_flush_qp(struct ocrdma_qp *qp) spin_unlock_irqrestore(&qp->dev->flush_q_lock, flags); } -int ocrdma_qp_state_machine(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state, - enum ib_qp_state *old_ib_state) +int ocrdma_qp_state_change(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state, + enum ib_qp_state *old_ib_state) { unsigned long flags; int status = 0; @@ -1694,96 +1694,11 @@ int ocrdma_qp_state_machine(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state, return 1; } - switch (qp->state) { - case OCRDMA_QPS_RST: - switch (new_state) { - case OCRDMA_QPS_RST: - case OCRDMA_QPS_INIT: - break; - default: - status = -EINVAL; - break; - }; - break; - case OCRDMA_QPS_INIT: - /* qps: INIT->XXX */ - switch (new_state) { - case OCRDMA_QPS_INIT: - case OCRDMA_QPS_RTR: - break; - case OCRDMA_QPS_ERR: - ocrdma_flush_qp(qp); - break; - default: - status = -EINVAL; - break; - }; - break; - case OCRDMA_QPS_RTR: - /* qps: RTS->XXX */ - switch (new_state) { - case OCRDMA_QPS_RTS: - break; - case OCRDMA_QPS_ERR: - ocrdma_flush_qp(qp); - break; - default: - status = -EINVAL; - break; - }; - break; - case OCRDMA_QPS_RTS: - /* qps: RTS->XXX */ - switch (new_state) { - case OCRDMA_QPS_SQD: - case OCRDMA_QPS_SQE: - break; - case OCRDMA_QPS_ERR: - ocrdma_flush_qp(qp); - break; - default: - status = -EINVAL; - break; - }; - break; - case OCRDMA_QPS_SQD: - /* qps: SQD->XXX */ - switch (new_state) { - case OCRDMA_QPS_RTS: - case OCRDMA_QPS_SQE: - case OCRDMA_QPS_ERR: - break; - default: - status = -EINVAL; - break; - }; - break; - case OCRDMA_QPS_SQE: - switch (new_state) { - case OCRDMA_QPS_RTS: - case OCRDMA_QPS_ERR: - break; - default: - status = -EINVAL; - break; - }; - break; - case OCRDMA_QPS_ERR: - /* qps: ERR->XXX */ - switch (new_state) { - case OCRDMA_QPS_RST: - break; - default: - status = -EINVAL; - break; - }; - break; - default: - status = -EINVAL; - break; - }; - if (!status) - qp->state = new_state; + + if (new_state == OCRDMA_QPS_ERR) + ocrdma_flush_qp(qp); + + qp->state = new_state; spin_unlock_irqrestore(&qp->q_lock, flags); return status; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index 5d8e6f518f8..cc90ac3b6d4 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -122,7 +122,7 @@ int ocrdma_mbx_destroy_srq(struct ocrdma_dev *, struct ocrdma_srq *); int ocrdma_alloc_av(struct ocrdma_dev *, struct ocrdma_ah *); int ocrdma_free_av(struct ocrdma_dev *, struct ocrdma_ah *); -int ocrdma_qp_state_machine(struct ocrdma_qp *, enum ib_qp_state new_state, +int ocrdma_qp_state_change(struct ocrdma_qp *, enum ib_qp_state new_state, enum ib_qp_state *old_ib_state); bool ocrdma_is_qp_in_sq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *); bool ocrdma_is_qp_in_rq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index c9ace64fc34..2cfbd343ba0 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1101,7 +1101,7 @@ int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp = get_ocrdma_qp(ibqp); dev = qp->dev; if (attr_mask & IB_QP_STATE) - status = ocrdma_qp_state_machine(qp, attr->qp_state, &old_qps); + status = ocrdma_qp_state_change(qp, attr->qp_state, &old_qps); /* if new and previous states are same hw doesn't need to * know about it. */ @@ -2106,7 +2106,7 @@ static bool ocrdma_update_err_cqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe, ibwc->status = ocrdma_to_ibwc_err(status); ocrdma_flush_qp(qp); - ocrdma_qp_state_machine(qp, IB_QPS_ERR, NULL); + ocrdma_qp_state_change(qp, IB_QPS_ERR, NULL); /* if wqe/rqe pending for which cqe needs to be returned, * trigger inflating it. -- cgit v1.2.3-70-g09d2 From 7b9b1a596e0f37b463dfe3bc36b6d035c7450ca0 Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Wed, 7 Aug 2013 12:52:36 +0530 Subject: RDMA/ocrdma: Remove __packed 1) Remove __packed for structures. 2) Align and pad all ABI structure to 64 bit boundaries instead of using __packed. Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_abi.h | 31 +++---- drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 133 +++++++++++++++--------------- 2 files changed, 84 insertions(+), 80 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h index 517ab20b727..e5ea9a9776a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h @@ -28,6 +28,8 @@ #ifndef __OCRDMA_ABI_H__ #define __OCRDMA_ABI_H__ +/* user kernel communication data structures. */ + struct ocrdma_alloc_ucontext_resp { u32 dev_id; u32 wqe_size; @@ -35,16 +37,16 @@ struct ocrdma_alloc_ucontext_resp { u32 dpp_wqe_size; u64 ah_tbl_page; u32 ah_tbl_len; - u32 rsvd; - u8 fw_ver[32]; u32 rqe_size; + u8 fw_ver[32]; + /* for future use/new features in progress */ u64 rsvd1; -} __packed; + u64 rsvd2; +}; -/* user kernel communication data structures. */ struct ocrdma_alloc_pd_ureq { u64 rsvd1; -} __packed; +}; struct ocrdma_alloc_pd_uresp { u32 id; @@ -52,12 +54,12 @@ struct ocrdma_alloc_pd_uresp { u32 dpp_page_addr_hi; u32 dpp_page_addr_lo; u64 rsvd1; -} __packed; +}; struct ocrdma_create_cq_ureq { u32 dpp_cq; - u32 rsvd; -} __packed; + u32 rsvd; /* pad */ +}; #define MAX_CQ_PAGES 8 struct ocrdma_create_cq_uresp { @@ -69,9 +71,10 @@ struct ocrdma_create_cq_uresp { u64 db_page_addr; u32 db_page_size; u32 phase_change; + /* for future use/new features in progress */ u64 rsvd1; u64 rsvd2; -} __packed; +}; #define MAX_QP_PAGES 8 #define MAX_UD_AV_PAGES 8 @@ -80,14 +83,14 @@ struct ocrdma_create_qp_ureq { u8 enable_dpp_cq; u8 rsvd; u16 dpp_cq_id; - u32 rsvd1; + u32 rsvd1; /* pad */ }; struct ocrdma_create_qp_uresp { u16 qp_id; u16 sq_dbid; u16 rq_dbid; - u16 resv0; + u16 resv0; /* pad */ u32 sq_page_size; u32 rq_page_size; u32 num_sq_pages; @@ -98,19 +101,19 @@ struct ocrdma_create_qp_uresp { u32 db_page_size; u32 dpp_credit; u32 dpp_offset; - u32 rsvd1; u32 num_wqe_allocated; u32 num_rqe_allocated; u32 db_sq_offset; u32 db_rq_offset; u32 db_shift; + u64 rsvd1; u64 rsvd2; u64 rsvd3; } __packed; struct ocrdma_create_srq_uresp { u16 rq_dbid; - u16 resv0; + u16 resv0; /* pad */ u32 resv1; u32 rq_page_size; @@ -126,6 +129,6 @@ struct ocrdma_create_srq_uresp { u64 rsvd2; u64 rsvd3; -} __packed; +}; #endif /* __OCRDMA_ABI_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 36b062da2ae..96a96298241 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -177,7 +177,7 @@ struct ocrdma_mbx_hdr { u32 timeout; /* in seconds */ u32 cmd_len; u32 rsvd_version; -} __packed; +}; enum { OCRDMA_MBX_RSP_OPCODE_SHIFT = 0, @@ -197,7 +197,7 @@ struct ocrdma_mbx_rsp { u32 status; u32 rsp_len; u32 add_rsp_len; -} __packed; +}; enum { OCRDMA_MQE_EMBEDDED = 1, @@ -208,7 +208,7 @@ struct ocrdma_mqe_sge { u32 pa_lo; u32 pa_hi; u32 len; -} __packed; +}; enum { OCRDMA_MQE_HDR_EMB_SHIFT = 0, @@ -225,12 +225,12 @@ struct ocrdma_mqe_hdr { u32 tag_lo; u32 tag_hi; u32 rsvd3; -} __packed; +}; struct ocrdma_mqe_emb_cmd { struct ocrdma_mbx_hdr mch; u8 pyld[220]; -} __packed; +}; struct ocrdma_mqe { struct ocrdma_mqe_hdr hdr; @@ -242,7 +242,7 @@ struct ocrdma_mqe { u8 cmd[236]; struct ocrdma_mbx_rsp rsp; } u; -} __packed; +}; #define OCRDMA_EQ_LEN 4096 #define OCRDMA_MQ_CQ_LEN 256 @@ -259,12 +259,12 @@ struct ocrdma_mqe { struct ocrdma_delete_q_req { struct ocrdma_mbx_hdr req; u32 id; -} __packed; +}; struct ocrdma_pa { u32 lo; u32 hi; -} __packed; +}; #define MAX_OCRDMA_EQ_PAGES (8) struct ocrdma_create_eq_req { @@ -275,7 +275,7 @@ struct ocrdma_create_eq_req { u32 delay; u32 rsvd; struct ocrdma_pa pa[MAX_OCRDMA_EQ_PAGES]; -} __packed; +}; enum { OCRDMA_CREATE_EQ_VALID = Bit(29), @@ -310,7 +310,7 @@ struct ocrdma_mcqe { u32 tag_lo; u32 tag_hi; u32 valid_ae_cmpl_cons; -} __packed; +}; enum { OCRDMA_AE_MCQE_QPVALID = Bit(31), @@ -332,7 +332,7 @@ struct ocrdma_ae_mcqe { u32 cqvalid_cqid; u32 evt_tag; u32 valid_ae_event; -} __packed; +}; enum { OCRDMA_AE_MPA_MCQE_REQ_ID_SHIFT = 16, @@ -356,7 +356,7 @@ struct ocrdma_ae_mpa_mcqe { u32 w1; u32 w2; u32 valid_ae_event; -} __packed; +}; enum { OCRDMA_AE_QP_MCQE_NEW_QP_STATE_SHIFT = 0, @@ -382,7 +382,7 @@ struct ocrdma_ae_qp_mcqe { u32 w1; u32 w2; u32 valid_ae_event; -} __packed; +}; #define OCRDMA_ASYNC_EVE_CODE 0x14 @@ -487,7 +487,8 @@ struct ocrdma_mbx_query_config { u32 max_ird_ord_per_qp; u32 max_shared_ird_ord; u32 max_mr; - u64 max_mr_size; + u32 max_mr_size_lo; + u32 max_mr_size_hi; u32 max_num_mr_pbl; u32 max_mw; u32 max_fmr; @@ -502,14 +503,14 @@ struct ocrdma_mbx_query_config { u32 max_wqes_rqes_per_q; u32 max_cq_cqes_per_cq; u32 max_srq_rqe_sge; -} __packed; +}; struct ocrdma_fw_ver_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; u8 running_ver[32]; -} __packed; +}; struct ocrdma_fw_conf_rsp { struct ocrdma_mqe_hdr hdr; @@ -535,7 +536,7 @@ struct ocrdma_fw_conf_rsp { u32 base_eqid; u32 max_eq; -} __packed; +}; enum { OCRDMA_FN_MODE_RDMA = 0x4 @@ -584,7 +585,7 @@ struct ocrdma_create_cq_cmd { struct ocrdma_create_cq { struct ocrdma_mqe_hdr hdr; struct ocrdma_create_cq_cmd cmd; -} __packed; +}; enum { OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK = 0xFFFF @@ -593,12 +594,12 @@ enum { struct ocrdma_create_cq_cmd_rsp { struct ocrdma_mbx_rsp rsp; u32 cq_id; -} __packed; +}; struct ocrdma_create_cq_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_create_cq_cmd_rsp rsp; -} __packed; +}; enum { OCRDMA_CREATE_MQ_V0_CQ_ID_SHIFT = 22, @@ -617,12 +618,12 @@ struct ocrdma_create_mq_req { u32 async_cqid_valid; u32 rsvd; struct ocrdma_pa pa[8]; -} __packed; +}; struct ocrdma_create_mq_rsp { struct ocrdma_mbx_rsp rsp; u32 id; -} __packed; +}; enum { OCRDMA_DESTROY_CQ_QID_SHIFT = 0, @@ -637,12 +638,12 @@ struct ocrdma_destroy_cq { struct ocrdma_mbx_hdr req; u32 bypass_flush_qid; -} __packed; +}; struct ocrdma_destroy_cq_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; -} __packed; +}; enum { OCRDMA_QPT_GSI = 1, @@ -766,7 +767,7 @@ struct ocrdma_create_qp_req { u32 dpp_credits_cqid; u32 rpir_lkey; struct ocrdma_pa ird_addr[MAX_OCRDMA_IRD_PAGES]; -} __packed; +}; enum { OCRDMA_CREATE_QP_RSP_QP_ID_SHIFT = 0, @@ -820,18 +821,18 @@ struct ocrdma_create_qp_rsp { u32 max_ord_ird; u32 sq_rq_id; u32 dpp_response; -} __packed; +}; struct ocrdma_destroy_qp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_hdr req; u32 qp_id; -} __packed; +}; struct ocrdma_destroy_qp_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; -} __packed; +}; enum { OCRDMA_MODIFY_QP_ID_SHIFT = 0, @@ -975,7 +976,7 @@ struct ocrdma_qp_params { u32 dmac_b0_to_b3; u32 vlan_dmac_b4_to_b5; u32 qkey; -} __packed; +}; struct ocrdma_modify_qp { @@ -986,7 +987,7 @@ struct ocrdma_modify_qp { u32 flags; u32 rdma_flags; u32 num_outstanding_atomic_rd; -} __packed; +}; enum { OCRDMA_MODIFY_QP_RSP_MAX_RQE_SHIFT = 0, @@ -1007,7 +1008,7 @@ struct ocrdma_modify_qp_rsp { u32 max_wqe_rqe; u32 max_ord_ird; -} __packed; +}; struct ocrdma_query_qp { struct ocrdma_mqe_hdr hdr; @@ -1016,13 +1017,13 @@ struct ocrdma_query_qp { #define OCRDMA_QUERY_UP_QP_ID_SHIFT 0 #define OCRDMA_QUERY_UP_QP_ID_MASK 0xFFFFFF u32 qp_id; -} __packed; +}; struct ocrdma_query_qp_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; struct ocrdma_qp_params params; -} __packed; +}; enum { OCRDMA_CREATE_SRQ_PD_ID_SHIFT = 0, @@ -1051,7 +1052,7 @@ struct ocrdma_create_srq { u32 max_sge_rqe; u32 pages_rqe_sz; struct ocrdma_pa rq_addr[MAX_OCRDMA_SRQ_PAGES]; -} __packed; +}; enum { OCRDMA_CREATE_SRQ_RSP_SRQ_ID_SHIFT = 0, @@ -1070,7 +1071,7 @@ struct ocrdma_create_srq_rsp { u32 id; u32 max_sge_rqe_allocated; -} __packed; +}; enum { OCRDMA_MODIFY_SRQ_ID_SHIFT = 0, @@ -1089,7 +1090,7 @@ struct ocrdma_modify_srq { u32 id; u32 limit_max_rqe; -} __packed; +}; enum { OCRDMA_QUERY_SRQ_ID_SHIFT = 0, @@ -1101,7 +1102,7 @@ struct ocrdma_query_srq { struct ocrdma_mbx_rsp req; u32 id; -} __packed; +}; enum { OCRDMA_QUERY_SRQ_RSP_PD_ID_SHIFT = 0, @@ -1123,7 +1124,7 @@ struct ocrdma_query_srq_rsp { u32 max_rqe_pdid; u32 srq_lmt_max_sge; -} __packed; +}; enum { OCRDMA_DESTROY_SRQ_ID_SHIFT = 0, @@ -1135,7 +1136,7 @@ struct ocrdma_destroy_srq { struct ocrdma_mbx_rsp req; u32 id; -} __packed; +}; enum { OCRDMA_ALLOC_PD_ENABLE_DPP = BIT(16), @@ -1147,7 +1148,7 @@ struct ocrdma_alloc_pd { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_hdr req; u32 enable_dpp_rsvd; -} __packed; +}; enum { OCRDMA_ALLOC_PD_RSP_DPP = Bit(16), @@ -1159,18 +1160,18 @@ struct ocrdma_alloc_pd_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; u32 dpp_page_pdid; -} __packed; +}; struct ocrdma_dealloc_pd { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_hdr req; u32 id; -} __packed; +}; struct ocrdma_dealloc_pd_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; -} __packed; +}; enum { OCRDMA_ADDR_CHECK_ENABLE = 1, @@ -1206,7 +1207,7 @@ struct ocrdma_alloc_lkey { u32 pdid; u32 pbl_sz_flags; -} __packed; +}; struct ocrdma_alloc_lkey_rsp { struct ocrdma_mqe_hdr hdr; @@ -1214,7 +1215,7 @@ struct ocrdma_alloc_lkey_rsp { u32 lrkey; u32 num_pbl_rsvd; -} __packed; +}; struct ocrdma_dealloc_lkey { struct ocrdma_mqe_hdr hdr; @@ -1222,12 +1223,12 @@ struct ocrdma_dealloc_lkey { u32 lkey; u32 rsvd_frmr; -} __packed; +}; struct ocrdma_dealloc_lkey_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; -} __packed; +}; #define MAX_OCRDMA_NSMR_PBL (u32)22 #define MAX_OCRDMA_PBL_SIZE 65536 @@ -1283,7 +1284,7 @@ struct ocrdma_reg_nsmr { u32 va_loaddr; u32 va_hiaddr; struct ocrdma_pa pbl[MAX_OCRDMA_NSMR_PBL]; -} __packed; +}; enum { OCRDMA_REG_NSMR_CONT_PBL_SHIFT = 0, @@ -1310,7 +1311,7 @@ struct ocrdma_reg_nsmr_cont { struct ocrdma_pbe { u32 pa_hi; u32 pa_lo; -} __packed; +}; enum { OCRDMA_REG_NSMR_RSP_NUM_PBL_SHIFT = 16, @@ -1322,7 +1323,7 @@ struct ocrdma_reg_nsmr_rsp { u32 lrkey; u32 num_pbl; -} __packed; +}; enum { OCRDMA_REG_NSMR_CONT_RSP_LRKEY_INDEX_SHIFT = 0, @@ -1342,7 +1343,7 @@ struct ocrdma_reg_nsmr_cont_rsp { u32 lrkey_key_index; u32 num_pbl; -} __packed; +}; enum { OCRDMA_ALLOC_MW_PD_ID_SHIFT = 0, @@ -1354,7 +1355,7 @@ struct ocrdma_alloc_mw { struct ocrdma_mbx_hdr req; u32 pdid; -} __packed; +}; enum { OCRDMA_ALLOC_MW_RSP_LRKEY_INDEX_SHIFT = 0, @@ -1366,7 +1367,7 @@ struct ocrdma_alloc_mw_rsp { struct ocrdma_mbx_rsp rsp; u32 lrkey_index; -} __packed; +}; struct ocrdma_attach_mcast { struct ocrdma_mqe_hdr hdr; @@ -1375,12 +1376,12 @@ struct ocrdma_attach_mcast { u8 mgid[16]; u32 mac_b0_to_b3; u32 vlan_mac_b4_to_b5; -} __packed; +}; struct ocrdma_attach_mcast_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; -} __packed; +}; struct ocrdma_detach_mcast { struct ocrdma_mqe_hdr hdr; @@ -1389,12 +1390,12 @@ struct ocrdma_detach_mcast { u8 mgid[16]; u32 mac_b0_to_b3; u32 vlan_mac_b4_to_b5; -} __packed; +}; struct ocrdma_detach_mcast_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; -} __packed; +}; enum { OCRDMA_CREATE_AH_NUM_PAGES_SHIFT = 19, @@ -1418,24 +1419,24 @@ struct ocrdma_create_ah_tbl { u32 ah_conf; struct ocrdma_pa tbl_addr[8]; -} __packed; +}; struct ocrdma_create_ah_tbl_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; u32 ahid; -} __packed; +}; struct ocrdma_delete_ah_tbl { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_hdr req; u32 ahid; -} __packed; +}; struct ocrdma_delete_ah_tbl_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; -} __packed; +}; enum { OCRDMA_EQE_VALID_SHIFT = 0, @@ -1448,7 +1449,7 @@ enum { struct ocrdma_eqe { u32 id_valid; -} __packed; +}; enum OCRDMA_CQE_STATUS { OCRDMA_CQE_SUCCESS = 0, @@ -1532,14 +1533,14 @@ struct ocrdma_cqe { } cmn; }; u32 flags_status_srcqpn; /* w3 */ -} __packed; +}; struct ocrdma_sge { u32 addr_hi; u32 addr_lo; u32 lrkey; u32 len; -} __packed; +}; enum { OCRDMA_FLAG_SIG = 0x1, @@ -1600,14 +1601,14 @@ struct ocrdma_hdr_wqe { u32 lkey; }; u32 total_len; -} __packed; +}; struct ocrdma_ewqe_ud_hdr { u32 rsvd_dest_qpn; u32 qkey; u32 rsvd_ahid; u32 rsvd; -} __packed; +}; struct ocrdma_eth_basic { u8 dmac[6]; -- cgit v1.2.3-70-g09d2 From 45e86b33ec8b33f9ed41d9f9005f9e663018f8f1 Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Wed, 7 Aug 2013 12:52:37 +0530 Subject: RDMA/ocrdma: Cache recv DB until QP moved to RTR 1) In post recv, don't ring the DB doorbell if the QP is in RTR state. Cache the DB calls, until the QP is moved to RTS state. 2) Add max_rd_sge support to dev->attr. 3) Code cleanup in alloc_pd path. Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 2 ++ drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 3 +++ drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 29 +++++++++++++++++++++++------ 4 files changed, 29 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 8d54dc74ca7..e798837f1fe 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -60,6 +60,7 @@ struct ocrdma_dev_attr { int max_send_sge; int max_recv_sge; int max_srq_sge; + int max_rdma_sge; int max_mr; u64 max_mr_size; u32 max_num_mr_pbl; @@ -287,6 +288,7 @@ struct ocrdma_qp { u32 qkey; bool dpp_enabled; u8 *ird_q_va; + u16 db_cache; }; struct ocrdma_hw_mr { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index c4bb29c9d48..97bb1ce8d24 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -992,6 +992,9 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->max_srq_sge = (rsp->max_srq_rqe_sge & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET; + attr->max_rdma_sge = (rsp->max_write_send_sge & + OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT; attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp & OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 96a96298241..0184009060d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -1306,7 +1306,7 @@ struct ocrdma_reg_nsmr_cont { u32 last; struct ocrdma_pa pbl[MAX_OCRDMA_NSMR_PBL]; -} __packed; +}; struct ocrdma_pbe { u32 pa_hi; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 2cfbd343ba0..5f68dff0d6c 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -84,7 +84,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_LOCAL_DMA_LKEY; attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_srq_sge); - attr->max_sge_rd = 0; + attr->max_sge_rd = dev->attr.max_rdma_sge; attr->max_cq = dev->attr.max_cq; attr->max_cqe = dev->attr.max_cqe; attr->max_mr = dev->attr.max_mr; @@ -327,7 +327,7 @@ int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return status; } -static int ocrdma_copy_pd_uresp(struct ocrdma_pd *pd, +static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd, struct ib_ucontext *ib_ctx, struct ib_udata *udata) { @@ -337,7 +337,6 @@ static int ocrdma_copy_pd_uresp(struct ocrdma_pd *pd, u32 db_page_size; struct ocrdma_alloc_pd_uresp rsp; struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx); - struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device); memset(&rsp, 0, sizeof(rsp)); rsp.id = pd->id; @@ -400,14 +399,15 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev, } if (udata && context) { - status = ocrdma_copy_pd_uresp(pd, context, udata); + status = ocrdma_copy_pd_uresp(dev, pd, context, udata); if (status) goto err; } return &pd->ibpd; err: - ocrdma_dealloc_pd(&pd->ibpd); + status = ocrdma_mbx_dealloc_pd(dev, pd); + kfree(pd); return ERR_PTR(status); } @@ -1090,6 +1090,17 @@ gen_err: return ERR_PTR(status); } + +static void ocrdma_flush_rq_db(struct ocrdma_qp *qp) +{ + if (qp->db_cache) { + u32 val = qp->rq.dbid | (qp->db_cache << + ocrdma_get_num_posted_shift(qp)); + iowrite32(val, qp->rq_db); + qp->db_cache = 0; + } +} + int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) { @@ -1108,6 +1119,9 @@ int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (status < 0) return status; status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask, old_qps); + if (!status && attr_mask & IB_QP_STATE && attr->qp_state == IB_QPS_RTR) + ocrdma_flush_rq_db(qp); + return status; } @@ -1822,7 +1836,10 @@ static void ocrdma_ring_rq_db(struct ocrdma_qp *qp) { u32 val = qp->rq.dbid | (1 << ocrdma_get_num_posted_shift(qp)); - iowrite32(val, qp->rq_db); + if (qp->state != OCRDMA_QPS_INIT) + iowrite32(val, qp->rq_db); + else + qp->db_cache++; } static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr, -- cgit v1.2.3-70-g09d2 From 4668e4b527d2263a74a85334243bb740905da51e Mon Sep 17 00:00:00 2001 From: CQ Tang Date: Fri, 19 Jul 2013 13:57:21 -0400 Subject: IB/qib: Improve SDMA performance 1. The code accepts chunks of messages, and splits the chunk into packets when converting packets into sdma queue entries. Adjacent packets will use user buffer pages smartly to avoid pinning the same page multiple times. 2. Instead of discarding all the work when SDMA queue is full, the work is saved in a pending queue. Whenever there are enough SDMA queue free entries, pending queue is directly put onto SDMA queue. 3. An interrupt handler is used to progress this pending queue. Reviewed-by: Mike Marciniszyn Signed-off-by: CQ Tang Signed-off-by: Mike Marciniszyn [ Fixed up sparse warnings. - Roland ] Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib.h | 4 + drivers/infiniband/hw/qib/qib_common.h | 32 +- drivers/infiniband/hw/qib/qib_file_ops.c | 2 +- drivers/infiniband/hw/qib/qib_sdma.c | 8 +- drivers/infiniband/hw/qib/qib_user_sdma.c | 909 ++++++++++++++++++++++-------- 5 files changed, 728 insertions(+), 227 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 4a9af795b88..ae3e4feca71 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -576,11 +576,13 @@ struct qib_pportdata { /* read/write using lock */ spinlock_t sdma_lock ____cacheline_aligned_in_smp; struct list_head sdma_activelist; + struct list_head sdma_userpending; u64 sdma_descq_added; u64 sdma_descq_removed; u16 sdma_descq_tail; u16 sdma_descq_head; u8 sdma_generation; + u8 sdma_intrequest; struct tasklet_struct sdma_sw_clean_up_task ____cacheline_aligned_in_smp; @@ -1326,6 +1328,8 @@ int qib_setup_sdma(struct qib_pportdata *); void qib_teardown_sdma(struct qib_pportdata *); void __qib_sdma_intr(struct qib_pportdata *); void qib_sdma_intr(struct qib_pportdata *); +void qib_user_sdma_send_desc(struct qib_pportdata *dd, + struct list_head *pktlist); int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *, u32, struct qib_verbs_txreq *); /* ppd->sdma_lock should be locked before calling this. */ diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h index 4f255b723ff..5670ace27c6 100644 --- a/drivers/infiniband/hw/qib/qib_common.h +++ b/drivers/infiniband/hw/qib/qib_common.h @@ -279,7 +279,7 @@ struct qib_base_info { * may not be implemented; the user code must deal with this if it * cares, or it must abort after initialization reports the difference. */ -#define QIB_USER_SWMINOR 12 +#define QIB_USER_SWMINOR 13 #define QIB_USER_SWVERSION ((QIB_USER_SWMAJOR << 16) | QIB_USER_SWMINOR) @@ -701,7 +701,37 @@ struct qib_message_header { __be32 bth[3]; /* fields below this point are in host byte order */ struct qib_header iph; + /* fields below are simplified, but should match PSM */ + /* some are accessed by driver when packet spliting is needed */ __u8 sub_opcode; + __u8 flags; + __u16 commidx; + __u32 ack_seq_num; + __u8 flowid; + __u8 hdr_dlen; + __u16 mqhdr; + __u32 uwords[4]; +}; + +/* sequence number bits for message */ +union qib_seqnum { + struct { + __u32 seq:11; + __u32 gen:8; + __u32 flow:5; + }; + struct { + __u32 pkt:16; + __u32 msg:8; + }; + __u32 val; +}; + +/* qib receiving-dma tid-session-member */ +struct qib_tid_session_member { + __u16 tid; + __u16 offset; + __u16 length; }; /* IB - LRH header consts */ diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index b51a51486cb..275f247f9fc 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1220,7 +1220,7 @@ static int qib_compatible_subctxts(int user_swmajor, int user_swminor) return user_swminor == 3; default: /* >= 4 are compatible (or are expected to be) */ - return user_swminor >= 4; + return user_swminor <= QIB_USER_SWMINOR; } } /* make no promises yet for future major versions */ diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index 9b5322d8cd5..c6d6a54d2e1 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -423,8 +423,11 @@ void qib_sdma_intr(struct qib_pportdata *ppd) void __qib_sdma_intr(struct qib_pportdata *ppd) { - if (__qib_sdma_running(ppd)) + if (__qib_sdma_running(ppd)) { qib_sdma_make_progress(ppd); + if (!list_empty(&ppd->sdma_userpending)) + qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending); + } } int qib_setup_sdma(struct qib_pportdata *ppd) @@ -452,6 +455,9 @@ int qib_setup_sdma(struct qib_pportdata *ppd) ppd->sdma_descq_removed = 0; ppd->sdma_descq_added = 0; + ppd->sdma_intrequest = 0; + INIT_LIST_HEAD(&ppd->sdma_userpending); + INIT_LIST_HEAD(&ppd->sdma_activelist); tasklet_init(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task, diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index 82442085cbe..d0a0ea0c14d 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -53,20 +53,36 @@ #define QIB_USER_SDMA_DRAIN_TIMEOUT 500 struct qib_user_sdma_pkt { - u8 naddr; /* dimension of addr (1..3) ... */ + struct list_head list; /* list element */ + + u8 tiddma; /* if this is NEW tid-sdma */ + u8 largepkt; /* this is large pkt from kmalloc */ + u16 frag_size; /* frag size used by PSM */ + u16 index; /* last header index or push index */ + u16 naddr; /* dimension of addr (1..3) ... */ + u16 addrlimit; /* addr array size */ + u16 tidsmidx; /* current tidsm index */ + u16 tidsmcount; /* tidsm array item count */ + u16 payload_size; /* payload size so far for header */ + u32 bytes_togo; /* bytes for processing */ u32 counter; /* sdma pkts queued counter for this entry */ + struct qib_tid_session_member *tidsm; /* tid session member array */ + struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */ u64 added; /* global descq number of entries */ struct { - u32 offset; /* offset for kvaddr, addr */ - u32 length; /* length in page */ - u8 put_page; /* should we put_page? */ - u8 dma_mapped; /* is page dma_mapped? */ + u16 offset; /* offset for kvaddr, addr */ + u16 length; /* length in page */ + u16 first_desc; /* first desc */ + u16 last_desc; /* last desc */ + u16 put_page; /* should we put_page? */ + u16 dma_mapped; /* is page dma_mapped? */ + u16 dma_length; /* for dma_unmap_page() */ + u16 padding; struct page *page; /* may be NULL (coherent mem) */ void *kvaddr; /* FIXME: only for pio hack */ dma_addr_t addr; } addr[4]; /* max pages, any more and we coalesce */ - struct list_head list; /* list element */ }; struct qib_user_sdma_queue { @@ -77,6 +93,12 @@ struct qib_user_sdma_queue { */ struct list_head sent; + /* + * Because above list will be accessed by both process and + * signal handler, we need a spinlock for it. + */ + spinlock_t sent_lock ____cacheline_aligned_in_smp; + /* headers with expected length are allocated from here... */ char header_cache_name[64]; struct dma_pool *header_cache; @@ -88,6 +110,12 @@ struct qib_user_sdma_queue { /* as packets go on the queued queue, they are counted... */ u32 counter; u32 sent_counter; + /* pending packets, not sending yet */ + u32 num_pending; + /* sending packets, not complete yet */ + u32 num_sending; + /* global descq number of entry of last sending packet */ + u64 added; /* dma page table */ struct rb_root dma_pages_root; @@ -107,8 +135,12 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) pq->counter = 0; pq->sent_counter = 0; - INIT_LIST_HEAD(&pq->sent); + pq->num_pending = 0; + pq->num_sending = 0; + pq->added = 0; + INIT_LIST_HEAD(&pq->sent); + spin_lock_init(&pq->sent_lock); mutex_init(&pq->lock); snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name), @@ -144,34 +176,310 @@ done: } static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt, - int i, size_t offset, size_t len, - int put_page, int dma_mapped, - struct page *page, - void *kvaddr, dma_addr_t dma_addr) + int i, u16 offset, u16 len, + u16 first_desc, u16 last_desc, + u16 put_page, u16 dma_mapped, + struct page *page, void *kvaddr, + dma_addr_t dma_addr, u16 dma_length) { pkt->addr[i].offset = offset; pkt->addr[i].length = len; + pkt->addr[i].first_desc = first_desc; + pkt->addr[i].last_desc = last_desc; pkt->addr[i].put_page = put_page; pkt->addr[i].dma_mapped = dma_mapped; pkt->addr[i].page = page; pkt->addr[i].kvaddr = kvaddr; pkt->addr[i].addr = dma_addr; + pkt->addr[i].dma_length = dma_length; } -static void qib_user_sdma_init_header(struct qib_user_sdma_pkt *pkt, - u32 counter, size_t offset, - size_t len, int dma_mapped, - struct page *page, - void *kvaddr, dma_addr_t dma_addr) +static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq, + size_t len, dma_addr_t *dma_addr) { - pkt->naddr = 1; - pkt->counter = counter; - qib_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page, - kvaddr, dma_addr); + void *hdr; + + if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH) + hdr = dma_pool_alloc(pq->header_cache, GFP_KERNEL, + dma_addr); + else + hdr = NULL; + + if (!hdr) { + hdr = kmalloc(len, GFP_KERNEL); + if (!hdr) + return NULL; + + *dma_addr = 0; + } + + return hdr; +} + +static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd, + struct qib_user_sdma_queue *pq, + struct qib_user_sdma_pkt *pkt, + struct page *page, u16 put, + u16 offset, u16 len, void *kvaddr) +{ + __le16 *pbc16; + void *pbcvaddr; + struct qib_message_header *hdr; + u16 newlen, pbclen, lastdesc, dma_mapped; + u32 vcto; + union qib_seqnum seqnum; + dma_addr_t pbcdaddr; + dma_addr_t dma_addr = + dma_map_page(&dd->pcidev->dev, + page, offset, len, DMA_TO_DEVICE); + int ret = 0; + + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { + /* + * dma mapping error, pkt has not managed + * this page yet, return the page here so + * the caller can ignore this page. + */ + if (put) { + put_page(page); + } else { + /* coalesce case */ + kunmap(page); + __free_page(page); + } + ret = -ENOMEM; + goto done; + } + offset = 0; + dma_mapped = 1; + + +next_fragment: + + /* + * In tid-sdma, the transfer length is restricted by + * receiver side current tid page length. + */ + if (pkt->tiddma && len > pkt->tidsm[pkt->tidsmidx].length) + newlen = pkt->tidsm[pkt->tidsmidx].length; + else + newlen = len; + + /* + * Then the transfer length is restricted by MTU. + * the last descriptor flag is determined by: + * 1. the current packet is at frag size length. + * 2. the current tid page is done if tid-sdma. + * 3. there is no more byte togo if sdma. + */ + lastdesc = 0; + if ((pkt->payload_size + newlen) >= pkt->frag_size) { + newlen = pkt->frag_size - pkt->payload_size; + lastdesc = 1; + } else if (pkt->tiddma) { + if (newlen == pkt->tidsm[pkt->tidsmidx].length) + lastdesc = 1; + } else { + if (newlen == pkt->bytes_togo) + lastdesc = 1; + } + + /* fill the next fragment in this page */ + qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */ + offset, newlen, /* offset, len */ + 0, lastdesc, /* first last desc */ + put, dma_mapped, /* put page, dma mapped */ + page, kvaddr, /* struct page, virt addr */ + dma_addr, len); /* dma addr, dma length */ + pkt->bytes_togo -= newlen; + pkt->payload_size += newlen; + pkt->naddr++; + if (pkt->naddr == pkt->addrlimit) { + ret = -EFAULT; + goto done; + } + + /* If there is no more byte togo. (lastdesc==1) */ + if (pkt->bytes_togo == 0) { + /* The packet is done, header is not dma mapped yet. + * it should be from kmalloc */ + if (!pkt->addr[pkt->index].addr) { + pkt->addr[pkt->index].addr = + dma_map_single(&dd->pcidev->dev, + pkt->addr[pkt->index].kvaddr, + pkt->addr[pkt->index].dma_length, + DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, + pkt->addr[pkt->index].addr)) { + ret = -ENOMEM; + goto done; + } + pkt->addr[pkt->index].dma_mapped = 1; + } + + goto done; + } + + /* If tid-sdma, advance tid info. */ + if (pkt->tiddma) { + pkt->tidsm[pkt->tidsmidx].length -= newlen; + if (pkt->tidsm[pkt->tidsmidx].length) { + pkt->tidsm[pkt->tidsmidx].offset += newlen; + } else { + pkt->tidsmidx++; + if (pkt->tidsmidx == pkt->tidsmcount) { + ret = -EFAULT; + goto done; + } + } + } + + /* + * If this is NOT the last descriptor. (newlen==len) + * the current packet is not done yet, but the current + * send side page is done. + */ + if (lastdesc == 0) + goto done; + + /* + * If running this driver under PSM with message size + * fitting into one transfer unit, it is not possible + * to pass this line. otherwise, it is a buggggg. + */ + + /* + * Since the current packet is done, and there are more + * bytes togo, we need to create a new sdma header, copying + * from previous sdma header and modify both. + */ + pbclen = pkt->addr[pkt->index].length; + pbcvaddr = qib_user_sdma_alloc_header(pq, pbclen, &pbcdaddr); + if (!pbcvaddr) { + ret = -ENOMEM; + goto done; + } + /* Copy the previous sdma header to new sdma header */ + pbc16 = (__le16 *)pkt->addr[pkt->index].kvaddr; + memcpy(pbcvaddr, pbc16, pbclen); + + /* Modify the previous sdma header */ + hdr = (struct qib_message_header *)&pbc16[4]; + + /* New pbc length */ + pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->bytes_togo>>2)); + + /* New packet length */ + hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0])); + + if (pkt->tiddma) { + /* turn on the header suppression */ + hdr->iph.pkt_flags = + cpu_to_le16(le16_to_cpu(hdr->iph.pkt_flags)|0x2); + /* turn off ACK_REQ: 0x04 and EXPECTED_DONE: 0x20 */ + hdr->flags &= ~(0x04|0x20); + } else { + /* turn off extra bytes: 20-21 bits */ + hdr->bth[0] = cpu_to_be32(be32_to_cpu(hdr->bth[0])&0xFFCFFFFF); + /* turn off ACK_REQ: 0x04 */ + hdr->flags &= ~(0x04); + } + + /* New kdeth checksum */ + vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset); + hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH + + be16_to_cpu(hdr->lrh[2]) - + ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) - + le16_to_cpu(hdr->iph.pkt_flags)); + + /* The packet is done, header is not dma mapped yet. + * it should be from kmalloc */ + if (!pkt->addr[pkt->index].addr) { + pkt->addr[pkt->index].addr = + dma_map_single(&dd->pcidev->dev, + pkt->addr[pkt->index].kvaddr, + pkt->addr[pkt->index].dma_length, + DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, + pkt->addr[pkt->index].addr)) { + ret = -ENOMEM; + goto done; + } + pkt->addr[pkt->index].dma_mapped = 1; + } + + /* Modify the new sdma header */ + pbc16 = (__le16 *)pbcvaddr; + hdr = (struct qib_message_header *)&pbc16[4]; + + /* New pbc length */ + pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->payload_size>>2)); + + /* New packet length */ + hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0])); + + if (pkt->tiddma) { + /* Set new tid and offset for new sdma header */ + hdr->iph.ver_ctxt_tid_offset = cpu_to_le32( + (le32_to_cpu(hdr->iph.ver_ctxt_tid_offset)&0xFF000000) + + (pkt->tidsm[pkt->tidsmidx].tid<tidsm[pkt->tidsmidx].offset>>2)); + } else { + /* Middle protocol new packet offset */ + hdr->uwords[2] += pkt->payload_size; + } + + /* New kdeth checksum */ + vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset); + hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH + + be16_to_cpu(hdr->lrh[2]) - + ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) - + le16_to_cpu(hdr->iph.pkt_flags)); + + /* Next sequence number in new sdma header */ + seqnum.val = be32_to_cpu(hdr->bth[2]); + if (pkt->tiddma) + seqnum.seq++; + else + seqnum.pkt++; + hdr->bth[2] = cpu_to_be32(seqnum.val); + + /* Init new sdma header. */ + qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */ + 0, pbclen, /* offset, len */ + 1, 0, /* first last desc */ + 0, 0, /* put page, dma mapped */ + NULL, pbcvaddr, /* struct page, virt addr */ + pbcdaddr, pbclen); /* dma addr, dma length */ + pkt->index = pkt->naddr; + pkt->payload_size = 0; + pkt->naddr++; + if (pkt->naddr == pkt->addrlimit) { + ret = -EFAULT; + goto done; + } + + /* Prepare for next fragment in this page */ + if (newlen != len) { + if (dma_mapped) { + put = 0; + dma_mapped = 0; + page = NULL; + kvaddr = NULL; + } + len -= newlen; + offset += newlen; + + goto next_fragment; + } + +done: + return ret; } /* we've too many pages in the iovec, coalesce to a single page */ static int qib_user_sdma_coalesce(const struct qib_devdata *dd, + struct qib_user_sdma_queue *pq, struct qib_user_sdma_pkt *pkt, const struct iovec *iov, unsigned long niov) @@ -182,7 +490,6 @@ static int qib_user_sdma_coalesce(const struct qib_devdata *dd, char *mpage; int i; int len = 0; - dma_addr_t dma_addr; if (!page) { ret = -ENOMEM; @@ -205,17 +512,8 @@ static int qib_user_sdma_coalesce(const struct qib_devdata *dd, len += iov[i].iov_len; } - dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len, - DMA_TO_DEVICE); - if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { - ret = -ENOMEM; - goto free_unmap; - } - - qib_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save, - dma_addr); - pkt->naddr = 2; - + ret = qib_user_sdma_page_to_frags(dd, pq, pkt, + page, 0, 0, len, mpage_save); goto done; free_unmap: @@ -238,16 +536,6 @@ static int qib_user_sdma_num_pages(const struct iovec *iov) return 1 + ((epage - spage) >> PAGE_SHIFT); } -/* - * Truncate length to page boundary. - */ -static int qib_user_sdma_page_length(unsigned long addr, unsigned long len) -{ - const unsigned long offset = addr & ~PAGE_MASK; - - return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len; -} - static void qib_user_sdma_free_pkt_frag(struct device *dev, struct qib_user_sdma_queue *pq, struct qib_user_sdma_pkt *pkt, @@ -256,10 +544,11 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev, const int i = frag; if (pkt->addr[i].page) { + /* only user data has page */ if (pkt->addr[i].dma_mapped) dma_unmap_page(dev, pkt->addr[i].addr, - pkt->addr[i].length, + pkt->addr[i].dma_length, DMA_TO_DEVICE); if (pkt->addr[i].kvaddr) @@ -269,55 +558,81 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev, put_page(pkt->addr[i].page); else __free_page(pkt->addr[i].page); - } else if (pkt->addr[i].kvaddr) - /* free coherent mem from cache... */ - dma_pool_free(pq->header_cache, + } else if (pkt->addr[i].kvaddr) { + /* for headers */ + if (pkt->addr[i].dma_mapped) { + /* from kmalloc & dma mapped */ + dma_unmap_single(dev, + pkt->addr[i].addr, + pkt->addr[i].dma_length, + DMA_TO_DEVICE); + kfree(pkt->addr[i].kvaddr); + } else if (pkt->addr[i].addr) { + /* free coherent mem from cache... */ + dma_pool_free(pq->header_cache, pkt->addr[i].kvaddr, pkt->addr[i].addr); + } else { + /* from kmalloc but not dma mapped */ + kfree(pkt->addr[i].kvaddr); + } + } } /* return number of pages pinned... */ static int qib_user_sdma_pin_pages(const struct qib_devdata *dd, + struct qib_user_sdma_queue *pq, struct qib_user_sdma_pkt *pkt, unsigned long addr, int tlen, int npages) { - struct page *pages[2]; - int j; - int ret; - - ret = get_user_pages(current, current->mm, addr, - npages, 0, 1, pages, NULL); - - if (ret != npages) { - int i; - - for (i = 0; i < ret; i++) - put_page(pages[i]); - - ret = -ENOMEM; - goto done; - } + struct page *pages[8]; + int i, j; + int ret = 0; - for (j = 0; j < npages; j++) { - /* map the pages... */ - const int flen = qib_user_sdma_page_length(addr, tlen); - dma_addr_t dma_addr = - dma_map_page(&dd->pcidev->dev, - pages[j], 0, flen, DMA_TO_DEVICE); - unsigned long fofs = addr & ~PAGE_MASK; + while (npages) { + if (npages > 8) + j = 8; + else + j = npages; - if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { + ret = get_user_pages(current, current->mm, addr, + j, 0, 1, pages, NULL); + if (ret != j) { + i = 0; + j = ret; ret = -ENOMEM; - goto done; + goto free_pages; } - qib_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1, - pages[j], kmap(pages[j]), dma_addr); + for (i = 0; i < j; i++) { + /* map the pages... */ + unsigned long fofs = addr & ~PAGE_MASK; + int flen = ((fofs + tlen) > PAGE_SIZE) ? + (PAGE_SIZE - fofs) : tlen; + + ret = qib_user_sdma_page_to_frags(dd, pq, pkt, + pages[i], 1, fofs, flen, NULL); + if (ret < 0) { + /* current page has beed taken + * care of inside above call. + */ + i++; + goto free_pages; + } - pkt->naddr++; - addr += flen; - tlen -= flen; + addr += flen; + tlen -= flen; + } + + npages -= j; } + goto done; + + /* if error, return all pages not managed by pkt */ +free_pages: + while (i < j) + put_page(pages[i++]); + done: return ret; } @@ -335,7 +650,7 @@ static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd, const int npages = qib_user_sdma_num_pages(iov + idx); const unsigned long addr = (unsigned long) iov[idx].iov_base; - ret = qib_user_sdma_pin_pages(dd, pkt, addr, + ret = qib_user_sdma_pin_pages(dd, pq, pkt, addr, iov[idx].iov_len, npages); if (ret < 0) goto free_pkt; @@ -344,9 +659,22 @@ static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd, goto done; free_pkt: - for (idx = 0; idx < pkt->naddr; idx++) + /* we need to ignore the first entry here */ + for (idx = 1; idx < pkt->naddr; idx++) qib_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx); + /* need to dma unmap the first entry, this is to restore to + * the original state so that caller can free the memory in + * error condition. Caller does not know if dma mapped or not*/ + if (pkt->addr[0].dma_mapped) { + dma_unmap_single(&dd->pcidev->dev, + pkt->addr[0].addr, + pkt->addr[0].dma_length, + DMA_TO_DEVICE); + pkt->addr[0].addr = 0; + pkt->addr[0].dma_mapped = 0; + } + done: return ret; } @@ -359,8 +687,9 @@ static int qib_user_sdma_init_payload(const struct qib_devdata *dd, { int ret = 0; - if (npages >= ARRAY_SIZE(pkt->addr)) - ret = qib_user_sdma_coalesce(dd, pkt, iov, niov); + if (pkt->frag_size == pkt->bytes_togo && + npages >= ARRAY_SIZE(pkt->addr)) + ret = qib_user_sdma_coalesce(dd, pq, pkt, iov, niov); else ret = qib_user_sdma_pin_pkt(dd, pq, pkt, iov, niov); @@ -380,7 +709,10 @@ static void qib_user_sdma_free_pkt_list(struct device *dev, for (i = 0; i < pkt->naddr; i++) qib_user_sdma_free_pkt_frag(dev, pq, pkt, i); - kmem_cache_free(pq->pkt_slab, pkt); + if (pkt->largepkt) + kfree(pkt); + else + kmem_cache_free(pq->pkt_slab, pkt); } INIT_LIST_HEAD(list); } @@ -393,63 +725,48 @@ static void qib_user_sdma_free_pkt_list(struct device *dev, * as, if there is an error we clean it... */ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, + struct qib_pportdata *ppd, struct qib_user_sdma_queue *pq, - struct list_head *list, const struct iovec *iov, unsigned long niov, - int maxpkts) + struct list_head *list, + int *maxpkts, int *ndesc) { unsigned long idx = 0; int ret = 0; int npkts = 0; - struct page *page = NULL; __le32 *pbc; dma_addr_t dma_addr; struct qib_user_sdma_pkt *pkt = NULL; size_t len; size_t nw; u32 counter = pq->counter; - int dma_mapped = 0; + u16 frag_size; - while (idx < niov && npkts < maxpkts) { + while (idx < niov && npkts < *maxpkts) { const unsigned long addr = (unsigned long) iov[idx].iov_base; const unsigned long idx_save = idx; unsigned pktnw; unsigned pktnwc; int nfrags = 0; int npages = 0; + int bytes_togo = 0; + int tiddma = 0; int cfur; - dma_mapped = 0; len = iov[idx].iov_len; nw = len >> 2; - page = NULL; - - pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL); - if (!pkt) { - ret = -ENOMEM; - goto free_list; - } if (len < QIB_USER_SDMA_MIN_HEADER_LENGTH || len > PAGE_SIZE || len & 3 || addr & 3) { ret = -EINVAL; - goto free_pkt; + goto free_list; } - if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH) - pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL, - &dma_addr); - else - pbc = NULL; - + pbc = qib_user_sdma_alloc_header(pq, len, &dma_addr); if (!pbc) { - page = alloc_page(GFP_KERNEL); - if (!page) { - ret = -ENOMEM; - goto free_pkt; - } - pbc = kmap(page); + ret = -ENOMEM; + goto free_list; } cfur = copy_from_user(pbc, iov[idx].iov_base, len); @@ -474,8 +791,8 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, * we can verify that the packet is consistent with the * iovec lengths. */ - pktnw = le32_to_cpu(*pbc) & QIB_PBC_LENGTH_MASK; - if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) { + pktnw = le32_to_cpu(*pbc) & 0xFFFF; + if (pktnw < pktnwc) { ret = -EINVAL; goto free_pbc; } @@ -486,17 +803,14 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, const unsigned long faddr = (unsigned long) iov[idx].iov_base; - if (slen & 3 || faddr & 3 || !slen || - slen > PAGE_SIZE) { + if (slen & 3 || faddr & 3 || !slen) { ret = -EINVAL; goto free_pbc; } - npages++; - if ((faddr & PAGE_MASK) != - ((faddr + slen - 1) & PAGE_MASK)) - npages++; + npages += qib_user_sdma_num_pages(&iov[idx]); + bytes_togo += slen; pktnwc += slen >> 2; idx++; nfrags++; @@ -507,48 +821,139 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, goto free_pbc; } - if (page) { - dma_addr = dma_map_page(&dd->pcidev->dev, - page, 0, len, DMA_TO_DEVICE); - if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { + frag_size = ((le32_to_cpu(*pbc))>>16) & 0xFFFF; + if (((frag_size ? frag_size : bytes_togo) + len) > + ppd->ibmaxlen) { + ret = -EINVAL; + goto free_pbc; + } + + if (frag_size) { + int pktsize, tidsmsize, n; + + n = npages*((2*PAGE_SIZE/frag_size)+1); + pktsize = sizeof(*pkt) + sizeof(pkt->addr[0])*n; + + /* + * Determine if this is tid-sdma or just sdma. + */ + tiddma = (((le32_to_cpu(pbc[7])>> + QLOGIC_IB_I_TID_SHIFT)& + QLOGIC_IB_I_TID_MASK) != + QLOGIC_IB_I_TID_MASK); + + if (tiddma) + tidsmsize = iov[idx].iov_len; + else + tidsmsize = 0; + + pkt = kmalloc(pktsize+tidsmsize, GFP_KERNEL); + if (!pkt) { ret = -ENOMEM; goto free_pbc; } + pkt->largepkt = 1; + pkt->frag_size = frag_size; + pkt->addrlimit = n + ARRAY_SIZE(pkt->addr); + + if (tiddma) { + char *tidsm = (char *)pkt + pktsize; + cfur = copy_from_user(tidsm, + iov[idx].iov_base, tidsmsize); + if (cfur) { + ret = -EFAULT; + goto free_pkt; + } + pkt->tidsm = + (struct qib_tid_session_member *)tidsm; + pkt->tidsmcount = tidsmsize/ + sizeof(struct qib_tid_session_member); + pkt->tidsmidx = 0; + idx++; + } - dma_mapped = 1; + /* + * pbc 'fill1' field is borrowed to pass frag size, + * we need to clear it after picking frag size, the + * hardware requires this field to be zero. + */ + *pbc = cpu_to_le32(le32_to_cpu(*pbc) & 0x0000FFFF); + } else { + pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL); + if (!pkt) { + ret = -ENOMEM; + goto free_pbc; + } + pkt->largepkt = 0; + pkt->frag_size = bytes_togo; + pkt->addrlimit = ARRAY_SIZE(pkt->addr); } - - qib_user_sdma_init_header(pkt, counter, 0, len, dma_mapped, - page, pbc, dma_addr); + pkt->bytes_togo = bytes_togo; + pkt->payload_size = 0; + pkt->counter = counter; + pkt->tiddma = tiddma; + + /* setup the first header */ + qib_user_sdma_init_frag(pkt, 0, /* index */ + 0, len, /* offset, len */ + 1, 0, /* first last desc */ + 0, 0, /* put page, dma mapped */ + NULL, pbc, /* struct page, virt addr */ + dma_addr, len); /* dma addr, dma length */ + pkt->index = 0; + pkt->naddr = 1; if (nfrags) { ret = qib_user_sdma_init_payload(dd, pq, pkt, iov + idx_save + 1, nfrags, npages); if (ret < 0) - goto free_pbc_dma; + goto free_pkt; + } else { + /* since there is no payload, mark the + * header as the last desc. */ + pkt->addr[0].last_desc = 1; + + if (dma_addr == 0) { + /* + * the header is not dma mapped yet. + * it should be from kmalloc. + */ + dma_addr = dma_map_single(&dd->pcidev->dev, + pbc, len, DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, + dma_addr)) { + ret = -ENOMEM; + goto free_pkt; + } + pkt->addr[0].addr = dma_addr; + pkt->addr[0].dma_mapped = 1; + } } counter++; npkts++; + pkt->pq = pq; + pkt->index = 0; /* reset index for push on hw */ + *ndesc += pkt->naddr; list_add_tail(&pkt->list, list); } + *maxpkts = npkts; ret = idx; goto done; -free_pbc_dma: - if (dma_mapped) - dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE); +free_pkt: + if (pkt->largepkt) + kfree(pkt); + else + kmem_cache_free(pq->pkt_slab, pkt); free_pbc: - if (page) { - kunmap(page); - __free_page(page); - } else + if (dma_addr) dma_pool_free(pq->header_cache, pbc, dma_addr); -free_pkt: - kmem_cache_free(pq->pkt_slab, pkt); + else + kfree(pbc); free_list: qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list); done: @@ -569,10 +974,20 @@ static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd, struct list_head free_list; struct qib_user_sdma_pkt *pkt; struct qib_user_sdma_pkt *pkt_prev; + unsigned long flags; int ret = 0; + if (!pq->num_sending) + return 0; + INIT_LIST_HEAD(&free_list); + /* + * We need this spin lock here because interrupt handler + * might modify this list in qib_user_sdma_send_desc(), also + * we can not get interrupted, otherwise it is a deadlock. + */ + spin_lock_irqsave(&pq->sent_lock, flags); list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) { s64 descd = ppd->sdma_descq_removed - pkt->added; @@ -583,7 +998,9 @@ static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd, /* one more packet cleaned */ ret++; + pq->num_sending--; } + spin_unlock_irqrestore(&pq->sent_lock, flags); if (!list_empty(&free_list)) { u32 counter; @@ -627,6 +1044,7 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd, struct qib_user_sdma_queue *pq) { struct qib_devdata *dd = ppd->dd; + unsigned long flags; int i; if (!pq) @@ -634,7 +1052,7 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd, for (i = 0; i < QIB_USER_SDMA_DRAIN_TIMEOUT; i++) { mutex_lock(&pq->lock); - if (list_empty(&pq->sent)) { + if (!pq->num_pending && !pq->num_sending) { mutex_unlock(&pq->lock); break; } @@ -644,29 +1062,44 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd, msleep(10); } - if (!list_empty(&pq->sent)) { + if (pq->num_pending || pq->num_sending) { + struct qib_user_sdma_pkt *pkt; + struct qib_user_sdma_pkt *pkt_prev; struct list_head free_list; + mutex_lock(&pq->lock); + spin_lock_irqsave(&ppd->sdma_lock, flags); + /* + * Since we hold sdma_lock, it is safe without sent_lock. + */ + if (pq->num_pending) { + list_for_each_entry_safe(pkt, pkt_prev, + &ppd->sdma_userpending, list) { + if (pkt->pq == pq) { + list_move_tail(&pkt->list, &pq->sent); + pq->num_pending--; + pq->num_sending++; + } + } + } + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + qib_dev_err(dd, "user sdma lists not empty: forcing!\n"); INIT_LIST_HEAD(&free_list); - mutex_lock(&pq->lock); list_splice_init(&pq->sent, &free_list); + pq->num_sending = 0; qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list); mutex_unlock(&pq->lock); } } -static inline __le64 qib_sdma_make_desc0(struct qib_pportdata *ppd, +static inline __le64 qib_sdma_make_desc0(u8 gen, u64 addr, u64 dwlen, u64 dwoffset) { - u8 tmpgen; - - tmpgen = ppd->sdma_generation; - return cpu_to_le64(/* SDmaPhyAddr[31:0] */ ((addr & 0xfffffffcULL) << 32) | /* SDmaGeneration[1:0] */ - ((tmpgen & 3ULL) << 30) | + ((gen & 3ULL) << 30) | /* SDmaDwordCount[10:0] */ ((dwlen & 0x7ffULL) << 16) | /* SDmaBufOffset[12:2] */ @@ -692,7 +1125,7 @@ static inline __le64 qib_sdma_make_desc1(u64 addr) static void qib_user_sdma_send_frag(struct qib_pportdata *ppd, struct qib_user_sdma_pkt *pkt, int idx, - unsigned ofs, u16 tail) + unsigned ofs, u16 tail, u8 gen) { const u64 addr = (u64) pkt->addr[idx].addr + (u64) pkt->addr[idx].offset; @@ -702,104 +1135,132 @@ static void qib_user_sdma_send_frag(struct qib_pportdata *ppd, descqp = &ppd->sdma_descq[tail].qw[0]; - descq0 = qib_sdma_make_desc0(ppd, addr, dwlen, ofs); - if (idx == 0) + descq0 = qib_sdma_make_desc0(gen, addr, dwlen, ofs); + if (pkt->addr[idx].first_desc) descq0 = qib_sdma_make_first_desc0(descq0); - if (idx == pkt->naddr - 1) + if (pkt->addr[idx].last_desc) { descq0 = qib_sdma_make_last_desc0(descq0); + if (ppd->sdma_intrequest) { + descq0 |= cpu_to_le64(1ULL << 15); + ppd->sdma_intrequest = 0; + } + } descqp[0] = descq0; descqp[1] = qib_sdma_make_desc1(addr); } -/* pq->lock must be held, get packets on the wire... */ -static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd, - struct qib_user_sdma_queue *pq, - struct list_head *pktlist) +void qib_user_sdma_send_desc(struct qib_pportdata *ppd, + struct list_head *pktlist) { struct qib_devdata *dd = ppd->dd; - int ret = 0; - unsigned long flags; - u16 tail; - u8 generation; - u64 descq_added; - - if (list_empty(pktlist)) - return 0; + u16 nfree, nsent; + u16 tail, tail_c; + u8 gen, gen_c; - if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE))) - return -ECOMM; - - spin_lock_irqsave(&ppd->sdma_lock, flags); - - /* keep a copy for restoring purposes in case of problems */ - generation = ppd->sdma_generation; - descq_added = ppd->sdma_descq_added; - - if (unlikely(!__qib_sdma_running(ppd))) { - ret = -ECOMM; - goto unlock; - } + nfree = qib_sdma_descq_freecnt(ppd); + if (!nfree) + return; - tail = ppd->sdma_descq_tail; +retry: + nsent = 0; + tail_c = tail = ppd->sdma_descq_tail; + gen_c = gen = ppd->sdma_generation; while (!list_empty(pktlist)) { struct qib_user_sdma_pkt *pkt = list_entry(pktlist->next, struct qib_user_sdma_pkt, list); - int i; + int i, j, c = 0; unsigned ofs = 0; u16 dtail = tail; - if (pkt->naddr > qib_sdma_descq_freecnt(ppd)) - goto unlock_check_tail; - - for (i = 0; i < pkt->naddr; i++) { - qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail); + for (i = pkt->index; i < pkt->naddr && nfree; i++) { + qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail, gen); ofs += pkt->addr[i].length >> 2; if (++tail == ppd->sdma_descq_cnt) { tail = 0; - ++ppd->sdma_generation; + ++gen; + ppd->sdma_intrequest = 1; + } else if (tail == (ppd->sdma_descq_cnt>>1)) { + ppd->sdma_intrequest = 1; } - } + nfree--; + if (pkt->addr[i].last_desc == 0) + continue; - if ((ofs << 2) > ppd->ibmaxlen) { - ret = -EMSGSIZE; - goto unlock; - } - - /* - * If the packet is >= 2KB mtu equivalent, we have to use - * the large buffers, and have to mark each descriptor as - * part of a large buffer packet. - */ - if (ofs > dd->piosize2kmax_dwords) { - for (i = 0; i < pkt->naddr; i++) { - ppd->sdma_descq[dtail].qw[0] |= - cpu_to_le64(1ULL << 14); - if (++dtail == ppd->sdma_descq_cnt) - dtail = 0; + /* + * If the packet is >= 2KB mtu equivalent, we + * have to use the large buffers, and have to + * mark each descriptor as part of a large + * buffer packet. + */ + if (ofs > dd->piosize2kmax_dwords) { + for (j = pkt->index; j <= i; j++) { + ppd->sdma_descq[dtail].qw[0] |= + cpu_to_le64(1ULL << 14); + if (++dtail == ppd->sdma_descq_cnt) + dtail = 0; + } } + c += i + 1 - pkt->index; + pkt->index = i + 1; /* index for next first */ + tail_c = dtail = tail; + gen_c = gen; + ofs = 0; /* reset for next packet */ } - ppd->sdma_descq_added += pkt->naddr; - pkt->added = ppd->sdma_descq_added; - list_move_tail(&pkt->list, &pq->sent); - ret++; + ppd->sdma_descq_added += c; + nsent += c; + if (pkt->index == pkt->naddr) { + pkt->added = ppd->sdma_descq_added; + pkt->pq->added = pkt->added; + pkt->pq->num_pending--; + spin_lock(&pkt->pq->sent_lock); + pkt->pq->num_sending++; + list_move_tail(&pkt->list, &pkt->pq->sent); + spin_unlock(&pkt->pq->sent_lock); + } + if (!nfree || (nsent<<2) > ppd->sdma_descq_cnt) + break; } -unlock_check_tail: /* advance the tail on the chip if necessary */ - if (ppd->sdma_descq_tail != tail) - dd->f_sdma_update_tail(ppd, tail); + if (ppd->sdma_descq_tail != tail_c) { + ppd->sdma_generation = gen_c; + dd->f_sdma_update_tail(ppd, tail_c); + } -unlock: - if (unlikely(ret < 0)) { - ppd->sdma_generation = generation; - ppd->sdma_descq_added = descq_added; + if (nfree && !list_empty(pktlist)) + goto retry; + + return; +} + +/* pq->lock must be held, get packets on the wire... */ +static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd, + struct qib_user_sdma_queue *pq, + struct list_head *pktlist, int count) +{ + int ret = 0; + unsigned long flags; + + if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE))) + return -ECOMM; + + spin_lock_irqsave(&ppd->sdma_lock, flags); + + if (unlikely(!__qib_sdma_running(ppd))) { + ret = -ECOMM; + goto unlock; } - spin_unlock_irqrestore(&ppd->sdma_lock, flags); + pq->num_pending += count; + list_splice_tail_init(pktlist, &ppd->sdma_userpending); + qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending); + +unlock: + spin_unlock_irqrestore(&ppd->sdma_lock, flags); return ret; } @@ -822,19 +1283,23 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd, if (!qib_sdma_running(ppd)) goto done_unlock; - if (ppd->sdma_descq_added != ppd->sdma_descq_removed) { + /* if I have packets not complete yet */ + if (pq->added > ppd->sdma_descq_removed) qib_user_sdma_hwqueue_clean(ppd); + /* if I have complete packets to be freed */ + if (pq->num_sending) qib_user_sdma_queue_clean(ppd, pq); - } while (dim) { - const int mxp = 8; + int mxp = 8; + int ndesc = 0; down_write(¤t->mm->mmap_sem); - ret = qib_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp); + ret = qib_user_sdma_queue_pkts(dd, ppd, pq, + iov, dim, &list, &mxp, &ndesc); up_write(¤t->mm->mmap_sem); - if (ret <= 0) + if (ret < 0) goto done_unlock; else { dim -= ret; @@ -844,24 +1309,20 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd, /* force packets onto the sdma hw queue... */ if (!list_empty(&list)) { /* - * Lazily clean hw queue. the 4 is a guess of about - * how many sdma descriptors a packet will take (it - * doesn't have to be perfect). + * Lazily clean hw queue. */ - if (qib_sdma_descq_freecnt(ppd) < ret * 4) { + if (qib_sdma_descq_freecnt(ppd) < ndesc) { qib_user_sdma_hwqueue_clean(ppd); - qib_user_sdma_queue_clean(ppd, pq); + if (pq->num_sending) + qib_user_sdma_queue_clean(ppd, pq); } - ret = qib_user_sdma_push_pkts(ppd, pq, &list); + ret = qib_user_sdma_push_pkts(ppd, pq, &list, mxp); if (ret < 0) goto done_unlock; else { - npkts += ret; - pq->counter += ret; - - if (!list_empty(&list)) - goto done_unlock; + npkts += mxp; + pq->counter += mxp; } } } -- cgit v1.2.3-70-g09d2 From bea25e82c61fdf693949178594ee58aced72927d Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Wed, 7 Aug 2013 20:47:38 +0200 Subject: IB/qib: Make qib_driver static struct pci_driver qib_driver is only used in qib_init.c. Remove it from qib.h and make it static in qib_init.c. Signed-off-by: Paul Bolle Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib.h | 1 - drivers/infiniband/hw/qib/qib_init.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index ae3e4feca71..1946101419a 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -89,7 +89,6 @@ struct qlogic_ib_stats { extern struct qlogic_ib_stats qib_stats; extern const struct pci_error_handlers qib_pci_err_handler; -extern struct pci_driver qib_driver; #define QIB_CHIP_SWVERSION QIB_CHIP_VERS_MAJ /* diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 36e048e0e1d..24e802f4ea2 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1193,7 +1193,7 @@ static DEFINE_PCI_DEVICE_TABLE(qib_pci_tbl) = { MODULE_DEVICE_TABLE(pci, qib_pci_tbl); -struct pci_driver qib_driver = { +static struct pci_driver qib_driver = { .name = QIB_DRV_NAME, .probe = qib_init_one, .remove = qib_remove_one, -- cgit v1.2.3-70-g09d2 From b29b0763949de035fd9341b70f869bd6f400ea4e Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Thu, 8 Aug 2013 21:11:56 +0800 Subject: IB/qib: Clean up unnecessary MSI/MSI-X capability find PCI core will initialize device MSI/MSI-X capability in pci_msi_init_pci_dev(). So device drivers should use pci_dev->msi_cap/msix_cap to determine whether a device supports MSI/MSI-X instead of using pci_find_capability(pci_dev, PCI_CAP_ID_MSI/MSIX). Access to PCIe device config space again will consume more time. Signed-off-by: Yijing Wang Acked-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_pcie.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index c574ec7c85e..3f14009fb66 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -283,12 +283,12 @@ int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent, goto bail; } - pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSIX); + pos = dd->pcidev->msix_cap; if (nent && *nent && pos) { qib_msix_setup(dd, pos, nent, entry); ret = 0; /* did it, either MSIx or INTx */ } else { - pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI); + pos = dd->pcidev->msi_cap; if (pos) ret = qib_msi_setup(dd, pos); else @@ -357,7 +357,7 @@ int qib_reinit_intr(struct qib_devdata *dd) if (!dd->msi_lo) goto bail; - pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI); + pos = dd->pcidev->msi_cap; if (!pos) { qib_dev_err(dd, "Can't find MSI capability, can't restore MSI settings\n"); @@ -426,7 +426,7 @@ void qib_enable_intx(struct pci_dev *pdev) if (new != cw) pci_write_config_word(pdev, PCI_COMMAND, new); - pos = pci_find_capability(pdev, PCI_CAP_ID_MSI); + pos = pdev->msi_cap; if (pos) { /* then turn off MSI */ pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &cw); @@ -434,7 +434,7 @@ void qib_enable_intx(struct pci_dev *pdev) if (new != cw) pci_write_config_word(pdev, pos + PCI_MSI_FLAGS, new); } - pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX); + pos = pdev->msix_cap; if (pos) { /* then turn off MSIx */ pci_read_config_word(pdev, pos + PCI_MSIX_FLAGS, &cw); -- cgit v1.2.3-70-g09d2 From 73c40c616a33fcb7961b3c90a91b550813129b3e Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 1 Aug 2013 18:49:53 +0300 Subject: IB/core: Add locking around event dispatching on XRC target QPs Fix a potential race when event occurrs on a target XRC QP and in the middle of reporting that on its shared qps, one of them is destroyed by user space application. Also add note for kernel consumers in ib_verbs.h that they must not destroy the QP from within the handler. Signed-off-by: Yishai Hadas Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/verbs.c | 3 +++ include/rdma/ib_verbs.h | 6 ++++++ 2 files changed, 9 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 22192deb882..077fd641b30 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -346,10 +346,13 @@ EXPORT_SYMBOL(ib_destroy_srq); static void __ib_shared_qp_event_handler(struct ib_event *event, void *context) { struct ib_qp *qp = context; + unsigned long flags; + spin_lock_irqsave(&qp->device->event_handler_lock, flags); list_for_each_entry(event->element.qp, &qp->open_list, open_list) if (event->element.qp->event_handler) event->element.qp->event_handler(event, event->element.qp->qp_context); + spin_unlock_irqrestore(&qp->device->event_handler_lock, flags); } static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 645c3cedce9..a84d3dfc407 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -635,6 +635,12 @@ enum ib_qp_create_flags { IB_QP_CREATE_RESERVED_END = 1 << 31, }; + +/* + * Note: users may not call ib_close_qp or ib_destroy_qp from the event_handler + * callback to destroy the passed in QP. + */ + struct ib_qp_init_attr { void (*event_handler)(struct ib_event *, void *); void *qp_context; -- cgit v1.2.3-70-g09d2 From 846be90d810c285f6474f53abf1f928e1113830e Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 1 Aug 2013 18:49:54 +0300 Subject: IB/core: Fixes to XRC reference counting in uverbs Added reference counting mechanism for XRC target QPs between ib_uqp_object and its ib_uxrcd_object. This prevents closing an XRC domain that is still attached to a QP. In addition, add missing code in ib_uverbs_destroy_srq() to handle ib_uxrcd_object reference counting correctly when destroying an xsrq. Signed-off-by: Yishai Hadas Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_cmd.c | 22 ++++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 0fcd7aa26fa..b8431d64efe 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -135,6 +135,7 @@ struct ib_usrq_object { struct ib_uqp_object { struct ib_uevent_object uevent; struct list_head mcast_list; + struct ib_uxrcd_object *uxrcd; }; struct ib_ucq_object { diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index b3c07b0c9f2..b1051405d41 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1526,7 +1526,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - obj = kmalloc(sizeof *obj, GFP_KERNEL); + obj = kzalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; @@ -1642,8 +1642,13 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, goto err_copy; } - if (xrcd) + if (xrcd) { + obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, + uobject); + atomic_inc(&obj->uxrcd->refcnt); put_xrcd_read(xrcd_uobj); + } + if (pd) put_pd_read(pd); if (scq) @@ -1753,6 +1758,8 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, goto err_remove; } + obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); + atomic_inc(&obj->uxrcd->refcnt); put_xrcd_read(xrcd_uobj); mutex_lock(&file->mutex); @@ -2019,6 +2026,9 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, if (ret) return ret; + if (obj->uxrcd) + atomic_dec(&obj->uxrcd->refcnt); + idr_remove_uobj(&ib_uverbs_qp_idr, uobj); mutex_lock(&file->mutex); @@ -2860,6 +2870,8 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, struct ib_srq *srq; struct ib_uevent_object *obj; int ret = -EINVAL; + struct ib_usrq_object *us; + enum ib_srq_type srq_type; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; @@ -2869,6 +2881,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, return -EINVAL; srq = uobj->object; obj = container_of(uobj, struct ib_uevent_object, uobject); + srq_type = srq->srq_type; ret = ib_destroy_srq(srq); if (!ret) @@ -2879,6 +2892,11 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, if (ret) return ret; + if (srq_type == IB_SRQT_XRC) { + us = container_of(obj, struct ib_usrq_object, uevent); + atomic_dec(&us->uxrcd->refcnt); + } + idr_remove_uobj(&ib_uverbs_srq_idr, uobj); mutex_lock(&file->mutex); -- cgit v1.2.3-70-g09d2 From 49b8e74438062aba379ce5d3f91b9e6e2e9c6745 Mon Sep 17 00:00:00 2001 From: Jim Foraker Date: Thu, 8 Aug 2013 17:44:22 -0700 Subject: IPoIB: Fix race in deleting ipoib_neigh entries In several places, this snippet is used when removing neigh entries: list_del(&neigh->list); ipoib_neigh_free(neigh); The list_del() removes neigh from the associated struct ipoib_path, while ipoib_neigh_free() removes neigh from the device's neigh entry lookup table. Both of these operations are protected by the priv->lock spinlock. The table however is also protected via RCU, and so naturally the lock is not held when doing reads. This leads to a race condition, in which a thread may successfully look up a neigh entry that has already been deleted from neigh->list. Since the previous deletion will have marked the entry with poison, a second list_del() on the object will cause a panic: #5 [ffff8802338c3c70] general_protection at ffffffff815108c5 [exception RIP: list_del+16] RIP: ffffffff81289020 RSP: ffff8802338c3d20 RFLAGS: 00010082 RAX: dead000000200200 RBX: ffff880433e60c88 RCX: 0000000000009e6c RDX: 0000000000000246 RSI: ffff8806012ca298 RDI: ffff880433e60c88 RBP: ffff8802338c3d30 R8: ffff8806012ca2e8 R9: 00000000ffffffff R10: 0000000000000001 R11: 0000000000000000 R12: ffff8804346b2020 R13: ffff88032a3e7540 R14: ffff8804346b26e0 R15: 0000000000000246 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0000 #6 [ffff8802338c3d38] ipoib_cm_tx_handler at ffffffffa066fe0a [ib_ipoib] #7 [ffff8802338c3d98] cm_process_work at ffffffffa05149a7 [ib_cm] #8 [ffff8802338c3de8] cm_work_handler at ffffffffa05161aa [ib_cm] #9 [ffff8802338c3e38] worker_thread at ffffffff81090e10 #10 [ffff8802338c3ee8] kthread at ffffffff81096c66 #11 [ffff8802338c3f48] kernel_thread at ffffffff8100c0ca We move the list_del() into ipoib_neigh_free(), so that deletion happens only once, after the entry has been successfully removed from the lookup table. This same behavior is already used in ipoib_del_neighs_by_gid() and __ipoib_reap_neigh(). Signed-off-by: Jim Foraker Reviewed-by: Or Gerlitz Reviewed-by: Jack Wang Reviewed-by: Shlomo Pongratz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 3 --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 9 +++------ 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 3eceb61e353..7a3175400b2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -817,7 +817,6 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) if (neigh) { neigh->cm = NULL; - list_del(&neigh->list); ipoib_neigh_free(neigh); tx->neigh = NULL; @@ -1234,7 +1233,6 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, if (neigh) { neigh->cm = NULL; - list_del(&neigh->list); ipoib_neigh_free(neigh); tx->neigh = NULL; @@ -1325,7 +1323,6 @@ static void ipoib_cm_tx_start(struct work_struct *work) neigh = p->neigh; if (neigh) { neigh->cm = NULL; - list_del(&neigh->list); ipoib_neigh_free(neigh); } list_del(&p->list); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index c6f71a88c55..82cec1af902 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -493,7 +493,6 @@ static void path_rec_completion(int status, path, neigh)); if (!ipoib_cm_get(neigh)) { - list_del(&neigh->list); ipoib_neigh_free(neigh); continue; } @@ -618,7 +617,6 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, if (!ipoib_cm_get(neigh)) ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh)); if (!ipoib_cm_get(neigh)) { - list_del(&neigh->list); ipoib_neigh_free(neigh); goto err_drop; } @@ -639,7 +637,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, neigh->ah = NULL; if (!path->query && path_rec_start(dev, path)) - goto err_list; + goto err_path; __skb_queue_tail(&neigh->queue, skb); } @@ -648,9 +646,6 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, ipoib_neigh_put(neigh); return; -err_list: - list_del(&neigh->list); - err_path: ipoib_neigh_free(neigh); err_drop: @@ -1098,6 +1093,8 @@ void ipoib_neigh_free(struct ipoib_neigh *neigh) rcu_assign_pointer(*np, rcu_dereference_protected(neigh->hnext, lockdep_is_held(&priv->lock))); + /* remove from parent list */ + list_del(&neigh->list); call_rcu(&neigh->rcu, ipoib_neigh_reclaim); return; } else { -- cgit v1.2.3-70-g09d2 From 319a441d1361ea703b091caf92418f8121eadfc5 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 7 Aug 2013 14:01:59 +0300 Subject: IB/core: Add receive flow steering support The RDMA stack allows for applications to create IB_QPT_RAW_PACKET QPs, which receive plain Ethernet packets, specifically packets that don't carry any QPN to be matched by the receiving side. Applications using these QPs must be provided with a method to program some steering rule with the HW so packets arriving at the local port can be routed to them. This patch adds ib_create_flow(), which allow providing a flow specification for a QP. When there's a match between the specification and a received packet, the packet is forwarded to that QP, in a the same way one uses ib_attach_multicast() for IB UD multicast handling. Flow specifications are provided as instances of struct ib_flow_spec_yyy, which describe L2, L3 and L4 headers. Currently specs for Ethernet, IPv4, TCP and UDP are defined. Flow specs are made of values and masks. The input to ib_create_flow() is a struct ib_flow_attr, which contains a few mandatory control elements and optional flow specs. struct ib_flow_attr { enum ib_flow_attr_type type; u16 size; u16 priority; u32 flags; u8 num_of_specs; u8 port; /* Following are the optional layers according to user request * struct ib_flow_spec_yyy * struct ib_flow_spec_zzz */ }; As these specs are eventually coming from user space, they are defined and used in a way which allows adding new spec types without kernel/user ABI change, just with a little API enhancement which defines the newly added spec. The flow spec structures are defined with TLV (Type-Length-Value) entries, which allows calling ib_create_flow() with a list of variable length of optional specs. For the actual processing of ib_flow_attr the driver uses the number of specs and the size mandatory fields along with the TLV nature of the specs. Steering rules processing order is according to the domain over which the rule is set and the rule priority. All rules set by user space applicatations fall into the IB_FLOW_DOMAIN_USER domain, other domains could be used by future IPoIB RFS and Ethetool flow-steering interface implementation. Lower numerical value for the priority field means higher priority. The returned value from ib_create_flow() is a struct ib_flow, which contains a database pointer (handle) provided by the HW driver to be used when calling ib_destroy_flow(). Applications that offload TCP/IP traffic can also be written over IB UD QPs. The ib_create_flow() / ib_destroy_flow() API is designed to support UD QPs too. A HW driver can set IB_DEVICE_MANAGED_FLOW_STEERING to denote support for flow steering. The ib_flow_attr enum type supports usage of flow steering for promiscuous and sniffer purposes: IB_FLOW_ATTR_NORMAL - "regular" rule, steering according to rule specification IB_FLOW_ATTR_ALL_DEFAULT - default unicast and multicast rule, receive all Ethernet traffic which isn't steered to any QP IB_FLOW_ATTR_MC_DEFAULT - same as IB_FLOW_ATTR_ALL_DEFAULT but only for multicast IB_FLOW_ATTR_SNIFFER - sniffer rule, receive all port traffic ALL_DEFAULT and MC_DEFAULT rules options are valid only for Ethernet link type. Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/verbs.c | 27 +++++++++ include/rdma/ib_verbs.h | 119 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 144 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 22192deb882..87a8102c9db 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1254,3 +1254,30 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd) return xrcd->device->dealloc_xrcd(xrcd); } EXPORT_SYMBOL(ib_dealloc_xrcd); + +struct ib_flow *ib_create_flow(struct ib_qp *qp, + struct ib_flow_attr *flow_attr, + int domain) +{ + struct ib_flow *flow_id; + if (!qp->device->create_flow) + return ERR_PTR(-ENOSYS); + + flow_id = qp->device->create_flow(qp, flow_attr, domain); + if (!IS_ERR(flow_id)) + atomic_inc(&qp->usecnt); + return flow_id; +} +EXPORT_SYMBOL(ib_create_flow); + +int ib_destroy_flow(struct ib_flow *flow_id) +{ + int err; + struct ib_qp *qp = flow_id->qp; + + err = qp->device->destroy_flow(flow_id); + if (!err) + atomic_dec(&qp->usecnt); + return err; +} +EXPORT_SYMBOL(ib_destroy_flow); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 645c3cedce9..6f874b00491 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -116,7 +116,8 @@ enum ib_device_cap_flags { IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21), IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22), IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23), - IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24) + IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24), + IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29) }; enum ib_atomic_cap { @@ -1033,7 +1034,8 @@ struct ib_qp { struct ib_srq *srq; struct ib_xrcd *xrcd; /* XRC TGT QPs only */ struct list_head xrcd_list; - atomic_t usecnt; /* count times opened, mcast attaches */ + /* count times opened, mcast attaches, flow attaches */ + atomic_t usecnt; struct list_head open_list; struct ib_qp *real_qp; struct ib_uobject *uobject; @@ -1068,6 +1070,110 @@ struct ib_fmr { u32 rkey; }; +/* Supported steering options */ +enum ib_flow_attr_type { + /* steering according to rule specifications */ + IB_FLOW_ATTR_NORMAL = 0x0, + /* default unicast and multicast rule - + * receive all Eth traffic which isn't steered to any QP + */ + IB_FLOW_ATTR_ALL_DEFAULT = 0x1, + /* default multicast rule - + * receive all Eth multicast traffic which isn't steered to any QP + */ + IB_FLOW_ATTR_MC_DEFAULT = 0x2, + /* sniffer rule - receive all port traffic */ + IB_FLOW_ATTR_SNIFFER = 0x3 +}; + +/* Supported steering header types */ +enum ib_flow_spec_type { + /* L2 headers*/ + IB_FLOW_SPEC_ETH = 0x20, + /* L3 header*/ + IB_FLOW_SPEC_IPV4 = 0x30, + /* L4 headers*/ + IB_FLOW_SPEC_TCP = 0x40, + IB_FLOW_SPEC_UDP = 0x41 +}; + +/* Flow steering rule priority is set according to it's domain. + * Lower domain value means higher priority. + */ +enum ib_flow_domain { + IB_FLOW_DOMAIN_USER, + IB_FLOW_DOMAIN_ETHTOOL, + IB_FLOW_DOMAIN_RFS, + IB_FLOW_DOMAIN_NIC, + IB_FLOW_DOMAIN_NUM /* Must be last */ +}; + +struct ib_flow_eth_filter { + u8 dst_mac[6]; + u8 src_mac[6]; + __be16 ether_type; + __be16 vlan_tag; +}; + +struct ib_flow_spec_eth { + enum ib_flow_spec_type type; + u16 size; + struct ib_flow_eth_filter val; + struct ib_flow_eth_filter mask; +}; + +struct ib_flow_ipv4_filter { + __be32 src_ip; + __be32 dst_ip; +}; + +struct ib_flow_spec_ipv4 { + enum ib_flow_spec_type type; + u16 size; + struct ib_flow_ipv4_filter val; + struct ib_flow_ipv4_filter mask; +}; + +struct ib_flow_tcp_udp_filter { + __be16 dst_port; + __be16 src_port; +}; + +struct ib_flow_spec_tcp_udp { + enum ib_flow_spec_type type; + u16 size; + struct ib_flow_tcp_udp_filter val; + struct ib_flow_tcp_udp_filter mask; +}; + +union ib_flow_spec { + struct { + enum ib_flow_spec_type type; + u16 size; + }; + struct ib_flow_spec_eth eth; + struct ib_flow_spec_ipv4 ipv4; + struct ib_flow_spec_tcp_udp tcp_udp; +}; + +struct ib_flow_attr { + enum ib_flow_attr_type type; + u16 size; + u16 priority; + u32 flags; + u8 num_of_specs; + u8 port; + /* Following are the optional layers according to user request + * struct ib_flow_spec_xxx + * struct ib_flow_spec_yyy + */ +}; + +struct ib_flow { + struct ib_qp *qp; + struct ib_uobject *uobject; +}; + struct ib_mad; struct ib_grh; @@ -1300,6 +1406,11 @@ struct ib_device { struct ib_ucontext *ucontext, struct ib_udata *udata); int (*dealloc_xrcd)(struct ib_xrcd *xrcd); + struct ib_flow * (*create_flow)(struct ib_qp *qp, + struct ib_flow_attr + *flow_attr, + int domain); + int (*destroy_flow)(struct ib_flow *flow_id); struct ib_dma_mapping_ops *dma_ops; @@ -2260,4 +2371,8 @@ struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device); */ int ib_dealloc_xrcd(struct ib_xrcd *xrcd); +struct ib_flow *ib_create_flow(struct ib_qp *qp, + struct ib_flow_attr *flow_attr, int domain); +int ib_destroy_flow(struct ib_flow *flow_id); + #endif /* IB_VERBS_H */ -- cgit v1.2.3-70-g09d2 From 400dbc96583ff3b8ad4c09bd7e9dcd35a6215922 Mon Sep 17 00:00:00 2001 From: Igor Ivanov Date: Wed, 14 Aug 2013 13:58:29 +0300 Subject: IB/core: Infrastructure for extensible uverbs commands Add infrastructure to support extended uverbs capabilities in a forward/backward manner. Uverbs command opcodes which are based on the verbs extensions approach should be greater or equal to IB_USER_VERBS_CMD_THRESHOLD. They have new header format and processed a bit differently. Whenever a specific IB_USER_VERBS_CMD_XXX is extended, which practically means it needs to have additional arguments, we will be able to add them without creating a completely new IB_USER_VERBS_CMD_YYY command or bumping the uverbs ABI version. This patch for itself doesn't provide the whole scheme which is also dependent on adding a comp_mask field to each extended uverbs command struct. The new header framework allows for future extension of the CMD arguments (ib_uverbs_cmd_hdr.in_words, ib_uverbs_cmd_hdr.out_words) for an existing new command (that is a command that supports the new uverbs command header format suggested in this patch) w/o bumping ABI version and with maintaining backward and formward compatibility to new and old libibverbs versions. In the uverbs command we are passing both uverbs arguments and the provider arguments. We split the ib_uverbs_cmd_hdr.in_words to ib_uverbs_cmd_hdr.in_words which will now carry only uverbs input argument struct size and ib_uverbs_cmd_hdr.provider_in_words that will carry the provider input argument size. Same goes for the response (the uverbs CMD output argument). For example take the create_cq call and the mlx4_ib provider: The uverbs layer gets libibverb's struct ibv_create_cq (named struct ib_uverbs_create_cq in the kernel), mlx4_ib gets libmlx4's struct mlx4_create_cq (which includes struct ibv_create_cq and is named struct mlx4_ib_create_cq in the kernel) and in_words = sizeof(mlx4_create_cq)/4 . Thus ib_uverbs_cmd_hdr.in_words carry both uverbs plus mlx4_ib input argument sizes, where uverbs assumes it knows the size of its input argument - struct ibv_create_cq. Now, if we wish to add a variable to struct ibv_create_cq, we can add a comp_mask field to the struct which is basically bit field indicating which fields exists in the struct (as done for the libibverbs API extension), but we need a way to tell what is the total size of the struct and not assume the struct size is predefined (since we may get different struct sizes from different user libibverbs versions). So we know at which point the provider input argument (struct mlx4_create_cq) begins. Same goes for extending the provider struct mlx4_create_cq. Thus we split the ib_uverbs_cmd_hdr.in_words to ib_uverbs_cmd_hdr.in_words which will now carry only uverbs input argument struct size and ib_uverbs_cmd_hdr.provider_in_words that will carry the provider (mlx4_ib) input argument size. Signed-off-by: Igor Ivanov Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_main.c | 29 ++++++++++++++++++++++++----- include/uapi/rdma/ib_user_verbs.h | 10 ++++++++++ 2 files changed, 34 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 2c6f0f2ecd9..e4e7b2449d1 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -583,9 +583,6 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, if (copy_from_user(&hdr, buf, sizeof hdr)) return -EFAULT; - if (hdr.in_words * 4 != count) - return -EINVAL; - if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || !uverbs_cmd_table[hdr.command]) return -EINVAL; @@ -597,8 +594,30 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command))) return -ENOSYS; - return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr, - hdr.in_words * 4, hdr.out_words * 4); + if (hdr.command >= IB_USER_VERBS_CMD_THRESHOLD) { + struct ib_uverbs_cmd_hdr_ex hdr_ex; + + if (copy_from_user(&hdr_ex, buf, sizeof(hdr_ex))) + return -EFAULT; + + if (((hdr_ex.in_words + hdr_ex.provider_in_words) * 4) != count) + return -EINVAL; + + return uverbs_cmd_table[hdr.command](file, + buf + sizeof(hdr_ex), + (hdr_ex.in_words + + hdr_ex.provider_in_words) * 4, + (hdr_ex.out_words + + hdr_ex.provider_out_words) * 4); + } else { + if (hdr.in_words * 4 != count) + return -EINVAL; + + return uverbs_cmd_table[hdr.command](file, + buf + sizeof(hdr), + hdr.in_words * 4, + hdr.out_words * 4); + } } static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 805711ea200..61535aa0a62 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -43,6 +43,7 @@ * compatibility are made. */ #define IB_USER_VERBS_ABI_VERSION 6 +#define IB_USER_VERBS_CMD_THRESHOLD 50 enum { IB_USER_VERBS_CMD_GET_CONTEXT, @@ -123,6 +124,15 @@ struct ib_uverbs_cmd_hdr { __u16 out_words; }; +struct ib_uverbs_cmd_hdr_ex { + __u32 command; + __u16 in_words; + __u16 out_words; + __u16 provider_in_words; + __u16 provider_out_words; + __u32 cmd_hdr_reserved; +}; + struct ib_uverbs_get_context { __u64 response; __u64 driver_data[0]; -- cgit v1.2.3-70-g09d2 From 436f2ad05a0b65b1467ddf51bc68171c381bf844 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 14 Aug 2013 13:58:30 +0300 Subject: IB/core: Export ib_create/destroy_flow through uverbs Implement ib_uverbs_create_flow() and ib_uverbs_destroy_flow() to support flow steering for user space applications. Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs.h | 3 + drivers/infiniband/core/uverbs_cmd.c | 214 ++++++++++++++++++++++++++++++++++ drivers/infiniband/core/uverbs_main.c | 13 ++- include/rdma/ib_verbs.h | 1 + include/uapi/rdma/ib_user_verbs.h | 89 +++++++++++++- 5 files changed, 318 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 0fcd7aa26fa..ad9d1028102 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -155,6 +155,7 @@ extern struct idr ib_uverbs_cq_idr; extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; extern struct idr ib_uverbs_xrcd_idr; +extern struct idr ib_uverbs_rule_idr; void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); @@ -215,5 +216,7 @@ IB_UVERBS_DECLARE_CMD(destroy_srq); IB_UVERBS_DECLARE_CMD(create_xsrq); IB_UVERBS_DECLARE_CMD(open_xrcd); IB_UVERBS_DECLARE_CMD(close_xrcd); +IB_UVERBS_DECLARE_CMD(create_flow); +IB_UVERBS_DECLARE_CMD(destroy_flow); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index b3c07b0c9f2..6e98df929e2 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -54,6 +54,7 @@ static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" }; static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; +static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; #define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ do { \ @@ -330,6 +331,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); INIT_LIST_HEAD(&ucontext->xrcd_list); + INIT_LIST_HEAD(&ucontext->rule_list); ucontext->closing = 0; resp.num_comp_vectors = file->device->num_comp_vectors; @@ -2587,6 +2589,218 @@ out_put: return ret ? ret : in_len; } +static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec, + union ib_flow_spec *ib_spec) +{ + ib_spec->type = kern_spec->type; + + switch (ib_spec->type) { + case IB_FLOW_SPEC_ETH: + ib_spec->eth.size = sizeof(struct ib_flow_spec_eth); + if (ib_spec->eth.size != kern_spec->eth.size) + return -EINVAL; + memcpy(&ib_spec->eth.val, &kern_spec->eth.val, + sizeof(struct ib_flow_eth_filter)); + memcpy(&ib_spec->eth.mask, &kern_spec->eth.mask, + sizeof(struct ib_flow_eth_filter)); + break; + case IB_FLOW_SPEC_IPV4: + ib_spec->ipv4.size = sizeof(struct ib_flow_spec_ipv4); + if (ib_spec->ipv4.size != kern_spec->ipv4.size) + return -EINVAL; + memcpy(&ib_spec->ipv4.val, &kern_spec->ipv4.val, + sizeof(struct ib_flow_ipv4_filter)); + memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask, + sizeof(struct ib_flow_ipv4_filter)); + break; + case IB_FLOW_SPEC_TCP: + case IB_FLOW_SPEC_UDP: + ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp); + if (ib_spec->tcp_udp.size != kern_spec->tcp_udp.size) + return -EINVAL; + memcpy(&ib_spec->tcp_udp.val, &kern_spec->tcp_udp.val, + sizeof(struct ib_flow_tcp_udp_filter)); + memcpy(&ib_spec->tcp_udp.mask, &kern_spec->tcp_udp.mask, + sizeof(struct ib_flow_tcp_udp_filter)); + break; + default: + return -EINVAL; + } + return 0; +} + +ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_flow cmd; + struct ib_uverbs_create_flow_resp resp; + struct ib_uobject *uobj; + struct ib_flow *flow_id; + struct ib_kern_flow_attr *kern_flow_attr; + struct ib_flow_attr *flow_attr; + struct ib_qp *qp; + int err = 0; + void *kern_spec; + void *ib_spec; + int i; + int kern_attr_size; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + if ((cmd.flow_attr.type == IB_FLOW_ATTR_SNIFFER && + !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW)) + return -EPERM; + + if (cmd.flow_attr.num_of_specs) { + kern_flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL); + if (!kern_flow_attr) + return -ENOMEM; + + memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr)); + kern_attr_size = cmd.flow_attr.size - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr_ex); + if (copy_from_user(kern_flow_attr + 1, buf + sizeof(cmd), + kern_attr_size)) { + err = -EFAULT; + goto err_free_attr; + } + } else { + kern_flow_attr = &cmd.flow_attr; + kern_attr_size = sizeof(cmd.flow_attr); + } + + uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); + if (!uobj) { + err = -ENOMEM; + goto err_free_attr; + } + init_uobj(uobj, 0, file->ucontext, &rule_lock_class); + down_write(&uobj->mutex); + + qp = idr_read_qp(cmd.qp_handle, file->ucontext); + if (!qp) { + err = -EINVAL; + goto err_uobj; + } + + flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL); + if (!flow_attr) { + err = -ENOMEM; + goto err_put; + } + + flow_attr->type = kern_flow_attr->type; + flow_attr->priority = kern_flow_attr->priority; + flow_attr->num_of_specs = kern_flow_attr->num_of_specs; + flow_attr->port = kern_flow_attr->port; + flow_attr->flags = kern_flow_attr->flags; + flow_attr->size = sizeof(*flow_attr); + + kern_spec = kern_flow_attr + 1; + ib_spec = flow_attr + 1; + for (i = 0; i < flow_attr->num_of_specs && kern_attr_size > 0; i++) { + err = kern_spec_to_ib_spec(kern_spec, ib_spec); + if (err) + goto err_free; + flow_attr->size += + ((union ib_flow_spec *) ib_spec)->size; + kern_attr_size -= ((struct ib_kern_spec *) kern_spec)->size; + kern_spec += ((struct ib_kern_spec *) kern_spec)->size; + ib_spec += ((union ib_flow_spec *) ib_spec)->size; + } + if (kern_attr_size) { + pr_warn("create flow failed, %d bytes left from uverb cmd\n", + kern_attr_size); + goto err_free; + } + flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER); + if (IS_ERR(flow_id)) { + err = PTR_ERR(flow_id); + goto err_free; + } + flow_id->qp = qp; + flow_id->uobject = uobj; + uobj->object = flow_id; + + err = idr_add_uobj(&ib_uverbs_rule_idr, uobj); + if (err) + goto destroy_flow; + + memset(&resp, 0, sizeof(resp)); + resp.flow_handle = uobj->id; + + if (copy_to_user((void __user *)(unsigned long) cmd.response, + &resp, sizeof(resp))) { + err = -EFAULT; + goto err_copy; + } + + put_qp_read(qp); + mutex_lock(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->rule_list); + mutex_unlock(&file->mutex); + + uobj->live = 1; + + up_write(&uobj->mutex); + kfree(flow_attr); + if (cmd.flow_attr.num_of_specs) + kfree(kern_flow_attr); + return in_len; +err_copy: + idr_remove_uobj(&ib_uverbs_rule_idr, uobj); +destroy_flow: + ib_destroy_flow(flow_id); +err_free: + kfree(flow_attr); +err_put: + put_qp_read(qp); +err_uobj: + put_uobj_write(uobj); +err_free_attr: + if (cmd.flow_attr.num_of_specs) + kfree(kern_flow_attr); + return err; +} + +ssize_t ib_uverbs_destroy_flow(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) { + struct ib_uverbs_destroy_flow cmd; + struct ib_flow *flow_id; + struct ib_uobject *uobj; + int ret; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle, + file->ucontext); + if (!uobj) + return -EINVAL; + flow_id = uobj->object; + + ret = ib_destroy_flow(flow_id); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + + idr_remove_uobj(&ib_uverbs_rule_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + + return ret ? ret : in_len; +} + static int __uverbs_create_xsrq(struct ib_uverbs_file *file, struct ib_uverbs_create_xsrq *cmd, struct ib_udata *udata) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index e4e7b2449d1..75ad86c4abf 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -73,6 +73,7 @@ DEFINE_IDR(ib_uverbs_cq_idr); DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); DEFINE_IDR(ib_uverbs_xrcd_idr); +DEFINE_IDR(ib_uverbs_rule_idr); static DEFINE_SPINLOCK(map_lock); static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); @@ -113,7 +114,9 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd, [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, - [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp + [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp, + [IB_USER_VERBS_CMD_CREATE_FLOW] = ib_uverbs_create_flow, + [IB_USER_VERBS_CMD_DESTROY_FLOW] = ib_uverbs_destroy_flow }; static void ib_uverbs_add_one(struct ib_device *device); @@ -212,6 +215,14 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, kfree(uobj); } + list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) { + struct ib_flow *flow_id = uobj->object; + + idr_remove_uobj(&ib_uverbs_rule_idr, uobj); + ib_destroy_flow(flow_id); + kfree(uobj); + } + list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { struct ib_qp *qp = uobj->object; struct ib_uqp_object *uqp = diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6f874b00491..274205d4df9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -954,6 +954,7 @@ struct ib_ucontext { struct list_head srq_list; struct list_head ah_list; struct list_head xrcd_list; + struct list_head rule_list; int closing; }; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 61535aa0a62..0b233c56b0e 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -86,7 +86,9 @@ enum { IB_USER_VERBS_CMD_OPEN_XRCD, IB_USER_VERBS_CMD_CLOSE_XRCD, IB_USER_VERBS_CMD_CREATE_XSRQ, - IB_USER_VERBS_CMD_OPEN_QP + IB_USER_VERBS_CMD_OPEN_QP, + IB_USER_VERBS_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, + IB_USER_VERBS_CMD_DESTROY_FLOW }; /* @@ -694,6 +696,91 @@ struct ib_uverbs_detach_mcast { __u64 driver_data[0]; }; +struct ib_kern_eth_filter { + __u8 dst_mac[6]; + __u8 src_mac[6]; + __be16 ether_type; + __be16 vlan_tag; +}; + +struct ib_kern_spec_eth { + __u32 type; + __u16 size; + __u16 reserved; + struct ib_kern_eth_filter val; + struct ib_kern_eth_filter mask; +}; + +struct ib_kern_ipv4_filter { + __be32 src_ip; + __be32 dst_ip; +}; + +struct ib_kern_spec_ipv4 { + __u32 type; + __u16 size; + __u16 reserved; + struct ib_kern_ipv4_filter val; + struct ib_kern_ipv4_filter mask; +}; + +struct ib_kern_tcp_udp_filter { + __be16 dst_port; + __be16 src_port; +}; + +struct ib_kern_spec_tcp_udp { + __u32 type; + __u16 size; + __u16 reserved; + struct ib_kern_tcp_udp_filter val; + struct ib_kern_tcp_udp_filter mask; +}; + +struct ib_kern_spec { + union { + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + struct ib_kern_spec_eth eth; + struct ib_kern_spec_ipv4 ipv4; + struct ib_kern_spec_tcp_udp tcp_udp; + }; +}; + +struct ib_kern_flow_attr { + __u32 type; + __u16 size; + __u16 priority; + __u8 num_of_specs; + __u8 reserved[2]; + __u8 port; + __u32 flags; + /* Following are the optional layers according to user request + * struct ib_flow_spec_xxx + * struct ib_flow_spec_yyy + */ +}; + +struct ib_uverbs_create_flow { + __u32 comp_mask; + __u64 response; + __u32 qp_handle; + struct ib_kern_flow_attr flow_attr; +}; + +struct ib_uverbs_create_flow_resp { + __u32 comp_mask; + __u32 flow_handle; +}; + +struct ib_uverbs_destroy_flow { + __u32 comp_mask; + __u32 flow_handle; +}; + struct ib_uverbs_create_srq { __u64 response; __u64 user_handle; -- cgit v1.2.3-70-g09d2 From f77c0162a339400ad16f657603fdc3bf11654fd3 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 14 Aug 2013 13:58:31 +0300 Subject: IB/mlx4: Add receive flow steering support Implement ib_create_flow() and ib_destroy_flow(). Translate the verbs structures provided by the user to HW structures and call the MLX4_QP_FLOW_STEERING_ATTACH/DETACH firmware commands. On the ATTACH command completion, the firmware provides a 64-bit registration ID, which is placed into struct mlx4_ib_flow that wraps the instance of struct ib_flow which is retuned to caller. Later, this reg ID is used for detaching that flow from the firmware. Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 235 +++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx4/mlx4_ib.h | 12 ++ include/linux/mlx4/device.h | 5 - 3 files changed, 247 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index a188d317855..d6c5a73becf 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -54,6 +54,8 @@ #define DRV_VERSION "1.0" #define DRV_RELDATE "April 4, 2008" +#define MLX4_IB_FLOW_MAX_PRIO 0xFFF + MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver"); MODULE_LICENSE("Dual BSD/GPL"); @@ -88,6 +90,25 @@ static void init_query_mad(struct ib_smp *mad) static union ib_gid zgid; +static int check_flow_steering_support(struct mlx4_dev *dev) +{ + int ib_num_ports = 0; + int i; + + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) + ib_num_ports++; + + if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { + if (ib_num_ports || mlx4_is_mfunc(dev)) { + pr_warn("Device managed flow steering is unavailable " + "for IB ports or in multifunction env.\n"); + return 0; + } + return 1; + } + return 0; +} + static int mlx4_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { @@ -144,6 +165,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B; else props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A; + if (check_flow_steering_support(dev->dev)) + props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; } props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & @@ -798,6 +821,209 @@ struct mlx4_ib_steering { union ib_gid gid; }; +static int parse_flow_attr(struct mlx4_dev *dev, + union ib_flow_spec *ib_spec, + struct _rule_hw *mlx4_spec) +{ + enum mlx4_net_trans_rule_id type; + + switch (ib_spec->type) { + case IB_FLOW_SPEC_ETH: + type = MLX4_NET_TRANS_RULE_ID_ETH; + memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac, + ETH_ALEN); + memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac, + ETH_ALEN); + mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag; + mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag; + break; + + case IB_FLOW_SPEC_IPV4: + type = MLX4_NET_TRANS_RULE_ID_IPV4; + mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip; + mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip; + mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip; + mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip; + break; + + case IB_FLOW_SPEC_TCP: + case IB_FLOW_SPEC_UDP: + type = ib_spec->type == IB_FLOW_SPEC_TCP ? + MLX4_NET_TRANS_RULE_ID_TCP : + MLX4_NET_TRANS_RULE_ID_UDP; + mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port; + mlx4_spec->tcp_udp.dst_port_msk = ib_spec->tcp_udp.mask.dst_port; + mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port; + mlx4_spec->tcp_udp.src_port_msk = ib_spec->tcp_udp.mask.src_port; + break; + + default: + return -EINVAL; + } + if (mlx4_map_sw_to_hw_steering_id(dev, type) < 0 || + mlx4_hw_rule_sz(dev, type) < 0) + return -EINVAL; + mlx4_spec->id = cpu_to_be16(mlx4_map_sw_to_hw_steering_id(dev, type)); + mlx4_spec->size = mlx4_hw_rule_sz(dev, type) >> 2; + return mlx4_hw_rule_sz(dev, type); +} + +static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, + int domain, + enum mlx4_net_trans_promisc_mode flow_type, + u64 *reg_id) +{ + int ret, i; + int size = 0; + void *ib_flow; + struct mlx4_ib_dev *mdev = to_mdev(qp->device); + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_net_trans_rule_hw_ctrl *ctrl; + size_t rule_size = sizeof(struct mlx4_net_trans_rule_hw_ctrl) + + (sizeof(struct _rule_hw) * flow_attr->num_of_specs); + + static const u16 __mlx4_domain[] = { + [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS, + [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL, + [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS, + [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC, + }; + + if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) { + pr_err("Invalid priority value %d\n", flow_attr->priority); + return -EINVAL; + } + + if (domain >= IB_FLOW_DOMAIN_NUM) { + pr_err("Invalid domain value %d\n", domain); + return -EINVAL; + } + + if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0) + return -EINVAL; + + mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + memset(mailbox->buf, 0, rule_size); + ctrl = mailbox->buf; + + ctrl->prio = cpu_to_be16(__mlx4_domain[domain] | + flow_attr->priority); + ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type); + ctrl->port = flow_attr->port; + ctrl->qpn = cpu_to_be32(qp->qp_num); + + ib_flow = flow_attr + 1; + size += sizeof(struct mlx4_net_trans_rule_hw_ctrl); + for (i = 0; i < flow_attr->num_of_specs; i++) { + ret = parse_flow_attr(mdev->dev, ib_flow, mailbox->buf + size); + if (ret < 0) { + mlx4_free_cmd_mailbox(mdev->dev, mailbox); + return -EINVAL; + } + ib_flow += ((union ib_flow_spec *) ib_flow)->size; + size += ret; + } + + ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0, + MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (ret == -ENOMEM) + pr_err("mcg table is full. Fail to register network rule.\n"); + else if (ret == -ENXIO) + pr_err("Device managed flow steering is disabled. Fail to register network rule.\n"); + else if (ret) + pr_err("Invalid argumant. Fail to register network rule.\n"); + + mlx4_free_cmd_mailbox(mdev->dev, mailbox); + return ret; +} + +static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id) +{ + int err; + err = mlx4_cmd(dev, reg_id, 0, 0, + MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) + pr_err("Fail to detach network rule. registration id = 0x%llx\n", + reg_id); + return err; +} + +static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, + struct ib_flow_attr *flow_attr, + int domain) +{ + int err = 0, i = 0; + struct mlx4_ib_flow *mflow; + enum mlx4_net_trans_promisc_mode type[2]; + + memset(type, 0, sizeof(type)); + + mflow = kzalloc(sizeof(*mflow), GFP_KERNEL); + if (!mflow) { + err = -ENOMEM; + goto err_free; + } + + switch (flow_attr->type) { + case IB_FLOW_ATTR_NORMAL: + type[0] = MLX4_FS_REGULAR; + break; + + case IB_FLOW_ATTR_ALL_DEFAULT: + type[0] = MLX4_FS_ALL_DEFAULT; + break; + + case IB_FLOW_ATTR_MC_DEFAULT: + type[0] = MLX4_FS_MC_DEFAULT; + break; + + case IB_FLOW_ATTR_SNIFFER: + type[0] = MLX4_FS_UC_SNIFFER; + type[1] = MLX4_FS_MC_SNIFFER; + break; + + default: + err = -EINVAL; + goto err_free; + } + + while (i < ARRAY_SIZE(type) && type[i]) { + err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i], + &mflow->reg_id[i]); + if (err) + goto err_free; + i++; + } + + return &mflow->ibflow; + +err_free: + kfree(mflow); + return ERR_PTR(err); +} + +static int mlx4_ib_destroy_flow(struct ib_flow *flow_id) +{ + int err, ret = 0; + int i = 0; + struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device); + struct mlx4_ib_flow *mflow = to_mflow(flow_id); + + while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i]) { + err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i]); + if (err) + ret = err; + i++; + } + + kfree(mflow); + return ret; +} + static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { int err; @@ -1461,6 +1687,15 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); } + if (check_flow_steering_support(dev)) { + ibdev->ib_dev.create_flow = mlx4_ib_create_flow; + ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow; + + ibdev->ib_dev.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_CREATE_FLOW) | + (1ull << IB_USER_VERBS_CMD_DESTROY_FLOW); + } + mlx4_ib_alloc_eqs(dev, ibdev); spin_lock_init(&iboe->lock); diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index f61ec26500c..036b663dd26 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -132,6 +132,12 @@ struct mlx4_ib_fmr { struct mlx4_fmr mfmr; }; +struct mlx4_ib_flow { + struct ib_flow ibflow; + /* translating DMFS verbs sniffer rule to FW API requires two reg IDs */ + u64 reg_id[2]; +}; + struct mlx4_ib_wq { u64 *wrid; spinlock_t lock; @@ -552,6 +558,12 @@ static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) { return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr); } + +static inline struct mlx4_ib_flow *to_mflow(struct ib_flow *ibflow) +{ + return container_of(ibflow, struct mlx4_ib_flow, ibflow); +} + static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp) { return container_of(ibqp, struct mlx4_ib_qp, ibqp); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 52c23a892ba..d73423c37c2 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1052,11 +1052,6 @@ struct _rule_hw { }; }; -/* translating DMFS verbs sniffer rule to the FW API would need two reg IDs */ -struct mlx4_flow_handle { - u64 reg_id[2]; -}; - int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn, enum mlx4_net_trans_promisc_mode mode); int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port, -- cgit v1.2.3-70-g09d2 From 22878dbc9173a7f0322dd697b1b5b49a83a1d4d5 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Sun, 1 Sep 2013 18:39:52 +0300 Subject: IB/core: Better checking of userspace values for receive flow steering - Don't allow unsupported comp_mask values, user should check ibv_query_device to know which features are supported. - Add a check in ib_uverbs_create_flow() to verify the size passed from the user space. Signed-off-by: Matan Barak Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_cmd.c | 16 +++++++++++++++- include/rdma/ib_verbs.h | 2 ++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 6e98df929e2..9112410da11 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2652,17 +2652,31 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; + if (cmd.comp_mask) + return -EINVAL; + if ((cmd.flow_attr.type == IB_FLOW_ATTR_SNIFFER && !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW)) return -EPERM; + if (cmd.flow_attr.num_of_specs < 0 || + cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS) + return -EINVAL; + + kern_attr_size = cmd.flow_attr.size - sizeof(cmd) - + sizeof(struct ib_uverbs_cmd_hdr_ex); + + if (cmd.flow_attr.size < 0 || cmd.flow_attr.size > in_len || + kern_attr_size < 0 || kern_attr_size > + (cmd.flow_attr.num_of_specs * sizeof(struct ib_kern_spec))) + return -EINVAL; + if (cmd.flow_attr.num_of_specs) { kern_flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL); if (!kern_flow_attr) return -ENOMEM; memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr)); - kern_attr_size = cmd.flow_attr.size - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr_ex); if (copy_from_user(kern_flow_attr + 1, buf + sizeof(cmd), kern_attr_size)) { err = -EFAULT; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 274205d4df9..bd151eac1e6 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1098,6 +1098,8 @@ enum ib_flow_spec_type { IB_FLOW_SPEC_UDP = 0x41 }; +#define IB_FLOW_SPEC_SUPPORT_LAYERS 4 + /* Flow steering rule priority is set according to it's domain. * Lower domain value means higher priority. */ -- cgit v1.2.3-70-g09d2 From 43a6b4025c79ded5b44e58ba0db97c29dd38d718 Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Mon, 26 Aug 2013 15:27:38 +0530 Subject: RDMA/ocrdma: Create IRD queue fix 1) Fix ocrdma_get_num_posted_shift for upto 128 QPs. 2) Create for min of dev->max_wqe and requested wqe in create_qp. 3) As part of creating ird queue, populate with basic header templates. 4) Make sure all the DB memory allocated to userspace are page aligned. 5) Fix issue in checking the mmap local cache. 6) Some code cleanup. Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 21 +++++-- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 97 +++++++++++++++++------------ 3 files changed, 73 insertions(+), 47 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index e798837f1fe..b4511668514 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -382,7 +382,7 @@ static inline struct ocrdma_srq *get_ocrdma_srq(struct ib_srq *ibsrq) static inline int ocrdma_get_num_posted_shift(struct ocrdma_qp *qp) { return ((qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY && - qp->id < 64) ? 24 : 16); + qp->id < 128) ? 24 : 16); } static inline int is_cqe_valid(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 97bb1ce8d24..31fd3ff1d1d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -259,6 +259,7 @@ static int ocrdma_get_mbx_cqe_errno(u16 cqe_status) err_num = -EAGAIN; break; case OCRDMA_MBX_CQE_STATUS_DMA_FAILED: + default: err_num = -EIO; break; } @@ -1340,8 +1341,6 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, struct ocrdma_create_cq_rsp *rsp; u32 hw_pages, cqe_size, page_size, cqe_count; - if (dpp_cq) - return -EINVAL; if (entries > dev->attr.max_cqe) { pr_err("%s(%d) max_cqe=0x%x, requester_cqe=0x%x\n", __func__, dev->id, dev->attr.max_cqe, entries); @@ -1735,10 +1734,9 @@ static int ocrdma_set_create_qp_sq_cmd(struct ocrdma_create_qp_req *cmd, u32 max_wqe_allocated; u32 max_sges = attrs->cap.max_send_sge; - max_wqe_allocated = attrs->cap.max_send_wr; - /* need to allocate one extra to for GEN1 family */ - if (dev->nic_info.dev_family != OCRDMA_GEN2_FAMILY) - max_wqe_allocated += 1; + /* QP1 may exceed 127 */ + max_wqe_allocated = min_t(int, attrs->cap.max_send_wr + 1, + dev->attr.max_wqe); status = ocrdma_build_q_conf(&max_wqe_allocated, dev->attr.wqe_size, &hw_pages, &hw_page_size); @@ -1850,6 +1848,8 @@ static int ocrdma_set_create_qp_ird_cmd(struct ocrdma_create_qp_req *cmd, dma_addr_t pa = 0; int ird_page_size = dev->attr.ird_page_size; int ird_q_len = dev->attr.num_ird_pages * ird_page_size; + struct ocrdma_hdr_wqe *rqe; + int i = 0; if (dev->attr.ird == 0) return 0; @@ -1861,6 +1861,15 @@ static int ocrdma_set_create_qp_ird_cmd(struct ocrdma_create_qp_req *cmd, memset(qp->ird_q_va, 0, ird_q_len); ocrdma_build_q_pages(&cmd->ird_addr[0], dev->attr.num_ird_pages, pa, ird_page_size); + for (; i < ird_q_len / dev->attr.rqe_size; i++) { + rqe = (struct ocrdma_hdr_wqe *)(qp->ird_q_va + + (i * dev->attr.rqe_size)); + rqe->cw = 0; + rqe->cw |= 2; + rqe->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT); + rqe->cw |= (8 << OCRDMA_WQE_SIZE_SHIFT); + rqe->cw |= (8 << OCRDMA_WQE_NXT_WQE_SIZE_SHIFT); + } return 0; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 5f68dff0d6c..278b33b628e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -186,7 +186,7 @@ static void ocrdma_del_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr, mutex_lock(&uctx->mm_list_lock); list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) { - if (len != mm->key.len || phy_addr != mm->key.phy_addr) + if (len != mm->key.len && phy_addr != mm->key.phy_addr) continue; list_del(&mm->entry); @@ -204,7 +204,7 @@ static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr, mutex_lock(&uctx->mm_list_lock); list_for_each_entry(mm, &uctx->mm_head, entry) { - if (len != mm->key.len || phy_addr != mm->key.phy_addr) + if (len != mm->key.len && phy_addr != mm->key.phy_addr) continue; found = true; @@ -307,7 +307,10 @@ int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db + dev->nic_info.db_total_size)) && (len <= dev->nic_info.db_page_size)) { - /* doorbell mapping */ + if (vma->vm_flags & VM_READ) + return -EPERM; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len, vma->vm_page_prot); } else if (dev->nic_info.dpp_unmapped_len && @@ -315,12 +318,13 @@ int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) (vm_page <= (u64) (dev->nic_info.dpp_unmapped_addr + dev->nic_info.dpp_unmapped_len)) && (len <= dev->nic_info.dpp_unmapped_len)) { - /* dpp area mapping */ + if (vma->vm_flags & VM_READ) + return -EPERM; + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len, vma->vm_page_prot); } else { - /* queue memory mapping */ status = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len, vma->vm_page_prot); } @@ -351,9 +355,9 @@ static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd, if (pd->dpp_enabled) { dpp_page_addr = dev->nic_info.dpp_unmapped_addr + - (pd->id * OCRDMA_DPP_PAGE_SIZE); + (pd->id * PAGE_SIZE); status = ocrdma_add_mmap(uctx, dpp_page_addr, - OCRDMA_DPP_PAGE_SIZE); + PAGE_SIZE); if (status) goto dpp_map_err; rsp.dpp_page_addr_hi = upper_32_bits(dpp_page_addr); @@ -369,7 +373,7 @@ static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd, ucopy_err: if (pd->dpp_enabled) - ocrdma_del_mmap(pd->uctx, dpp_page_addr, OCRDMA_DPP_PAGE_SIZE); + ocrdma_del_mmap(pd->uctx, dpp_page_addr, PAGE_SIZE); dpp_map_err: ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size); return status; @@ -392,10 +396,18 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev, pd->num_dpp_qp = pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0; } +retry: status = ocrdma_mbx_alloc_pd(dev, pd); if (status) { - kfree(pd); - return ERR_PTR(status); + /* try for pd with out dpp */ + if (pd->dpp_enabled) { + pd->dpp_enabled = false; + pd->num_dpp_qp = 0; + goto retry; + } else { + kfree(pd); + return ERR_PTR(status); + } } if (udata && context) { @@ -421,9 +433,9 @@ int ocrdma_dealloc_pd(struct ib_pd *ibpd) status = ocrdma_mbx_dealloc_pd(dev, pd); if (pd->uctx) { u64 dpp_db = dev->nic_info.dpp_unmapped_addr + - (pd->id * OCRDMA_DPP_PAGE_SIZE); + (pd->id * PAGE_SIZE); if (pd->dpp_enabled) - ocrdma_del_mmap(pd->uctx, dpp_db, OCRDMA_DPP_PAGE_SIZE); + ocrdma_del_mmap(pd->uctx, dpp_db, PAGE_SIZE); usr_db = dev->nic_info.unmapped_db + (pd->id * dev->nic_info.db_page_size); ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size); @@ -693,7 +705,7 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq, memset(&uresp, 0, sizeof(uresp)); uresp.cq_id = cq->id; - uresp.page_size = cq->len; + uresp.page_size = PAGE_ALIGN(cq->len); uresp.num_pages = 1; uresp.max_hw_cqe = cq->max_hw_cqe; uresp.page_addr[0] = cq->pa; @@ -788,7 +800,8 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq) status = ocrdma_mbx_destroy_cq(dev, cq); if (cq->ucontext) { - ocrdma_del_mmap(cq->ucontext, (u64) cq->pa, cq->len); + ocrdma_del_mmap(cq->ucontext, (u64) cq->pa, + PAGE_ALIGN(cq->len)); ocrdma_del_mmap(cq->ucontext, dev->nic_info.unmapped_db, dev->nic_info.db_page_size); } @@ -817,14 +830,17 @@ static void ocrdma_del_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp) static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev, struct ib_qp_init_attr *attrs) { - if (attrs->qp_type != IB_QPT_GSI && - attrs->qp_type != IB_QPT_RC && - attrs->qp_type != IB_QPT_UD) { + if ((attrs->qp_type != IB_QPT_GSI) && + (attrs->qp_type != IB_QPT_RC) && + (attrs->qp_type != IB_QPT_UC) && + (attrs->qp_type != IB_QPT_UD)) { pr_err("%s(%d) unsupported qp type=0x%x requested\n", __func__, dev->id, attrs->qp_type); return -EINVAL; } - if (attrs->cap.max_send_wr > dev->attr.max_wqe) { + /* Skip the check for QP1 to support CM size of 128 */ + if ((attrs->qp_type != IB_QPT_GSI) && + (attrs->cap.max_send_wr > dev->attr.max_wqe)) { pr_err("%s(%d) unsupported send_wr=0x%x requested\n", __func__, dev->id, attrs->cap.max_send_wr); pr_err("%s(%d) supported send_wr=0x%x\n", @@ -875,11 +891,9 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev, /* verify consumer QPs are not trying to use GSI QP's CQ */ if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created)) { if ((dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq)) || - (dev->gsi_sqcq == get_ocrdma_cq(attrs->recv_cq)) || - (dev->gsi_rqcq == get_ocrdma_cq(attrs->send_cq)) || - (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) { + (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) { pr_err("%s(%d) Consumer QP cannot use GSI CQs.\n", - __func__, dev->id); + __func__, dev->id); return -EINVAL; } } @@ -902,13 +916,13 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp, uresp.qp_id = qp->id; uresp.sq_dbid = qp->sq.dbid; uresp.num_sq_pages = 1; - uresp.sq_page_size = qp->sq.len; + uresp.sq_page_size = PAGE_ALIGN(qp->sq.len); uresp.sq_page_addr[0] = qp->sq.pa; uresp.num_wqe_allocated = qp->sq.max_cnt; if (!srq) { uresp.rq_dbid = qp->rq.dbid; uresp.num_rq_pages = 1; - uresp.rq_page_size = qp->rq.len; + uresp.rq_page_size = PAGE_ALIGN(qp->rq.len); uresp.rq_page_addr[0] = qp->rq.pa; uresp.num_rqe_allocated = qp->rq.max_cnt; } @@ -1043,6 +1057,9 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd, } qp->dev = dev; ocrdma_set_qp_init_params(qp, pd, attrs); + if (udata == NULL) + qp->cap_flags |= (OCRDMA_QP_MW_BIND | OCRDMA_QP_LKEY0 | + OCRDMA_QP_FAST_REG); mutex_lock(&dev->dev_lock); status = ocrdma_mbx_create_qp(qp, attrs, ureq.enable_dpp_cq, @@ -1053,8 +1070,6 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd, /* user space QP's wr_id table are managed in library */ if (udata == NULL) { - qp->cap_flags |= (OCRDMA_QP_MW_BIND | OCRDMA_QP_LKEY0 | - OCRDMA_QP_FAST_REG); status = ocrdma_alloc_wr_id_tbl(qp); if (status) goto map_err; @@ -1290,22 +1305,17 @@ static void ocrdma_srq_toggle_bit(struct ocrdma_srq *srq, int idx) static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q) { int free_cnt; - if (q->head >= q->tail) - free_cnt = (q->max_cnt - q->head) + q->tail; - else - free_cnt = q->tail - q->head; - return free_cnt; + return ((q->max_wqe_idx - q->head) + q->tail) % q->max_cnt; } static int is_hw_sq_empty(struct ocrdma_qp *qp) { - return (qp->sq.tail == qp->sq.head && - ocrdma_hwq_free_cnt(&qp->sq) ? 1 : 0); + return (qp->sq.tail == qp->sq.head); } static int is_hw_rq_empty(struct ocrdma_qp *qp) { - return (qp->rq.tail == qp->rq.head) ? 1 : 0; + return (qp->rq.tail == qp->rq.head); } static void *ocrdma_hwq_head(struct ocrdma_qp_hwq_info *q) @@ -1456,9 +1466,11 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp) mutex_unlock(&dev->dev_lock); if (pd->uctx) { - ocrdma_del_mmap(pd->uctx, (u64) qp->sq.pa, qp->sq.len); + ocrdma_del_mmap(pd->uctx, (u64) qp->sq.pa, + PAGE_ALIGN(qp->sq.len)); if (!qp->srq) - ocrdma_del_mmap(pd->uctx, (u64) qp->rq.pa, qp->rq.len); + ocrdma_del_mmap(pd->uctx, (u64) qp->rq.pa, + PAGE_ALIGN(qp->rq.len)); } ocrdma_del_flush_qp(qp); @@ -1603,7 +1615,8 @@ int ocrdma_destroy_srq(struct ib_srq *ibsrq) status = ocrdma_mbx_destroy_srq(dev, srq); if (srq->pd->uctx) - ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa, srq->rq.len); + ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa, + PAGE_ALIGN(srq->rq.len)); kfree(srq->idx_bit_fields); kfree(srq->rqe_wr_id_tbl); @@ -1650,7 +1663,7 @@ static int ocrdma_build_inline_sges(struct ocrdma_qp *qp, struct ocrdma_sge *sge, struct ib_send_wr *wr, u32 wqe_size) { - if (wr->send_flags & IB_SEND_INLINE) { + if (wr->send_flags & IB_SEND_INLINE && qp->qp_type != IB_QPT_UD) { if (wr->sg_list[0].length > qp->max_inline_data) { pr_err("%s() supported_len=0x%x,\n" " unspported len req=0x%x\n", __func__, @@ -1662,6 +1675,8 @@ static int ocrdma_build_inline_sges(struct ocrdma_qp *qp, wr->sg_list[0].length); hdr->total_len = wr->sg_list[0].length; wqe_size += roundup(hdr->total_len, OCRDMA_WQE_ALIGN_BYTES); + if (0 == wr->sg_list[0].length) + wqe_size += sizeof(struct ocrdma_sge); hdr->cw |= (OCRDMA_TYPE_INLINE << OCRDMA_WQE_TYPE_SHIFT); } else { ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list); @@ -2208,7 +2223,8 @@ static bool ocrdma_poll_success_scqe(struct ocrdma_qp *qp, ocrdma_update_wc(qp, ibwc, tail); *polled = true; } - wqe_idx = le32_to_cpu(cqe->wq.wqeidx) & OCRDMA_CQE_WQEIDX_MASK; + wqe_idx = (le32_to_cpu(cqe->wq.wqeidx) & + OCRDMA_CQE_WQEIDX_MASK) & qp->sq.max_wqe_idx; if (tail != wqe_idx) expand = true; /* Coalesced CQE can't be consumed yet */ @@ -2257,7 +2273,8 @@ static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc, u32 wqe_idx; srq = get_ocrdma_srq(qp->ibqp.srq); - wqe_idx = le32_to_cpu(cqe->rq.buftag_qpn) >> OCRDMA_CQE_BUFTAG_SHIFT; + wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >> + OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx; ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx]; spin_lock_irqsave(&srq->q_lock, flags); ocrdma_srq_toggle_bit(srq, wqe_idx); -- cgit v1.2.3-70-g09d2 From 7c33880c3cb2cda816d4d64852c6a81018b9bc1f Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Mon, 26 Aug 2013 15:27:39 +0530 Subject: RDMA/ocrdma: Add support for fast register work requests (FRWR) Also get the max_srq value from query_config mailbox response. Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 2 + drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 3 + drivers/infiniband/hw/ocrdma/ocrdma_main.c | 4 + drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 19 ++++ drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 162 +++++++++++++++++++++++++++- drivers/infiniband/hw/ocrdma/ocrdma_verbs.h | 5 + 6 files changed, 192 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index b4511668514..634c2e18521 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -56,6 +56,7 @@ struct ocrdma_dev_attr { u16 max_qp; u16 max_wqe; u16 max_rqe; + u16 max_srq; u32 max_inline_data; int max_send_sge; int max_recv_sge; @@ -169,6 +170,7 @@ struct ocrdma_dev { struct list_head entry; struct rcu_head rcu; int id; + u64 stag_arr[OCRDMA_MAX_STAG]; }; struct ocrdma_cq { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 31fd3ff1d1d..af3c5f564d6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -999,6 +999,9 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp & OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT; + attr->max_srq = + (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET; attr->max_ird_per_qp = (rsp->max_ird_ord_per_qp & OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_SHIFT; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index ded416f1ade..4eeea56f7b3 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -329,6 +329,10 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.dereg_mr = ocrdma_dereg_mr; dev->ibdev.reg_user_mr = ocrdma_reg_user_mr; + dev->ibdev.alloc_fast_reg_mr = ocrdma_alloc_frmr; + dev->ibdev.alloc_fast_reg_page_list = ocrdma_alloc_frmr_page_list; + dev->ibdev.free_fast_reg_page_list = ocrdma_free_frmr_page_list; + /* mandatory to support user space verbs consumer. */ dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext; dev->ibdev.dealloc_ucontext = ocrdma_dealloc_ucontext; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 0184009060d..6cf5a96f38c 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -91,6 +91,7 @@ enum { #define OCRDMA_MAX_QP 2048 #define OCRDMA_MAX_CQ 2048 +#define OCRDMA_MAX_STAG 2048 enum { OCRDMA_DB_RQ_OFFSET = 0xE0, @@ -1564,6 +1565,7 @@ enum OCRDMA_WQE_OPCODE { OCRDMA_SEND = 0x00, OCRDMA_CMP_SWP = 0x14, OCRDMA_BIND_MW = 0x10, + OCRDMA_FR_MR = 0x11, OCRDMA_RESV1 = 0x0A, OCRDMA_LKEY_INV = 0x15, OCRDMA_FETCH_ADD = 0x13, @@ -1610,6 +1612,23 @@ struct ocrdma_ewqe_ud_hdr { u32 rsvd; }; +#define OCRDMA_MAX_FR_PBES 11 +struct ocrdma_fr_pbe { + u32 pa_hi; + u32 pa_lo; +}; + +/* extended wqe followed by hdr_wqe for Fast Memory register */ +struct ocrdma_ewqe_fr { + u32 va_hi; + u32 va_lo; + u32 fbo_hi; + u32 fbo_lo; + u32 size_sge; + u32 num_sges; + struct ocrdma_fr_pbe pbe[0]; +}; + struct ocrdma_eth_basic { u8 dmac[6]; u8 smac[6]; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 278b33b628e..ffa5511baf3 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -96,7 +96,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) attr->max_qp_rd_atom = min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp); attr->max_qp_init_rd_atom = dev->attr.max_ord_per_qp; - attr->max_srq = (dev->attr.max_qp - 1); + attr->max_srq = dev->attr.max_srq; attr->max_srq_sge = dev->attr.max_srq_sge; attr->max_srq_wr = dev->attr.max_rqe; attr->local_ca_ack_delay = dev->attr.local_ca_ack_delay; @@ -1304,7 +1304,6 @@ static void ocrdma_srq_toggle_bit(struct ocrdma_srq *srq, int idx) static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q) { - int free_cnt; return ((q->max_wqe_idx - q->head) + q->tail) % q->max_cnt; } @@ -1746,6 +1745,96 @@ static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, ext_rw->len = hdr->total_len; } +static void build_frmr_pbes(struct ib_send_wr *wr, struct ocrdma_pbl *pbl_tbl, + struct ocrdma_hw_mr *hwmr) +{ + int i; + u64 buf_addr = 0; + int num_pbes; + struct ocrdma_pbe *pbe; + + pbe = (struct ocrdma_pbe *)pbl_tbl->va; + num_pbes = 0; + + /* go through the OS phy regions & fill hw pbe entries into pbls. */ + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { + /* number of pbes can be more for one OS buf, when + * buffers are of different sizes. + * split the ib_buf to one or more pbes. + */ + buf_addr = wr->wr.fast_reg.page_list->page_list[i]; + pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK)); + pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr)); + num_pbes += 1; + pbe++; + + /* if the pbl is full storing the pbes, + * move to next pbl. + */ + if (num_pbes == (hwmr->pbl_size/sizeof(u64))) { + pbl_tbl++; + pbe = (struct ocrdma_pbe *)pbl_tbl->va; + } + } + return; +} + +static int get_encoded_page_size(int pg_sz) +{ + /* Max size is 256M 4096 << 16 */ + int i = 0; + for (; i < 17; i++) + if (pg_sz == (4096 << i)) + break; + return i; +} + + +static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, + struct ib_send_wr *wr) +{ + u64 fbo; + struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1); + struct ocrdma_mr *mr; + u32 wqe_size = sizeof(*fast_reg) + sizeof(*hdr); + + wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES); + + if ((wr->wr.fast_reg.page_list_len > + qp->dev->attr.max_pages_per_frmr) || + (wr->wr.fast_reg.length > 0xffffffffULL)) + return -EINVAL; + + hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT); + hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT); + + if (wr->wr.fast_reg.page_list_len == 0) + BUG(); + if (wr->wr.fast_reg.access_flags & IB_ACCESS_LOCAL_WRITE) + hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR; + if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_WRITE) + hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR; + if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_READ) + hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD; + hdr->lkey = wr->wr.fast_reg.rkey; + hdr->total_len = wr->wr.fast_reg.length; + + fbo = wr->wr.fast_reg.iova_start - + (wr->wr.fast_reg.page_list->page_list[0] & PAGE_MASK); + + fast_reg->va_hi = upper_32_bits(wr->wr.fast_reg.iova_start); + fast_reg->va_lo = (u32) (wr->wr.fast_reg.iova_start & 0xffffffff); + fast_reg->fbo_hi = upper_32_bits(fbo); + fast_reg->fbo_lo = (u32) fbo & 0xffffffff; + fast_reg->num_sges = wr->wr.fast_reg.page_list_len; + fast_reg->size_sge = + get_encoded_page_size(1 << wr->wr.fast_reg.page_shift); + mr = (struct ocrdma_mr *)qp->dev->stag_arr[(hdr->lkey >> 8) & + (OCRDMA_MAX_STAG - 1)]; + build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr); + return 0; +} + static void ocrdma_ring_sq_db(struct ocrdma_qp *qp) { u32 val = qp->sq.dbid | (1 << 16); @@ -1815,10 +1904,14 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_LOCAL_INV: hdr->cw |= (OCRDMA_LKEY_INV << OCRDMA_WQE_OPCODE_SHIFT); - hdr->cw |= (sizeof(struct ocrdma_hdr_wqe) / + hdr->cw |= ((sizeof(struct ocrdma_hdr_wqe) + + sizeof(struct ocrdma_sge)) / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT; hdr->lkey = wr->ex.invalidate_rkey; break; + case IB_WR_FAST_REG_MR: + status = ocrdma_build_fr(qp, hdr, wr); + break; default: status = -EINVAL; break; @@ -2085,6 +2178,9 @@ static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc, case OCRDMA_SEND: ibwc->opcode = IB_WC_SEND; break; + case OCRDMA_FR_MR: + ibwc->opcode = IB_WC_FAST_REG_MR; + break; case OCRDMA_LKEY_INV: ibwc->opcode = IB_WC_LOCAL_INV; break; @@ -2530,3 +2626,63 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags) spin_unlock_irqrestore(&cq->cq_lock, flags); return 0; } + +struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len) +{ + int status; + struct ocrdma_mr *mr; + struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); + + if (max_page_list_len > dev->attr.max_pages_per_frmr) + return ERR_PTR(-EINVAL); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + status = ocrdma_get_pbl_info(dev, mr, max_page_list_len); + if (status) + goto pbl_err; + mr->hwmr.fr_mr = 1; + mr->hwmr.remote_rd = 0; + mr->hwmr.remote_wr = 0; + mr->hwmr.local_rd = 0; + mr->hwmr.local_wr = 0; + mr->hwmr.mw_bind = 0; + status = ocrdma_build_pbl_tbl(dev, &mr->hwmr); + if (status) + goto pbl_err; + status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, 0); + if (status) + goto mbx_err; + mr->ibmr.rkey = mr->hwmr.lkey; + mr->ibmr.lkey = mr->hwmr.lkey; + dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = (u64) mr; + return &mr->ibmr; +mbx_err: + ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); +pbl_err: + kfree(mr); + return ERR_PTR(-ENOMEM); +} + +struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device + *ibdev, + int page_list_len) +{ + struct ib_fast_reg_page_list *frmr_list; + int size; + + size = sizeof(*frmr_list) + (page_list_len * sizeof(u64)); + frmr_list = kzalloc(size, GFP_KERNEL); + if (!frmr_list) + return ERR_PTR(-ENOMEM); + frmr_list->page_list = (u64 *)(frmr_list + 1); + return frmr_list; +} + +void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list) +{ + kfree(page_list); +} diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index 633f03d8027..7f3056731af 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -89,5 +89,10 @@ struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *, int num_phys_buf, int acc, u64 *iova_start); struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length, u64 virt, int acc, struct ib_udata *); +struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *pd, int max_page_list_len); +struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device + *ibdev, + int page_list_len); +void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list); #endif /* __OCRDMA_VERBS_H__ */ -- cgit v1.2.3-70-g09d2 From d3cb6c0b2a0d9f507fff8d7c74b2b334d6751bee Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Mon, 26 Aug 2013 15:27:40 +0530 Subject: RDMA/ocrdma: Remove the MTU check based on Ethernet MTU Also increase MAX AH to 512. Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 6 ++---- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index af3c5f564d6..6a62b2372fb 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -2102,8 +2102,6 @@ static int ocrdma_set_qp_params(struct ocrdma_qp *qp, enum ib_qp_state old_qps) { int status = 0; - struct net_device *netdev = qp->dev->nic_info.netdev; - int eth_mtu = iboe_get_mtu(netdev->mtu); if (attr_mask & IB_QP_PKEY_INDEX) { cmd->params.path_mtu_pkey_indx |= (attrs->pkey_index & @@ -2140,8 +2138,8 @@ static int ocrdma_set_qp_params(struct ocrdma_qp *qp, cmd->flags |= OCRDMA_QP_PARA_DST_QPN_VALID; } if (attr_mask & IB_QP_PATH_MTU) { - if (ib_mtu_enum_to_int(eth_mtu) < - ib_mtu_enum_to_int(attrs->path_mtu)) { + if (attrs->path_mtu < IB_MTU_256 || + attrs->path_mtu > IB_MTU_4096) { status = -EINVAL; goto pmtu_err; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index ffa5511baf3..7698572b79f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -75,7 +75,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) attr->vendor_part_id = dev->nic_info.pdev->device; attr->hw_ver = 0; attr->max_qp = dev->attr.max_qp; - attr->max_ah = dev->attr.max_qp; + attr->max_ah = OCRDMA_MAX_AH; attr->max_qp_wr = dev->attr.max_wqe; attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD | -- cgit v1.2.3-70-g09d2 From c88bd03ffccdb069fd9541bea347bdab8f4e7e6a Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Mon, 26 Aug 2013 15:27:41 +0530 Subject: RDMA/ocrdma: Fix to work with even a single MSI-X vector There are cases like SRIOV where can get only one MSI-X vector allocated for RoCE. In that case we need to use the vector for both data plane and control plane. We need to use EQ create version V2. Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 3 +- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 133 ++++++++----------------------- 2 files changed, 36 insertions(+), 100 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 634c2e18521..9cc966ab3b5 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -132,8 +132,7 @@ struct ocrdma_dev { struct ocrdma_cq **cq_tbl; struct ocrdma_qp **qp_tbl; - struct ocrdma_eq meq; - struct ocrdma_eq *qp_eq_tbl; + struct ocrdma_eq *eq_tbl; int eq_cnt; u16 base_eqid; u16 max_eq; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 6a62b2372fb..1b14ef811b3 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -364,22 +364,6 @@ static void ocrdma_build_q_pages(struct ocrdma_pa *q_pa, int cnt, } } -static void ocrdma_assign_eq_vect_gen2(struct ocrdma_dev *dev, - struct ocrdma_eq *eq) -{ - /* assign vector and update vector id for next EQ */ - eq->vector = dev->nic_info.msix.start_vector; - dev->nic_info.msix.start_vector += 1; -} - -static void ocrdma_free_eq_vect_gen2(struct ocrdma_dev *dev) -{ - /* this assumes that EQs are freed in exactly reverse order - * as its allocation. - */ - dev->nic_info.msix.start_vector -= 1; -} - static int ocrdma_mbx_delete_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q, int queue_type) { @@ -420,11 +404,8 @@ static int ocrdma_mbx_create_eq(struct ocrdma_dev *dev, struct ocrdma_eq *eq) memset(cmd, 0, sizeof(*cmd)); ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_EQ, OCRDMA_SUBSYS_COMMON, sizeof(*cmd)); - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) - cmd->req.rsvd_version = 0; - else - cmd->req.rsvd_version = 2; + cmd->req.rsvd_version = 2; cmd->num_pages = 4; cmd->valid = OCRDMA_CREATE_EQ_VALID; cmd->cnt = 4 << OCRDMA_CREATE_EQ_CNT_SHIFT; @@ -435,12 +416,7 @@ static int ocrdma_mbx_create_eq(struct ocrdma_dev *dev, struct ocrdma_eq *eq) NULL); if (!status) { eq->q.id = rsp->vector_eqid & 0xffff; - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { - ocrdma_assign_eq_vect_gen2(dev, eq); - } else { - eq->vector = (rsp->vector_eqid >> 16) & 0xffff; - dev->nic_info.msix.start_vector += 1; - } + eq->vector = (rsp->vector_eqid >> 16) & 0xffff; eq->q.created = true; } return status; @@ -483,8 +459,6 @@ static void _ocrdma_destroy_eq(struct ocrdma_dev *dev, struct ocrdma_eq *eq) { if (eq->q.created) { ocrdma_mbx_delete_q(dev, &eq->q, QTYPE_EQ); - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) - ocrdma_free_eq_vect_gen2(dev); ocrdma_free_q(dev, &eq->q); } } @@ -503,13 +477,12 @@ static void ocrdma_destroy_eq(struct ocrdma_dev *dev, struct ocrdma_eq *eq) _ocrdma_destroy_eq(dev, eq); } -static void ocrdma_destroy_qp_eqs(struct ocrdma_dev *dev) +static void ocrdma_destroy_eqs(struct ocrdma_dev *dev) { int i; - /* deallocate the data path eqs */ for (i = 0; i < dev->eq_cnt; i++) - ocrdma_destroy_eq(dev, &dev->qp_eq_tbl[i]); + ocrdma_destroy_eq(dev, &dev->eq_tbl[i]); } static int ocrdma_mbx_mq_cq_create(struct ocrdma_dev *dev, @@ -598,7 +571,7 @@ static int ocrdma_create_mq(struct ocrdma_dev *dev) if (status) goto alloc_err; - status = ocrdma_mbx_mq_cq_create(dev, &dev->mq.cq, &dev->meq.q); + status = ocrdma_mbx_mq_cq_create(dev, &dev->mq.cq, &dev->eq_tbl[0].q); if (status) goto mbx_cq_free; @@ -1304,19 +1277,19 @@ static u16 ocrdma_bind_eq(struct ocrdma_dev *dev) u16 eq_id; mutex_lock(&dev->dev_lock); - cq_cnt = dev->qp_eq_tbl[0].cq_cnt; - eq_id = dev->qp_eq_tbl[0].q.id; + cq_cnt = dev->eq_tbl[0].cq_cnt; + eq_id = dev->eq_tbl[0].q.id; /* find the EQ which is has the least number of * CQs associated with it. */ for (i = 0; i < dev->eq_cnt; i++) { - if (dev->qp_eq_tbl[i].cq_cnt < cq_cnt) { - cq_cnt = dev->qp_eq_tbl[i].cq_cnt; - eq_id = dev->qp_eq_tbl[i].q.id; + if (dev->eq_tbl[i].cq_cnt < cq_cnt) { + cq_cnt = dev->eq_tbl[i].cq_cnt; + eq_id = dev->eq_tbl[i].q.id; selected_eq = i; } } - dev->qp_eq_tbl[selected_eq].cq_cnt += 1; + dev->eq_tbl[selected_eq].cq_cnt += 1; mutex_unlock(&dev->dev_lock); return eq_id; } @@ -1327,9 +1300,9 @@ static void ocrdma_unbind_eq(struct ocrdma_dev *dev, u16 eq_id) mutex_lock(&dev->dev_lock); for (i = 0; i < dev->eq_cnt; i++) { - if (dev->qp_eq_tbl[i].q.id != eq_id) + if (dev->eq_tbl[i].q.id != eq_id) continue; - dev->qp_eq_tbl[i].cq_cnt -= 1; + dev->eq_tbl[i].cq_cnt -= 1; break; } mutex_unlock(&dev->dev_lock); @@ -2434,38 +2407,7 @@ int ocrdma_free_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah) return 0; } -static int ocrdma_create_mq_eq(struct ocrdma_dev *dev) -{ - int status; - int irq; - unsigned long flags = 0; - int num_eq = 0; - - if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX) { - flags = IRQF_SHARED; - } else { - num_eq = dev->nic_info.msix.num_vectors - - dev->nic_info.msix.start_vector; - /* minimum two vectors/eq are required for rdma to work. - * one for control path and one for data path. - */ - if (num_eq < 2) - return -EBUSY; - } - - status = ocrdma_create_eq(dev, &dev->meq, OCRDMA_EQ_LEN); - if (status) - return status; - sprintf(dev->meq.irq_name, "ocrdma_mq%d", dev->id); - irq = ocrdma_get_irq(dev, &dev->meq); - status = request_irq(irq, ocrdma_irq_handler, flags, dev->meq.irq_name, - &dev->meq); - if (status) - _ocrdma_destroy_eq(dev, &dev->meq); - return status; -} - -static int ocrdma_create_qp_eqs(struct ocrdma_dev *dev) +static int ocrdma_create_eqs(struct ocrdma_dev *dev) { int num_eq, i, status = 0; int irq; @@ -2480,46 +2422,43 @@ static int ocrdma_create_qp_eqs(struct ocrdma_dev *dev) num_eq = min_t(u32, num_eq, num_online_cpus()); } - dev->qp_eq_tbl = kzalloc(sizeof(struct ocrdma_eq) * num_eq, GFP_KERNEL); - if (!dev->qp_eq_tbl) + if (!num_eq) + return -EINVAL; + + dev->eq_tbl = kzalloc(sizeof(struct ocrdma_eq) * num_eq, GFP_KERNEL); + if (!dev->eq_tbl) return -ENOMEM; for (i = 0; i < num_eq; i++) { - status = ocrdma_create_eq(dev, &dev->qp_eq_tbl[i], + status = ocrdma_create_eq(dev, &dev->eq_tbl[i], OCRDMA_EQ_LEN); if (status) { status = -EINVAL; break; } - sprintf(dev->qp_eq_tbl[i].irq_name, "ocrdma_qp%d-%d", + sprintf(dev->eq_tbl[i].irq_name, "ocrdma%d-%d", dev->id, i); - irq = ocrdma_get_irq(dev, &dev->qp_eq_tbl[i]); + irq = ocrdma_get_irq(dev, &dev->eq_tbl[i]); status = request_irq(irq, ocrdma_irq_handler, flags, - dev->qp_eq_tbl[i].irq_name, - &dev->qp_eq_tbl[i]); - if (status) { - _ocrdma_destroy_eq(dev, &dev->qp_eq_tbl[i]); - status = -EINVAL; - break; - } + dev->eq_tbl[i].irq_name, + &dev->eq_tbl[i]); + if (status) + goto done; dev->eq_cnt += 1; } /* one eq is sufficient for data path to work */ - if (dev->eq_cnt >= 1) - return 0; - ocrdma_destroy_qp_eqs(dev); + return 0; +done: + ocrdma_destroy_eqs(dev); return status; } int ocrdma_init_hw(struct ocrdma_dev *dev) { int status; - /* set up control path eq */ - status = ocrdma_create_mq_eq(dev); - if (status) - return status; - /* set up data path eq */ - status = ocrdma_create_qp_eqs(dev); + + /* create the eqs */ + status = ocrdma_create_eqs(dev); if (status) goto qpeq_err; status = ocrdma_create_mq(dev); @@ -2542,9 +2481,8 @@ int ocrdma_init_hw(struct ocrdma_dev *dev) conf_err: ocrdma_destroy_mq(dev); mq_err: - ocrdma_destroy_qp_eqs(dev); + ocrdma_destroy_eqs(dev); qpeq_err: - ocrdma_destroy_eq(dev, &dev->meq); pr_err("%s() status=%d\n", __func__, status); return status; } @@ -2553,10 +2491,9 @@ void ocrdma_cleanup_hw(struct ocrdma_dev *dev) { ocrdma_mbx_delete_ah_tbl(dev); - /* cleanup the data path eqs */ - ocrdma_destroy_qp_eqs(dev); + /* cleanup the eqs */ + ocrdma_destroy_eqs(dev); /* cleanup the control path */ ocrdma_destroy_mq(dev); - ocrdma_destroy_eq(dev, &dev->meq); } -- cgit v1.2.3-70-g09d2 From f11220ee69f72cf08479f28fd494264ac6a9349b Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Mon, 26 Aug 2013 15:27:42 +0530 Subject: RDMA/ocrdma: For ERX2 irrespective of Qid, num_posted offset is 24 1) All RQ doorbells are handled by ERX2 and doorbell->num_posted offset is constant to bit offset 24 for ERX2 irrspective of Q id. 2) Fixed RESET to INIT state change (from ERR->RST->INIT->RTR case). Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 21 +++++++++++++++++---- drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 5 ++--- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 12 +++++------- drivers/infiniband/hw/ocrdma/ocrdma_verbs.h | 1 + 4 files changed, 25 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 1b14ef811b3..2c2991b7dae 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -256,11 +256,11 @@ static int ocrdma_get_mbx_cqe_errno(u16 cqe_status) break; case OCRDMA_MBX_CQE_STATUS_INSUFFICIENT_RESOURCES: case OCRDMA_MBX_CQE_STATUS_QUEUE_FLUSHING: - err_num = -EAGAIN; + err_num = -EINVAL; break; case OCRDMA_MBX_CQE_STATUS_DMA_FAILED: default: - err_num = -EIO; + err_num = -EINVAL; break; } return err_num; @@ -1654,6 +1654,14 @@ void ocrdma_flush_qp(struct ocrdma_qp *qp) spin_unlock_irqrestore(&qp->dev->flush_q_lock, flags); } +static void ocrdma_init_hwq_ptr(struct ocrdma_qp *qp) +{ + qp->sq.head = 0; + qp->sq.tail = 0; + qp->rq.head = 0; + qp->rq.tail = 0; +} + int ocrdma_qp_state_change(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state, enum ib_qp_state *old_ib_state) { @@ -1673,8 +1681,12 @@ int ocrdma_qp_state_change(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state, } - if (new_state == OCRDMA_QPS_ERR) + if (new_state == OCRDMA_QPS_INIT) { + ocrdma_init_hwq_ptr(qp); + ocrdma_del_flush_qp(qp); + } else if (new_state == OCRDMA_QPS_ERR) { ocrdma_flush_qp(qp); + } qp->state = new_state; @@ -2317,7 +2329,8 @@ int ocrdma_mbx_modify_srq(struct ocrdma_srq *srq, struct ib_srq_attr *srq_attr) { int status = -ENOMEM; struct ocrdma_modify_srq *cmd; - struct ocrdma_dev *dev = get_ocrdma_dev(srq->ibsrq.device); + struct ocrdma_pd *pd = srq->pd; + struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device); cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_SRQ, sizeof(*cmd)); if (!cmd) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 6cf5a96f38c..bfd0acb5659 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -95,12 +95,11 @@ enum { enum { OCRDMA_DB_RQ_OFFSET = 0xE0, - OCRDMA_DB_GEN2_RQ1_OFFSET = 0x100, - OCRDMA_DB_GEN2_RQ2_OFFSET = 0xC0, + OCRDMA_DB_GEN2_RQ_OFFSET = 0x100, OCRDMA_DB_SQ_OFFSET = 0x60, OCRDMA_DB_GEN2_SQ_OFFSET = 0x1C0, OCRDMA_DB_SRQ_OFFSET = OCRDMA_DB_RQ_OFFSET, - OCRDMA_DB_GEN2_SRQ_OFFSET = OCRDMA_DB_GEN2_RQ1_OFFSET, + OCRDMA_DB_GEN2_SRQ_OFFSET = OCRDMA_DB_GEN2_RQ_OFFSET, OCRDMA_DB_CQ_OFFSET = 0x120, OCRDMA_DB_EQ_OFFSET = OCRDMA_DB_CQ_OFFSET, OCRDMA_DB_MQ_OFFSET = 0x140 diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 7698572b79f..9e1d8c6bde5 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -930,9 +930,8 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp, uresp.db_page_size = dev->nic_info.db_page_size; if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET; - uresp.db_rq_offset = ((qp->id & 0xFFFF) < 128) ? - OCRDMA_DB_GEN2_RQ1_OFFSET : OCRDMA_DB_GEN2_RQ2_OFFSET; - uresp.db_shift = (qp->id < 128) ? 24 : 16; + uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET; + uresp.db_shift = 24; } else { uresp.db_sq_offset = OCRDMA_DB_SQ_OFFSET; uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET; @@ -975,8 +974,7 @@ static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp, OCRDMA_DB_GEN2_SQ_OFFSET; qp->rq_db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size) + - ((qp->id < 128) ? - OCRDMA_DB_GEN2_RQ1_OFFSET : OCRDMA_DB_GEN2_RQ2_OFFSET); + OCRDMA_DB_GEN2_RQ_OFFSET; } else { qp->sq_db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size) + @@ -1399,7 +1397,7 @@ skip_cqe: spin_unlock_irqrestore(&cq->cq_lock, cq_flags); } -static void ocrdma_del_flush_qp(struct ocrdma_qp *qp) +void ocrdma_del_flush_qp(struct ocrdma_qp *qp) { int found = false; unsigned long flags; @@ -1496,7 +1494,7 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq, uresp.db_page_size = dev->nic_info.db_page_size; uresp.num_rqe_allocated = srq->rq.max_cnt; if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { - uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ1_OFFSET; + uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET; uresp.db_shift = 24; } else { uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index 7f3056731af..b8f7853fd36 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -72,6 +72,7 @@ int ocrdma_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *); int ocrdma_destroy_qp(struct ib_qp *); +void ocrdma_del_flush_qp(struct ocrdma_qp *qp); struct ib_srq *ocrdma_create_srq(struct ib_pd *, struct ib_srq_init_attr *, struct ib_udata *); -- cgit v1.2.3-70-g09d2 From 2b51a9b9eb6bf240d2592e10d2f8823dd1f5ee3e Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Mon, 26 Aug 2013 15:27:43 +0530 Subject: RDMA/ocrdma: FRMA code cleanup 1) Fixed setting FR_MR bit for FRWR stag allocation 2) Access rights are passsed during FRWR stage and not during STAT allocation stage 3) FRWR WQE structure cleanup 4) Add QP level signaled bit. Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 1 + drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 2 ++ drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 16 +++++++--------- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 8 +++++--- 4 files changed, 15 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 9cc966ab3b5..1c8ba4cefcb 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -289,6 +289,7 @@ struct ocrdma_qp { u32 qkey; bool dpp_enabled; u8 *ird_q_va; + bool signaled; u16 db_cache; }; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 2c2991b7dae..16ce664dc46 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1500,6 +1500,7 @@ static int ocrdma_mbx_reg_mr(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr, return -ENOMEM; cmd->num_pbl_pdid = pdid | (hwmr->num_pbls << OCRDMA_REG_NSMR_NUM_PBL_SHIFT); + cmd->fr_mr = hwmr->fr_mr; cmd->flags_hpage_pbe_sz |= (hwmr->remote_wr << OCRDMA_REG_NSMR_REMOTE_WR_SHIFT); @@ -2049,6 +2050,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, (ah_attr->grh.traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT); cmd->params.rnt_rc_sl_fl |= (ah_attr->grh.flow_label & OCRDMA_QP_PARAMS_FLOW_LABEL_MASK); + cmd->params.rnt_rc_sl_fl |= (ah_attr->sl << OCRDMA_QP_PARAMS_SL_SHIFT); cmd->params.hop_lmt_rq_psn |= (ah_attr->grh.hop_limit << OCRDMA_QP_PARAMS_HOP_LMT_SHIFT); cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index bfd0acb5659..d1a9fb72a4b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -143,8 +143,11 @@ enum { # 2: 16K Bytes # 3: 32K Bytes # 4: 64K Bytes +# 5: 128K Bytes +# 6: 256K Bytes +# 7: 512K Bytes */ -#define OCRDMA_MAX_Q_PAGE_SIZE_CNT (5) +#define OCRDMA_MAX_Q_PAGE_SIZE_CNT (8) #define OCRDMA_Q_PAGE_BASE_SIZE (OCRDMA_MIN_Q_PAGE_SIZE * OCRDMA_MAX_Q_PAGES) #define MAX_OCRDMA_QP_PAGES (8) @@ -1274,7 +1277,7 @@ struct ocrdma_reg_nsmr { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_hdr cmd; - u32 lrkey_key_index; + u32 fr_mr; u32 num_pbl_pdid; u32 flags_hpage_pbe_sz; u32 totlen_low; @@ -1611,12 +1614,6 @@ struct ocrdma_ewqe_ud_hdr { u32 rsvd; }; -#define OCRDMA_MAX_FR_PBES 11 -struct ocrdma_fr_pbe { - u32 pa_hi; - u32 pa_lo; -}; - /* extended wqe followed by hdr_wqe for Fast Memory register */ struct ocrdma_ewqe_fr { u32 va_hi; @@ -1625,7 +1622,8 @@ struct ocrdma_ewqe_fr { u32 fbo_lo; u32 size_sge; u32 num_sges; - struct ocrdma_fr_pbe pbe[0]; + u32 rsvd; + u32 rsvd2; }; struct ocrdma_eth_basic { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 9e1d8c6bde5..3e80f65f42a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -82,7 +82,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_SHUTDOWN_PORT | IB_DEVICE_SYS_IMAGE_GUID | - IB_DEVICE_LOCAL_DMA_LKEY; + IB_DEVICE_LOCAL_DMA_LKEY | + IB_DEVICE_MEM_MGT_EXTENSIONS; attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_srq_sge); attr->max_sge_rd = dev->attr.max_rdma_sge; attr->max_cq = dev->attr.max_cq; @@ -1015,6 +1016,7 @@ static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp, qp->sq.max_sges = attrs->cap.max_send_sge; qp->rq.max_sges = attrs->cap.max_recv_sge; qp->state = OCRDMA_QPS_RST; + qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; } @@ -1864,7 +1866,7 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } hdr = ocrdma_hwq_head(&qp->sq); hdr->cw = 0; - if (wr->send_flags & IB_SEND_SIGNALED) + if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled) hdr->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT); if (wr->send_flags & IB_SEND_FENCE) hdr->cw |= @@ -1918,7 +1920,7 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, *bad_wr = wr; break; } - if (wr->send_flags & IB_SEND_SIGNALED) + if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled) qp->wqe_wr_id_tbl[qp->sq.head].signaled = 1; else qp->wqe_wr_id_tbl[qp->sq.head].signaled = 0; -- cgit v1.2.3-70-g09d2 From cffce99051b80c90630a9fff662a1b25e278069d Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Mon, 26 Aug 2013 15:27:44 +0530 Subject: RDMA/ocrdma: Dont use PD 0 for userpace CQ DB Create_CQ verb doesn't provide a PD pointer. So, until now we are creating all (both userspace and kernel) CQ DB regions from PD0. This will result in mmapping PD0 to applications. A rogue userspace application can mess things up. Also more serious issues is even the be2net NIC uses PD0. This patch addresses this problem by: 1) Create a PD page for every userspace application when the alloc_ucontext is called. This will be destroyed in dealloc_ucontext. 2) All CQs for that context will use the PD allocated in ucontext. 3) The first create_PD call from application will result in returning the PD address from its ucontext (no new PD will be created). 4) For subsecquent create_pd calls from application, we create new PDs for the application. Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 3 + drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 5 +- drivers/infiniband/hw/ocrdma/ocrdma_hw.h | 7 +- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 1 + drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 4 +- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 359 +++++++++++++++++++++++++--- 6 files changed, 339 insertions(+), 40 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 1c8ba4cefcb..fde8fb097a8 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -324,6 +324,9 @@ struct ocrdma_ucontext { struct list_head mm_head; struct mutex mm_list_lock; /* protects list entries of mm type */ + struct ocrdma_pd *cntxt_pd; + int pd_in_use; + struct { u32 *va; dma_addr_t pa; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 16ce664dc46..618c2124e61 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1309,7 +1309,7 @@ static void ocrdma_unbind_eq(struct ocrdma_dev *dev, u16 eq_id) } int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, - int entries, int dpp_cq) + int entries, int dpp_cq, u16 pd_id) { int status = -ENOMEM; int max_hw_cqe; struct pci_dev *pdev = dev->nic_info.pdev; @@ -1357,7 +1357,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, cmd->cmd.ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS; cq->eqn = ocrdma_bind_eq(dev); - cmd->cmd.req.rsvd_version = OCRDMA_CREATE_CQ_VER2; + cmd->cmd.req.rsvd_version = OCRDMA_CREATE_CQ_VER3; cqe_count = cq->len / cqe_size; if (cqe_count > 1024) { /* Set cnt to 3 to indicate more than 1024 cq entries */ @@ -1393,6 +1393,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, cq->phase_change = true; } + cmd->cmd.pd_id = pd_id; /* valid only for v3 */ ocrdma_build_q_pages(&cmd->cmd.pa[0], hw_pages, cq->pa, page_size); status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); if (status) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index cc90ac3b6d4..044db74e780 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -78,6 +78,11 @@ static inline void ocrdma_copy_le32_to_cpu(void *dst, void *src, u32 len) #endif } +static inline u64 ocrdma_get_db_addr(struct ocrdma_dev *dev, u32 pdid) +{ + return dev->nic_info.unmapped_db + (pdid * dev->nic_info.db_page_size); +} + int ocrdma_init_hw(struct ocrdma_dev *); void ocrdma_cleanup_hw(struct ocrdma_dev *); @@ -100,7 +105,7 @@ int ocrdma_mbx_dealloc_lkey(struct ocrdma_dev *, int fmr, u32 lkey); int ocrdma_reg_mr(struct ocrdma_dev *, struct ocrdma_hw_mr *hwmr, u32 pd_id, int acc); int ocrdma_mbx_create_cq(struct ocrdma_dev *, struct ocrdma_cq *, - int entries, int dpp_cq); + int entries, int dpp_cq, u16 pd_id); int ocrdma_mbx_destroy_cq(struct ocrdma_dev *, struct ocrdma_cq *); int ocrdma_mbx_create_qp(struct ocrdma_qp *, struct ib_qp_init_attr *attrs, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 4eeea56f7b3..7d43ba924bf 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -326,6 +326,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.req_notify_cq = ocrdma_arm_cq; dev->ibdev.get_dma_mr = ocrdma_get_dma_mr; + dev->ibdev.reg_phys_mr = ocrdma_reg_kernel_mr; dev->ibdev.dereg_mr = ocrdma_dereg_mr; dev->ibdev.reg_user_mr = ocrdma_reg_user_mr; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index d1a9fb72a4b..1e2992fee4b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -547,6 +547,7 @@ enum { enum { OCRDMA_CREATE_CQ_VER2 = 2, + OCRDMA_CREATE_CQ_VER3 = 3, OCRDMA_CREATE_CQ_PAGE_CNT_MASK = 0xFFFF, OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT = 16, @@ -580,7 +581,8 @@ struct ocrdma_create_cq_cmd { u32 pgsz_pgcnt; u32 ev_cnt_flags; u32 eqn; - u32 cqe_count; + u16 cqe_count; + u16 pd_id; u32 rsvd6; struct ocrdma_pa pa[OCRDMA_CREATE_CQ_MAX_PAGES]; }; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 3e80f65f42a..e554fc258a6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -215,6 +215,108 @@ static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr, return found; } +static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev, + struct ocrdma_ucontext *uctx, + struct ib_udata *udata) +{ + struct ocrdma_pd *pd = NULL; + int status = 0; + + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + if (udata && uctx) { + pd->dpp_enabled = + dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY; + pd->num_dpp_qp = + pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0; + } + +retry: + status = ocrdma_mbx_alloc_pd(dev, pd); + if (status) { + if (pd->dpp_enabled) { + pd->dpp_enabled = false; + pd->num_dpp_qp = 0; + goto retry; + } else { + kfree(pd); + return ERR_PTR(status); + } + } + + return pd; +} + +static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx, + struct ocrdma_pd *pd) +{ + return (uctx->cntxt_pd == pd ? true : false); +} + +static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev, + struct ocrdma_pd *pd) +{ + int status = 0; + + status = ocrdma_mbx_dealloc_pd(dev, pd); + kfree(pd); + return status; +} + +static int ocrdma_alloc_ucontext_pd(struct ocrdma_dev *dev, + struct ocrdma_ucontext *uctx, + struct ib_udata *udata) +{ + int status = 0; + + uctx->cntxt_pd = _ocrdma_alloc_pd(dev, uctx, udata); + if (IS_ERR(uctx->cntxt_pd)) { + status = PTR_ERR(uctx->cntxt_pd); + uctx->cntxt_pd = NULL; + goto err; + } + + uctx->cntxt_pd->uctx = uctx; + uctx->cntxt_pd->ibpd.device = &dev->ibdev; +err: + return status; +} + +static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx) +{ + int status = 0; + struct ocrdma_pd *pd = uctx->cntxt_pd; + struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device); + + BUG_ON(uctx->pd_in_use); + uctx->cntxt_pd = NULL; + status = _ocrdma_dealloc_pd(dev, pd); + return status; +} + +static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx) +{ + struct ocrdma_pd *pd = NULL; + + mutex_lock(&uctx->mm_list_lock); + if (!uctx->pd_in_use) { + uctx->pd_in_use = true; + pd = uctx->cntxt_pd; + } + mutex_unlock(&uctx->mm_list_lock); + + return pd; +} + +static void ocrdma_release_ucontext_pd(struct ocrdma_ucontext *uctx) +{ + mutex_lock(&uctx->mm_list_lock); + uctx->pd_in_use = false; + mutex_unlock(&uctx->mm_list_lock); +} + struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata) { @@ -249,6 +351,11 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev, status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len); if (status) goto map_err; + + status = ocrdma_alloc_ucontext_pd(dev, ctx, udata); + if (status) + goto pd_err; + resp.dev_id = dev->id; resp.max_inline_data = dev->attr.max_inline_data; resp.wqe_size = dev->attr.wqe_size; @@ -262,6 +369,7 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev, return &ctx->ibucontext; cpy_err: +pd_err: ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len); map_err: dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va, @@ -272,11 +380,14 @@ map_err: int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx) { + int status = 0; struct ocrdma_mm *mm, *tmp; struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx); struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device); struct pci_dev *pdev = dev->nic_info.pdev; + status = ocrdma_dealloc_ucontext_pd(uctx); + ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len); dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va, uctx->ah_tbl.pa); @@ -286,7 +397,7 @@ int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx) kfree(mm); } kfree(uctx); - return 0; + return status; } int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) @@ -346,8 +457,7 @@ static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd, memset(&rsp, 0, sizeof(rsp)); rsp.id = pd->id; rsp.dpp_enabled = pd->dpp_enabled; - db_page_addr = dev->nic_info.unmapped_db + - (pd->id * dev->nic_info.db_page_size); + db_page_addr = ocrdma_get_db_addr(dev, pd->id); db_page_size = dev->nic_info.db_page_size; status = ocrdma_add_mmap(uctx, db_page_addr, db_page_size); @@ -386,31 +496,26 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev, { struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); struct ocrdma_pd *pd; + struct ocrdma_ucontext *uctx = NULL; int status; + u8 is_uctx_pd = false; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); if (udata && context) { - pd->dpp_enabled = - (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY); - pd->num_dpp_qp = - pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0; - } -retry: - status = ocrdma_mbx_alloc_pd(dev, pd); - if (status) { - /* try for pd with out dpp */ - if (pd->dpp_enabled) { - pd->dpp_enabled = false; - pd->num_dpp_qp = 0; - goto retry; - } else { - kfree(pd); - return ERR_PTR(status); + uctx = get_ocrdma_ucontext(context); + pd = ocrdma_get_ucontext_pd(uctx); + if (pd) { + is_uctx_pd = true; + goto pd_mapping; } } + pd = _ocrdma_alloc_pd(dev, uctx, udata); + if (IS_ERR(pd)) { + status = PTR_ERR(pd); + goto exit; + } + +pd_mapping: if (udata && context) { status = ocrdma_copy_pd_uresp(dev, pd, context, udata); if (status) @@ -419,8 +524,13 @@ retry: return &pd->ibpd; err: - status = ocrdma_mbx_dealloc_pd(dev, pd); - kfree(pd); + if (is_uctx_pd) { + ocrdma_release_ucontext_pd(uctx); + } else { + status = ocrdma_mbx_dealloc_pd(dev, pd); + kfree(pd); + } +exit: return ERR_PTR(status); } @@ -428,20 +538,25 @@ int ocrdma_dealloc_pd(struct ib_pd *ibpd) { struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); - int status; + struct ocrdma_ucontext *uctx = NULL; + int status = 0; u64 usr_db; - status = ocrdma_mbx_dealloc_pd(dev, pd); - if (pd->uctx) { + uctx = pd->uctx; + if (uctx) { u64 dpp_db = dev->nic_info.dpp_unmapped_addr + - (pd->id * PAGE_SIZE); + (pd->id * PAGE_SIZE); if (pd->dpp_enabled) ocrdma_del_mmap(pd->uctx, dpp_db, PAGE_SIZE); - usr_db = dev->nic_info.unmapped_db + - (pd->id * dev->nic_info.db_page_size); + usr_db = ocrdma_get_db_addr(dev, pd->id); ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size); + + if (is_ucontext_pd(uctx, pd)) { + ocrdma_release_ucontext_pd(uctx); + return status; + } } - kfree(pd); + status = _ocrdma_dealloc_pd(dev, pd); return status; } @@ -701,7 +816,7 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq, struct ib_ucontext *ib_ctx) { int status; - struct ocrdma_ucontext *uctx; + struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx); struct ocrdma_create_cq_uresp uresp; memset(&uresp, 0, sizeof(uresp)); @@ -710,7 +825,7 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq, uresp.num_pages = 1; uresp.max_hw_cqe = cq->max_hw_cqe; uresp.page_addr[0] = cq->pa; - uresp.db_page_addr = dev->nic_info.unmapped_db; + uresp.db_page_addr = ocrdma_get_db_addr(dev, uctx->cntxt_pd->id); uresp.db_page_size = dev->nic_info.db_page_size; uresp.phase_change = cq->phase_change ? 1 : 0; status = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); @@ -719,7 +834,6 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq, __func__, dev->id, cq->id); goto err; } - uctx = get_ocrdma_ucontext(ib_ctx); status = ocrdma_add_mmap(uctx, uresp.db_page_addr, uresp.db_page_size); if (status) goto err; @@ -739,6 +853,8 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, { struct ocrdma_cq *cq; struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); + struct ocrdma_ucontext *uctx = NULL; + u16 pd_id = 0; int status; struct ocrdma_create_cq_ureq ureq; @@ -756,7 +872,12 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, INIT_LIST_HEAD(&cq->sq_head); INIT_LIST_HEAD(&cq->rq_head); - status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq); + if (ib_ctx) { + uctx = get_ocrdma_ucontext(ib_ctx); + pd_id = uctx->cntxt_pd->id; + } + + status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq, pd_id); if (status) { kfree(cq); return ERR_PTR(status); @@ -797,13 +918,16 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq) int status; struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device); + int pdid = 0; status = ocrdma_mbx_destroy_cq(dev, cq); if (cq->ucontext) { + pdid = cq->ucontext->cntxt_pd->id; ocrdma_del_mmap(cq->ucontext, (u64) cq->pa, PAGE_ALIGN(cq->len)); - ocrdma_del_mmap(cq->ucontext, dev->nic_info.unmapped_db, + ocrdma_del_mmap(cq->ucontext, + ocrdma_get_db_addr(dev, pdid), dev->nic_info.db_page_size); } dev->cq_tbl[cq->id] = NULL; @@ -2686,3 +2810,166 @@ void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list) { kfree(page_list); } + +#define MAX_KERNEL_PBE_SIZE 65536 +static inline int count_kernel_pbes(struct ib_phys_buf *buf_list, + int buf_cnt, u32 *pbe_size) +{ + u64 total_size = 0; + u64 buf_size = 0; + int i; + *pbe_size = roundup(buf_list[0].size, PAGE_SIZE); + *pbe_size = roundup_pow_of_two(*pbe_size); + + /* find the smallest PBE size that we can have */ + for (i = 0; i < buf_cnt; i++) { + /* first addr may not be page aligned, so ignore checking */ + if ((i != 0) && ((buf_list[i].addr & ~PAGE_MASK) || + (buf_list[i].size & ~PAGE_MASK))) { + return 0; + } + + /* if configured PBE size is greater then the chosen one, + * reduce the PBE size. + */ + buf_size = roundup(buf_list[i].size, PAGE_SIZE); + /* pbe_size has to be even multiple of 4K 1,2,4,8...*/ + buf_size = roundup_pow_of_two(buf_size); + if (*pbe_size > buf_size) + *pbe_size = buf_size; + + total_size += buf_size; + } + *pbe_size = *pbe_size > MAX_KERNEL_PBE_SIZE ? + (MAX_KERNEL_PBE_SIZE) : (*pbe_size); + + /* num_pbes = total_size / (*pbe_size); this is implemented below. */ + + return total_size >> ilog2(*pbe_size); +} + +static void build_kernel_pbes(struct ib_phys_buf *buf_list, int ib_buf_cnt, + u32 pbe_size, struct ocrdma_pbl *pbl_tbl, + struct ocrdma_hw_mr *hwmr) +{ + int i; + int idx; + int pbes_per_buf = 0; + u64 buf_addr = 0; + int num_pbes; + struct ocrdma_pbe *pbe; + int total_num_pbes = 0; + + if (!hwmr->num_pbes) + return; + + pbe = (struct ocrdma_pbe *)pbl_tbl->va; + num_pbes = 0; + + /* go through the OS phy regions & fill hw pbe entries into pbls. */ + for (i = 0; i < ib_buf_cnt; i++) { + buf_addr = buf_list[i].addr; + pbes_per_buf = + roundup_pow_of_two(roundup(buf_list[i].size, PAGE_SIZE)) / + pbe_size; + hwmr->len += buf_list[i].size; + /* number of pbes can be more for one OS buf, when + * buffers are of different sizes. + * split the ib_buf to one or more pbes. + */ + for (idx = 0; idx < pbes_per_buf; idx++) { + /* we program always page aligned addresses, + * first unaligned address is taken care by fbo. + */ + if (i == 0) { + /* for non zero fbo, assign the + * start of the page. + */ + pbe->pa_lo = + cpu_to_le32((u32) (buf_addr & PAGE_MASK)); + pbe->pa_hi = + cpu_to_le32((u32) upper_32_bits(buf_addr)); + } else { + pbe->pa_lo = + cpu_to_le32((u32) (buf_addr & 0xffffffff)); + pbe->pa_hi = + cpu_to_le32((u32) upper_32_bits(buf_addr)); + } + buf_addr += pbe_size; + num_pbes += 1; + total_num_pbes += 1; + pbe++; + + if (total_num_pbes == hwmr->num_pbes) + goto mr_tbl_done; + /* if the pbl is full storing the pbes, + * move to next pbl. + */ + if (num_pbes == (hwmr->pbl_size/sizeof(u64))) { + pbl_tbl++; + pbe = (struct ocrdma_pbe *)pbl_tbl->va; + num_pbes = 0; + } + } + } +mr_tbl_done: + return; +} + +struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *ibpd, + struct ib_phys_buf *buf_list, + int buf_cnt, int acc, u64 *iova_start) +{ + int status = -ENOMEM; + struct ocrdma_mr *mr; + struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); + u32 num_pbes; + u32 pbe_size = 0; + + if ((acc & IB_ACCESS_REMOTE_WRITE) && !(acc & IB_ACCESS_LOCAL_WRITE)) + return ERR_PTR(-EINVAL); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(status); + + num_pbes = count_kernel_pbes(buf_list, buf_cnt, &pbe_size); + if (num_pbes == 0) { + status = -EINVAL; + goto pbl_err; + } + status = ocrdma_get_pbl_info(dev, mr, num_pbes); + if (status) + goto pbl_err; + + mr->hwmr.pbe_size = pbe_size; + mr->hwmr.fbo = *iova_start - (buf_list[0].addr & PAGE_MASK); + mr->hwmr.va = *iova_start; + mr->hwmr.local_rd = 1; + mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; + mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; + mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; + mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; + mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0; + + status = ocrdma_build_pbl_tbl(dev, &mr->hwmr); + if (status) + goto pbl_err; + build_kernel_pbes(buf_list, buf_cnt, pbe_size, mr->hwmr.pbl_table, + &mr->hwmr); + status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc); + if (status) + goto mbx_err; + + mr->ibmr.lkey = mr->hwmr.lkey; + if (mr->hwmr.remote_wr || mr->hwmr.remote_rd) + mr->ibmr.rkey = mr->hwmr.lkey; + return &mr->ibmr; + +mbx_err: + ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); +pbl_err: + kfree(mr); + return ERR_PTR(status); +} -- cgit v1.2.3-70-g09d2 From c43e9ab84d853f499a2fd531362973c8e505b342 Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Mon, 26 Aug 2013 15:27:46 +0530 Subject: RDMA/ocrdma: Increase STAG array size 1) Increase STAG Array size. 2) Max inline data size should be set to the same value used during QP creation 3) Set max_sge_rd to zero since we dont support RD transport in our adapters. 4) Max cqes reported in ibv_devinfo should be from QUERY_CONFIG. Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 4 +++- drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 618c2124e61..3f027552a1a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -994,6 +994,9 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->max_num_mr_pbl = rsp->max_num_mr_pbl; attr->max_cqe = rsp->max_cq_cqes_per_cq & OCRDMA_MBX_QUERY_CFG_MAX_CQES_PER_CQ_MASK; + attr->max_cq = (rsp->max_cq_cqes_per_cq & + OCRDMA_MBX_QUERY_CFG_MAX_CQ_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_CQ_OFFSET; attr->wqe_size = ((rsp->wqe_rqe_stride_max_dpp_cqs & OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_OFFSET) * @@ -1026,7 +1029,6 @@ static int ocrdma_check_fw_config(struct ocrdma_dev *dev, return -EINVAL; dev->base_eqid = conf->base_eqid; dev->max_eq = conf->max_eq; - dev->attr.max_cq = OCRDMA_MAX_CQ - 1; return 0; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 1e2992fee4b..35c61080ae1 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -91,7 +91,7 @@ enum { #define OCRDMA_MAX_QP 2048 #define OCRDMA_MAX_CQ 2048 -#define OCRDMA_MAX_STAG 2048 +#define OCRDMA_MAX_STAG 8192 enum { OCRDMA_DB_RQ_OFFSET = 0xE0, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index e554fc258a6..e934073e705 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -85,7 +85,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_srq_sge); - attr->max_sge_rd = dev->attr.max_rdma_sge; + attr->max_sge_rd = 0; attr->max_cq = dev->attr.max_cq; attr->max_cqe = dev->attr.max_cqe; attr->max_mr = dev->attr.max_mr; @@ -1365,7 +1365,7 @@ int ocrdma_query_qp(struct ib_qp *ibqp, qp_attr->cap.max_recv_wr = qp->rq.max_cnt - 1; qp_attr->cap.max_send_sge = qp->sq.max_sges; qp_attr->cap.max_recv_sge = qp->rq.max_sges; - qp_attr->cap.max_inline_data = dev->attr.max_inline_data; + qp_attr->cap.max_inline_data = qp->max_inline_data; qp_init_attr->cap = qp_attr->cap; memcpy(&qp_attr->ah_attr.grh.dgid, ¶ms.dgid[0], sizeof(params.dgid)); -- cgit v1.2.3-70-g09d2 From f24ceba6b6454f68f456981be2a337b6390d9aa0 Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Mon, 26 Aug 2013 15:27:47 +0530 Subject: RDMA/ocrdma: Fix for displaying proper link speed Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 28 +++++++++++++++++++ drivers/infiniband/hw/ocrdma/ocrdma_hw.h | 1 + drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 27 ++++++++++++++++++ drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 43 +++++++++++++++++++++++++++-- 4 files changed, 97 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 3f027552a1a..ea3d7237596 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1101,6 +1101,34 @@ mbx_err: return status; } +int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed) +{ + int status = -ENOMEM; + struct ocrdma_get_link_speed_rsp *rsp; + struct ocrdma_mqe *cmd; + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_NTWK_LINK_CONFIG_V1, + sizeof(*cmd)); + if (!cmd) + return status; + ocrdma_init_mch((struct ocrdma_mbx_hdr *)&cmd->u.cmd[0], + OCRDMA_CMD_QUERY_NTWK_LINK_CONFIG_V1, + OCRDMA_SUBSYS_COMMON, sizeof(*cmd)); + + ((struct ocrdma_mbx_hdr *)cmd->u.cmd)->rsvd_version = 0x1; + + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (status) + goto mbx_err; + + rsp = (struct ocrdma_get_link_speed_rsp *)cmd; + *lnk_speed = rsp->phys_port_speed; + +mbx_err: + kfree(cmd); + return status; +} + int ocrdma_mbx_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd) { int status = -ENOMEM; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index 044db74e780..f2a89d4cc7c 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -91,6 +91,7 @@ void ocrdma_ring_cq_db(struct ocrdma_dev *, u16 cq_id, bool armed, bool solicited, u16 cqe_popped); /* verbs specific mailbox commands */ +int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed); int ocrdma_query_config(struct ocrdma_dev *, struct ocrdma_mbx_query_config *config); int ocrdma_resolve_dgid(struct ocrdma_dev *, union ib_gid *dgid, u8 *mac_addr); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 35c61080ae1..9e975d88844 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -70,6 +70,7 @@ enum { #define OCRDMA_SUBSYS_COMMON 1 enum { + OCRDMA_CMD_QUERY_NTWK_LINK_CONFIG_V1 = 5, OCRDMA_CMD_CREATE_CQ = 12, OCRDMA_CMD_CREATE_EQ = 13, OCRDMA_CMD_CREATE_MQ = 21, @@ -545,6 +546,32 @@ enum { OCRDMA_FN_MODE_RDMA = 0x4 }; +struct ocrdma_get_link_speed_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; + + u8 pt_port_num; + u8 link_duplex; + u8 phys_port_speed; + u8 phys_port_fault; + u16 rsvd1; + u16 qos_lnk_speed; + u8 logical_lnk_status; + u8 rsvd2[3]; +}; + +enum { + OCRDMA_PHYS_LINK_SPEED_ZERO = 0x0, + OCRDMA_PHYS_LINK_SPEED_10MBPS = 0x1, + OCRDMA_PHYS_LINK_SPEED_100MBPS = 0x2, + OCRDMA_PHYS_LINK_SPEED_1GBPS = 0x3, + OCRDMA_PHYS_LINK_SPEED_10GBPS = 0x4, + OCRDMA_PHYS_LINK_SPEED_20GBPS = 0x5, + OCRDMA_PHYS_LINK_SPEED_25GBPS = 0x6, + OCRDMA_PHYS_LINK_SPEED_40GBPS = 0x7, + OCRDMA_PHYS_LINK_SPEED_100GBPS = 0x8 +}; + enum { OCRDMA_CREATE_CQ_VER2 = 2, OCRDMA_CREATE_CQ_VER3 = 3, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index e934073e705..f09a9403b60 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -106,6 +106,45 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) return 0; } +static inline void get_link_speed_and_width(struct ocrdma_dev *dev, + u8 *ib_speed, u8 *ib_width) +{ + int status; + u8 speed; + + status = ocrdma_mbx_get_link_speed(dev, &speed); + if (status) + speed = OCRDMA_PHYS_LINK_SPEED_ZERO; + + switch (speed) { + case OCRDMA_PHYS_LINK_SPEED_1GBPS: + *ib_speed = IB_SPEED_SDR; + *ib_width = IB_WIDTH_1X; + break; + + case OCRDMA_PHYS_LINK_SPEED_10GBPS: + *ib_speed = IB_SPEED_QDR; + *ib_width = IB_WIDTH_1X; + break; + + case OCRDMA_PHYS_LINK_SPEED_20GBPS: + *ib_speed = IB_SPEED_DDR; + *ib_width = IB_WIDTH_4X; + break; + + case OCRDMA_PHYS_LINK_SPEED_40GBPS: + *ib_speed = IB_SPEED_QDR; + *ib_width = IB_WIDTH_4X; + break; + + default: + /* Unsupported */ + *ib_speed = IB_SPEED_SDR; + *ib_width = IB_WIDTH_1X; + }; +} + + int ocrdma_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { @@ -142,8 +181,8 @@ int ocrdma_query_port(struct ib_device *ibdev, props->pkey_tbl_len = 1; props->bad_pkey_cntr = 0; props->qkey_viol_cntr = 0; - props->active_width = IB_WIDTH_1X; - props->active_speed = 4; + get_link_speed_and_width(dev, &props->active_speed, + &props->active_width); props->max_msg_sz = 0x80000000; props->max_vl_num = 4; return 0; -- cgit v1.2.3-70-g09d2 From 117e6dd1c5c96ef1edd6e5def4dd9937d98cae94 Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Mon, 26 Aug 2013 15:27:48 +0530 Subject: RDMA/ocrdma: Consider multiple SGES in case of DPP While posting inline DPP data, we are not considering multiple sges. Fix this. Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 32 ++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index f09a9403b60..0504e7376f1 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1820,24 +1820,42 @@ static void ocrdma_build_sges(struct ocrdma_hdr_wqe *hdr, memset(sge, 0, sizeof(*sge)); } +static inline uint32_t ocrdma_sglist_len(struct ib_sge *sg_list, int num_sge) +{ + uint32_t total_len = 0, i; + + for (i = 0; i < num_sge; i++) + total_len += sg_list[i].length; + return total_len; +} + + static int ocrdma_build_inline_sges(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, struct ocrdma_sge *sge, struct ib_send_wr *wr, u32 wqe_size) { + int i; + char *dpp_addr; + if (wr->send_flags & IB_SEND_INLINE && qp->qp_type != IB_QPT_UD) { - if (wr->sg_list[0].length > qp->max_inline_data) { + hdr->total_len = ocrdma_sglist_len(wr->sg_list, wr->num_sge); + if (unlikely(hdr->total_len > qp->max_inline_data)) { pr_err("%s() supported_len=0x%x,\n" " unspported len req=0x%x\n", __func__, - qp->max_inline_data, wr->sg_list[0].length); + qp->max_inline_data, hdr->total_len); return -EINVAL; } - memcpy(sge, - (void *)(unsigned long)wr->sg_list[0].addr, - wr->sg_list[0].length); - hdr->total_len = wr->sg_list[0].length; + dpp_addr = (char *)sge; + for (i = 0; i < wr->num_sge; i++) { + memcpy(dpp_addr, + (void *)(unsigned long)wr->sg_list[i].addr, + wr->sg_list[i].length); + dpp_addr += wr->sg_list[i].length; + } + wqe_size += roundup(hdr->total_len, OCRDMA_WQE_ALIGN_BYTES); - if (0 == wr->sg_list[0].length) + if (0 == hdr->total_len) wqe_size += sizeof(struct ocrdma_sge); hdr->cw |= (OCRDMA_TYPE_INLINE << OCRDMA_WQE_TYPE_SHIFT); } else { -- cgit v1.2.3-70-g09d2 From 38754397152e0e9ab0d2854064ef0ff4deabdd7e Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Mon, 26 Aug 2013 15:27:49 +0530 Subject: RDMA/ocrdma: Add ABI versioning support Add ABI versioning support between driver and userspace library. Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_abi.h | 1 + drivers/infiniband/hw/ocrdma/ocrdma_main.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h index e5ea9a9776a..fbac8eb4403 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h @@ -28,6 +28,7 @@ #ifndef __OCRDMA_ABI_H__ #define __OCRDMA_ABI_H__ +#define OCRDMA_ABI_VERSION 1 /* user kernel communication data structures. */ struct ocrdma_alloc_ucontext_resp { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 7d43ba924bf..56e004940f1 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -39,6 +39,7 @@ #include "ocrdma_ah.h" #include "be_roce.h" #include "ocrdma_hw.h" +#include "ocrdma_abi.h" MODULE_VERSION(OCRDMA_ROCE_DEV_VERSION); MODULE_DESCRIPTION("Emulex RoCE HCA Driver"); @@ -265,6 +266,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC, sizeof(OCRDMA_NODE_DESC)); dev->ibdev.owner = THIS_MODULE; + dev->ibdev.uverbs_abi_ver = OCRDMA_ABI_VERSION; dev->ibdev.uverbs_cmd_mask = OCRDMA_UVERBS(GET_CONTEXT) | OCRDMA_UVERBS(QUERY_DEVICE) | -- cgit v1.2.3-70-g09d2 From 84b105db593e735b8304815c913f7eea222a0600 Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Mon, 26 Aug 2013 15:27:50 +0530 Subject: RDMA/ocrdma: Fill PVID in UMC case In UMC case, driver needs to fill PVID in the address vector template for UD traffic. Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 1 + drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 2 ++ drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 32 +++++++++++++++++++++++++++++-- drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 18 ++++++++++++++++- 4 files changed, 50 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index fde8fb097a8..adc11d14f87 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -170,6 +170,7 @@ struct ocrdma_dev { struct rcu_head rcu; int id; u64 stag_arr[OCRDMA_MAX_STAG]; + u16 pvid; }; struct ocrdma_cq { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index df9e73758af..ee499d94225 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -50,6 +50,8 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, ah->sgid_index = attr->grh.sgid_index; vlan_tag = rdma_get_vlan_id(&attr->grh.dgid); + if (!vlan_tag || (vlan_tag > 0xFFF)) + vlan_tag = dev->pvid; if (vlan_tag && (vlan_tag < 0x1000)) { eth.eth_type = cpu_to_be16(0x8100); eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index ea3d7237596..acf5441857a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -544,7 +544,10 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev, cmd->cqid_pages = num_pages; cmd->cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT); cmd->async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID; - cmd->async_event_bitmap = Bit(20); + + cmd->async_event_bitmap = Bit(OCRDMA_ASYNC_GRP5_EVE_CODE); + cmd->async_event_bitmap |= Bit(OCRDMA_ASYNC_RDMA_EVE_CODE); + cmd->async_cqid_ringsize = cq->id; cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) << OCRDMA_CREATE_MQ_RING_SIZE_SHIFT); @@ -727,6 +730,29 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, } +static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev, + struct ocrdma_ae_mcqe *cqe) +{ + struct ocrdma_ae_pvid_mcqe *evt; + int type = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_TYPE_MASK) >> + OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT; + + switch (type) { + case OCRDMA_ASYNC_EVENT_PVID_STATE: + evt = (struct ocrdma_ae_pvid_mcqe *)cqe; + if ((evt->tag_enabled & OCRDMA_AE_PVID_MCQE_ENABLED_MASK) >> + OCRDMA_AE_PVID_MCQE_ENABLED_SHIFT) + dev->pvid = ((evt->tag_enabled & + OCRDMA_AE_PVID_MCQE_TAG_MASK) >> + OCRDMA_AE_PVID_MCQE_TAG_SHIFT); + break; + default: + /* Not interested evts. */ + break; + } +} + + static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe) { /* async CQE processing */ @@ -734,8 +760,10 @@ static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe) u32 evt_code = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_CODE_MASK) >> OCRDMA_AE_MCQE_EVENT_CODE_SHIFT; - if (evt_code == OCRDMA_ASYNC_EVE_CODE) + if (evt_code == OCRDMA_ASYNC_RDMA_EVE_CODE) ocrdma_dispatch_ibevent(dev, cqe); + else if (evt_code == OCRDMA_ASYNC_GRP5_EVE_CODE) + ocrdma_process_grp5_aync(dev, cqe); else pr_err("%s(%d) invalid evt code=0x%x\n", __func__, dev->id, evt_code); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 9e975d88844..9f9570ec3c2 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -338,6 +338,20 @@ struct ocrdma_ae_mcqe { u32 valid_ae_event; }; +enum { + OCRDMA_AE_PVID_MCQE_ENABLED_SHIFT = 0, + OCRDMA_AE_PVID_MCQE_ENABLED_MASK = 0xFF, + OCRDMA_AE_PVID_MCQE_TAG_SHIFT = 16, + OCRDMA_AE_PVID_MCQE_TAG_MASK = 0xFFFF << OCRDMA_AE_PVID_MCQE_TAG_SHIFT +}; + +struct ocrdma_ae_pvid_mcqe { + u32 tag_enabled; + u32 event_tag; + u32 rsvd1; + u32 rsvd2; +}; + enum { OCRDMA_AE_MPA_MCQE_REQ_ID_SHIFT = 16, OCRDMA_AE_MPA_MCQE_REQ_ID_MASK = 0xFFFF << @@ -388,7 +402,9 @@ struct ocrdma_ae_qp_mcqe { u32 valid_ae_event; }; -#define OCRDMA_ASYNC_EVE_CODE 0x14 +#define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14 +#define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5 +#define OCRDMA_ASYNC_EVENT_PVID_STATE 0x3 enum OCRDMA_ASYNC_EVENT_TYPE { OCRDMA_CQ_ERROR = 0x00, -- cgit v1.2.3-70-g09d2 From d7e19c0ad9baa0cfe7ef8b69a182a7db1dee6b52 Mon Sep 17 00:00:00 2001 From: Naresh Gottumukkala Date: Mon, 26 Aug 2013 15:27:51 +0530 Subject: RDMA/ocrdma: Fix passing wrong opcode to modify_srq Fix passing wrong opcode to ocrdma_modify_srq and query SRQ. Signed-off-by: Naresh Gottumukkala Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index acf5441857a..4ed8235d2d3 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -2393,7 +2393,7 @@ int ocrdma_mbx_modify_srq(struct ocrdma_srq *srq, struct ib_srq_attr *srq_attr) struct ocrdma_pd *pd = srq->pd; struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device); - cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_SRQ, sizeof(*cmd)); + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_MODIFY_SRQ, sizeof(*cmd)); if (!cmd) return status; cmd->id = srq->id; @@ -2410,7 +2410,7 @@ int ocrdma_mbx_query_srq(struct ocrdma_srq *srq, struct ib_srq_attr *srq_attr) struct ocrdma_query_srq *cmd; struct ocrdma_dev *dev = get_ocrdma_dev(srq->ibsrq.device); - cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_SRQ, sizeof(*cmd)); + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_SRQ, sizeof(*cmd)); if (!cmd) return status; cmd->id = srq->rq.dbid; -- cgit v1.2.3-70-g09d2 From 0318f685213e04109d408ac04d8c0d11bb6b15f3 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 20 Aug 2013 08:25:08 -0700 Subject: IB/qib: Move COUNTER_MASK definition within qib_mad.h header guards Commit 36a8f01cd24b ("IB/qib: Add congestion control agent implementation") caused statements to leak pass the header guard. Fix this. Reviewed-by: Mike Marciniszyn Signed-off-by: Ira Weiny Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_mad.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h index 57bd3fa016b..28874f8606f 100644 --- a/drivers/infiniband/hw/qib/qib_mad.h +++ b/drivers/infiniband/hw/qib/qib_mad.h @@ -415,7 +415,6 @@ struct cc_table_shadow { struct ib_cc_table_entry_shadow entries[CC_TABLE_SHADOW_MAX]; } __packed; -#endif /* _QIB_MAD_H */ /* * The PortSamplesControl.CounterMasks field is an array of 3 bit fields * which specify the N'th counter's capabilities. See ch. 16.1.3.2. @@ -428,3 +427,5 @@ struct cc_table_shadow { COUNTER_MASK(1, 2) | \ COUNTER_MASK(1, 3) | \ COUNTER_MASK(1, 4)) + +#endif /* _QIB_MAD_H */ -- cgit v1.2.3-70-g09d2 From 27ae2d1ea578dce73ab5368a6bf31c342004e709 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 19 Aug 2013 16:41:51 +0300 Subject: IB/iser: Fix possible memory leak in iser_create_frwr_pool() Fix leak where desc is not being freed in error flows. Signed-off-by: Roi Dayan Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_verbs.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 28badacb013..afe95674008 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -305,7 +305,7 @@ int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max) if (IS_ERR(desc->data_frpl)) { ret = PTR_ERR(desc->data_frpl); iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", ret); - goto err; + goto fast_reg_page_failure; } desc->data_mr = ib_alloc_fast_reg_mr(device->pd, @@ -313,8 +313,7 @@ int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max) if (IS_ERR(desc->data_mr)) { ret = PTR_ERR(desc->data_mr); iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); - ib_free_fast_reg_page_list(desc->data_frpl); - goto err; + goto fast_reg_mr_failure; } desc->valid = true; list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); @@ -322,6 +321,11 @@ int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max) } return 0; + +fast_reg_mr_failure: + ib_free_fast_reg_page_list(desc->data_frpl); +fast_reg_page_failure: + kfree(desc); err: iser_free_frwr_pool(ib_conn); return ret; -- cgit v1.2.3-70-g09d2 From 2e02d653febff23de614dc8978c64fbaa2767d85 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 15 Aug 2013 17:04:42 +0300 Subject: IB/iser: Fix redundant pointer check in dealloc flow This bug was discovered by Smatch static checker run by Dan Carpenter. If in free_rx_descriptors(), rx_descs are not NULL then the iser device is definately not NULL, so no need to check it before dereferencing it. Signed-off-by: Sagi Grimberg Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_initiator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index bdc38f423ca..5f01da99ad6 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -310,7 +310,7 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn) if (!ib_conn->rx_descs) goto free_login_buf; - if (device && device->iser_free_rdma_reg_res) + if (device->iser_free_rdma_reg_res) device->iser_free_rdma_reg_res(ib_conn); rx_desc = ib_conn->rx_descs; -- cgit v1.2.3-70-g09d2 From 33ccbd858f3a8676d976f1a990c7c62e51551241 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 3 Sep 2013 09:00:08 -0700 Subject: RDMA/ocrdma: Fix compiler warning about int/pointer size mismatch Fix: drivers/infiniband/hw/ocrdma/ocrdma_verbs.c: In function 'ocrdma_build_fr': >> drivers/infiniband/hw/ocrdma/ocrdma_verbs.c:1832:7: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] mr = (struct ocrdma_mr *)qp->dev->stag_arr[(hdr->lkey >> 8) & ^ drivers/infiniband/hw/ocrdma/ocrdma_verbs.c: In function 'ocrdma_alloc_frmr': >> drivers/infiniband/hw/ocrdma/ocrdma_verbs.c:2661:64: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = (u64) mr; Reported-by: kbuild test robot Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 0504e7376f1..6e982bb43c3 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -2010,7 +2010,7 @@ static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, fast_reg->num_sges = wr->wr.fast_reg.page_list_len; fast_reg->size_sge = get_encoded_page_size(1 << wr->wr.fast_reg.page_shift); - mr = (struct ocrdma_mr *)qp->dev->stag_arr[(hdr->lkey >> 8) & + mr = (struct ocrdma_mr *) (unsigned long) qp->dev->stag_arr[(hdr->lkey >> 8) & (OCRDMA_MAX_STAG - 1)]; build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr); return 0; @@ -2839,7 +2839,7 @@ struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len) goto mbx_err; mr->ibmr.rkey = mr->hwmr.lkey; mr->ibmr.lkey = mr->hwmr.lkey; - dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = (u64) mr; + dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = (unsigned long) mr; return &mr->ibmr; mbx_err: ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); -- cgit v1.2.3-70-g09d2