diff options
61 files changed, 3487 insertions, 776 deletions
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 3f62041222f..3591855cc5b 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1,7 +1,7 @@ /******************************************************************************* * This file contains iSCSI extentions for RDMA (iSER) Verbs * - * (c) Copyright 2013 RisingTide Systems LLC. + * (c) Copyright 2013 Datera, Inc. * * Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -39,7 +39,17 @@ static DEFINE_MUTEX(device_list_mutex); static LIST_HEAD(device_list); static struct workqueue_struct *isert_rx_wq; static struct workqueue_struct *isert_comp_wq; -static struct kmem_cache *isert_cmd_cache; + +static void +isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn); +static int +isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct isert_rdma_wr *wr); +static void +isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn); +static int +isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct isert_rdma_wr *wr); static void isert_qp_event_callback(struct ib_event *e, void *context) @@ -80,14 +90,8 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id) { struct isert_device *device = isert_conn->conn_device; struct ib_qp_init_attr attr; - struct ib_device_attr devattr; int ret, index, min_index = 0; - memset(&devattr, 0, sizeof(struct ib_device_attr)); - ret = isert_query_device(cma_id->device, &devattr); - if (ret) - return ret; - mutex_lock(&device_list_mutex); for (index = 0; index < device->cqs_used; index++) if (device->cq_active_qps[index] < @@ -108,7 +112,7 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id) * FIXME: Use devattr.max_sge - 2 for max_send_sge as * work-around for RDMA_READ.. */ - attr.cap.max_send_sge = devattr.max_sge - 2; + attr.cap.max_send_sge = device->dev_attr.max_sge - 2; isert_conn->max_sge = attr.cap.max_send_sge; attr.cap.max_recv_sge = 1; @@ -210,14 +214,31 @@ isert_create_device_ib_res(struct isert_device *device) { struct ib_device *ib_dev = device->ib_device; struct isert_cq_desc *cq_desc; + struct ib_device_attr *dev_attr; int ret = 0, i, j; + dev_attr = &device->dev_attr; + ret = isert_query_device(ib_dev, dev_attr); + if (ret) + return ret; + + /* asign function handlers */ + if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { + device->use_frwr = 1; + device->reg_rdma_mem = isert_reg_rdma_frwr; + device->unreg_rdma_mem = isert_unreg_rdma_frwr; + } else { + device->use_frwr = 0; + device->reg_rdma_mem = isert_map_rdma; + device->unreg_rdma_mem = isert_unmap_cmd; + } + device->cqs_used = min_t(int, num_online_cpus(), device->ib_device->num_comp_vectors); device->cqs_used = min(ISERT_MAX_CQ, device->cqs_used); - pr_debug("Using %d CQs, device %s supports %d vectors\n", + pr_debug("Using %d CQs, device %s supports %d vectors support FRWR %d\n", device->cqs_used, device->ib_device->name, - device->ib_device->num_comp_vectors); + device->ib_device->num_comp_vectors, device->use_frwr); device->cq_desc = kzalloc(sizeof(struct isert_cq_desc) * device->cqs_used, GFP_KERNEL); if (!device->cq_desc) { @@ -363,6 +384,85 @@ isert_device_find_by_ib_dev(struct rdma_cm_id *cma_id) return device; } +static void +isert_conn_free_frwr_pool(struct isert_conn *isert_conn) +{ + struct fast_reg_descriptor *fr_desc, *tmp; + int i = 0; + + if (list_empty(&isert_conn->conn_frwr_pool)) + return; + + pr_debug("Freeing conn %p frwr pool", isert_conn); + + list_for_each_entry_safe(fr_desc, tmp, + &isert_conn->conn_frwr_pool, list) { + list_del(&fr_desc->list); + ib_free_fast_reg_page_list(fr_desc->data_frpl); + ib_dereg_mr(fr_desc->data_mr); + kfree(fr_desc); + ++i; + } + + if (i < isert_conn->conn_frwr_pool_size) + pr_warn("Pool still has %d regions registered\n", + isert_conn->conn_frwr_pool_size - i); +} + +static int +isert_conn_create_frwr_pool(struct isert_conn *isert_conn) +{ + struct fast_reg_descriptor *fr_desc; + struct isert_device *device = isert_conn->conn_device; + int i, ret; + + INIT_LIST_HEAD(&isert_conn->conn_frwr_pool); + isert_conn->conn_frwr_pool_size = 0; + for (i = 0; i < ISCSI_DEF_XMIT_CMDS_MAX; i++) { + fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL); + if (!fr_desc) { + pr_err("Failed to allocate fast_reg descriptor\n"); + ret = -ENOMEM; + goto err; + } + + fr_desc->data_frpl = + ib_alloc_fast_reg_page_list(device->ib_device, + ISCSI_ISER_SG_TABLESIZE); + if (IS_ERR(fr_desc->data_frpl)) { + pr_err("Failed to allocate fr_pg_list err=%ld\n", + PTR_ERR(fr_desc->data_frpl)); + ret = PTR_ERR(fr_desc->data_frpl); + goto err; + } + + fr_desc->data_mr = ib_alloc_fast_reg_mr(device->dev_pd, + ISCSI_ISER_SG_TABLESIZE); + if (IS_ERR(fr_desc->data_mr)) { + pr_err("Failed to allocate frmr err=%ld\n", + PTR_ERR(fr_desc->data_mr)); + ret = PTR_ERR(fr_desc->data_mr); + ib_free_fast_reg_page_list(fr_desc->data_frpl); + goto err; + } + pr_debug("Create fr_desc %p page_list %p\n", + fr_desc, fr_desc->data_frpl->page_list); + + fr_desc->valid = true; + list_add_tail(&fr_desc->list, &isert_conn->conn_frwr_pool); + isert_conn->conn_frwr_pool_size++; + } + + pr_debug("Creating conn %p frwr pool size=%d", + isert_conn, isert_conn->conn_frwr_pool_size); + + return 0; + +err: + isert_conn_free_frwr_pool(isert_conn); + return ret; +} + static int isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { @@ -389,6 +489,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) kref_init(&isert_conn->conn_kref); kref_get(&isert_conn->conn_kref); mutex_init(&isert_conn->conn_mutex); + spin_lock_init(&isert_conn->conn_lock); cma_id->context = isert_conn; isert_conn->conn_cm_id = cma_id; @@ -446,6 +547,14 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) isert_conn->conn_pd = device->dev_pd; isert_conn->conn_mr = device->dev_mr; + if (device->use_frwr) { + ret = isert_conn_create_frwr_pool(isert_conn); + if (ret) { + pr_err("Conn: %p failed to create frwr_pool\n", isert_conn); + goto out_frwr; + } + } + ret = isert_conn_setup_qp(isert_conn, cma_id); if (ret) goto out_conn_dev; @@ -459,6 +568,9 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) return 0; out_conn_dev: + if (device->use_frwr) + isert_conn_free_frwr_pool(isert_conn); +out_frwr: isert_device_try_release(device); out_rsp_dma_map: ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma, @@ -482,6 +594,9 @@ isert_connect_release(struct isert_conn *isert_conn) pr_debug("Entering isert_connect_release(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); + if (device->use_frwr) + isert_conn_free_frwr_pool(isert_conn); + if (isert_conn->conn_qp) { cq_index = ((struct isert_cq_desc *) isert_conn->conn_qp->recv_cq->cq_context)->cq_index; @@ -869,46 +984,37 @@ isert_rx_login_req(struct iser_rx_desc *rx_desc, int rx_buflen, size, rx_buflen, MAX_KEY_VALUE_PAIRS); memcpy(login->req_buf, &rx_desc->data[0], size); - complete(&isert_conn->conn_login_comp); -} - -static void -isert_release_cmd(struct iscsi_cmd *cmd) -{ - struct isert_cmd *isert_cmd = container_of(cmd, struct isert_cmd, - iscsi_cmd); - - pr_debug("Entering isert_release_cmd %p >>>>>>>>>>>>>>>.\n", isert_cmd); - - kfree(cmd->buf_ptr); - kfree(cmd->tmr_req); - - kmem_cache_free(isert_cmd_cache, isert_cmd); + if (login->first_request) { + complete(&isert_conn->conn_login_comp); + return; + } + schedule_delayed_work(&conn->login_work, 0); } static struct iscsi_cmd -*isert_alloc_cmd(struct iscsi_conn *conn, gfp_t gfp) +*isert_allocate_cmd(struct iscsi_conn *conn, gfp_t gfp) { struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct isert_cmd *isert_cmd; + struct iscsi_cmd *cmd; - isert_cmd = kmem_cache_zalloc(isert_cmd_cache, gfp); - if (!isert_cmd) { - pr_err("Unable to allocate isert_cmd\n"); + cmd = iscsit_allocate_cmd(conn, gfp); + if (!cmd) { + pr_err("Unable to allocate iscsi_cmd + isert_cmd\n"); return NULL; } + isert_cmd = iscsit_priv_cmd(cmd); isert_cmd->conn = isert_conn; - isert_cmd->iscsi_cmd.release_cmd = &isert_release_cmd; + isert_cmd->iscsi_cmd = cmd; - return &isert_cmd->iscsi_cmd; + return cmd; } static int isert_handle_scsi_cmd(struct isert_conn *isert_conn, - struct isert_cmd *isert_cmd, struct iser_rx_desc *rx_desc, - unsigned char *buf) + struct isert_cmd *isert_cmd, struct iscsi_cmd *cmd, + struct iser_rx_desc *rx_desc, unsigned char *buf) { - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; struct iscsi_conn *conn = isert_conn->conn; struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)buf; struct scatterlist *sg; @@ -1015,9 +1121,9 @@ isert_handle_iscsi_dataout(struct isert_conn *isert_conn, static int isert_handle_nop_out(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, - struct iser_rx_desc *rx_desc, unsigned char *buf) + struct iscsi_cmd *cmd, struct iser_rx_desc *rx_desc, + unsigned char *buf) { - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; struct iscsi_conn *conn = isert_conn->conn; struct iscsi_nopout *hdr = (struct iscsi_nopout *)buf; int rc; @@ -1034,9 +1140,9 @@ isert_handle_nop_out(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, static int isert_handle_text_cmd(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, - struct iser_rx_desc *rx_desc, struct iscsi_text *hdr) + struct iscsi_cmd *cmd, struct iser_rx_desc *rx_desc, + struct iscsi_text *hdr) { - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; struct iscsi_conn *conn = isert_conn->conn; u32 payload_length = ntoh24(hdr->dlength); int rc; @@ -1081,26 +1187,26 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, switch (opcode) { case ISCSI_OP_SCSI_CMD: - cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + cmd = isert_allocate_cmd(conn, GFP_KERNEL); if (!cmd) break; - isert_cmd = container_of(cmd, struct isert_cmd, iscsi_cmd); + isert_cmd = iscsit_priv_cmd(cmd); isert_cmd->read_stag = read_stag; isert_cmd->read_va = read_va; isert_cmd->write_stag = write_stag; isert_cmd->write_va = write_va; - ret = isert_handle_scsi_cmd(isert_conn, isert_cmd, + ret = isert_handle_scsi_cmd(isert_conn, isert_cmd, cmd, rx_desc, (unsigned char *)hdr); break; case ISCSI_OP_NOOP_OUT: - cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + cmd = isert_allocate_cmd(conn, GFP_KERNEL); if (!cmd) break; - isert_cmd = container_of(cmd, struct isert_cmd, iscsi_cmd); - ret = isert_handle_nop_out(isert_conn, isert_cmd, + isert_cmd = iscsit_priv_cmd(cmd); + ret = isert_handle_nop_out(isert_conn, isert_cmd, cmd, rx_desc, (unsigned char *)hdr); break; case ISCSI_OP_SCSI_DATA_OUT: @@ -1108,7 +1214,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, (unsigned char *)hdr); break; case ISCSI_OP_SCSI_TMFUNC: - cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + cmd = isert_allocate_cmd(conn, GFP_KERNEL); if (!cmd) break; @@ -1116,7 +1222,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, (unsigned char *)hdr); break; case ISCSI_OP_LOGOUT: - cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + cmd = isert_allocate_cmd(conn, GFP_KERNEL); if (!cmd) break; @@ -1127,12 +1233,12 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, HZ); break; case ISCSI_OP_TEXT: - cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + cmd = isert_allocate_cmd(conn, GFP_KERNEL); if (!cmd) break; - isert_cmd = container_of(cmd, struct isert_cmd, iscsi_cmd); - ret = isert_handle_text_cmd(isert_conn, isert_cmd, + isert_cmd = iscsit_priv_cmd(cmd); + ret = isert_handle_text_cmd(isert_conn, isert_cmd, cmd, rx_desc, (struct iscsi_text *)hdr); break; default: @@ -1243,26 +1349,65 @@ isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn) struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; struct ib_device *ib_dev = isert_conn->conn_cm_id->device; - pr_debug("isert_unmap_cmd >>>>>>>>>>>>>>>>>>>>>>>\n"); + pr_debug("isert_unmap_cmd: %p\n", isert_cmd); + if (wr->sge) { + pr_debug("isert_unmap_cmd: %p unmap_sg op\n", isert_cmd); + ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge, + (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); + wr->sge = NULL; + } + + if (wr->send_wr) { + pr_debug("isert_unmap_cmd: %p free send_wr\n", isert_cmd); + kfree(wr->send_wr); + wr->send_wr = NULL; + } + + if (wr->ib_sge) { + pr_debug("isert_unmap_cmd: %p free ib_sge\n", isert_cmd); + kfree(wr->ib_sge); + wr->ib_sge = NULL; + } +} + +static void +isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn) +{ + struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; + struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + LIST_HEAD(unmap_list); + + pr_debug("unreg_frwr_cmd: %p\n", isert_cmd); + + if (wr->fr_desc) { + pr_debug("unreg_frwr_cmd: %p free fr_desc %p\n", + isert_cmd, wr->fr_desc); + spin_lock_bh(&isert_conn->conn_lock); + list_add_tail(&wr->fr_desc->list, &isert_conn->conn_frwr_pool); + spin_unlock_bh(&isert_conn->conn_lock); + wr->fr_desc = NULL; + } if (wr->sge) { - ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge, DMA_TO_DEVICE); + pr_debug("unreg_frwr_cmd: %p unmap_sg op\n", isert_cmd); + ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge, + (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); wr->sge = NULL; } - kfree(wr->send_wr); + wr->ib_sge = NULL; wr->send_wr = NULL; - - kfree(isert_cmd->ib_sge); - isert_cmd->ib_sge = NULL; } static void isert_put_cmd(struct isert_cmd *isert_cmd) { - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; + struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; struct isert_conn *isert_conn = isert_cmd->conn; struct iscsi_conn *conn = isert_conn->conn; + struct isert_device *device = isert_conn->conn_device; pr_debug("Entering isert_put_cmd: %p\n", isert_cmd); @@ -1276,7 +1421,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd) if (cmd->data_direction == DMA_TO_DEVICE) iscsit_stop_dataout_timer(cmd); - isert_unmap_cmd(isert_cmd, isert_conn); + device->unreg_rdma_mem(isert_cmd, isert_conn); transport_generic_free_cmd(&cmd->se_cmd, 0); break; case ISCSI_OP_SCSI_TMFUNC: @@ -1311,7 +1456,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd) * Fall-through */ default: - isert_release_cmd(cmd); + iscsit_release_cmd(cmd); break; } } @@ -1347,27 +1492,16 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc, struct isert_cmd *isert_cmd) { struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; + struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; struct se_cmd *se_cmd = &cmd->se_cmd; - struct ib_device *ib_dev = isert_cmd->conn->conn_cm_id->device; + struct isert_conn *isert_conn = isert_cmd->conn; + struct isert_device *device = isert_conn->conn_device; iscsit_stop_dataout_timer(cmd); + device->unreg_rdma_mem(isert_cmd, isert_conn); + cmd->write_data_done = wr->cur_rdma_length; - if (wr->sge) { - pr_debug("isert_do_rdma_read_comp: Unmapping wr->sge from t_data_sg\n"); - ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge, DMA_TO_DEVICE); - wr->sge = NULL; - } - - if (isert_cmd->ib_sge) { - pr_debug("isert_do_rdma_read_comp: Freeing isert_cmd->ib_sge\n"); - kfree(isert_cmd->ib_sge); - isert_cmd->ib_sge = NULL; - } - - cmd->write_data_done = se_cmd->data_length; - - pr_debug("isert_do_rdma_read_comp, calling target_execute_cmd\n"); + pr_debug("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd); spin_lock_bh(&cmd->istate_lock); cmd->cmd_flags |= ICF_GOT_LAST_DATAOUT; cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT; @@ -1383,7 +1517,7 @@ isert_do_control_comp(struct work_struct *work) struct isert_cmd, comp_work); struct isert_conn *isert_conn = isert_cmd->conn; struct ib_device *ib_dev = isert_conn->conn_cm_id->device; - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; + struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; switch (cmd->i_state) { case ISTATE_SEND_TASKMGTRSP: @@ -1429,7 +1563,7 @@ isert_response_completion(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn, struct ib_device *ib_dev) { - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; + struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; if (cmd->i_state == ISTATE_SEND_TASKMGTRSP || cmd->i_state == ISTATE_SEND_LOGOUTRSP || @@ -1621,8 +1755,7 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd) static int isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd) { - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr; struct iscsi_scsi_rsp *hdr = (struct iscsi_scsi_rsp *) @@ -1671,8 +1804,7 @@ static int isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn, bool nopout_response) { - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr; @@ -1691,8 +1823,7 @@ isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn, static int isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) { - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr; @@ -1710,8 +1841,7 @@ isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) static int isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) { - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr; @@ -1729,8 +1859,7 @@ isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) static int isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn) { - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr; struct ib_device *ib_dev = isert_conn->conn_cm_id->device; @@ -1762,8 +1891,7 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn) static int isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) { - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr; struct iscsi_text_rsp *hdr = @@ -1805,7 +1933,7 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, struct ib_sge *ib_sge, struct ib_send_wr *send_wr, u32 data_left, u32 offset) { - struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; + struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; struct scatterlist *sg_start, *tmp_sg; struct ib_device *ib_dev = isert_conn->conn_cm_id->device; u32 sg_off, page_off; @@ -1832,8 +1960,8 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, ib_sg_dma_len(ib_dev, tmp_sg) - page_off); ib_sge->lkey = isert_conn->conn_mr->lkey; - pr_debug("RDMA ib_sge: addr: 0x%16llx length: %u\n", - ib_sge->addr, ib_sge->length); + pr_debug("RDMA ib_sge: addr: 0x%16llx length: %u lkey: %08x\n", + ib_sge->addr, ib_sge->length, ib_sge->lkey); page_off = 0; data_left -= ib_sge->length; ib_sge++; @@ -1847,200 +1975,373 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, } static int -isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd) +isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct isert_rdma_wr *wr) { struct se_cmd *se_cmd = &cmd->se_cmd; - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); - struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; - struct ib_send_wr *wr_failed, *send_wr; struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct ib_send_wr *send_wr; struct ib_sge *ib_sge; - struct scatterlist *sg; - u32 offset = 0, data_len, data_left, rdma_write_max; - int rc, ret = 0, count, sg_nents, i, ib_sge_cnt; - - pr_debug("RDMA_WRITE: data_length: %u\n", se_cmd->data_length); + struct scatterlist *sg_start; + u32 sg_off = 0, sg_nents; + u32 offset = 0, data_len, data_left, rdma_write_max, va_offset = 0; + int ret = 0, count, i, ib_sge_cnt; + + if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { + data_left = se_cmd->data_length; + iscsit_increment_maxcmdsn(cmd, conn->sess); + cmd->stat_sn = conn->stat_sn++; + } else { + sg_off = cmd->write_data_done / PAGE_SIZE; + data_left = se_cmd->data_length - cmd->write_data_done; + offset = cmd->write_data_done; + isert_cmd->tx_desc.isert_cmd = isert_cmd; + } - sg = &se_cmd->t_data_sg[0]; - sg_nents = se_cmd->t_data_nents; + sg_start = &cmd->se_cmd.t_data_sg[sg_off]; + sg_nents = se_cmd->t_data_nents - sg_off; - count = ib_dma_map_sg(ib_dev, sg, sg_nents, DMA_TO_DEVICE); + count = ib_dma_map_sg(ib_dev, sg_start, sg_nents, + (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); if (unlikely(!count)) { - pr_err("Unable to map put_datain SGs\n"); + pr_err("Cmd: %p unrable to map SGs\n", isert_cmd); return -EINVAL; } - wr->sge = sg; + wr->sge = sg_start; wr->num_sge = sg_nents; - pr_debug("Mapped IB count: %u sg: %p sg_nents: %u for RDMA_WRITE\n", - count, sg, sg_nents); + wr->cur_rdma_length = data_left; + pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n", + isert_cmd, count, sg_start, sg_nents, data_left); ib_sge = kzalloc(sizeof(struct ib_sge) * sg_nents, GFP_KERNEL); if (!ib_sge) { - pr_warn("Unable to allocate datain ib_sge\n"); + pr_warn("Unable to allocate ib_sge\n"); ret = -ENOMEM; goto unmap_sg; } - isert_cmd->ib_sge = ib_sge; - - pr_debug("Allocated ib_sge: %p from t_data_ents: %d for RDMA_WRITE\n", - ib_sge, se_cmd->t_data_nents); + wr->ib_sge = ib_sge; wr->send_wr_num = DIV_ROUND_UP(sg_nents, isert_conn->max_sge); wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num, GFP_KERNEL); if (!wr->send_wr) { - pr_err("Unable to allocate wr->send_wr\n"); + pr_debug("Unable to allocate wr->send_wr\n"); ret = -ENOMEM; goto unmap_sg; } - pr_debug("Allocated wr->send_wr: %p wr->send_wr_num: %u\n", - wr->send_wr, wr->send_wr_num); - - iscsit_increment_maxcmdsn(cmd, conn->sess); - cmd->stat_sn = conn->stat_sn++; wr->isert_cmd = isert_cmd; rdma_write_max = isert_conn->max_sge * PAGE_SIZE; - data_left = se_cmd->data_length; for (i = 0; i < wr->send_wr_num; i++) { send_wr = &isert_cmd->rdma_wr.send_wr[i]; data_len = min(data_left, rdma_write_max); - send_wr->opcode = IB_WR_RDMA_WRITE; send_wr->send_flags = 0; - send_wr->wr.rdma.remote_addr = isert_cmd->read_va + offset; - send_wr->wr.rdma.rkey = isert_cmd->read_stag; + if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { + send_wr->opcode = IB_WR_RDMA_WRITE; + send_wr->wr.rdma.remote_addr = isert_cmd->read_va + offset; + send_wr->wr.rdma.rkey = isert_cmd->read_stag; + if (i + 1 == wr->send_wr_num) + send_wr->next = &isert_cmd->tx_desc.send_wr; + else + send_wr->next = &wr->send_wr[i + 1]; + } else { + send_wr->opcode = IB_WR_RDMA_READ; + send_wr->wr.rdma.remote_addr = isert_cmd->write_va + va_offset; + send_wr->wr.rdma.rkey = isert_cmd->write_stag; + if (i + 1 == wr->send_wr_num) + send_wr->send_flags = IB_SEND_SIGNALED; + else + send_wr->next = &wr->send_wr[i + 1]; + } ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge, send_wr, data_len, offset); ib_sge += ib_sge_cnt; - if (i + 1 == wr->send_wr_num) - send_wr->next = &isert_cmd->tx_desc.send_wr; - else - send_wr->next = &wr->send_wr[i + 1]; - offset += data_len; + va_offset += data_len; data_left -= data_len; } - /* - * Build isert_conn->tx_desc for iSCSI response PDU and attach - */ - isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc); - iscsit_build_rsp_pdu(cmd, conn, false, (struct iscsi_scsi_rsp *) - &isert_cmd->tx_desc.iscsi_header); - isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); - isert_init_send_wr(isert_cmd, &isert_cmd->tx_desc.send_wr); - atomic_inc(&isert_conn->post_send_buf_count); + return 0; +unmap_sg: + ib_dma_unmap_sg(ib_dev, sg_start, sg_nents, + (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); + return ret; +} - rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed); - if (rc) { - pr_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n"); - atomic_dec(&isert_conn->post_send_buf_count); +static int +isert_map_fr_pagelist(struct ib_device *ib_dev, + struct scatterlist *sg_start, int sg_nents, u64 *fr_pl) +{ + u64 start_addr, end_addr, page, chunk_start = 0; + struct scatterlist *tmp_sg; + int i = 0, new_chunk, last_ent, n_pages; + + n_pages = 0; + new_chunk = 1; + last_ent = sg_nents - 1; + for_each_sg(sg_start, tmp_sg, sg_nents, i) { + start_addr = ib_sg_dma_address(ib_dev, tmp_sg); + if (new_chunk) + chunk_start = start_addr; + end_addr = start_addr + ib_sg_dma_len(ib_dev, tmp_sg); + + pr_debug("SGL[%d] dma_addr: 0x%16llx len: %u\n", + i, (unsigned long long)tmp_sg->dma_address, + tmp_sg->length); + + if ((end_addr & ~PAGE_MASK) && i < last_ent) { + new_chunk = 0; + continue; + } + new_chunk = 1; + + page = chunk_start & PAGE_MASK; + do { + fr_pl[n_pages++] = page; + pr_debug("Mapped page_list[%d] page_addr: 0x%16llx\n", + n_pages - 1, page); + page += PAGE_SIZE; + } while (page < end_addr); } - pr_debug("Posted RDMA_WRITE + Response for iSER Data READ\n"); - return 1; -unmap_sg: - ib_dma_unmap_sg(ib_dev, sg, sg_nents, DMA_TO_DEVICE); + return n_pages; +} + +static int +isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc, + struct isert_cmd *isert_cmd, struct isert_conn *isert_conn, + struct ib_sge *ib_sge, u32 offset, unsigned int data_len) +{ + struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; + struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct scatterlist *sg_start; + u32 sg_off, page_off; + struct ib_send_wr fr_wr, inv_wr; + struct ib_send_wr *bad_wr, *wr = NULL; + u8 key; + int ret, sg_nents, pagelist_len; + + sg_off = offset / PAGE_SIZE; + sg_start = &cmd->se_cmd.t_data_sg[sg_off]; + sg_nents = min_t(unsigned int, cmd->se_cmd.t_data_nents - sg_off, + ISCSI_ISER_SG_TABLESIZE); + page_off = offset % PAGE_SIZE; + + pr_debug("Cmd: %p use fr_desc %p sg_nents %d sg_off %d offset %u\n", + isert_cmd, fr_desc, sg_nents, sg_off, offset); + + pagelist_len = isert_map_fr_pagelist(ib_dev, sg_start, sg_nents, + &fr_desc->data_frpl->page_list[0]); + + if (!fr_desc->valid) { + memset(&inv_wr, 0, sizeof(inv_wr)); + inv_wr.opcode = IB_WR_LOCAL_INV; + inv_wr.ex.invalidate_rkey = fr_desc->data_mr->rkey; + wr = &inv_wr; + /* Bump the key */ + key = (u8)(fr_desc->data_mr->rkey & 0x000000FF); + ib_update_fast_reg_key(fr_desc->data_mr, ++key); + } + + /* Prepare FASTREG WR */ + memset(&fr_wr, 0, sizeof(fr_wr)); + fr_wr.opcode = IB_WR_FAST_REG_MR; + fr_wr.wr.fast_reg.iova_start = + fr_desc->data_frpl->page_list[0] + page_off; + fr_wr.wr.fast_reg.page_list = fr_desc->data_frpl; + fr_wr.wr.fast_reg.page_list_len = pagelist_len; + fr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; + fr_wr.wr.fast_reg.length = data_len; + fr_wr.wr.fast_reg.rkey = fr_desc->data_mr->rkey; + fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE; + + if (!wr) + wr = &fr_wr; + else + wr->next = &fr_wr; + + ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr); + if (ret) { + pr_err("fast registration failed, ret:%d\n", ret); + return ret; + } + fr_desc->valid = false; + + ib_sge->lkey = fr_desc->data_mr->lkey; + ib_sge->addr = fr_desc->data_frpl->page_list[0] + page_off; + ib_sge->length = data_len; + + pr_debug("RDMA ib_sge: addr: 0x%16llx length: %u lkey: %08x\n", + ib_sge->addr, ib_sge->length, ib_sge->lkey); + return ret; } static int -isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) +isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct isert_rdma_wr *wr) { struct se_cmd *se_cmd = &cmd->se_cmd; - struct isert_cmd *isert_cmd = container_of(cmd, - struct isert_cmd, iscsi_cmd); - struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; - struct ib_send_wr *wr_failed, *send_wr; - struct ib_sge *ib_sge; struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct ib_send_wr *send_wr; + struct ib_sge *ib_sge; struct scatterlist *sg_start; - u32 sg_off, sg_nents, page_off, va_offset = 0; + struct fast_reg_descriptor *fr_desc; + u32 sg_off = 0, sg_nents; u32 offset = 0, data_len, data_left, rdma_write_max; - int rc, ret = 0, count, i, ib_sge_cnt; + int ret = 0, count; + unsigned long flags; - pr_debug("RDMA_READ: data_length: %u write_data_done: %u\n", - se_cmd->data_length, cmd->write_data_done); + if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { + data_left = se_cmd->data_length; + iscsit_increment_maxcmdsn(cmd, conn->sess); + cmd->stat_sn = conn->stat_sn++; + } else { + sg_off = cmd->write_data_done / PAGE_SIZE; + data_left = se_cmd->data_length - cmd->write_data_done; + offset = cmd->write_data_done; + isert_cmd->tx_desc.isert_cmd = isert_cmd; + } - sg_off = cmd->write_data_done / PAGE_SIZE; sg_start = &cmd->se_cmd.t_data_sg[sg_off]; - page_off = cmd->write_data_done % PAGE_SIZE; - - pr_debug("RDMA_READ: sg_off: %d, sg_start: %p page_off: %d\n", - sg_off, sg_start, page_off); - - data_left = se_cmd->data_length - cmd->write_data_done; sg_nents = se_cmd->t_data_nents - sg_off; - pr_debug("RDMA_READ: data_left: %d, sg_nents: %d\n", - data_left, sg_nents); - - count = ib_dma_map_sg(ib_dev, sg_start, sg_nents, DMA_FROM_DEVICE); + count = ib_dma_map_sg(ib_dev, sg_start, sg_nents, + (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); if (unlikely(!count)) { - pr_err("Unable to map get_dataout SGs\n"); + pr_err("Cmd: %p unrable to map SGs\n", isert_cmd); return -EINVAL; } wr->sge = sg_start; wr->num_sge = sg_nents; - pr_debug("Mapped IB count: %u sg_start: %p sg_nents: %u for RDMA_READ\n", - count, sg_start, sg_nents); + pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n", + isert_cmd, count, sg_start, sg_nents, data_left); - ib_sge = kzalloc(sizeof(struct ib_sge) * sg_nents, GFP_KERNEL); - if (!ib_sge) { - pr_warn("Unable to allocate dataout ib_sge\n"); - ret = -ENOMEM; - goto unmap_sg; + memset(&wr->s_ib_sge, 0, sizeof(*ib_sge)); + ib_sge = &wr->s_ib_sge; + wr->ib_sge = ib_sge; + + wr->send_wr_num = 1; + memset(&wr->s_send_wr, 0, sizeof(*send_wr)); + wr->send_wr = &wr->s_send_wr; + + wr->isert_cmd = isert_cmd; + rdma_write_max = ISCSI_ISER_SG_TABLESIZE * PAGE_SIZE; + + send_wr = &isert_cmd->rdma_wr.s_send_wr; + send_wr->sg_list = ib_sge; + send_wr->num_sge = 1; + send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc; + if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { + send_wr->opcode = IB_WR_RDMA_WRITE; + send_wr->wr.rdma.remote_addr = isert_cmd->read_va; + send_wr->wr.rdma.rkey = isert_cmd->read_stag; + send_wr->send_flags = 0; + send_wr->next = &isert_cmd->tx_desc.send_wr; + } else { + send_wr->opcode = IB_WR_RDMA_READ; + send_wr->wr.rdma.remote_addr = isert_cmd->write_va; + send_wr->wr.rdma.rkey = isert_cmd->write_stag; + send_wr->send_flags = IB_SEND_SIGNALED; } - isert_cmd->ib_sge = ib_sge; - pr_debug("Using ib_sge: %p from sg_ents: %d for RDMA_READ\n", - ib_sge, sg_nents); + data_len = min(data_left, rdma_write_max); + wr->cur_rdma_length = data_len; - wr->send_wr_num = DIV_ROUND_UP(sg_nents, isert_conn->max_sge); - wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num, - GFP_KERNEL); - if (!wr->send_wr) { - pr_debug("Unable to allocate wr->send_wr\n"); - ret = -ENOMEM; + spin_lock_irqsave(&isert_conn->conn_lock, flags); + fr_desc = list_first_entry(&isert_conn->conn_frwr_pool, + struct fast_reg_descriptor, list); + list_del(&fr_desc->list); + spin_unlock_irqrestore(&isert_conn->conn_lock, flags); + wr->fr_desc = fr_desc; + + ret = isert_fast_reg_mr(fr_desc, isert_cmd, isert_conn, + ib_sge, offset, data_len); + if (ret) { + list_add_tail(&fr_desc->list, &isert_conn->conn_frwr_pool); goto unmap_sg; } - pr_debug("Allocated wr->send_wr: %p wr->send_wr_num: %u\n", - wr->send_wr, wr->send_wr_num); - isert_cmd->tx_desc.isert_cmd = isert_cmd; + return 0; - wr->iser_ib_op = ISER_IB_RDMA_READ; - wr->isert_cmd = isert_cmd; - rdma_write_max = isert_conn->max_sge * PAGE_SIZE; - offset = cmd->write_data_done; +unmap_sg: + ib_dma_unmap_sg(ib_dev, sg_start, sg_nents, + (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); + return ret; +} - for (i = 0; i < wr->send_wr_num; i++) { - send_wr = &isert_cmd->rdma_wr.send_wr[i]; - data_len = min(data_left, rdma_write_max); +static int +isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd) +{ + struct se_cmd *se_cmd = &cmd->se_cmd; + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); + struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; + struct isert_conn *isert_conn = (struct isert_conn *)conn->context; + struct isert_device *device = isert_conn->conn_device; + struct ib_send_wr *wr_failed; + int rc; - send_wr->opcode = IB_WR_RDMA_READ; - send_wr->wr.rdma.remote_addr = isert_cmd->write_va + va_offset; - send_wr->wr.rdma.rkey = isert_cmd->write_stag; + pr_debug("Cmd: %p RDMA_WRITE data_length: %u\n", + isert_cmd, se_cmd->data_length); + wr->iser_ib_op = ISER_IB_RDMA_WRITE; + rc = device->reg_rdma_mem(conn, cmd, wr); + if (rc) { + pr_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd); + return rc; + } - ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge, - send_wr, data_len, offset); - ib_sge += ib_sge_cnt; + /* + * Build isert_conn->tx_desc for iSCSI response PDU and attach + */ + isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc); + iscsit_build_rsp_pdu(cmd, conn, false, (struct iscsi_scsi_rsp *) + &isert_cmd->tx_desc.iscsi_header); + isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); + isert_init_send_wr(isert_cmd, &isert_cmd->tx_desc.send_wr); - if (i + 1 == wr->send_wr_num) - send_wr->send_flags = IB_SEND_SIGNALED; - else - send_wr->next = &wr->send_wr[i + 1]; + atomic_inc(&isert_conn->post_send_buf_count); - offset += data_len; - va_offset += data_len; - data_left -= data_len; + rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed); + if (rc) { + pr_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n"); + atomic_dec(&isert_conn->post_send_buf_count); + } + pr_debug("Cmd: %p posted RDMA_WRITE + Response for iSER Data READ\n", + isert_cmd); + + return 1; +} + +static int +isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) +{ + struct se_cmd *se_cmd = &cmd->se_cmd; + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); + struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; + struct isert_conn *isert_conn = (struct isert_conn *)conn->context; + struct isert_device *device = isert_conn->conn_device; + struct ib_send_wr *wr_failed; + int rc; + + pr_debug("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n", + isert_cmd, se_cmd->data_length, cmd->write_data_done); + wr->iser_ib_op = ISER_IB_RDMA_READ; + rc = device->reg_rdma_mem(conn, cmd, wr); + if (rc) { + pr_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd); + return rc; } atomic_inc(&isert_conn->post_send_buf_count); @@ -2050,12 +2351,10 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) pr_warn("ib_post_send() failed for IB_WR_RDMA_READ\n"); atomic_dec(&isert_conn->post_send_buf_count); } - pr_debug("Posted RDMA_READ memory for ISER Data WRITE\n"); - return 0; + pr_debug("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n", + isert_cmd); -unmap_sg: - ib_dma_unmap_sg(ib_dev, sg_start, sg_nents, DMA_FROM_DEVICE); - return ret; + return 0; } static int @@ -2224,6 +2523,14 @@ isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login) int ret; pr_debug("isert_get_login_rx before conn_login_comp conn: %p\n", conn); + /* + * For login requests after the first PDU, isert_rx_login_req() will + * kick schedule_delayed_work(&conn->login_work) as the packet is + * received, which turns this callback from iscsi_target_do_login_rx() + * into a NOP. + */ + if (!login->first_request) + return 0; ret = wait_for_completion_interruptible(&isert_conn->conn_login_comp); if (ret) @@ -2393,12 +2700,12 @@ static void isert_free_conn(struct iscsi_conn *conn) static struct iscsit_transport iser_target_transport = { .name = "IB/iSER", .transport_type = ISCSI_INFINIBAND, + .priv_size = sizeof(struct isert_cmd), .owner = THIS_MODULE, .iscsit_setup_np = isert_setup_np, .iscsit_accept_np = isert_accept_np, .iscsit_free_np = isert_free_np, .iscsit_free_conn = isert_free_conn, - .iscsit_alloc_cmd = isert_alloc_cmd, .iscsit_get_login_rx = isert_get_login_rx, .iscsit_put_login_tx = isert_put_login_tx, .iscsit_immediate_queue = isert_immediate_queue, @@ -2425,21 +2732,10 @@ static int __init isert_init(void) goto destroy_rx_wq; } - isert_cmd_cache = kmem_cache_create("isert_cmd_cache", - sizeof(struct isert_cmd), __alignof__(struct isert_cmd), - 0, NULL); - if (!isert_cmd_cache) { - pr_err("Unable to create isert_cmd_cache\n"); - ret = -ENOMEM; - goto destroy_tx_cq; - } - iscsit_register_transport(&iser_target_transport); pr_debug("iSER_TARGET[0] - Loaded iser_target_transport\n"); return 0; -destroy_tx_cq: - destroy_workqueue(isert_comp_wq); destroy_rx_wq: destroy_workqueue(isert_rx_wq); return ret; @@ -2447,7 +2743,6 @@ destroy_rx_wq: static void __exit isert_exit(void) { - kmem_cache_destroy(isert_cmd_cache); destroy_workqueue(isert_comp_wq); destroy_workqueue(isert_rx_wq); iscsit_unregister_transport(&iser_target_transport); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 191117b5b50..631f2090f0b 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -5,6 +5,7 @@ #include <rdma/rdma_cm.h> #define ISERT_RDMA_LISTEN_BACKLOG 10 +#define ISCSI_ISER_SG_TABLESIZE 256 enum isert_desc_type { ISCSI_TX_CONTROL, @@ -45,15 +46,26 @@ struct iser_tx_desc { struct ib_send_wr send_wr; } __packed; +struct fast_reg_descriptor { + struct list_head list; + struct ib_mr *data_mr; + struct ib_fast_reg_page_list *data_frpl; + bool valid; +}; + struct isert_rdma_wr { struct list_head wr_list; struct isert_cmd *isert_cmd; enum iser_ib_op_code iser_ib_op; struct ib_sge *ib_sge; + struct ib_sge s_ib_sge; int num_sge; struct scatterlist *sge; int send_wr_num; struct ib_send_wr *send_wr; + struct ib_send_wr s_send_wr; + u32 cur_rdma_length; + struct fast_reg_descriptor *fr_desc; }; struct isert_cmd { @@ -67,8 +79,7 @@ struct isert_cmd { u32 write_va_off; u32 rdma_wr_num; struct isert_conn *conn; - struct iscsi_cmd iscsi_cmd; - struct ib_sge *ib_sge; + struct iscsi_cmd *iscsi_cmd; struct iser_tx_desc tx_desc; struct isert_rdma_wr rdma_wr; struct work_struct comp_work; @@ -106,6 +117,10 @@ struct isert_conn { wait_queue_head_t conn_wait; wait_queue_head_t conn_wait_comp_err; struct kref conn_kref; + struct list_head conn_frwr_pool; + int conn_frwr_pool_size; + /* lock to protect frwr_pool */ + spinlock_t conn_lock; }; #define ISERT_MAX_CQ 64 @@ -118,6 +133,7 @@ struct isert_cq_desc { }; struct isert_device { + int use_frwr; int cqs_used; int refcount; int cq_active_qps[ISERT_MAX_CQ]; @@ -128,6 +144,12 @@ struct isert_device { struct ib_cq *dev_tx_cq[ISERT_MAX_CQ]; struct isert_cq_desc *cq_desc; struct list_head dev_node; + struct ib_device_attr dev_attr; + int (*reg_rdma_mem)(struct iscsi_conn *conn, + struct iscsi_cmd *cmd, + struct isert_rdma_wr *wr); + void (*unreg_rdma_mem)(struct isert_cmd *isert_cmd, + struct isert_conn *isert_conn); }; struct isert_np { diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index ff12d4677cc..596480022b0 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -10,7 +10,7 @@ * * Forward port and refactoring to modern qla2xxx and target/configfs * - * Copyright (C) 2010-2011 Nicholas A. Bellinger <nab@kernel.org> + * Copyright (C) 2010-2013 Nicholas A. Bellinger <nab@kernel.org> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index a6da313e253..f85b9e5c1f0 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -2,12 +2,9 @@ * This file contains tcm implementation using v4 configfs fabric infrastructure * for QLogic target mode HBAs * - * ?? Copyright 2010-2011 RisingTide Systems LLC. + * (c) Copyright 2010-2013 Datera, Inc. * - * Licensed to the Linux Foundation under the General Public License (GPL) - * version 2. - * - * Author: Nicholas A. Bellinger <nab@risingtidesystems.com> + * Author: Nicholas A. Bellinger <nab@daterainc.com> * * tcm_qla2xxx_parse_wwn() and tcm_qla2xxx_format_wwn() contains code from * the TCM_FC / Open-FCoE.org fabric module. @@ -360,6 +357,14 @@ static int tcm_qla2xxx_check_prod_write_protect(struct se_portal_group *se_tpg) return QLA_TPG_ATTRIB(tpg)->prod_mode_write_protect; } +static int tcm_qla2xxx_check_demo_mode_login_only(struct se_portal_group *se_tpg) +{ + struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, + struct tcm_qla2xxx_tpg, se_tpg); + + return QLA_TPG_ATTRIB(tpg)->demo_mode_login_only; +} + static struct se_node_acl *tcm_qla2xxx_alloc_fabric_acl( struct se_portal_group *se_tpg) { @@ -489,38 +494,13 @@ static u32 tcm_qla2xxx_sess_get_index(struct se_session *se_sess) return 0; } -/* - * The LIO target core uses DMA_TO_DEVICE to mean that data is going - * to the target (eg handling a WRITE) and DMA_FROM_DEVICE to mean - * that data is coming from the target (eg handling a READ). However, - * this is just the opposite of what we have to tell the DMA mapping - * layer -- eg when handling a READ, the HBA will have to DMA the data - * out of memory so it can send it to the initiator, which means we - * need to use DMA_TO_DEVICE when we map the data. - */ -static enum dma_data_direction tcm_qla2xxx_mapping_dir(struct se_cmd *se_cmd) -{ - if (se_cmd->se_cmd_flags & SCF_BIDI) - return DMA_BIDIRECTIONAL; - - switch (se_cmd->data_direction) { - case DMA_TO_DEVICE: - return DMA_FROM_DEVICE; - case DMA_FROM_DEVICE: - return DMA_TO_DEVICE; - case DMA_NONE: - default: - return DMA_NONE; - } -} - static int tcm_qla2xxx_write_pending(struct se_cmd *se_cmd) { struct qla_tgt_cmd *cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd); cmd->bufflen = se_cmd->data_length; - cmd->dma_data_direction = tcm_qla2xxx_mapping_dir(se_cmd); + cmd->dma_data_direction = target_reverse_dma_direction(se_cmd); cmd->sg_cnt = se_cmd->t_data_nents; cmd->sg = se_cmd->t_data_sg; @@ -656,7 +636,7 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd) struct qla_tgt_cmd, se_cmd); cmd->bufflen = se_cmd->data_length; - cmd->dma_data_direction = tcm_qla2xxx_mapping_dir(se_cmd); + cmd->dma_data_direction = target_reverse_dma_direction(se_cmd); cmd->aborted = (se_cmd->transport_state & CMD_T_ABORTED); cmd->sg_cnt = se_cmd->t_data_nents; @@ -680,7 +660,7 @@ static int tcm_qla2xxx_queue_status(struct se_cmd *se_cmd) cmd->sg = NULL; cmd->sg_cnt = 0; cmd->offset = 0; - cmd->dma_data_direction = tcm_qla2xxx_mapping_dir(se_cmd); + cmd->dma_data_direction = target_reverse_dma_direction(se_cmd); cmd->aborted = (se_cmd->transport_state & CMD_T_ABORTED); if (se_cmd->data_direction == DMA_FROM_DEVICE) { @@ -939,11 +919,19 @@ DEF_QLA_TPG_ATTR_BOOL(prod_mode_write_protect); DEF_QLA_TPG_ATTRIB(prod_mode_write_protect); QLA_TPG_ATTR(prod_mode_write_protect, S_IRUGO | S_IWUSR); +/* + * Define tcm_qla2xxx_tpg_attrib_s_demo_mode_login_only + */ +DEF_QLA_TPG_ATTR_BOOL(demo_mode_login_only); +DEF_QLA_TPG_ATTRIB(demo_mode_login_only); +QLA_TPG_ATTR(demo_mode_login_only, S_IRUGO | S_IWUSR); + static struct configfs_attribute *tcm_qla2xxx_tpg_attrib_attrs[] = { &tcm_qla2xxx_tpg_attrib_generate_node_acls.attr, &tcm_qla2xxx_tpg_attrib_cache_dynamic_acls.attr, &tcm_qla2xxx_tpg_attrib_demo_mode_write_protect.attr, &tcm_qla2xxx_tpg_attrib_prod_mode_write_protect.attr, + &tcm_qla2xxx_tpg_attrib_demo_mode_login_only.attr, NULL, }; @@ -1042,6 +1030,7 @@ static struct se_portal_group *tcm_qla2xxx_make_tpg( QLA_TPG_ATTRIB(tpg)->generate_node_acls = 1; QLA_TPG_ATTRIB(tpg)->demo_mode_write_protect = 1; QLA_TPG_ATTRIB(tpg)->cache_dynamic_acls = 1; + QLA_TPG_ATTRIB(tpg)->demo_mode_login_only = 1; ret = core_tpg_register(&tcm_qla2xxx_fabric_configfs->tf_ops, wwn, &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL); @@ -1736,7 +1725,7 @@ static struct target_core_fabric_ops tcm_qla2xxx_ops = { tcm_qla2xxx_check_demo_write_protect, .tpg_check_prod_mode_write_protect = tcm_qla2xxx_check_prod_write_protect, - .tpg_check_demo_mode_login_only = tcm_qla2xxx_check_true, + .tpg_check_demo_mode_login_only = tcm_qla2xxx_check_demo_mode_login_only, .tpg_alloc_fabric_acl = tcm_qla2xxx_alloc_fabric_acl, .tpg_release_fabric_acl = tcm_qla2xxx_release_fabric_acl, .tpg_get_inst_index = tcm_qla2xxx_tpg_get_inst_index, @@ -1784,7 +1773,7 @@ static struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = { .tpg_check_demo_mode_cache = tcm_qla2xxx_check_true, .tpg_check_demo_mode_write_protect = tcm_qla2xxx_check_true, .tpg_check_prod_mode_write_protect = tcm_qla2xxx_check_false, - .tpg_check_demo_mode_login_only = tcm_qla2xxx_check_true, + .tpg_check_demo_mode_login_only = tcm_qla2xxx_check_demo_mode_login_only, .tpg_alloc_fabric_acl = tcm_qla2xxx_alloc_fabric_acl, .tpg_release_fabric_acl = tcm_qla2xxx_release_fabric_acl, .tpg_get_inst_index = tcm_qla2xxx_tpg_get_inst_index, diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h index 9ba075fe978..329327528a5 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h @@ -29,6 +29,7 @@ struct tcm_qla2xxx_tpg_attrib { int cache_dynamic_acls; int demo_mode_write_protect; int prod_mode_write_protect; + int demo_mode_login_only; }; struct tcm_qla2xxx_tpg { diff --git a/drivers/target/Makefile b/drivers/target/Makefile index 9fdcb561422..85b012d2f89 100644 --- a/drivers/target/Makefile +++ b/drivers/target/Makefile @@ -13,7 +13,8 @@ target_core_mod-y := target_core_configfs.o \ target_core_spc.o \ target_core_ua.o \ target_core_rd.o \ - target_core_stat.o + target_core_stat.o \ + target_core_xcopy.o obj-$(CONFIG_TARGET_CORE) += target_core_mod.o diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 3a179302b90..35b61f7d6c6 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains main functions related to the iSCSI Target Core Driver. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -63,7 +61,6 @@ spinlock_t sess_idr_lock; struct iscsit_global *iscsit_global; -struct kmem_cache *lio_cmd_cache; struct kmem_cache *lio_qr_cache; struct kmem_cache *lio_dr_cache; struct kmem_cache *lio_ooo_cache; @@ -220,11 +217,6 @@ int iscsit_access_np(struct iscsi_np *np, struct iscsi_portal_group *tpg) spin_unlock_bh(&np->np_thread_lock); return -1; } - if (np->np_login_tpg) { - pr_err("np->np_login_tpg() is not NULL!\n"); - spin_unlock_bh(&np->np_thread_lock); - return -1; - } spin_unlock_bh(&np->np_thread_lock); /* * Determine if the portal group is accepting storage traffic. @@ -239,26 +231,38 @@ int iscsit_access_np(struct iscsi_np *np, struct iscsi_portal_group *tpg) /* * Here we serialize access across the TIQN+TPG Tuple. */ - ret = mutex_lock_interruptible(&tpg->np_login_lock); + ret = down_interruptible(&tpg->np_login_sem); if ((ret != 0) || signal_pending(current)) return -1; - spin_lock_bh(&np->np_thread_lock); - np->np_login_tpg = tpg; - spin_unlock_bh(&np->np_thread_lock); + spin_lock_bh(&tpg->tpg_state_lock); + if (tpg->tpg_state != TPG_STATE_ACTIVE) { + spin_unlock_bh(&tpg->tpg_state_lock); + up(&tpg->np_login_sem); + return -1; + } + spin_unlock_bh(&tpg->tpg_state_lock); return 0; } -int iscsit_deaccess_np(struct iscsi_np *np, struct iscsi_portal_group *tpg) +void iscsit_login_kref_put(struct kref *kref) +{ + struct iscsi_tpg_np *tpg_np = container_of(kref, + struct iscsi_tpg_np, tpg_np_kref); + + complete(&tpg_np->tpg_np_comp); +} + +int iscsit_deaccess_np(struct iscsi_np *np, struct iscsi_portal_group *tpg, + struct iscsi_tpg_np *tpg_np) { struct iscsi_tiqn *tiqn = tpg->tpg_tiqn; - spin_lock_bh(&np->np_thread_lock); - np->np_login_tpg = NULL; - spin_unlock_bh(&np->np_thread_lock); + up(&tpg->np_login_sem); - mutex_unlock(&tpg->np_login_lock); + if (tpg_np) + kref_put(&tpg_np->tpg_np_kref, iscsit_login_kref_put); if (tiqn) iscsit_put_tiqn_for_login(tiqn); @@ -410,20 +414,10 @@ struct iscsi_np *iscsit_add_np( int iscsit_reset_np_thread( struct iscsi_np *np, struct iscsi_tpg_np *tpg_np, - struct iscsi_portal_group *tpg) + struct iscsi_portal_group *tpg, + bool shutdown) { spin_lock_bh(&np->np_thread_lock); - if (tpg && tpg_np) { - /* - * The reset operation need only be performed when the - * passed struct iscsi_portal_group has a login in progress - * to one of the network portals. - */ - if (tpg_np->tpg_np->np_login_tpg != tpg) { - spin_unlock_bh(&np->np_thread_lock); - return 0; - } - } if (np->np_thread_state == ISCSI_NP_THREAD_INACTIVE) { spin_unlock_bh(&np->np_thread_lock); return 0; @@ -438,6 +432,12 @@ int iscsit_reset_np_thread( } spin_unlock_bh(&np->np_thread_lock); + if (tpg_np && shutdown) { + kref_put(&tpg_np->tpg_np_kref, iscsit_login_kref_put); + + wait_for_completion(&tpg_np->tpg_np_comp); + } + return 0; } @@ -497,7 +497,6 @@ static struct iscsit_transport iscsi_target_transport = { .iscsit_setup_np = iscsit_setup_np, .iscsit_accept_np = iscsit_accept_np, .iscsit_free_np = iscsit_free_np, - .iscsit_alloc_cmd = iscsit_alloc_cmd, .iscsit_get_login_rx = iscsit_get_login_rx, .iscsit_put_login_tx = iscsit_put_login_tx, .iscsit_get_dataout = iscsit_build_r2ts_for_cmd, @@ -538,22 +537,13 @@ static int __init iscsi_target_init_module(void) goto ts_out1; } - lio_cmd_cache = kmem_cache_create("lio_cmd_cache", - sizeof(struct iscsi_cmd), __alignof__(struct iscsi_cmd), - 0, NULL); - if (!lio_cmd_cache) { - pr_err("Unable to kmem_cache_create() for" - " lio_cmd_cache\n"); - goto ts_out2; - } - lio_qr_cache = kmem_cache_create("lio_qr_cache", sizeof(struct iscsi_queue_req), __alignof__(struct iscsi_queue_req), 0, NULL); if (!lio_qr_cache) { pr_err("nable to kmem_cache_create() for" " lio_qr_cache\n"); - goto cmd_out; + goto ts_out2; } lio_dr_cache = kmem_cache_create("lio_dr_cache", @@ -597,8 +587,6 @@ dr_out: kmem_cache_destroy(lio_dr_cache); qr_out: kmem_cache_destroy(lio_qr_cache); -cmd_out: - kmem_cache_destroy(lio_cmd_cache); ts_out2: iscsi_deallocate_thread_sets(); ts_out1: @@ -616,7 +604,6 @@ static void __exit iscsi_target_cleanup_module(void) iscsi_thread_set_free(); iscsit_release_discovery_tpg(); iscsit_unregister_transport(&iscsi_target_transport); - kmem_cache_destroy(lio_cmd_cache); kmem_cache_destroy(lio_qr_cache); kmem_cache_destroy(lio_dr_cache); kmem_cache_destroy(lio_ooo_cache); @@ -3447,12 +3434,10 @@ static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd) bool inaddr_any = iscsit_check_inaddr_any(np); len = sprintf(buf, "TargetAddress=" - "%s%s%s:%hu,%hu", - (np->np_sockaddr.ss_family == AF_INET6) ? - "[" : "", (inaddr_any == false) ? + "%s:%hu,%hu", + (inaddr_any == false) ? np->np_ip : conn->local_ip, - (np->np_sockaddr.ss_family == AF_INET6) ? - "]" : "", (inaddr_any == false) ? + (inaddr_any == false) ? np->np_port : conn->local_port, tpg->tpgt); len += 1; diff --git a/drivers/target/iscsi/iscsi_target.h b/drivers/target/iscsi/iscsi_target.h index 2c437cb8ca0..e936d56fb52 100644 --- a/drivers/target/iscsi/iscsi_target.h +++ b/drivers/target/iscsi/iscsi_target.h @@ -7,13 +7,15 @@ extern void iscsit_put_tiqn_for_login(struct iscsi_tiqn *); extern struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *); extern void iscsit_del_tiqn(struct iscsi_tiqn *); extern int iscsit_access_np(struct iscsi_np *, struct iscsi_portal_group *); -extern int iscsit_deaccess_np(struct iscsi_np *, struct iscsi_portal_group *); +extern void iscsit_login_kref_put(struct kref *); +extern int iscsit_deaccess_np(struct iscsi_np *, struct iscsi_portal_group *, + struct iscsi_tpg_np *); extern bool iscsit_check_np_match(struct __kernel_sockaddr_storage *, struct iscsi_np *, int); extern struct iscsi_np *iscsit_add_np(struct __kernel_sockaddr_storage *, char *, int); extern int iscsit_reset_np_thread(struct iscsi_np *, struct iscsi_tpg_np *, - struct iscsi_portal_group *); + struct iscsi_portal_group *, bool); extern int iscsit_del_np(struct iscsi_np *); extern int iscsit_reject_cmd(struct iscsi_cmd *cmd, u8, unsigned char *); extern void iscsit_set_unsoliticed_dataout(struct iscsi_cmd *); @@ -37,7 +39,6 @@ extern struct target_fabric_configfs *lio_target_fabric_configfs; extern struct kmem_cache *lio_dr_cache; extern struct kmem_cache *lio_ooo_cache; -extern struct kmem_cache *lio_cmd_cache; extern struct kmem_cache *lio_qr_cache; extern struct kmem_cache *lio_r2t_cache; diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c index cee17543278..7505fddca15 100644 --- a/drivers/target/iscsi/iscsi_target_auth.c +++ b/drivers/target/iscsi/iscsi_target_auth.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file houses the main functions for the iSCSI CHAP support * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index bbfd2889316..fd145259361 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -2,9 +2,7 @@ * This file contains the configfs implementation for iSCSI Target mode * from the LIO-Target Project. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -265,9 +263,9 @@ static struct se_tpg_np *lio_target_call_addnptotpg( *port_str = '\0'; /* Terminate string for IP */ port_str++; /* Skip over ":" */ - ret = strict_strtoul(port_str, 0, &port); + ret = kstrtoul(port_str, 0, &port); if (ret < 0) { - pr_err("strict_strtoul() failed for port_str: %d\n", ret); + pr_err("kstrtoul() failed for port_str: %d\n", ret); return ERR_PTR(ret); } sock_in6 = (struct sockaddr_in6 *)&sockaddr; @@ -290,9 +288,9 @@ static struct se_tpg_np *lio_target_call_addnptotpg( *port_str = '\0'; /* Terminate string for IP */ port_str++; /* Skip over ":" */ - ret = strict_strtoul(port_str, 0, &port); + ret = kstrtoul(port_str, 0, &port); if (ret < 0) { - pr_err("strict_strtoul() failed for port_str: %d\n", ret); + pr_err("kstrtoul() failed for port_str: %d\n", ret); return ERR_PTR(ret); } sock_in = (struct sockaddr_in *)&sockaddr; @@ -1481,7 +1479,7 @@ static ssize_t lio_target_wwn_show_attr_lio_version( struct target_fabric_configfs *tf, char *page) { - return sprintf(page, "RisingTide Systems Linux-iSCSI Target "ISCSIT_VERSION"\n"); + return sprintf(page, "Datera Inc. iSCSI Target "ISCSIT_VERSION"\n"); } TF_WWN_ATTR_RO(lio_target, lio_version); @@ -1925,7 +1923,7 @@ static void lio_release_cmd(struct se_cmd *se_cmd) struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); pr_debug("Entering lio_release_cmd for se_cmd: %p\n", se_cmd); - cmd->release_cmd(cmd); + iscsit_release_cmd(cmd); } /* End functions for target_core_fabric_ops */ diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h index 4f77a78edef..9a5721b8ff9 100644 --- a/drivers/target/iscsi/iscsi_target_core.h +++ b/drivers/target/iscsi/iscsi_target_core.h @@ -9,7 +9,7 @@ #include <scsi/iscsi_proto.h> #include <target/target_core_base.h> -#define ISCSIT_VERSION "v4.1.0-rc2" +#define ISCSIT_VERSION "v4.1.0" #define ISCSI_MAX_DATASN_MISSING_COUNT 16 #define ISCSI_TX_THREAD_TCP_TIMEOUT 2 #define ISCSI_RX_THREAD_TCP_TIMEOUT 2 @@ -17,6 +17,9 @@ #define SECONDS_FOR_ASYNC_TEXT 10 #define SECONDS_FOR_LOGOUT_COMP 15 #define WHITE_SPACE " \t\v\f\n\r" +#define ISCSIT_MIN_TAGS 16 +#define ISCSIT_EXTRA_TAGS 8 +#define ISCSIT_TCP_BACKLOG 256 /* struct iscsi_node_attrib sanity values */ #define NA_DATAOUT_TIMEOUT 3 @@ -47,7 +50,7 @@ #define TA_NETIF_TIMEOUT_MAX 15 #define TA_NETIF_TIMEOUT_MIN 2 #define TA_GENERATE_NODE_ACLS 0 -#define TA_DEFAULT_CMDSN_DEPTH 16 +#define TA_DEFAULT_CMDSN_DEPTH 64 #define TA_DEFAULT_CMDSN_DEPTH_MAX 512 #define TA_DEFAULT_CMDSN_DEPTH_MIN 1 #define TA_CACHE_DYNAMIC_ACLS 0 @@ -489,7 +492,6 @@ struct iscsi_cmd { u32 first_data_sg_off; u32 kmapped_nents; sense_reason_t sense_reason; - void (*release_cmd)(struct iscsi_cmd *); } ____cacheline_aligned; struct iscsi_tmr_req { @@ -554,9 +556,19 @@ struct iscsi_conn { struct completion rx_half_close_comp; /* socket used by this connection */ struct socket *sock; + void (*orig_data_ready)(struct sock *, int); + void (*orig_state_change)(struct sock *); +#define LOGIN_FLAGS_READ_ACTIVE 1 +#define LOGIN_FLAGS_CLOSED 2 +#define LOGIN_FLAGS_READY 4 + unsigned long login_flags; + struct delayed_work login_work; + struct delayed_work login_cleanup_work; + struct iscsi_login *login; struct timer_list nopin_timer; struct timer_list nopin_response_timer; struct timer_list transport_timer; + struct task_struct *login_kworker; /* Spinlock used for add/deleting cmd's from conn_cmd_list */ spinlock_t cmd_lock; spinlock_t conn_usage_lock; @@ -584,6 +596,7 @@ struct iscsi_conn { void *context; struct iscsi_login_thread_s *login_thread; struct iscsi_portal_group *tpg; + struct iscsi_tpg_np *tpg_np; /* Pointer to parent session */ struct iscsi_session *sess; /* Pointer to thread_set in use for this conn's threads */ @@ -682,6 +695,7 @@ struct iscsi_login { u8 version_max; u8 login_complete; u8 login_failed; + bool zero_tsih; char isid[6]; u32 cmd_sn; itt_t init_task_tag; @@ -694,6 +708,7 @@ struct iscsi_login { char *req_buf; char *rsp_buf; struct iscsi_conn *conn; + struct iscsi_np *np; } ____cacheline_aligned; struct iscsi_node_attrib { @@ -773,7 +788,6 @@ struct iscsi_np { struct __kernel_sockaddr_storage np_sockaddr; struct task_struct *np_thread; struct timer_list np_login_timer; - struct iscsi_portal_group *np_login_tpg; void *np_context; struct iscsit_transport *np_transport; struct list_head np_list; @@ -788,6 +802,8 @@ struct iscsi_tpg_np { struct list_head tpg_np_parent_list; struct se_tpg_np se_tpg_np; spinlock_t tpg_np_parent_lock; + struct completion tpg_np_comp; + struct kref tpg_np_kref; }; struct iscsi_portal_group { @@ -809,7 +825,7 @@ struct iscsi_portal_group { spinlock_t tpg_state_lock; struct se_portal_group tpg_se_tpg; struct mutex tpg_access_lock; - struct mutex np_login_lock; + struct semaphore np_login_sem; struct iscsi_tpg_attrib tpg_attrib; struct iscsi_node_auth tpg_demo_auth; /* Pointer to default list of iSCSI parameters for TPG */ diff --git a/drivers/target/iscsi/iscsi_target_datain_values.c b/drivers/target/iscsi/iscsi_target_datain_values.c index 848fee76894..e93d5a7a3f8 100644 --- a/drivers/target/iscsi/iscsi_target_datain_values.c +++ b/drivers/target/iscsi/iscsi_target_datain_values.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains the iSCSI Target DataIN value generation functions. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_device.c b/drivers/target/iscsi/iscsi_target_device.c index 1b74033510a..6c7a5104a4c 100644 --- a/drivers/target/iscsi/iscsi_target_device.c +++ b/drivers/target/iscsi/iscsi_target_device.c @@ -2,9 +2,7 @@ * This file contains the iSCSI Virtual Device and Disk Transport * agnostic related functions. * - \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c index 08bd8783332..41052e512d9 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.c +++ b/drivers/target/iscsi/iscsi_target_erl0.c @@ -2,9 +2,7 @@ * This file contains error recovery level zero functions used by * the iSCSI Target driver. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c index 586c268679a..e048d6439f4 100644 --- a/drivers/target/iscsi/iscsi_target_erl1.c +++ b/drivers/target/iscsi/iscsi_target_erl1.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains error recovery level one used by the iSCSI Target driver. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_erl2.c b/drivers/target/iscsi/iscsi_target_erl2.c index 45a5afd5ea1..33be1fb1df3 100644 --- a/drivers/target/iscsi/iscsi_target_erl2.c +++ b/drivers/target/iscsi/iscsi_target_erl2.c @@ -2,9 +2,7 @@ * This file contains error recovery level two functions used by * the iSCSI Target driver. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index bc788c52b6c..1794c753954 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains the login functions used by the iSCSI Target driver. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -50,6 +48,7 @@ static struct iscsi_login *iscsi_login_init_conn(struct iscsi_conn *conn) pr_err("Unable to allocate memory for struct iscsi_login.\n"); return NULL; } + conn->login = login; login->conn = conn; login->first_request = 1; @@ -428,7 +427,7 @@ static int iscsi_login_zero_tsih_s2( ISCSI_LOGIN_STATUS_NO_RESOURCES); return -1; } - rc = strict_strtoul(param->value, 0, &mrdsl); + rc = kstrtoul(param->value, 0, &mrdsl); if (rc < 0) { iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_NO_RESOURCES); @@ -684,7 +683,7 @@ static void iscsi_post_login_start_timers(struct iscsi_conn *conn) iscsit_start_nopin_timer(conn); } -static int iscsi_post_login_handler( +int iscsi_post_login_handler( struct iscsi_np *np, struct iscsi_conn *conn, u8 zero_tsih) @@ -872,7 +871,7 @@ int iscsit_setup_np( struct __kernel_sockaddr_storage *sockaddr) { struct socket *sock = NULL; - int backlog = 5, ret, opt = 0, len; + int backlog = ISCSIT_TCP_BACKLOG, ret, opt = 0, len; switch (np->np_network_transport) { case ISCSI_TCP: @@ -1007,16 +1006,24 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn) rc = conn->sock->ops->getname(conn->sock, (struct sockaddr *)&sock_in6, &err, 1); if (!rc) { - snprintf(conn->login_ip, sizeof(conn->login_ip), "%pI6c", - &sock_in6.sin6_addr.in6_u); + if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) + snprintf(conn->login_ip, sizeof(conn->login_ip), "[%pI6c]", + &sock_in6.sin6_addr.in6_u); + else + snprintf(conn->login_ip, sizeof(conn->login_ip), "%pI4", + &sock_in6.sin6_addr.s6_addr32[3]); conn->login_port = ntohs(sock_in6.sin6_port); } rc = conn->sock->ops->getname(conn->sock, (struct sockaddr *)&sock_in6, &err, 0); if (!rc) { - snprintf(conn->local_ip, sizeof(conn->local_ip), "%pI6c", - &sock_in6.sin6_addr.in6_u); + if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) + snprintf(conn->local_ip, sizeof(conn->local_ip), "[%pI6c]", + &sock_in6.sin6_addr.in6_u); + else + snprintf(conn->local_ip, sizeof(conn->local_ip), "%pI4", + &sock_in6.sin6_addr.s6_addr32[3]); conn->local_port = ntohs(sock_in6.sin6_port); } } else { @@ -1116,6 +1123,77 @@ iscsit_conn_set_transport(struct iscsi_conn *conn, struct iscsit_transport *t) return 0; } +void iscsi_target_login_sess_out(struct iscsi_conn *conn, + struct iscsi_np *np, bool zero_tsih, bool new_sess) +{ + if (new_sess == false) + goto old_sess_out; + + pr_err("iSCSI Login negotiation failed.\n"); + iscsit_collect_login_stats(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_INIT_ERR); + if (!zero_tsih || !conn->sess) + goto old_sess_out; + if (conn->sess->se_sess) + transport_free_session(conn->sess->se_sess); + if (conn->sess->session_index != 0) { + spin_lock_bh(&sess_idr_lock); + idr_remove(&sess_idr, conn->sess->session_index); + spin_unlock_bh(&sess_idr_lock); + } + kfree(conn->sess->sess_ops); + kfree(conn->sess); + +old_sess_out: + iscsi_stop_login_thread_timer(np); + /* + * If login negotiation fails check if the Time2Retain timer + * needs to be restarted. + */ + if (!zero_tsih && conn->sess) { + spin_lock_bh(&conn->sess->conn_lock); + if (conn->sess->session_state == TARG_SESS_STATE_FAILED) { + struct se_portal_group *se_tpg = + &ISCSI_TPG_C(conn)->tpg_se_tpg; + + atomic_set(&conn->sess->session_continuation, 0); + spin_unlock_bh(&conn->sess->conn_lock); + spin_lock_bh(&se_tpg->session_lock); + iscsit_start_time2retain_handler(conn->sess); + spin_unlock_bh(&se_tpg->session_lock); + } else + spin_unlock_bh(&conn->sess->conn_lock); + iscsit_dec_session_usage_count(conn->sess); + } + + if (!IS_ERR(conn->conn_rx_hash.tfm)) + crypto_free_hash(conn->conn_rx_hash.tfm); + if (!IS_ERR(conn->conn_tx_hash.tfm)) + crypto_free_hash(conn->conn_tx_hash.tfm); + + if (conn->conn_cpumask) + free_cpumask_var(conn->conn_cpumask); + + kfree(conn->conn_ops); + + if (conn->param_list) { + iscsi_release_param_list(conn->param_list); + conn->param_list = NULL; + } + iscsi_target_nego_release(conn); + + if (conn->sock) { + sock_release(conn->sock); + conn->sock = NULL; + } + + if (conn->conn_transport->iscsit_free_conn) + conn->conn_transport->iscsit_free_conn(conn); + + iscsit_put_transport(conn->conn_transport); + kfree(conn); +} + static int __iscsi_target_login_thread(struct iscsi_np *np) { u8 *buffer, zero_tsih = 0; @@ -1124,6 +1202,8 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) struct iscsi_login *login; struct iscsi_portal_group *tpg = NULL; struct iscsi_login_req *pdu; + struct iscsi_tpg_np *tpg_np; + bool new_sess = false; flush_signals(current); @@ -1264,6 +1344,7 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) tpg = conn->tpg; goto new_sess_out; } + login->zero_tsih = zero_tsih; tpg = conn->tpg; if (!tpg) { @@ -1279,7 +1360,8 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) goto old_sess_out; } - if (iscsi_target_start_negotiation(login, conn) < 0) + ret = iscsi_target_start_negotiation(login, conn); + if (ret < 0) goto new_sess_out; if (!conn->sess) { @@ -1292,84 +1374,32 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) if (signal_pending(current)) goto new_sess_out; - ret = iscsi_post_login_handler(np, conn, zero_tsih); + if (ret == 1) { + tpg_np = conn->tpg_np; - if (ret < 0) - goto new_sess_out; + ret = iscsi_post_login_handler(np, conn, zero_tsih); + if (ret < 0) + goto new_sess_out; + + iscsit_deaccess_np(np, tpg, tpg_np); + } - iscsit_deaccess_np(np, tpg); tpg = NULL; + tpg_np = NULL; /* Get another socket */ return 1; new_sess_out: - pr_err("iSCSI Login negotiation failed.\n"); - iscsit_collect_login_stats(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, - ISCSI_LOGIN_STATUS_INIT_ERR); - if (!zero_tsih || !conn->sess) - goto old_sess_out; - if (conn->sess->se_sess) - transport_free_session(conn->sess->se_sess); - if (conn->sess->session_index != 0) { - spin_lock_bh(&sess_idr_lock); - idr_remove(&sess_idr, conn->sess->session_index); - spin_unlock_bh(&sess_idr_lock); - } - kfree(conn->sess->sess_ops); - kfree(conn->sess); + new_sess = true; old_sess_out: - iscsi_stop_login_thread_timer(np); - /* - * If login negotiation fails check if the Time2Retain timer - * needs to be restarted. - */ - if (!zero_tsih && conn->sess) { - spin_lock_bh(&conn->sess->conn_lock); - if (conn->sess->session_state == TARG_SESS_STATE_FAILED) { - struct se_portal_group *se_tpg = - &ISCSI_TPG_C(conn)->tpg_se_tpg; - - atomic_set(&conn->sess->session_continuation, 0); - spin_unlock_bh(&conn->sess->conn_lock); - spin_lock_bh(&se_tpg->session_lock); - iscsit_start_time2retain_handler(conn->sess); - spin_unlock_bh(&se_tpg->session_lock); - } else - spin_unlock_bh(&conn->sess->conn_lock); - iscsit_dec_session_usage_count(conn->sess); - } - - if (!IS_ERR(conn->conn_rx_hash.tfm)) - crypto_free_hash(conn->conn_rx_hash.tfm); - if (!IS_ERR(conn->conn_tx_hash.tfm)) - crypto_free_hash(conn->conn_tx_hash.tfm); - - if (conn->conn_cpumask) - free_cpumask_var(conn->conn_cpumask); - - kfree(conn->conn_ops); - - if (conn->param_list) { - iscsi_release_param_list(conn->param_list); - conn->param_list = NULL; - } - iscsi_target_nego_release(conn); - - if (conn->sock) { - sock_release(conn->sock); - conn->sock = NULL; - } - - if (conn->conn_transport->iscsit_free_conn) - conn->conn_transport->iscsit_free_conn(conn); - - iscsit_put_transport(conn->conn_transport); - - kfree(conn); + tpg_np = conn->tpg_np; + iscsi_target_login_sess_out(conn, np, zero_tsih, new_sess); + new_sess = false; if (tpg) { - iscsit_deaccess_np(np, tpg); + iscsit_deaccess_np(np, tpg, tpg_np); tpg = NULL; + tpg_np = NULL; } out: diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/iscsi/iscsi_target_login.h index 63efd287845..29d098324b7 100644 --- a/drivers/target/iscsi/iscsi_target_login.h +++ b/drivers/target/iscsi/iscsi_target_login.h @@ -12,6 +12,9 @@ extern int iscsit_accept_np(struct iscsi_np *, struct iscsi_conn *); extern int iscsit_get_login_rx(struct iscsi_conn *, struct iscsi_login *); extern int iscsit_put_login_tx(struct iscsi_conn *, struct iscsi_login *, u32); extern void iscsit_free_conn(struct iscsi_np *, struct iscsi_conn *); +extern int iscsi_post_login_handler(struct iscsi_np *, struct iscsi_conn *, u8); +extern void iscsi_target_login_sess_out(struct iscsi_conn *, struct iscsi_np *, + bool, bool); extern int iscsi_target_login_thread(void *); extern int iscsi_login_disable_FIM_keys(struct iscsi_param_list *, struct iscsi_conn *); diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index c4675b4ceb4..14d1aed5af1 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains main functions related to iSCSI Parameter negotiation. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -377,15 +375,284 @@ static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_log return 0; } -static int iscsi_target_do_login_io(struct iscsi_conn *conn, struct iscsi_login *login) +static void iscsi_target_sk_data_ready(struct sock *sk, int count) { - if (iscsi_target_do_tx_login_io(conn, login) < 0) - return -1; + struct iscsi_conn *conn = sk->sk_user_data; + bool rc; - if (conn->conn_transport->iscsit_get_login_rx(conn, login) < 0) - return -1; + pr_debug("Entering iscsi_target_sk_data_ready: conn: %p\n", conn); - return 0; + write_lock_bh(&sk->sk_callback_lock); + if (!sk->sk_user_data) { + write_unlock_bh(&sk->sk_callback_lock); + return; + } + if (!test_bit(LOGIN_FLAGS_READY, &conn->login_flags)) { + write_unlock_bh(&sk->sk_callback_lock); + pr_debug("Got LOGIN_FLAGS_READY=0, conn: %p >>>>\n", conn); + return; + } + if (test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)) { + write_unlock_bh(&sk->sk_callback_lock); + pr_debug("Got LOGIN_FLAGS_CLOSED=1, conn: %p >>>>\n", conn); + return; + } + if (test_and_set_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags)) { + write_unlock_bh(&sk->sk_callback_lock); + pr_debug("Got LOGIN_FLAGS_READ_ACTIVE=1, conn: %p >>>>\n", conn); + return; + } + + rc = schedule_delayed_work(&conn->login_work, 0); + if (rc == false) { + pr_debug("iscsi_target_sk_data_ready, schedule_delayed_work" + " got false\n"); + } + write_unlock_bh(&sk->sk_callback_lock); +} + +static void iscsi_target_sk_state_change(struct sock *); + +static void iscsi_target_set_sock_callbacks(struct iscsi_conn *conn) +{ + struct sock *sk; + + if (!conn->sock) + return; + + sk = conn->sock->sk; + pr_debug("Entering iscsi_target_set_sock_callbacks: conn: %p\n", conn); + + write_lock_bh(&sk->sk_callback_lock); + sk->sk_user_data = conn; + conn->orig_data_ready = sk->sk_data_ready; + conn->orig_state_change = sk->sk_state_change; + sk->sk_data_ready = iscsi_target_sk_data_ready; + sk->sk_state_change = iscsi_target_sk_state_change; + write_unlock_bh(&sk->sk_callback_lock); + + sk->sk_sndtimeo = TA_LOGIN_TIMEOUT * HZ; + sk->sk_rcvtimeo = TA_LOGIN_TIMEOUT * HZ; +} + +static void iscsi_target_restore_sock_callbacks(struct iscsi_conn *conn) +{ + struct sock *sk; + + if (!conn->sock) + return; + + sk = conn->sock->sk; + pr_debug("Entering iscsi_target_restore_sock_callbacks: conn: %p\n", conn); + + write_lock_bh(&sk->sk_callback_lock); + if (!sk->sk_user_data) { + write_unlock_bh(&sk->sk_callback_lock); + return; + } + sk->sk_user_data = NULL; + sk->sk_data_ready = conn->orig_data_ready; + sk->sk_state_change = conn->orig_state_change; + write_unlock_bh(&sk->sk_callback_lock); + + sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; + sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; +} + +static int iscsi_target_do_login(struct iscsi_conn *, struct iscsi_login *); + +static bool iscsi_target_sk_state_check(struct sock *sk) +{ + if (sk->sk_state == TCP_CLOSE_WAIT || sk->sk_state == TCP_CLOSE) { + pr_debug("iscsi_target_sk_state_check: TCP_CLOSE_WAIT|TCP_CLOSE," + "returning FALSE\n"); + return false; + } + return true; +} + +static void iscsi_target_login_drop(struct iscsi_conn *conn, struct iscsi_login *login) +{ + struct iscsi_np *np = login->np; + bool zero_tsih = login->zero_tsih; + + iscsi_remove_failed_auth_entry(conn); + iscsi_target_nego_release(conn); + iscsi_target_login_sess_out(conn, np, zero_tsih, true); +} + +static void iscsi_target_login_timeout(unsigned long data) +{ + struct iscsi_conn *conn = (struct iscsi_conn *)data; + + pr_debug("Entering iscsi_target_login_timeout >>>>>>>>>>>>>>>>>>>\n"); + + if (conn->login_kworker) { + pr_debug("Sending SIGINT to conn->login_kworker %s/%d\n", + conn->login_kworker->comm, conn->login_kworker->pid); + send_sig(SIGINT, conn->login_kworker, 1); + } +} + +static void iscsi_target_do_login_rx(struct work_struct *work) +{ + struct iscsi_conn *conn = container_of(work, + struct iscsi_conn, login_work.work); + struct iscsi_login *login = conn->login; + struct iscsi_np *np = login->np; + struct iscsi_portal_group *tpg = conn->tpg; + struct iscsi_tpg_np *tpg_np = conn->tpg_np; + struct timer_list login_timer; + int rc, zero_tsih = login->zero_tsih; + bool state; + + pr_debug("entering iscsi_target_do_login_rx, conn: %p, %s:%d\n", + conn, current->comm, current->pid); + + spin_lock(&tpg->tpg_state_lock); + state = (tpg->tpg_state == TPG_STATE_ACTIVE); + spin_unlock(&tpg->tpg_state_lock); + + if (state == false) { + pr_debug("iscsi_target_do_login_rx: tpg_state != TPG_STATE_ACTIVE\n"); + iscsi_target_restore_sock_callbacks(conn); + iscsi_target_login_drop(conn, login); + iscsit_deaccess_np(np, tpg, tpg_np); + return; + } + + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + read_lock_bh(&sk->sk_callback_lock); + state = iscsi_target_sk_state_check(sk); + read_unlock_bh(&sk->sk_callback_lock); + + if (state == false) { + pr_debug("iscsi_target_do_login_rx, TCP state CLOSE\n"); + iscsi_target_restore_sock_callbacks(conn); + iscsi_target_login_drop(conn, login); + iscsit_deaccess_np(np, tpg, tpg_np); + return; + } + } + + conn->login_kworker = current; + allow_signal(SIGINT); + + init_timer(&login_timer); + login_timer.expires = (get_jiffies_64() + TA_LOGIN_TIMEOUT * HZ); + login_timer.data = (unsigned long)conn; + login_timer.function = iscsi_target_login_timeout; + add_timer(&login_timer); + pr_debug("Starting login_timer for %s/%d\n", current->comm, current->pid); + + rc = conn->conn_transport->iscsit_get_login_rx(conn, login); + del_timer_sync(&login_timer); + flush_signals(current); + conn->login_kworker = NULL; + + if (rc < 0) { + iscsi_target_restore_sock_callbacks(conn); + iscsi_target_login_drop(conn, login); + iscsit_deaccess_np(np, tpg, tpg_np); + return; + } + + pr_debug("iscsi_target_do_login_rx after rx_login_io, %p, %s:%d\n", + conn, current->comm, current->pid); + + rc = iscsi_target_do_login(conn, login); + if (rc < 0) { + iscsi_target_restore_sock_callbacks(conn); + iscsi_target_login_drop(conn, login); + iscsit_deaccess_np(np, tpg, tpg_np); + } else if (!rc) { + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + clear_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags); + write_unlock_bh(&sk->sk_callback_lock); + } + } else if (rc == 1) { + iscsi_target_nego_release(conn); + iscsi_post_login_handler(np, conn, zero_tsih); + iscsit_deaccess_np(np, tpg, tpg_np); + } +} + +static void iscsi_target_do_cleanup(struct work_struct *work) +{ + struct iscsi_conn *conn = container_of(work, + struct iscsi_conn, login_cleanup_work.work); + struct sock *sk = conn->sock->sk; + struct iscsi_login *login = conn->login; + struct iscsi_np *np = login->np; + struct iscsi_portal_group *tpg = conn->tpg; + struct iscsi_tpg_np *tpg_np = conn->tpg_np; + + pr_debug("Entering iscsi_target_do_cleanup\n"); + + cancel_delayed_work_sync(&conn->login_work); + conn->orig_state_change(sk); + + iscsi_target_restore_sock_callbacks(conn); + iscsi_target_login_drop(conn, login); + iscsit_deaccess_np(np, tpg, tpg_np); + + pr_debug("iscsi_target_do_cleanup done()\n"); +} + +static void iscsi_target_sk_state_change(struct sock *sk) +{ + struct iscsi_conn *conn; + void (*orig_state_change)(struct sock *); + bool state; + + pr_debug("Entering iscsi_target_sk_state_change\n"); + + write_lock_bh(&sk->sk_callback_lock); + conn = sk->sk_user_data; + if (!conn) { + write_unlock_bh(&sk->sk_callback_lock); + return; + } + orig_state_change = conn->orig_state_change; + + if (!test_bit(LOGIN_FLAGS_READY, &conn->login_flags)) { + pr_debug("Got LOGIN_FLAGS_READY=0 sk_state_change conn: %p\n", + conn); + write_unlock_bh(&sk->sk_callback_lock); + orig_state_change(sk); + return; + } + if (test_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags)) { + pr_debug("Got LOGIN_FLAGS_READ_ACTIVE=1 sk_state_change" + " conn: %p\n", conn); + write_unlock_bh(&sk->sk_callback_lock); + orig_state_change(sk); + return; + } + if (test_and_set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)) { + pr_debug("Got LOGIN_FLAGS_CLOSED=1 sk_state_change conn: %p\n", + conn); + write_unlock_bh(&sk->sk_callback_lock); + orig_state_change(sk); + return; + } + + state = iscsi_target_sk_state_check(sk); + write_unlock_bh(&sk->sk_callback_lock); + + pr_debug("iscsi_target_sk_state_change: state: %d\n", state); + + if (!state) { + pr_debug("iscsi_target_sk_state_change got failed state\n"); + schedule_delayed_work(&conn->login_cleanup_work, 0); + return; + } + orig_state_change(sk); } /* @@ -643,10 +910,11 @@ static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *lo if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) { login->tsih = conn->sess->tsih; login->login_complete = 1; + iscsi_target_restore_sock_callbacks(conn); if (iscsi_target_do_tx_login_io(conn, login) < 0) return -1; - return 0; + return 1; } break; default: @@ -656,13 +924,29 @@ static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *lo break; } - if (iscsi_target_do_login_io(conn, login) < 0) + if (iscsi_target_do_tx_login_io(conn, login) < 0) return -1; if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) { login_rsp->flags &= ~ISCSI_FLAG_LOGIN_TRANSIT; login_rsp->flags &= ~ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK; } + break; + } + + if (conn->sock) { + struct sock *sk = conn->sock->sk; + bool state; + + read_lock_bh(&sk->sk_callback_lock); + state = iscsi_target_sk_state_check(sk); + read_unlock_bh(&sk->sk_callback_lock); + + if (!state) { + pr_debug("iscsi_target_do_login() failed state for" + " conn: %p\n", conn); + return -1; + } } return 0; @@ -695,9 +979,17 @@ int iscsi_target_locate_portal( char *tmpbuf, *start = NULL, *end = NULL, *key, *value; struct iscsi_session *sess = conn->sess; struct iscsi_tiqn *tiqn; + struct iscsi_tpg_np *tpg_np = NULL; struct iscsi_login_req *login_req; - u32 payload_length; - int sessiontype = 0, ret = 0; + struct se_node_acl *se_nacl; + u32 payload_length, queue_depth = 0; + int sessiontype = 0, ret = 0, tag_num, tag_size; + + INIT_DELAYED_WORK(&conn->login_work, iscsi_target_do_login_rx); + INIT_DELAYED_WORK(&conn->login_cleanup_work, iscsi_target_do_cleanup); + iscsi_target_set_sock_callbacks(conn); + + login->np = np; login_req = (struct iscsi_login_req *) login->req; payload_length = ntoh24(login_req->dlength); @@ -791,7 +1083,7 @@ int iscsi_target_locate_portal( goto out; } ret = 0; - goto out; + goto alloc_tags; } get_target: @@ -822,7 +1114,7 @@ get_target: /* * Locate Target Portal Group from Storage Node. */ - conn->tpg = iscsit_get_tpg_from_np(tiqn, np); + conn->tpg = iscsit_get_tpg_from_np(tiqn, np, &tpg_np); if (!conn->tpg) { pr_err("Unable to locate Target Portal Group" " on %s\n", tiqn->tiqn); @@ -832,12 +1124,16 @@ get_target: ret = -1; goto out; } + conn->tpg_np = tpg_np; pr_debug("Located Portal Group Object: %hu\n", conn->tpg->tpgt); /* * Setup crc32c modules from libcrypto */ if (iscsi_login_setup_crypto(conn) < 0) { pr_err("iscsi_login_setup_crypto() failed\n"); + kref_put(&tpg_np->tpg_np_kref, iscsit_login_kref_put); + iscsit_put_tiqn_for_login(tiqn); + conn->tpg = NULL; ret = -1; goto out; } @@ -846,11 +1142,12 @@ get_target: * process login attempt. */ if (iscsit_access_np(np, conn->tpg) < 0) { + kref_put(&tpg_np->tpg_np_kref, iscsit_login_kref_put); iscsit_put_tiqn_for_login(tiqn); iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE); - ret = -1; conn->tpg = NULL; + ret = -1; goto out; } @@ -883,8 +1180,27 @@ get_target: ret = -1; goto out; } + se_nacl = sess->se_sess->se_node_acl; + queue_depth = se_nacl->queue_depth; + /* + * Setup pre-allocated tags based upon allowed per NodeACL CmdSN + * depth for non immediate commands, plus extra tags for immediate + * commands. + * + * Also enforce a ISCSIT_MIN_TAGS to prevent unnecessary contention + * in per-cpu-ida tag allocation logic + small queue_depth. + */ +alloc_tags: + tag_num = max_t(u32, ISCSIT_MIN_TAGS, queue_depth); + tag_num += ISCSIT_EXTRA_TAGS; + tag_size = sizeof(struct iscsi_cmd) + conn->conn_transport->priv_size; - ret = 0; + ret = transport_alloc_session_tags(sess->se_sess, tag_num, tag_size); + if (ret < 0) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + ret = -1; + } out: kfree(tmpbuf); return ret; @@ -897,10 +1213,23 @@ int iscsi_target_start_negotiation( int ret; ret = iscsi_target_do_login(conn, login); - if (ret != 0) + if (!ret) { + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + set_bit(LOGIN_FLAGS_READY, &conn->login_flags); + write_unlock_bh(&sk->sk_callback_lock); + } + } else if (ret < 0) { + cancel_delayed_work_sync(&conn->login_work); + cancel_delayed_work_sync(&conn->login_cleanup_work); + iscsi_target_restore_sock_callbacks(conn); iscsi_remove_failed_auth_entry(conn); + } + if (ret != 0) + iscsi_target_nego_release(conn); - iscsi_target_nego_release(conn); return ret; } diff --git a/drivers/target/iscsi/iscsi_target_nodeattrib.c b/drivers/target/iscsi/iscsi_target_nodeattrib.c index 11dc2936af7..93bdc475eb0 100644 --- a/drivers/target/iscsi/iscsi_target_nodeattrib.c +++ b/drivers/target/iscsi/iscsi_target_nodeattrib.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains the main functions related to Initiator Node Attributes. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c index 35fd6439eb0..4d2e23fc76f 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.c +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains main functions related to iSCSI Parameter negotiation. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -1182,7 +1180,7 @@ static int iscsi_check_acceptor_state(struct iscsi_param *param, char *value, unsigned long long tmp; int rc; - rc = strict_strtoull(param->value, 0, &tmp); + rc = kstrtoull(param->value, 0, &tmp); if (rc < 0) return -1; diff --git a/drivers/target/iscsi/iscsi_target_seq_pdu_list.c b/drivers/target/iscsi/iscsi_target_seq_pdu_list.c index edb592a368e..ca41b583f2f 100644 --- a/drivers/target/iscsi/iscsi_target_seq_pdu_list.c +++ b/drivers/target/iscsi/iscsi_target_seq_pdu_list.c @@ -2,9 +2,7 @@ * This file contains main functions related to iSCSI DataSequenceInOrder=No * and DataPDUInOrder=No. * - \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_stat.c b/drivers/target/iscsi/iscsi_target_stat.c index 464b4206a51..f788e8b5e85 100644 --- a/drivers/target/iscsi/iscsi_target_stat.c +++ b/drivers/target/iscsi/iscsi_target_stat.c @@ -2,9 +2,7 @@ * Modern ConfigFS group context specific iSCSI statistics based on original * iscsi_target_mib.c code * - * Copyright (c) 2011 Rising Tide Systems - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * Copyright (c) 2011-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -177,7 +175,7 @@ ISCSI_STAT_INSTANCE_ATTR_RO(description); static ssize_t iscsi_stat_instance_show_attr_vendor( struct iscsi_wwn_stat_grps *igrps, char *page) { - return snprintf(page, PAGE_SIZE, "RisingTide Systems iSCSI-Target\n"); + return snprintf(page, PAGE_SIZE, "Datera, Inc. iSCSI-Target\n"); } ISCSI_STAT_INSTANCE_ATTR_RO(vendor); @@ -432,13 +430,7 @@ static ssize_t iscsi_stat_tgt_attr_show_attr_fail_intr_addr( int ret; spin_lock(&lstat->lock); - if (lstat->last_intr_fail_ip_family == AF_INET6) { - ret = snprintf(page, PAGE_SIZE, "[%s]\n", - lstat->last_intr_fail_ip_addr); - } else { - ret = snprintf(page, PAGE_SIZE, "%s\n", - lstat->last_intr_fail_ip_addr); - } + ret = snprintf(page, PAGE_SIZE, "%s\n", lstat->last_intr_fail_ip_addr); spin_unlock(&lstat->lock); return ret; diff --git a/drivers/target/iscsi/iscsi_target_tmr.c b/drivers/target/iscsi/iscsi_target_tmr.c index b997e5da47d..78404b1cc0b 100644 --- a/drivers/target/iscsi/iscsi_target_tmr.c +++ b/drivers/target/iscsi/iscsi_target_tmr.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains the iSCSI Target specific Task Management functions. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 439260b7d87..4faeb47fa5e 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains iSCSI Target Portal Group related functions. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -49,7 +47,7 @@ struct iscsi_portal_group *iscsit_alloc_portal_group(struct iscsi_tiqn *tiqn, u1 INIT_LIST_HEAD(&tpg->tpg_gnp_list); INIT_LIST_HEAD(&tpg->tpg_list); mutex_init(&tpg->tpg_access_lock); - mutex_init(&tpg->np_login_lock); + sema_init(&tpg->np_login_sem, 1); spin_lock_init(&tpg->tpg_state_lock); spin_lock_init(&tpg->tpg_np_lock); @@ -129,7 +127,8 @@ void iscsit_release_discovery_tpg(void) struct iscsi_portal_group *iscsit_get_tpg_from_np( struct iscsi_tiqn *tiqn, - struct iscsi_np *np) + struct iscsi_np *np, + struct iscsi_tpg_np **tpg_np_out) { struct iscsi_portal_group *tpg = NULL; struct iscsi_tpg_np *tpg_np; @@ -147,6 +146,8 @@ struct iscsi_portal_group *iscsit_get_tpg_from_np( spin_lock(&tpg->tpg_np_lock); list_for_each_entry(tpg_np, &tpg->tpg_gnp_list, tpg_np_list) { if (tpg_np->tpg_np == np) { + *tpg_np_out = tpg_np; + kref_get(&tpg_np->tpg_np_kref); spin_unlock(&tpg->tpg_np_lock); spin_unlock(&tiqn->tiqn_tpg_lock); return tpg; @@ -175,18 +176,20 @@ void iscsit_put_tpg(struct iscsi_portal_group *tpg) static void iscsit_clear_tpg_np_login_thread( struct iscsi_tpg_np *tpg_np, - struct iscsi_portal_group *tpg) + struct iscsi_portal_group *tpg, + bool shutdown) { if (!tpg_np->tpg_np) { pr_err("struct iscsi_tpg_np->tpg_np is NULL!\n"); return; } - iscsit_reset_np_thread(tpg_np->tpg_np, tpg_np, tpg); + iscsit_reset_np_thread(tpg_np->tpg_np, tpg_np, tpg, shutdown); } void iscsit_clear_tpg_np_login_threads( - struct iscsi_portal_group *tpg) + struct iscsi_portal_group *tpg, + bool shutdown) { struct iscsi_tpg_np *tpg_np; @@ -197,7 +200,7 @@ void iscsit_clear_tpg_np_login_threads( continue; } spin_unlock(&tpg->tpg_np_lock); - iscsit_clear_tpg_np_login_thread(tpg_np, tpg); + iscsit_clear_tpg_np_login_thread(tpg_np, tpg, shutdown); spin_lock(&tpg->tpg_np_lock); } spin_unlock(&tpg->tpg_np_lock); @@ -268,6 +271,8 @@ int iscsit_tpg_del_portal_group( tpg->tpg_state = TPG_STATE_INACTIVE; spin_unlock(&tpg->tpg_state_lock); + iscsit_clear_tpg_np_login_threads(tpg, true); + if (iscsit_release_sessions_for_tpg(tpg, force) < 0) { pr_err("Unable to delete iSCSI Target Portal Group:" " %hu while active sessions exist, and force=0\n", @@ -368,7 +373,7 @@ int iscsit_tpg_disable_portal_group(struct iscsi_portal_group *tpg, int force) tpg->tpg_state = TPG_STATE_INACTIVE; spin_unlock(&tpg->tpg_state_lock); - iscsit_clear_tpg_np_login_threads(tpg); + iscsit_clear_tpg_np_login_threads(tpg, false); if (iscsit_release_sessions_for_tpg(tpg, force) < 0) { spin_lock(&tpg->tpg_state_lock); @@ -490,6 +495,8 @@ struct iscsi_tpg_np *iscsit_tpg_add_network_portal( INIT_LIST_HEAD(&tpg_np->tpg_np_child_list); INIT_LIST_HEAD(&tpg_np->tpg_np_parent_list); spin_lock_init(&tpg_np->tpg_np_parent_lock); + init_completion(&tpg_np->tpg_np_comp); + kref_init(&tpg_np->tpg_np_kref); tpg_np->tpg_np = np; tpg_np->tpg = tpg; @@ -520,7 +527,7 @@ static int iscsit_tpg_release_np( struct iscsi_portal_group *tpg, struct iscsi_np *np) { - iscsit_clear_tpg_np_login_thread(tpg_np, tpg); + iscsit_clear_tpg_np_login_thread(tpg_np, tpg, true); pr_debug("CORE[%s] - Removed Network Portal: %s:%hu,%hu on %s\n", tpg->tpg_tiqn->tiqn, np->np_ip, np->np_port, tpg->tpgt, diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h index dda48c141a8..b77693e2c20 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.h +++ b/drivers/target/iscsi/iscsi_target_tpg.h @@ -5,10 +5,10 @@ extern struct iscsi_portal_group *iscsit_alloc_portal_group(struct iscsi_tiqn *, extern int iscsit_load_discovery_tpg(void); extern void iscsit_release_discovery_tpg(void); extern struct iscsi_portal_group *iscsit_get_tpg_from_np(struct iscsi_tiqn *, - struct iscsi_np *); + struct iscsi_np *, struct iscsi_tpg_np **); extern int iscsit_get_tpg(struct iscsi_portal_group *); extern void iscsit_put_tpg(struct iscsi_portal_group *); -extern void iscsit_clear_tpg_np_login_threads(struct iscsi_portal_group *); +extern void iscsit_clear_tpg_np_login_threads(struct iscsi_portal_group *, bool); extern void iscsit_tpg_dump_params(struct iscsi_portal_group *); extern int iscsit_tpg_add_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *); extern int iscsit_tpg_del_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *, diff --git a/drivers/target/iscsi/iscsi_target_tq.c b/drivers/target/iscsi/iscsi_target_tq.c index 81289520f96..601e9cc61e9 100644 --- a/drivers/target/iscsi/iscsi_target_tq.c +++ b/drivers/target/iscsi/iscsi_target_tq.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains the iSCSI Login Thread and Thread Queue functions. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -105,12 +103,11 @@ int iscsi_allocate_thread_sets(u32 thread_pair_count) ts->status = ISCSI_THREAD_SET_FREE; INIT_LIST_HEAD(&ts->ts_list); spin_lock_init(&ts->ts_state_lock); - init_completion(&ts->rx_post_start_comp); - init_completion(&ts->tx_post_start_comp); init_completion(&ts->rx_restart_comp); init_completion(&ts->tx_restart_comp); init_completion(&ts->rx_start_comp); init_completion(&ts->tx_start_comp); + sema_init(&ts->ts_activate_sem, 0); ts->create_threads = 1; ts->tx_thread = kthread_run(iscsi_target_tx_thread, ts, "%s", @@ -139,35 +136,44 @@ int iscsi_allocate_thread_sets(u32 thread_pair_count) return allocated_thread_pair_count; } -void iscsi_deallocate_thread_sets(void) +static void iscsi_deallocate_thread_one(struct iscsi_thread_set *ts) { - u32 released_count = 0; - struct iscsi_thread_set *ts = NULL; - - while ((ts = iscsi_get_ts_from_inactive_list())) { + spin_lock_bh(&ts->ts_state_lock); + ts->status = ISCSI_THREAD_SET_DIE; + if (ts->rx_thread) { + complete(&ts->rx_start_comp); + spin_unlock_bh(&ts->ts_state_lock); + kthread_stop(ts->rx_thread); spin_lock_bh(&ts->ts_state_lock); - ts->status = ISCSI_THREAD_SET_DIE; + } + if (ts->tx_thread) { + complete(&ts->tx_start_comp); spin_unlock_bh(&ts->ts_state_lock); + kthread_stop(ts->tx_thread); + spin_lock_bh(&ts->ts_state_lock); + } + spin_unlock_bh(&ts->ts_state_lock); + /* + * Release this thread_id in the thread_set_bitmap + */ + spin_lock(&ts_bitmap_lock); + bitmap_release_region(iscsit_global->ts_bitmap, + ts->thread_id, get_order(1)); + spin_unlock(&ts_bitmap_lock); - if (ts->rx_thread) { - send_sig(SIGINT, ts->rx_thread, 1); - kthread_stop(ts->rx_thread); - } - if (ts->tx_thread) { - send_sig(SIGINT, ts->tx_thread, 1); - kthread_stop(ts->tx_thread); - } - /* - * Release this thread_id in the thread_set_bitmap - */ - spin_lock(&ts_bitmap_lock); - bitmap_release_region(iscsit_global->ts_bitmap, - ts->thread_id, get_order(1)); - spin_unlock(&ts_bitmap_lock); + kfree(ts); +} +void iscsi_deallocate_thread_sets(void) +{ + struct iscsi_thread_set *ts = NULL; + u32 released_count = 0; + + while ((ts = iscsi_get_ts_from_inactive_list())) { + + iscsi_deallocate_thread_one(ts); released_count++; - kfree(ts); } if (released_count) @@ -187,34 +193,13 @@ static void iscsi_deallocate_extra_thread_sets(void) if (!ts) break; - spin_lock_bh(&ts->ts_state_lock); - ts->status = ISCSI_THREAD_SET_DIE; - spin_unlock_bh(&ts->ts_state_lock); - - if (ts->rx_thread) { - send_sig(SIGINT, ts->rx_thread, 1); - kthread_stop(ts->rx_thread); - } - if (ts->tx_thread) { - send_sig(SIGINT, ts->tx_thread, 1); - kthread_stop(ts->tx_thread); - } - /* - * Release this thread_id in the thread_set_bitmap - */ - spin_lock(&ts_bitmap_lock); - bitmap_release_region(iscsit_global->ts_bitmap, - ts->thread_id, get_order(1)); - spin_unlock(&ts_bitmap_lock); - + iscsi_deallocate_thread_one(ts); released_count++; - kfree(ts); } - if (released_count) { + if (released_count) pr_debug("Stopped %d thread set(s) (%d total threads)." "\n", released_count, released_count * 2); - } } void iscsi_activate_thread_set(struct iscsi_conn *conn, struct iscsi_thread_set *ts) @@ -224,37 +209,23 @@ void iscsi_activate_thread_set(struct iscsi_conn *conn, struct iscsi_thread_set spin_lock_bh(&ts->ts_state_lock); conn->thread_set = ts; ts->conn = conn; + ts->status = ISCSI_THREAD_SET_ACTIVE; spin_unlock_bh(&ts->ts_state_lock); - /* - * Start up the RX thread and wait on rx_post_start_comp. The RX - * Thread will then do the same for the TX Thread in - * iscsi_rx_thread_pre_handler(). - */ + complete(&ts->rx_start_comp); - wait_for_completion(&ts->rx_post_start_comp); + complete(&ts->tx_start_comp); + + down(&ts->ts_activate_sem); } struct iscsi_thread_set *iscsi_get_thread_set(void) { - int allocate_ts = 0; - struct completion comp; - struct iscsi_thread_set *ts = NULL; - /* - * If no inactive thread set is available on the first call to - * iscsi_get_ts_from_inactive_list(), sleep for a second and - * try again. If still none are available after two attempts, - * allocate a set ourselves. - */ + struct iscsi_thread_set *ts; + get_set: ts = iscsi_get_ts_from_inactive_list(); if (!ts) { - if (allocate_ts == 2) - iscsi_allocate_thread_sets(1); - - init_completion(&comp); - wait_for_completion_timeout(&comp, 1 * HZ); - - allocate_ts++; + iscsi_allocate_thread_sets(1); goto get_set; } @@ -263,6 +234,7 @@ get_set: ts->thread_count = 2; init_completion(&ts->rx_restart_comp); init_completion(&ts->tx_restart_comp); + sema_init(&ts->ts_activate_sem, 0); return ts; } @@ -400,7 +372,8 @@ static void iscsi_check_to_add_additional_sets(void) static int iscsi_signal_thread_pre_handler(struct iscsi_thread_set *ts) { spin_lock_bh(&ts->ts_state_lock); - if ((ts->status == ISCSI_THREAD_SET_DIE) || signal_pending(current)) { + if (ts->status == ISCSI_THREAD_SET_DIE || kthread_should_stop() || + signal_pending(current)) { spin_unlock_bh(&ts->ts_state_lock); return -1; } @@ -419,7 +392,8 @@ struct iscsi_conn *iscsi_rx_thread_pre_handler(struct iscsi_thread_set *ts) goto sleep; } - flush_signals(current); + if (ts->status != ISCSI_THREAD_SET_DIE) + flush_signals(current); if (ts->delay_inactive && (--ts->thread_count == 0)) { spin_unlock_bh(&ts->ts_state_lock); @@ -446,18 +420,19 @@ sleep: if (iscsi_signal_thread_pre_handler(ts) < 0) return NULL; + iscsi_check_to_add_additional_sets(); + + spin_lock_bh(&ts->ts_state_lock); if (!ts->conn) { pr_err("struct iscsi_thread_set->conn is NULL for" - " thread_id: %d, going back to sleep\n", ts->thread_id); - goto sleep; + " RX thread_id: %s/%d\n", current->comm, current->pid); + spin_unlock_bh(&ts->ts_state_lock); + return NULL; } - iscsi_check_to_add_additional_sets(); - /* - * The RX Thread starts up the TX Thread and sleeps. - */ ts->thread_clear |= ISCSI_CLEAR_RX_THREAD; - complete(&ts->tx_start_comp); - wait_for_completion(&ts->tx_post_start_comp); + spin_unlock_bh(&ts->ts_state_lock); + + up(&ts->ts_activate_sem); return ts->conn; } @@ -472,7 +447,8 @@ struct iscsi_conn *iscsi_tx_thread_pre_handler(struct iscsi_thread_set *ts) goto sleep; } - flush_signals(current); + if (ts->status != ISCSI_THREAD_SET_DIE) + flush_signals(current); if (ts->delay_inactive && (--ts->thread_count == 0)) { spin_unlock_bh(&ts->ts_state_lock); @@ -498,27 +474,20 @@ sleep: if (iscsi_signal_thread_pre_handler(ts) < 0) return NULL; - if (!ts->conn) { - pr_err("struct iscsi_thread_set->conn is NULL for " - " thread_id: %d, going back to sleep\n", - ts->thread_id); - goto sleep; - } - iscsi_check_to_add_additional_sets(); - /* - * From the TX thread, up the tx_post_start_comp that the RX Thread is - * sleeping on in iscsi_rx_thread_pre_handler(), then up the - * rx_post_start_comp that iscsi_activate_thread_set() is sleeping on. - */ - ts->thread_clear |= ISCSI_CLEAR_TX_THREAD; - complete(&ts->tx_post_start_comp); - complete(&ts->rx_post_start_comp); spin_lock_bh(&ts->ts_state_lock); - ts->status = ISCSI_THREAD_SET_ACTIVE; + if (!ts->conn) { + pr_err("struct iscsi_thread_set->conn is NULL for" + " TX thread_id: %s/%d\n", current->comm, current->pid); + spin_unlock_bh(&ts->ts_state_lock); + return NULL; + } + ts->thread_clear |= ISCSI_CLEAR_TX_THREAD; spin_unlock_bh(&ts->ts_state_lock); + up(&ts->ts_activate_sem); + return ts->conn; } diff --git a/drivers/target/iscsi/iscsi_target_tq.h b/drivers/target/iscsi/iscsi_target_tq.h index 547d1183128..cc1eede5ab3 100644 --- a/drivers/target/iscsi/iscsi_target_tq.h +++ b/drivers/target/iscsi/iscsi_target_tq.h @@ -64,10 +64,6 @@ struct iscsi_thread_set { struct iscsi_conn *conn; /* used for controlling ts state accesses */ spinlock_t ts_state_lock; - /* Used for rx side post startup */ - struct completion rx_post_start_comp; - /* Used for tx side post startup */ - struct completion tx_post_start_comp; /* used for restarting thread queue */ struct completion rx_restart_comp; /* used for restarting thread queue */ @@ -82,6 +78,7 @@ struct iscsi_thread_set { struct task_struct *tx_thread; /* struct iscsi_thread_set in list list head*/ struct list_head ts_list; + struct semaphore ts_activate_sem; }; #endif /*** ISCSI_THREAD_QUEUE_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 1df06d5e4e0..f2de28e178f 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -1,9 +1,7 @@ /******************************************************************************* * This file contains the iSCSI Target specific utility functions. * - * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. - * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * (c) Copyright 2007-2013 Datera, Inc. * * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -19,6 +17,7 @@ ******************************************************************************/ #include <linux/list.h> +#include <linux/percpu_ida.h> #include <scsi/scsi_tcq.h> #include <scsi/iscsi_proto.h> #include <target/target_core_base.h> @@ -149,18 +148,6 @@ void iscsit_free_r2ts_from_list(struct iscsi_cmd *cmd) spin_unlock_bh(&cmd->r2t_lock); } -struct iscsi_cmd *iscsit_alloc_cmd(struct iscsi_conn *conn, gfp_t gfp_mask) -{ - struct iscsi_cmd *cmd; - - cmd = kmem_cache_zalloc(lio_cmd_cache, gfp_mask); - if (!cmd) - return NULL; - - cmd->release_cmd = &iscsit_release_cmd; - return cmd; -} - /* * May be called from software interrupt (timer) context for allocating * iSCSI NopINs. @@ -168,12 +155,15 @@ struct iscsi_cmd *iscsit_alloc_cmd(struct iscsi_conn *conn, gfp_t gfp_mask) struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *conn, gfp_t gfp_mask) { struct iscsi_cmd *cmd; + struct se_session *se_sess = conn->sess->se_sess; + int size, tag; - cmd = conn->conn_transport->iscsit_alloc_cmd(conn, gfp_mask); - if (!cmd) { - pr_err("Unable to allocate memory for struct iscsi_cmd.\n"); - return NULL; - } + tag = percpu_ida_alloc(&se_sess->sess_tag_pool, gfp_mask); + size = sizeof(struct iscsi_cmd) + conn->conn_transport->priv_size; + cmd = (struct iscsi_cmd *)(se_sess->sess_cmd_map + (tag * size)); + memset(cmd, 0, size); + + cmd->se_cmd.map_tag = tag; cmd->conn = conn; INIT_LIST_HEAD(&cmd->i_conn_node); INIT_LIST_HEAD(&cmd->datain_list); @@ -689,6 +679,16 @@ void iscsit_free_queue_reqs_for_conn(struct iscsi_conn *conn) void iscsit_release_cmd(struct iscsi_cmd *cmd) { + struct iscsi_session *sess; + struct se_cmd *se_cmd = &cmd->se_cmd; + + if (cmd->conn) + sess = cmd->conn->sess; + else + sess = cmd->sess; + + BUG_ON(!sess || !sess->se_sess); + kfree(cmd->buf_ptr); kfree(cmd->pdu_list); kfree(cmd->seq_list); @@ -696,8 +696,9 @@ void iscsit_release_cmd(struct iscsi_cmd *cmd) kfree(cmd->iov_data); kfree(cmd->text_in_ptr); - kmem_cache_free(lio_cmd_cache, cmd); + percpu_ida_free(&sess->se_sess->sess_tag_pool, se_cmd->map_tag); } +EXPORT_SYMBOL(iscsit_release_cmd); static void __iscsit_free_cmd(struct iscsi_cmd *cmd, bool scsi_cmd, bool check_queues) @@ -761,7 +762,7 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown) /* Fall-through */ default: __iscsit_free_cmd(cmd, false, shutdown); - cmd->release_cmd(cmd); + iscsit_release_cmd(cmd); break; } } diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 568ad25f25d..0f6d69dabca 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -3,7 +3,7 @@ * This file contains the Linux/SCSI LLD virtual SCSI initiator driver * for emulated SAS initiator ports * - * © Copyright 2011 RisingTide Systems LLC. + * © Copyright 2011-2013 Datera, Inc. * * Licensed to the Linux Foundation under the General Public License (GPL) version 2. * diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index cbe48ab4174..47244102281 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -3,7 +3,7 @@ * * This file contains SPC-3 compliant asymmetric logical unit assigntment (ALUA) * - * (c) Copyright 2009-2012 RisingTide Systems LLC. + * (c) Copyright 2009-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -557,6 +557,9 @@ target_alua_state_check(struct se_cmd *cmd) * a ALUA logical unit group. */ tg_pt_gp_mem = port->sep_alua_tg_pt_gp_mem; + if (!tg_pt_gp_mem) + return 0; + spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock); tg_pt_gp = tg_pt_gp_mem->tg_pt_gp; out_alua_state = atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state); @@ -730,7 +733,7 @@ static int core_alua_write_tpg_metadata( if (ret < 0) pr_err("Error writing ALUA metadata file: %s\n", path); fput(file); - return ret ? -EIO : 0; + return (ret < 0) ? -EIO : 0; } /* @@ -1756,10 +1759,10 @@ ssize_t core_alua_store_access_type( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract alua_access_type\n"); - return -EINVAL; + return ret; } if ((tmp != 0) && (tmp != 1) && (tmp != 2) && (tmp != 3)) { pr_err("Illegal value for alua_access_type:" @@ -1794,10 +1797,10 @@ ssize_t core_alua_store_nonop_delay_msecs( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract nonop_delay_msecs\n"); - return -EINVAL; + return ret; } if (tmp > ALUA_MAX_NONOP_DELAY_MSECS) { pr_err("Passed nonop_delay_msecs: %lu, exceeds" @@ -1825,10 +1828,10 @@ ssize_t core_alua_store_trans_delay_msecs( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract trans_delay_msecs\n"); - return -EINVAL; + return ret; } if (tmp > ALUA_MAX_TRANS_DELAY_MSECS) { pr_err("Passed trans_delay_msecs: %lu, exceeds" @@ -1856,10 +1859,10 @@ ssize_t core_alua_store_implict_trans_secs( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract implict_trans_secs\n"); - return -EINVAL; + return ret; } if (tmp > ALUA_MAX_IMPLICT_TRANS_SECS) { pr_err("Passed implict_trans_secs: %lu, exceeds" @@ -1887,10 +1890,10 @@ ssize_t core_alua_store_preferred_bit( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract preferred ALUA value\n"); - return -EINVAL; + return ret; } if ((tmp != 0) && (tmp != 1)) { pr_err("Illegal value for preferred ALUA: %lu\n", tmp); @@ -1922,10 +1925,10 @@ ssize_t core_alua_store_offline_bit( if (!lun->lun_sep) return -ENODEV; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract alua_tg_pt_offline value\n"); - return -EINVAL; + return ret; } if ((tmp != 0) && (tmp != 1)) { pr_err("Illegal value for alua_tg_pt_offline: %lu\n", @@ -1961,10 +1964,10 @@ ssize_t core_alua_store_secondary_status( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract alua_tg_pt_status\n"); - return -EINVAL; + return ret; } if ((tmp != ALUA_STATUS_NONE) && (tmp != ALUA_STATUS_ALTERED_BY_EXPLICT_STPG) && @@ -1994,10 +1997,10 @@ ssize_t core_alua_store_secondary_write_metadata( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract alua_tg_pt_write_md\n"); - return -EINVAL; + return ret; } if ((tmp != 0) && (tmp != 1)) { pr_err("Illegal value for alua_tg_pt_write_md:" diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index e4d22933efa..82e81c542e4 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -3,7 +3,7 @@ * * This file contains ConfigFS logic for the Generic Target Engine project. * - * (c) Copyright 2008-2012 RisingTide Systems LLC. + * (c) Copyright 2008-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -48,6 +48,7 @@ #include "target_core_alua.h" #include "target_core_pr.h" #include "target_core_rd.h" +#include "target_core_xcopy.h" extern struct t10_alua_lu_gp *default_lu_gp; @@ -268,7 +269,7 @@ static struct configfs_subsystem target_core_fabrics = { }, }; -static struct configfs_subsystem *target_core_subsystem[] = { +struct configfs_subsystem *target_core_subsystem[] = { &target_core_fabrics, NULL, }; @@ -577,9 +578,9 @@ static ssize_t target_core_dev_store_attr_##_name( \ unsigned long val; \ int ret; \ \ - ret = strict_strtoul(page, 0, &val); \ + ret = kstrtoul(page, 0, &val); \ if (ret < 0) { \ - pr_err("strict_strtoul() failed with" \ + pr_err("kstrtoul() failed with" \ " ret: %d\n", ret); \ return -EINVAL; \ } \ @@ -636,6 +637,12 @@ SE_DEV_ATTR(emulate_tpu, S_IRUGO | S_IWUSR); DEF_DEV_ATTRIB(emulate_tpws); SE_DEV_ATTR(emulate_tpws, S_IRUGO | S_IWUSR); +DEF_DEV_ATTRIB(emulate_caw); +SE_DEV_ATTR(emulate_caw, S_IRUGO | S_IWUSR); + +DEF_DEV_ATTRIB(emulate_3pc); +SE_DEV_ATTR(emulate_3pc, S_IRUGO | S_IWUSR); + DEF_DEV_ATTRIB(enforce_pr_isids); SE_DEV_ATTR(enforce_pr_isids, S_IRUGO | S_IWUSR); @@ -693,6 +700,8 @@ static struct configfs_attribute *target_core_dev_attrib_attrs[] = { &target_core_dev_attrib_emulate_tas.attr, &target_core_dev_attrib_emulate_tpu.attr, &target_core_dev_attrib_emulate_tpws.attr, + &target_core_dev_attrib_emulate_caw.attr, + &target_core_dev_attrib_emulate_3pc.attr, &target_core_dev_attrib_enforce_pr_isids.attr, &target_core_dev_attrib_is_nonrot.attr, &target_core_dev_attrib_emulate_rest_reord.attr, @@ -1310,9 +1319,9 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata( ret = -ENOMEM; goto out; } - ret = strict_strtoull(arg_p, 0, &tmp_ll); + ret = kstrtoull(arg_p, 0, &tmp_ll); if (ret < 0) { - pr_err("strict_strtoull() failed for" + pr_err("kstrtoull() failed for" " sa_res_key=\n"); goto out; } @@ -1836,11 +1845,11 @@ static ssize_t target_core_alua_lu_gp_store_attr_lu_gp_id( unsigned long lu_gp_id; int ret; - ret = strict_strtoul(page, 0, &lu_gp_id); + ret = kstrtoul(page, 0, &lu_gp_id); if (ret < 0) { - pr_err("strict_strtoul() returned %d for" + pr_err("kstrtoul() returned %d for" " lu_gp_id\n", ret); - return -EINVAL; + return ret; } if (lu_gp_id > 0x0000ffff) { pr_err("ALUA lu_gp_id: %lu exceeds maximum:" @@ -2032,11 +2041,11 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_access_state( return -EINVAL; } - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract new ALUA access state from" " %s\n", page); - return -EINVAL; + return ret; } new_state = (int)tmp; @@ -2079,11 +2088,11 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_access_status( return -EINVAL; } - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract new ALUA access status" " from %s\n", page); - return -EINVAL; + return ret; } new_status = (int)tmp; @@ -2139,10 +2148,10 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_write_metadata( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { pr_err("Unable to extract alua_write_metadata\n"); - return -EINVAL; + return ret; } if ((tmp != 0) && (tmp != 1)) { @@ -2263,11 +2272,11 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_tg_pt_gp_id( unsigned long tg_pt_gp_id; int ret; - ret = strict_strtoul(page, 0, &tg_pt_gp_id); + ret = kstrtoul(page, 0, &tg_pt_gp_id); if (ret < 0) { - pr_err("strict_strtoul() returned %d for" + pr_err("kstrtoul() returned %d for" " tg_pt_gp_id\n", ret); - return -EINVAL; + return ret; } if (tg_pt_gp_id > 0x0000ffff) { pr_err("ALUA tg_pt_gp_id: %lu exceeds maximum:" @@ -2676,10 +2685,10 @@ static ssize_t target_core_hba_store_attr_hba_mode(struct se_hba *hba, if (transport->pmode_enable_hba == NULL) return -EINVAL; - ret = strict_strtoul(page, 0, &mode_flag); + ret = kstrtoul(page, 0, &mode_flag); if (ret < 0) { pr_err("Unable to extract hba mode flag: %d\n", ret); - return -EINVAL; + return ret; } if (hba->dev_count) { @@ -2767,11 +2776,11 @@ static struct config_group *target_core_call_addhbatotarget( str++; /* Skip to start of plugin dependent ID */ } - ret = strict_strtoul(str, 0, &plugin_dep_id); + ret = kstrtoul(str, 0, &plugin_dep_id); if (ret < 0) { - pr_err("strict_strtoul() returned %d for" + pr_err("kstrtoul() returned %d for" " plugin_dep_id\n", ret); - return ERR_PTR(-EINVAL); + return ERR_PTR(ret); } /* * Load up TCM subsystem plugins if they have not already been loaded. @@ -2927,6 +2936,10 @@ static int __init target_core_init_configfs(void) if (ret < 0) goto out; + ret = target_xcopy_setup_pt(); + if (ret < 0) + goto out; + return 0; out: @@ -2999,6 +3012,7 @@ static void __exit target_core_exit_configfs(void) core_dev_release_virtual_lun0(); rd_module_exit(); + target_xcopy_release_pt(); release_se_kmem_caches(); } diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 8f4142fe5f1..d90dbb0f1a6 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -4,7 +4,7 @@ * This file contains the TCM Virtual Device and Disk Transport * agnostic related functions. * - * (c) Copyright 2003-2012 RisingTide Systems LLC. + * (c) Copyright 2003-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -47,6 +47,9 @@ #include "target_core_pr.h" #include "target_core_ua.h" +DEFINE_MUTEX(g_device_mutex); +LIST_HEAD(g_device_list); + static struct se_hba *lun0_hba; /* not static, needed by tpg.c */ struct se_device *g_lun0_dev; @@ -890,6 +893,32 @@ int se_dev_set_emulate_tpws(struct se_device *dev, int flag) return 0; } +int se_dev_set_emulate_caw(struct se_device *dev, int flag) +{ + if (flag != 0 && flag != 1) { + pr_err("Illegal value %d\n", flag); + return -EINVAL; + } + dev->dev_attrib.emulate_caw = flag; + pr_debug("dev[%p]: SE Device CompareAndWrite (AtomicTestandSet): %d\n", + dev, flag); + + return 0; +} + +int se_dev_set_emulate_3pc(struct se_device *dev, int flag) +{ + if (flag != 0 && flag != 1) { + pr_err("Illegal value %d\n", flag); + return -EINVAL; + } + dev->dev_attrib.emulate_3pc = flag; + pr_debug("dev[%p]: SE Device 3rd Party Copy (EXTENDED_COPY): %d\n", + dev, flag); + + return 0; +} + int se_dev_set_enforce_pr_isids(struct se_device *dev, int flag) { if ((flag != 0) && (flag != 1)) { @@ -1393,6 +1422,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) INIT_LIST_HEAD(&dev->delayed_cmd_list); INIT_LIST_HEAD(&dev->state_list); INIT_LIST_HEAD(&dev->qf_cmd_list); + INIT_LIST_HEAD(&dev->g_dev_node); spin_lock_init(&dev->stats_lock); spin_lock_init(&dev->execute_task_lock); spin_lock_init(&dev->delayed_cmd_lock); @@ -1400,6 +1430,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) spin_lock_init(&dev->se_port_lock); spin_lock_init(&dev->se_tmr_lock); spin_lock_init(&dev->qf_cmd_lock); + sema_init(&dev->caw_sem, 1); atomic_set(&dev->dev_ordered_id, 0); INIT_LIST_HEAD(&dev->t10_wwn.t10_vpd_list); spin_lock_init(&dev->t10_wwn.t10_vpd_lock); @@ -1423,6 +1454,8 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) dev->dev_attrib.emulate_tas = DA_EMULATE_TAS; dev->dev_attrib.emulate_tpu = DA_EMULATE_TPU; dev->dev_attrib.emulate_tpws = DA_EMULATE_TPWS; + dev->dev_attrib.emulate_caw = DA_EMULATE_CAW; + dev->dev_attrib.emulate_3pc = DA_EMULATE_3PC; dev->dev_attrib.enforce_pr_isids = DA_ENFORCE_PR_ISIDS; dev->dev_attrib.is_nonrot = DA_IS_NONROT; dev->dev_attrib.emulate_rest_reord = DA_EMULATE_REST_REORD; @@ -1510,6 +1543,11 @@ int target_configure_device(struct se_device *dev) spin_lock(&hba->device_lock); hba->dev_count++; spin_unlock(&hba->device_lock); + + mutex_lock(&g_device_mutex); + list_add_tail(&dev->g_dev_node, &g_device_list); + mutex_unlock(&g_device_mutex); + return 0; out_free_alua: @@ -1528,6 +1566,10 @@ void target_free_device(struct se_device *dev) if (dev->dev_flags & DF_CONFIGURED) { destroy_workqueue(dev->tmr_wq); + mutex_lock(&g_device_mutex); + list_del(&dev->g_dev_node); + mutex_unlock(&g_device_mutex); + spin_lock(&hba->device_lock); hba->dev_count--; spin_unlock(&hba->device_lock); diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index eb56eb12956..3503996d7d1 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -4,7 +4,7 @@ * This file contains generic fabric module configfs infrastructure for * TCM v4.x code * - * (c) Copyright 2010-2012 RisingTide Systems LLC. + * (c) Copyright 2010-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@linux-iscsi.org> * @@ -189,9 +189,11 @@ static ssize_t target_fabric_mappedlun_store_write_protect( struct se_node_acl *se_nacl = lacl->se_lun_nacl; struct se_portal_group *se_tpg = se_nacl->se_tpg; unsigned long op; + int ret; - if (strict_strtoul(page, 0, &op)) - return -EINVAL; + ret = kstrtoul(page, 0, &op); + if (ret) + return ret; if ((op != 1) && (op != 0)) return -EINVAL; @@ -350,7 +352,10 @@ static struct config_group *target_fabric_make_mappedlun( * Determine the Mapped LUN value. This is what the SCSI Initiator * Port will actually see. */ - if (strict_strtoul(buf + 4, 0, &mapped_lun) || mapped_lun > UINT_MAX) { + ret = kstrtoul(buf + 4, 0, &mapped_lun); + if (ret) + goto out; + if (mapped_lun > UINT_MAX) { ret = -EINVAL; goto out; } @@ -875,7 +880,10 @@ static struct config_group *target_fabric_make_lun( " \"lun_$LUN_NUMBER\"\n"); return ERR_PTR(-EINVAL); } - if (strict_strtoul(name + 4, 0, &unpacked_lun) || unpacked_lun > UINT_MAX) + errno = kstrtoul(name + 4, 0, &unpacked_lun); + if (errno) + return ERR_PTR(errno); + if (unpacked_lun > UINT_MAX) return ERR_PTR(-EINVAL); lun = core_get_lun_from_tpg(se_tpg, unpacked_lun); diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c index 687b0b0a4aa..0d1cf8b4f49 100644 --- a/drivers/target/target_core_fabric_lib.c +++ b/drivers/target/target_core_fabric_lib.c @@ -4,7 +4,7 @@ * This file contains generic high level protocol identifier and PR * handlers for TCM fabric modules * - * (c) Copyright 2010-2012 RisingTide Systems LLC. + * (c) Copyright 2010-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index b11890d8512..b662f89deda 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -3,7 +3,7 @@ * * This file contains the Storage Engine <-> FILEIO transport specific functions * - * (c) Copyright 2005-2012 RisingTide Systems LLC. + * (c) Copyright 2005-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -547,11 +547,9 @@ fd_execute_unmap(struct se_cmd *cmd) } static sense_reason_t -fd_execute_rw(struct se_cmd *cmd) +fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, + enum dma_data_direction data_direction) { - struct scatterlist *sgl = cmd->t_data_sg; - u32 sgl_nents = cmd->t_data_nents; - enum dma_data_direction data_direction = cmd->data_direction; struct se_device *dev = cmd->se_dev; int ret = 0; @@ -635,10 +633,10 @@ static ssize_t fd_set_configfs_dev_params(struct se_device *dev, ret = -ENOMEM; break; } - ret = strict_strtoull(arg_p, 0, &fd_dev->fd_dev_size); + ret = kstrtoull(arg_p, 0, &fd_dev->fd_dev_size); kfree(arg_p); if (ret < 0) { - pr_err("strict_strtoull() failed for" + pr_err("kstrtoull() failed for" " fd_dev_size=\n"); goto out; } diff --git a/drivers/target/target_core_hba.c b/drivers/target/target_core_hba.c index d2616cd48f1..a25051a37dd 100644 --- a/drivers/target/target_core_hba.c +++ b/drivers/target/target_core_hba.c @@ -3,7 +3,7 @@ * * This file contains the TCM HBA Transport related functions. * - * (c) Copyright 2003-2012 RisingTide Systems LLC. + * (c) Copyright 2003-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index aa1620abec6..b9a3394fe47 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -4,7 +4,7 @@ * This file contains the Storage Engine <-> Linux BlockIO transport * specific functions. * - * (c) Copyright 2003-2012 RisingTide Systems LLC. + * (c) Copyright 2003-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -536,10 +536,10 @@ static ssize_t iblock_set_configfs_dev_params(struct se_device *dev, ret = -ENOMEM; break; } - ret = strict_strtoul(arg_p, 0, &tmp_readonly); + ret = kstrtoul(arg_p, 0, &tmp_readonly); kfree(arg_p); if (ret < 0) { - pr_err("strict_strtoul() failed for" + pr_err("kstrtoul() failed for" " readonly=\n"); goto out; } @@ -587,11 +587,9 @@ static ssize_t iblock_show_configfs_dev_params(struct se_device *dev, char *b) } static sense_reason_t -iblock_execute_rw(struct se_cmd *cmd) +iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, + enum dma_data_direction data_direction) { - struct scatterlist *sgl = cmd->t_data_sg; - u32 sgl_nents = cmd->t_data_nents; - enum dma_data_direction data_direction = cmd->data_direction; struct se_device *dev = cmd->se_dev; struct iblock_req *ibr; struct bio *bio; diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 18d49df4d0a..579128abe3f 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -33,6 +33,8 @@ int se_dev_set_emulate_ua_intlck_ctrl(struct se_device *, int); int se_dev_set_emulate_tas(struct se_device *, int); int se_dev_set_emulate_tpu(struct se_device *, int); int se_dev_set_emulate_tpws(struct se_device *, int); +int se_dev_set_emulate_caw(struct se_device *, int); +int se_dev_set_emulate_3pc(struct se_device *, int); int se_dev_set_enforce_pr_isids(struct se_device *, int); int se_dev_set_is_nonrot(struct se_device *, int); int se_dev_set_emulate_rest_reord(struct se_device *dev, int); diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index bd78faf67c6..d1ae4c5c3ff 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -4,7 +4,7 @@ * This file contains SPC-3 compliant persistent reservations and * legacy SPC-2 reservations with compatible reservation handling (CRH=1) * - * (c) Copyright 2009-2012 RisingTide Systems LLC. + * (c) Copyright 2009-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -1949,7 +1949,7 @@ static int __core_scsi3_write_aptpl_to_file( pr_debug("Error writing APTPL metadata file: %s\n", path); fput(file); - return ret ? -EIO : 0; + return (ret < 0) ? -EIO : 0; } /* diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index e992b27aa09..551c96ca60a 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -3,7 +3,7 @@ * * This file contains the generic target mode <-> Linux SCSI subsystem plugin. * - * (c) Copyright 2003-2012 RisingTide Systems LLC. + * (c) Copyright 2003-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -1050,9 +1050,8 @@ pscsi_execute_cmd(struct se_cmd *cmd) req = blk_get_request(pdv->pdv_sd->request_queue, (data_direction == DMA_TO_DEVICE), GFP_KERNEL); - if (!req || IS_ERR(req)) { - pr_err("PSCSI: blk_get_request() failed: %ld\n", - req ? IS_ERR(req) : -ENOMEM); + if (!req) { + pr_err("PSCSI: blk_get_request() failed\n"); ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; goto fail; } diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index 51127d15d5c..131327ac7f5 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -4,7 +4,7 @@ * This file contains the Storage Engine <-> Ramdisk transport * specific functions. * - * (c) Copyright 2003-2012 RisingTide Systems LLC. + * (c) Copyright 2003-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -280,11 +280,9 @@ static struct rd_dev_sg_table *rd_get_sg_table(struct rd_dev *rd_dev, u32 page) } static sense_reason_t -rd_execute_rw(struct se_cmd *cmd) +rd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, + enum dma_data_direction data_direction) { - struct scatterlist *sgl = cmd->t_data_sg; - u32 sgl_nents = cmd->t_data_nents; - enum dma_data_direction data_direction = cmd->data_direction; struct se_device *se_dev = cmd->se_dev; struct rd_dev *dev = RD_DEV(se_dev); struct rd_dev_sg_table *table; diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 8a462773d0c..6c17295e8d7 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -1,7 +1,7 @@ /* * SCSI Block Commands (SBC) parsing and emulation. * - * (c) Copyright 2002-2012 RisingTide Systems LLC. + * (c) Copyright 2002-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -25,6 +25,7 @@ #include <linux/ratelimit.h> #include <asm/unaligned.h> #include <scsi/scsi.h> +#include <scsi/scsi_tcq.h> #include <target/target_core_base.h> #include <target/target_core_backend.h> @@ -280,13 +281,13 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o return 0; } -static void xdreadwrite_callback(struct se_cmd *cmd) +static sense_reason_t xdreadwrite_callback(struct se_cmd *cmd) { unsigned char *buf, *addr; struct scatterlist *sg; unsigned int offset; - int i; - int count; + sense_reason_t ret = TCM_NO_SENSE; + int i, count; /* * From sbc3r22.pdf section 5.48 XDWRITEREAD (10) command * @@ -301,7 +302,7 @@ static void xdreadwrite_callback(struct se_cmd *cmd) buf = kmalloc(cmd->data_length, GFP_KERNEL); if (!buf) { pr_err("Unable to allocate xor_callback buf\n"); - return; + return TCM_OUT_OF_RESOURCES; } /* * Copy the scatterlist WRITE buffer located at cmd->t_data_sg @@ -320,8 +321,10 @@ static void xdreadwrite_callback(struct se_cmd *cmd) offset = 0; for_each_sg(cmd->t_bidi_data_sg, sg, cmd->t_bidi_data_nents, count) { addr = kmap_atomic(sg_page(sg)); - if (!addr) + if (!addr) { + ret = TCM_OUT_OF_RESOURCES; goto out; + } for (i = 0; i < sg->length; i++) *(addr + sg->offset + i) ^= *(buf + offset + i); @@ -332,6 +335,193 @@ static void xdreadwrite_callback(struct se_cmd *cmd) out: kfree(buf); + return ret; +} + +static sense_reason_t +sbc_execute_rw(struct se_cmd *cmd) +{ + return cmd->execute_rw(cmd, cmd->t_data_sg, cmd->t_data_nents, + cmd->data_direction); +} + +static sense_reason_t compare_and_write_post(struct se_cmd *cmd) +{ + struct se_device *dev = cmd->se_dev; + + cmd->se_cmd_flags |= SCF_COMPARE_AND_WRITE_POST; + /* + * Unlock ->caw_sem originally obtained during sbc_compare_and_write() + * before the original READ I/O submission. + */ + up(&dev->caw_sem); + + return TCM_NO_SENSE; +} + +static sense_reason_t compare_and_write_callback(struct se_cmd *cmd) +{ + struct se_device *dev = cmd->se_dev; + struct scatterlist *write_sg = NULL, *sg; + unsigned char *buf, *addr; + struct sg_mapping_iter m; + unsigned int offset = 0, len; + unsigned int nlbas = cmd->t_task_nolb; + unsigned int block_size = dev->dev_attrib.block_size; + unsigned int compare_len = (nlbas * block_size); + sense_reason_t ret = TCM_NO_SENSE; + int rc, i; + + /* + * Handle early failure in transport_generic_request_failure(), + * which will not have taken ->caw_mutex yet.. + */ + if (!cmd->t_data_sg || !cmd->t_bidi_data_sg) + return TCM_NO_SENSE; + + buf = kzalloc(cmd->data_length, GFP_KERNEL); + if (!buf) { + pr_err("Unable to allocate compare_and_write buf\n"); + ret = TCM_OUT_OF_RESOURCES; + goto out; + } + + write_sg = kzalloc(sizeof(struct scatterlist) * cmd->t_data_nents, + GFP_KERNEL); + if (!write_sg) { + pr_err("Unable to allocate compare_and_write sg\n"); + ret = TCM_OUT_OF_RESOURCES; + goto out; + } + /* + * Setup verify and write data payloads from total NumberLBAs. + */ + rc = sg_copy_to_buffer(cmd->t_data_sg, cmd->t_data_nents, buf, + cmd->data_length); + if (!rc) { + pr_err("sg_copy_to_buffer() failed for compare_and_write\n"); + ret = TCM_OUT_OF_RESOURCES; + goto out; + } + /* + * Compare against SCSI READ payload against verify payload + */ + for_each_sg(cmd->t_bidi_data_sg, sg, cmd->t_bidi_data_nents, i) { + addr = (unsigned char *)kmap_atomic(sg_page(sg)); + if (!addr) { + ret = TCM_OUT_OF_RESOURCES; + goto out; + } + + len = min(sg->length, compare_len); + + if (memcmp(addr, buf + offset, len)) { + pr_warn("Detected MISCOMPARE for addr: %p buf: %p\n", + addr, buf + offset); + kunmap_atomic(addr); + goto miscompare; + } + kunmap_atomic(addr); + + offset += len; + compare_len -= len; + if (!compare_len) + break; + } + + i = 0; + len = cmd->t_task_nolb * block_size; + sg_miter_start(&m, cmd->t_data_sg, cmd->t_data_nents, SG_MITER_TO_SG); + /* + * Currently assumes NoLB=1 and SGLs are PAGE_SIZE.. + */ + while (len) { + sg_miter_next(&m); + + if (block_size < PAGE_SIZE) { + sg_set_page(&write_sg[i], m.page, block_size, + block_size); + } else { + sg_miter_next(&m); + sg_set_page(&write_sg[i], m.page, block_size, + 0); + } + len -= block_size; + i++; + } + sg_miter_stop(&m); + /* + * Save the original SGL + nents values before updating to new + * assignments, to be released in transport_free_pages() -> + * transport_reset_sgl_orig() + */ + cmd->t_data_sg_orig = cmd->t_data_sg; + cmd->t_data_sg = write_sg; + cmd->t_data_nents_orig = cmd->t_data_nents; + cmd->t_data_nents = 1; + + cmd->sam_task_attr = MSG_HEAD_TAG; + cmd->transport_complete_callback = compare_and_write_post; + /* + * Now reset ->execute_cmd() to the normal sbc_execute_rw() handler + * for submitting the adjusted SGL to write instance user-data. + */ + cmd->execute_cmd = sbc_execute_rw; + + spin_lock_irq(&cmd->t_state_lock); + cmd->t_state = TRANSPORT_PROCESSING; + cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT; + spin_unlock_irq(&cmd->t_state_lock); + + __target_execute_cmd(cmd); + + kfree(buf); + return ret; + +miscompare: + pr_warn("Target/%s: Send MISCOMPARE check condition and sense\n", + dev->transport->name); + ret = TCM_MISCOMPARE_VERIFY; +out: + /* + * In the MISCOMPARE or failure case, unlock ->caw_sem obtained in + * sbc_compare_and_write() before the original READ I/O submission. + */ + up(&dev->caw_sem); + kfree(write_sg); + kfree(buf); + return ret; +} + +static sense_reason_t +sbc_compare_and_write(struct se_cmd *cmd) +{ + struct se_device *dev = cmd->se_dev; + sense_reason_t ret; + int rc; + /* + * Submit the READ first for COMPARE_AND_WRITE to perform the + * comparision using SGLs at cmd->t_bidi_data_sg.. + */ + rc = down_interruptible(&dev->caw_sem); + if ((rc != 0) || signal_pending(current)) { + cmd->transport_complete_callback = NULL; + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + } + + ret = cmd->execute_rw(cmd, cmd->t_bidi_data_sg, cmd->t_bidi_data_nents, + DMA_FROM_DEVICE); + if (ret) { + cmd->transport_complete_callback = NULL; + up(&dev->caw_sem); + return ret; + } + /* + * Unlock of dev->caw_sem to occur in compare_and_write_callback() + * upon MISCOMPARE, or in compare_and_write_done() upon completion + * of WRITE instance user-data. + */ + return TCM_NO_SENSE; } sense_reason_t @@ -348,31 +538,36 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) sectors = transport_get_sectors_6(cdb); cmd->t_task_lba = transport_lba_21(cdb); cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case READ_10: sectors = transport_get_sectors_10(cdb); cmd->t_task_lba = transport_lba_32(cdb); cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case READ_12: sectors = transport_get_sectors_12(cdb); cmd->t_task_lba = transport_lba_32(cdb); cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case READ_16: sectors = transport_get_sectors_16(cdb); cmd->t_task_lba = transport_lba_64(cdb); cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case WRITE_6: sectors = transport_get_sectors_6(cdb); cmd->t_task_lba = transport_lba_21(cdb); cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case WRITE_10: case WRITE_VERIFY: @@ -381,7 +576,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (cdb[1] & 0x8) cmd->se_cmd_flags |= SCF_FUA; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case WRITE_12: sectors = transport_get_sectors_12(cdb); @@ -389,7 +585,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (cdb[1] & 0x8) cmd->se_cmd_flags |= SCF_FUA; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case WRITE_16: sectors = transport_get_sectors_16(cdb); @@ -397,7 +594,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (cdb[1] & 0x8) cmd->se_cmd_flags |= SCF_FUA; cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB; - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; break; case XDWRITEREAD_10: if (cmd->data_direction != DMA_TO_DEVICE || @@ -411,7 +609,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) /* * Setup BIDI XOR callback to be run after I/O completion. */ - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; cmd->transport_complete_callback = &xdreadwrite_callback; if (cdb[1] & 0x8) cmd->se_cmd_flags |= SCF_FUA; @@ -434,7 +633,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) * Setup BIDI XOR callback to be run during after I/O * completion. */ - cmd->execute_cmd = ops->execute_rw; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_execute_rw; cmd->transport_complete_callback = &xdreadwrite_callback; if (cdb[1] & 0x8) cmd->se_cmd_flags |= SCF_FUA; @@ -461,6 +661,28 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) } break; } + case COMPARE_AND_WRITE: + sectors = cdb[13]; + /* + * Currently enforce COMPARE_AND_WRITE for a single sector + */ + if (sectors > 1) { + pr_err("COMPARE_AND_WRITE contains NoLB: %u greater" + " than 1\n", sectors); + return TCM_INVALID_CDB_FIELD; + } + /* + * Double size because we have two buffers, note that + * zero is not an error.. + */ + size = 2 * sbc_get_size(cmd, sectors); + cmd->t_task_lba = get_unaligned_be64(&cdb[2]); + cmd->t_task_nolb = sectors; + cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB | SCF_COMPARE_AND_WRITE; + cmd->execute_rw = ops->execute_rw; + cmd->execute_cmd = sbc_compare_and_write; + cmd->transport_complete_callback = compare_and_write_callback; + break; case READ_CAPACITY: size = READ_CAP_LEN; cmd->execute_cmd = sbc_emulate_readcapacity; @@ -600,7 +822,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) return TCM_ADDRESS_OUT_OF_RANGE; } - size = sbc_get_size(cmd, sectors); + if (!(cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE)) + size = sbc_get_size(cmd, sectors); } return target_cmd_size_check(cmd, size); diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index 9fabbf7214c..074539558a5 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -1,7 +1,7 @@ /* * SCSI Primary Commands (SPC) parsing and emulation. * - * (c) Copyright 2002-2012 RisingTide Systems LLC. + * (c) Copyright 2002-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -35,7 +35,7 @@ #include "target_core_alua.h" #include "target_core_pr.h" #include "target_core_ua.h" - +#include "target_core_xcopy.h" static void spc_fill_alua_data(struct se_port *port, unsigned char *buf) { @@ -95,6 +95,12 @@ spc_emulate_inquiry_std(struct se_cmd *cmd, unsigned char *buf) */ spc_fill_alua_data(lun->lun_sep, buf); + /* + * Set Third-Party Copy (3PC) bit to indicate support for EXTENDED_COPY + */ + if (dev->dev_attrib.emulate_3pc) + buf[5] |= 0x8; + buf[7] = 0x2; /* CmdQue=1 */ memcpy(&buf[8], "LIO-ORG ", 8); @@ -129,8 +135,8 @@ spc_emulate_evpd_80(struct se_cmd *cmd, unsigned char *buf) return 0; } -static void spc_parse_naa_6h_vendor_specific(struct se_device *dev, - unsigned char *buf) +void spc_parse_naa_6h_vendor_specific(struct se_device *dev, + unsigned char *buf) { unsigned char *p = &dev->t10_wwn.unit_serial[0]; int cnt; @@ -460,6 +466,11 @@ spc_emulate_evpd_b0(struct se_cmd *cmd, unsigned char *buf) /* Set WSNZ to 1 */ buf[4] = 0x01; + /* + * Set MAXIMUM COMPARE AND WRITE LENGTH + */ + if (dev->dev_attrib.emulate_caw) + buf[5] = 0x01; /* * Set OPTIMAL TRANSFER LENGTH GRANULARITY @@ -1250,8 +1261,14 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size) *size = (cdb[6] << 24) | (cdb[7] << 16) | (cdb[8] << 8) | cdb[9]; break; case EXTENDED_COPY: - case READ_ATTRIBUTE: + *size = get_unaligned_be32(&cdb[10]); + cmd->execute_cmd = target_do_xcopy; + break; case RECEIVE_COPY_RESULTS: + *size = get_unaligned_be32(&cdb[10]); + cmd->execute_cmd = target_do_receive_copy_results; + break; + case READ_ATTRIBUTE: case WRITE_ATTRIBUTE: *size = (cdb[10] << 24) | (cdb[11] << 16) | (cdb[12] << 8) | cdb[13]; diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c index d154ce79718..9c642e02cba 100644 --- a/drivers/target/target_core_stat.c +++ b/drivers/target/target_core_stat.c @@ -4,7 +4,7 @@ * Modern ConfigFS group context specific statistics based on original * target_core_mib.c code * - * (c) Copyright 2006-2012 RisingTide Systems LLC. + * (c) Copyright 2006-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@linux-iscsi.org> * diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index 0d7cacb9110..250009909d4 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -3,7 +3,7 @@ * * This file contains SPC-3 task management infrastructure * - * (c) Copyright 2009-2012 RisingTide Systems LLC. + * (c) Copyright 2009-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index aac9d2727e3..b9a6ec0aa5f 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -3,7 +3,7 @@ * * This file contains generic Target Portal Group related functions. * - * (c) Copyright 2002-2012 RisingTide Systems LLC. + * (c) Copyright 2002-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index d8e49d79f8c..84747cc1aac 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -3,7 +3,7 @@ * * This file contains the Generic Target Engine Core. * - * (c) Copyright 2002-2012 RisingTide Systems LLC. + * (c) Copyright 2002-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * @@ -67,7 +67,6 @@ struct kmem_cache *t10_alua_tg_pt_gp_mem_cache; static void transport_complete_task_attr(struct se_cmd *cmd); static void transport_handle_queue_full(struct se_cmd *cmd, struct se_device *dev); -static int transport_generic_get_mem(struct se_cmd *cmd); static int transport_put_cmd(struct se_cmd *cmd); static void target_complete_ok_work(struct work_struct *work); @@ -232,6 +231,50 @@ struct se_session *transport_init_session(void) } EXPORT_SYMBOL(transport_init_session); +int transport_alloc_session_tags(struct se_session *se_sess, + unsigned int tag_num, unsigned int tag_size) +{ + int rc; + + se_sess->sess_cmd_map = kzalloc(tag_num * tag_size, GFP_KERNEL); + if (!se_sess->sess_cmd_map) { + pr_err("Unable to allocate se_sess->sess_cmd_map\n"); + return -ENOMEM; + } + + rc = percpu_ida_init(&se_sess->sess_tag_pool, tag_num); + if (rc < 0) { + pr_err("Unable to init se_sess->sess_tag_pool," + " tag_num: %u\n", tag_num); + kfree(se_sess->sess_cmd_map); + se_sess->sess_cmd_map = NULL; + return -ENOMEM; + } + + return 0; +} +EXPORT_SYMBOL(transport_alloc_session_tags); + +struct se_session *transport_init_session_tags(unsigned int tag_num, + unsigned int tag_size) +{ + struct se_session *se_sess; + int rc; + + se_sess = transport_init_session(); + if (IS_ERR(se_sess)) + return se_sess; + + rc = transport_alloc_session_tags(se_sess, tag_num, tag_size); + if (rc < 0) { + transport_free_session(se_sess); + return ERR_PTR(-ENOMEM); + } + + return se_sess; +} +EXPORT_SYMBOL(transport_init_session_tags); + /* * Called with spin_lock_irqsave(&struct se_portal_group->session_lock called. */ @@ -367,6 +410,10 @@ EXPORT_SYMBOL(transport_deregister_session_configfs); void transport_free_session(struct se_session *se_sess) { + if (se_sess->sess_cmd_map) { + percpu_ida_destroy(&se_sess->sess_tag_pool); + kfree(se_sess->sess_cmd_map); + } kmem_cache_free(se_sess_cache, se_sess); } EXPORT_SYMBOL(transport_free_session); @@ -1206,7 +1253,7 @@ int transport_handle_cdb_direct( } EXPORT_SYMBOL(transport_handle_cdb_direct); -static sense_reason_t +sense_reason_t transport_generic_map_mem_to_cmd(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_count, struct scatterlist *sgl_bidi, u32 sgl_bidi_count) { @@ -1512,6 +1559,13 @@ void transport_generic_request_failure(struct se_cmd *cmd, * For SAM Task Attribute emulation for failed struct se_cmd */ transport_complete_task_attr(cmd); + /* + * Handle special case for COMPARE_AND_WRITE failure, where the + * callback is expected to drop the per device ->caw_mutex. + */ + if ((cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) && + cmd->transport_complete_callback) + cmd->transport_complete_callback(cmd); switch (sense_reason) { case TCM_NON_EXISTENT_LUN: @@ -1579,7 +1633,7 @@ queue_full: } EXPORT_SYMBOL(transport_generic_request_failure); -static void __target_execute_cmd(struct se_cmd *cmd) +void __target_execute_cmd(struct se_cmd *cmd) { sense_reason_t ret; @@ -1784,7 +1838,7 @@ static void transport_complete_qf(struct se_cmd *cmd) ret = cmd->se_tfo->queue_data_in(cmd); break; case DMA_TO_DEVICE: - if (cmd->t_bidi_data_sg) { + if (cmd->se_cmd_flags & SCF_BIDI) { ret = cmd->se_tfo->queue_data_in(cmd); if (ret < 0) break; @@ -1856,10 +1910,25 @@ static void target_complete_ok_work(struct work_struct *work) } /* * Check for a callback, used by amongst other things - * XDWRITE_READ_10 emulation. + * XDWRITE_READ_10 and COMPARE_AND_WRITE emulation. */ - if (cmd->transport_complete_callback) - cmd->transport_complete_callback(cmd); + if (cmd->transport_complete_callback) { + sense_reason_t rc; + + rc = cmd->transport_complete_callback(cmd); + if (!rc && !(cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE_POST)) { + return; + } else if (rc) { + ret = transport_send_check_condition_and_sense(cmd, + rc, 0); + if (ret == -EAGAIN || ret == -ENOMEM) + goto queue_full; + + transport_lun_remove_cmd(cmd); + transport_cmd_check_stop_to_fabric(cmd); + return; + } + } switch (cmd->data_direction) { case DMA_FROM_DEVICE: @@ -1885,7 +1954,7 @@ static void target_complete_ok_work(struct work_struct *work) /* * Check if we need to send READ payload for BIDI-COMMAND */ - if (cmd->t_bidi_data_sg) { + if (cmd->se_cmd_flags & SCF_BIDI) { spin_lock(&cmd->se_lun->lun_sep_lock); if (cmd->se_lun->lun_sep) { cmd->se_lun->lun_sep->sep_stats.tx_data_octets += @@ -1930,10 +1999,29 @@ static inline void transport_free_sgl(struct scatterlist *sgl, int nents) kfree(sgl); } +static inline void transport_reset_sgl_orig(struct se_cmd *cmd) +{ + /* + * Check for saved t_data_sg that may be used for COMPARE_AND_WRITE + * emulation, and free + reset pointers if necessary.. + */ + if (!cmd->t_data_sg_orig) + return; + + kfree(cmd->t_data_sg); + cmd->t_data_sg = cmd->t_data_sg_orig; + cmd->t_data_sg_orig = NULL; + cmd->t_data_nents = cmd->t_data_nents_orig; + cmd->t_data_nents_orig = 0; +} + static inline void transport_free_pages(struct se_cmd *cmd) { - if (cmd->se_cmd_flags & SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC) + if (cmd->se_cmd_flags & SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC) { + transport_reset_sgl_orig(cmd); return; + } + transport_reset_sgl_orig(cmd); transport_free_sgl(cmd->t_data_sg, cmd->t_data_nents); cmd->t_data_sg = NULL; @@ -2029,24 +2117,22 @@ void transport_kunmap_data_sg(struct se_cmd *cmd) } EXPORT_SYMBOL(transport_kunmap_data_sg); -static int -transport_generic_get_mem(struct se_cmd *cmd) +int +target_alloc_sgl(struct scatterlist **sgl, unsigned int *nents, u32 length, + bool zero_page) { - u32 length = cmd->data_length; - unsigned int nents; + struct scatterlist *sg; struct page *page; - gfp_t zero_flag; + gfp_t zero_flag = (zero_page) ? __GFP_ZERO : 0; + unsigned int nent; int i = 0; - nents = DIV_ROUND_UP(length, PAGE_SIZE); - cmd->t_data_sg = kmalloc(sizeof(struct scatterlist) * nents, GFP_KERNEL); - if (!cmd->t_data_sg) + nent = DIV_ROUND_UP(length, PAGE_SIZE); + sg = kmalloc(sizeof(struct scatterlist) * nent, GFP_KERNEL); + if (!sg) return -ENOMEM; - cmd->t_data_nents = nents; - sg_init_table(cmd->t_data_sg, nents); - - zero_flag = cmd->se_cmd_flags & SCF_SCSI_DATA_CDB ? 0 : __GFP_ZERO; + sg_init_table(sg, nent); while (length) { u32 page_len = min_t(u32, length, PAGE_SIZE); @@ -2054,19 +2140,20 @@ transport_generic_get_mem(struct se_cmd *cmd) if (!page) goto out; - sg_set_page(&cmd->t_data_sg[i], page, page_len, 0); + sg_set_page(&sg[i], page, page_len, 0); length -= page_len; i++; } + *sgl = sg; + *nents = nent; return 0; out: while (i > 0) { i--; - __free_page(sg_page(&cmd->t_data_sg[i])); + __free_page(sg_page(&sg[i])); } - kfree(cmd->t_data_sg); - cmd->t_data_sg = NULL; + kfree(sg); return -ENOMEM; } @@ -2087,7 +2174,27 @@ transport_generic_new_cmd(struct se_cmd *cmd) */ if (!(cmd->se_cmd_flags & SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC) && cmd->data_length) { - ret = transport_generic_get_mem(cmd); + bool zero_flag = !(cmd->se_cmd_flags & SCF_SCSI_DATA_CDB); + + if ((cmd->se_cmd_flags & SCF_BIDI) || + (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE)) { + u32 bidi_length; + + if (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) + bidi_length = cmd->t_task_nolb * + cmd->se_dev->dev_attrib.block_size; + else + bidi_length = cmd->data_length; + + ret = target_alloc_sgl(&cmd->t_bidi_data_sg, + &cmd->t_bidi_data_nents, + bidi_length, zero_flag); + if (ret < 0) + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + } + + ret = target_alloc_sgl(&cmd->t_data_sg, &cmd->t_data_nents, + cmd->data_length, zero_flag); if (ret < 0) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } @@ -2740,6 +2847,15 @@ transport_send_check_condition_and_sense(struct se_cmd *cmd, buffer[SPC_ASC_KEY_OFFSET] = asc; buffer[SPC_ASCQ_KEY_OFFSET] = ascq; break; + case TCM_MISCOMPARE_VERIFY: + /* CURRENT ERROR */ + buffer[0] = 0x70; + buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; + buffer[SPC_SENSE_KEY_OFFSET] = MISCOMPARE; + /* MISCOMPARE DURING VERIFY OPERATION */ + buffer[SPC_ASC_KEY_OFFSET] = 0x1d; + buffer[SPC_ASCQ_KEY_OFFSET] = 0x00; + break; case TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE: default: /* CURRENT ERROR */ diff --git a/drivers/target/target_core_ua.c b/drivers/target/target_core_ua.c index bf0e390ce2d..b04467e7547 100644 --- a/drivers/target/target_core_ua.c +++ b/drivers/target/target_core_ua.c @@ -3,7 +3,7 @@ * * This file contains logic for SPC-3 Unit Attention emulation * - * (c) Copyright 2009-2012 RisingTide Systems LLC. + * (c) Copyright 2009-2013 Datera, Inc. * * Nicholas A. Bellinger <nab@kernel.org> * diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c new file mode 100644 index 00000000000..4d22e7d2adc --- /dev/null +++ b/drivers/target/target_core_xcopy.c @@ -0,0 +1,1081 @@ +/******************************************************************************* + * Filename: target_core_xcopy.c + * + * This file contains support for SPC-4 Extended-Copy offload with generic + * TCM backends. + * + * Copyright (c) 2011-2013 Datera, Inc. All rights reserved. + * + * Author: + * Nicholas A. Bellinger <nab@daterainc.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + ******************************************************************************/ + +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/list.h> +#include <linux/configfs.h> +#include <scsi/scsi.h> +#include <scsi/scsi_cmnd.h> +#include <asm/unaligned.h> + +#include <target/target_core_base.h> +#include <target/target_core_backend.h> +#include <target/target_core_fabric.h> +#include <target/target_core_configfs.h> + +#include "target_core_pr.h" +#include "target_core_ua.h" +#include "target_core_xcopy.h" + +static struct workqueue_struct *xcopy_wq = NULL; +/* + * From target_core_spc.c + */ +extern void spc_parse_naa_6h_vendor_specific(struct se_device *, unsigned char *); +/* + * From target_core_device.c + */ +extern struct mutex g_device_mutex; +extern struct list_head g_device_list; +/* + * From target_core_configfs.c + */ +extern struct configfs_subsystem *target_core_subsystem[]; + +static int target_xcopy_gen_naa_ieee(struct se_device *dev, unsigned char *buf) +{ + int off = 0; + + buf[off++] = (0x6 << 4); + buf[off++] = 0x01; + buf[off++] = 0x40; + buf[off] = (0x5 << 4); + + spc_parse_naa_6h_vendor_specific(dev, &buf[off]); + return 0; +} + +static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op *xop, + bool src) +{ + struct se_device *se_dev; + struct configfs_subsystem *subsys = target_core_subsystem[0]; + unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN], *dev_wwn; + int rc; + + if (src == true) + dev_wwn = &xop->dst_tid_wwn[0]; + else + dev_wwn = &xop->src_tid_wwn[0]; + + mutex_lock(&g_device_mutex); + list_for_each_entry(se_dev, &g_device_list, g_dev_node) { + + memset(&tmp_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN); + target_xcopy_gen_naa_ieee(se_dev, &tmp_dev_wwn[0]); + + rc = memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN); + if (rc != 0) + continue; + + if (src == true) { + xop->dst_dev = se_dev; + pr_debug("XCOPY 0xe4: Setting xop->dst_dev: %p from located" + " se_dev\n", xop->dst_dev); + } else { + xop->src_dev = se_dev; + pr_debug("XCOPY 0xe4: Setting xop->src_dev: %p from located" + " se_dev\n", xop->src_dev); + } + + rc = configfs_depend_item(subsys, + &se_dev->dev_group.cg_item); + if (rc != 0) { + pr_err("configfs_depend_item attempt failed:" + " %d for se_dev: %p\n", rc, se_dev); + mutex_unlock(&g_device_mutex); + return rc; + } + + pr_debug("Called configfs_depend_item for subsys: %p se_dev: %p" + " se_dev->se_dev_group: %p\n", subsys, se_dev, + &se_dev->dev_group); + + mutex_unlock(&g_device_mutex); + return 0; + } + mutex_unlock(&g_device_mutex); + + pr_err("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n"); + return -EINVAL; +} + +static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op *xop, + unsigned char *p, bool src) +{ + unsigned char *desc = p; + unsigned short ript; + u8 desig_len; + /* + * Extract RELATIVE INITIATOR PORT IDENTIFIER + */ + ript = get_unaligned_be16(&desc[2]); + pr_debug("XCOPY 0xe4: RELATIVE INITIATOR PORT IDENTIFIER: %hu\n", ript); + /* + * Check for supported code set, association, and designator type + */ + if ((desc[4] & 0x0f) != 0x1) { + pr_err("XCOPY 0xe4: code set of non binary type not supported\n"); + return -EINVAL; + } + if ((desc[5] & 0x30) != 0x00) { + pr_err("XCOPY 0xe4: association other than LUN not supported\n"); + return -EINVAL; + } + if ((desc[5] & 0x0f) != 0x3) { + pr_err("XCOPY 0xe4: designator type unsupported: 0x%02x\n", + (desc[5] & 0x0f)); + return -EINVAL; + } + /* + * Check for matching 16 byte length for NAA IEEE Registered Extended + * Assigned designator + */ + desig_len = desc[7]; + if (desig_len != 16) { + pr_err("XCOPY 0xe4: invalid desig_len: %d\n", (int)desig_len); + return -EINVAL; + } + pr_debug("XCOPY 0xe4: desig_len: %d\n", (int)desig_len); + /* + * Check for NAA IEEE Registered Extended Assigned header.. + */ + if ((desc[8] & 0xf0) != 0x60) { + pr_err("XCOPY 0xe4: Unsupported DESIGNATOR TYPE: 0x%02x\n", + (desc[8] & 0xf0)); + return -EINVAL; + } + + if (src == true) { + memcpy(&xop->src_tid_wwn[0], &desc[8], XCOPY_NAA_IEEE_REGEX_LEN); + /* + * Determine if the source designator matches the local device + */ + if (!memcmp(&xop->local_dev_wwn[0], &xop->src_tid_wwn[0], + XCOPY_NAA_IEEE_REGEX_LEN)) { + xop->op_origin = XCOL_SOURCE_RECV_OP; + xop->src_dev = se_cmd->se_dev; + pr_debug("XCOPY 0xe4: Set xop->src_dev %p from source" + " received xop\n", xop->src_dev); + } + } else { + memcpy(&xop->dst_tid_wwn[0], &desc[8], XCOPY_NAA_IEEE_REGEX_LEN); + /* + * Determine if the destination designator matches the local device + */ + if (!memcmp(&xop->local_dev_wwn[0], &xop->dst_tid_wwn[0], + XCOPY_NAA_IEEE_REGEX_LEN)) { + xop->op_origin = XCOL_DEST_RECV_OP; + xop->dst_dev = se_cmd->se_dev; + pr_debug("XCOPY 0xe4: Set xop->dst_dev: %p from destination" + " received xop\n", xop->dst_dev); + } + } + + return 0; +} + +static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd, + struct xcopy_op *xop, unsigned char *p, + unsigned short tdll) +{ + struct se_device *local_dev = se_cmd->se_dev; + unsigned char *desc = p; + int offset = tdll % XCOPY_TARGET_DESC_LEN, rc, ret = 0; + unsigned short start = 0; + bool src = true; + + if (offset != 0) { + pr_err("XCOPY target descriptor list length is not" + " multiple of %d\n", XCOPY_TARGET_DESC_LEN); + return -EINVAL; + } + if (tdll > 64) { + pr_err("XCOPY target descriptor supports a maximum" + " two src/dest descriptors, tdll: %hu too large..\n", tdll); + return -EINVAL; + } + /* + * Generate an IEEE Registered Extended designator based upon the + * se_device the XCOPY was received upon.. + */ + memset(&xop->local_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN); + target_xcopy_gen_naa_ieee(local_dev, &xop->local_dev_wwn[0]); + + while (start < tdll) { + /* + * Check target descriptor identification with 0xE4 type with + * use VPD 0x83 WWPN matching .. + */ + switch (desc[0]) { + case 0xe4: + rc = target_xcopy_parse_tiddesc_e4(se_cmd, xop, + &desc[0], src); + if (rc != 0) + goto out; + /* + * Assume target descriptors are in source -> destination order.. + */ + if (src == true) + src = false; + else + src = true; + start += XCOPY_TARGET_DESC_LEN; + desc += XCOPY_TARGET_DESC_LEN; + ret++; + break; + default: + pr_err("XCOPY unsupported descriptor type code:" + " 0x%02x\n", desc[0]); + goto out; + } + } + + if (xop->op_origin == XCOL_SOURCE_RECV_OP) + rc = target_xcopy_locate_se_dev_e4(se_cmd, xop, true); + else + rc = target_xcopy_locate_se_dev_e4(se_cmd, xop, false); + + if (rc < 0) + goto out; + + pr_debug("XCOPY TGT desc: Source dev: %p NAA IEEE WWN: 0x%16phN\n", + xop->src_dev, &xop->src_tid_wwn[0]); + pr_debug("XCOPY TGT desc: Dest dev: %p NAA IEEE WWN: 0x%16phN\n", + xop->dst_dev, &xop->dst_tid_wwn[0]); + + return ret; + +out: + return -EINVAL; +} + +static int target_xcopy_parse_segdesc_02(struct se_cmd *se_cmd, struct xcopy_op *xop, + unsigned char *p) +{ + unsigned char *desc = p; + int dc = (desc[1] & 0x02); + unsigned short desc_len; + + desc_len = get_unaligned_be16(&desc[2]); + if (desc_len != 0x18) { + pr_err("XCOPY segment desc 0x02: Illegal desc_len:" + " %hu\n", desc_len); + return -EINVAL; + } + + xop->stdi = get_unaligned_be16(&desc[4]); + xop->dtdi = get_unaligned_be16(&desc[6]); + pr_debug("XCOPY seg desc 0x02: desc_len: %hu stdi: %hu dtdi: %hu, DC: %d\n", + desc_len, xop->stdi, xop->dtdi, dc); + + xop->nolb = get_unaligned_be16(&desc[10]); + xop->src_lba = get_unaligned_be64(&desc[12]); + xop->dst_lba = get_unaligned_be64(&desc[20]); + pr_debug("XCOPY seg desc 0x02: nolb: %hu src_lba: %llu dst_lba: %llu\n", + xop->nolb, (unsigned long long)xop->src_lba, + (unsigned long long)xop->dst_lba); + + if (dc != 0) { + xop->dbl = (desc[29] << 16) & 0xff; + xop->dbl |= (desc[30] << 8) & 0xff; + xop->dbl |= desc[31] & 0xff; + + pr_debug("XCOPY seg desc 0x02: DC=1 w/ dbl: %u\n", xop->dbl); + } + return 0; +} + +static int target_xcopy_parse_segment_descriptors(struct se_cmd *se_cmd, + struct xcopy_op *xop, unsigned char *p, + unsigned int sdll) +{ + unsigned char *desc = p; + unsigned int start = 0; + int offset = sdll % XCOPY_SEGMENT_DESC_LEN, rc, ret = 0; + + if (offset != 0) { + pr_err("XCOPY segment descriptor list length is not" + " multiple of %d\n", XCOPY_SEGMENT_DESC_LEN); + return -EINVAL; + } + + while (start < sdll) { + /* + * Check segment descriptor type code for block -> block + */ + switch (desc[0]) { + case 0x02: + rc = target_xcopy_parse_segdesc_02(se_cmd, xop, desc); + if (rc < 0) + goto out; + + ret++; + start += XCOPY_SEGMENT_DESC_LEN; + desc += XCOPY_SEGMENT_DESC_LEN; + break; + default: + pr_err("XCOPY unspported segment descriptor" + "type: 0x%02x\n", desc[0]); + goto out; + } + } + + return ret; + +out: + return -EINVAL; +} + +/* + * Start xcopy_pt ops + */ + +struct xcopy_pt_cmd { + bool remote_port; + struct se_cmd se_cmd; + struct xcopy_op *xcopy_op; + struct completion xpt_passthrough_sem; +}; + +static struct se_port xcopy_pt_port; +static struct se_portal_group xcopy_pt_tpg; +static struct se_session xcopy_pt_sess; +static struct se_node_acl xcopy_pt_nacl; + +static char *xcopy_pt_get_fabric_name(void) +{ + return "xcopy-pt"; +} + +static u32 xcopy_pt_get_tag(struct se_cmd *se_cmd) +{ + return 0; +} + +static int xcopy_pt_get_cmd_state(struct se_cmd *se_cmd) +{ + return 0; +} + +static void xcopy_pt_undepend_remotedev(struct xcopy_op *xop) +{ + struct configfs_subsystem *subsys = target_core_subsystem[0]; + struct se_device *remote_dev; + + if (xop->op_origin == XCOL_SOURCE_RECV_OP) + remote_dev = xop->dst_dev; + else + remote_dev = xop->src_dev; + + pr_debug("Calling configfs_undepend_item for subsys: %p" + " remote_dev: %p remote_dev->dev_group: %p\n", + subsys, remote_dev, &remote_dev->dev_group.cg_item); + + configfs_undepend_item(subsys, &remote_dev->dev_group.cg_item); +} + +static void xcopy_pt_release_cmd(struct se_cmd *se_cmd) +{ + struct xcopy_pt_cmd *xpt_cmd = container_of(se_cmd, + struct xcopy_pt_cmd, se_cmd); + + if (xpt_cmd->remote_port) + kfree(se_cmd->se_lun); + + kfree(xpt_cmd); +} + +static int xcopy_pt_check_stop_free(struct se_cmd *se_cmd) +{ + struct xcopy_pt_cmd *xpt_cmd = container_of(se_cmd, + struct xcopy_pt_cmd, se_cmd); + + complete(&xpt_cmd->xpt_passthrough_sem); + return 0; +} + +static int xcopy_pt_write_pending(struct se_cmd *se_cmd) +{ + return 0; +} + +static int xcopy_pt_write_pending_status(struct se_cmd *se_cmd) +{ + return 0; +} + +static int xcopy_pt_queue_data_in(struct se_cmd *se_cmd) +{ + return 0; +} + +static int xcopy_pt_queue_status(struct se_cmd *se_cmd) +{ + return 0; +} + +static struct target_core_fabric_ops xcopy_pt_tfo = { + .get_fabric_name = xcopy_pt_get_fabric_name, + .get_task_tag = xcopy_pt_get_tag, + .get_cmd_state = xcopy_pt_get_cmd_state, + .release_cmd = xcopy_pt_release_cmd, + .check_stop_free = xcopy_pt_check_stop_free, + .write_pending = xcopy_pt_write_pending, + .write_pending_status = xcopy_pt_write_pending_status, + .queue_data_in = xcopy_pt_queue_data_in, + .queue_status = xcopy_pt_queue_status, +}; + +/* + * End xcopy_pt_ops + */ + +int target_xcopy_setup_pt(void) +{ + xcopy_wq = alloc_workqueue("xcopy_wq", WQ_MEM_RECLAIM, 0); + if (!xcopy_wq) { + pr_err("Unable to allocate xcopy_wq\n"); + return -ENOMEM; + } + + memset(&xcopy_pt_port, 0, sizeof(struct se_port)); + INIT_LIST_HEAD(&xcopy_pt_port.sep_alua_list); + INIT_LIST_HEAD(&xcopy_pt_port.sep_list); + mutex_init(&xcopy_pt_port.sep_tg_pt_md_mutex); + + memset(&xcopy_pt_tpg, 0, sizeof(struct se_portal_group)); + INIT_LIST_HEAD(&xcopy_pt_tpg.se_tpg_node); + INIT_LIST_HEAD(&xcopy_pt_tpg.acl_node_list); + INIT_LIST_HEAD(&xcopy_pt_tpg.tpg_sess_list); + + xcopy_pt_port.sep_tpg = &xcopy_pt_tpg; + xcopy_pt_tpg.se_tpg_tfo = &xcopy_pt_tfo; + + memset(&xcopy_pt_nacl, 0, sizeof(struct se_node_acl)); + INIT_LIST_HEAD(&xcopy_pt_nacl.acl_list); + INIT_LIST_HEAD(&xcopy_pt_nacl.acl_sess_list); + memset(&xcopy_pt_sess, 0, sizeof(struct se_session)); + INIT_LIST_HEAD(&xcopy_pt_sess.sess_list); + INIT_LIST_HEAD(&xcopy_pt_sess.sess_acl_list); + + xcopy_pt_nacl.se_tpg = &xcopy_pt_tpg; + xcopy_pt_nacl.nacl_sess = &xcopy_pt_sess; + + xcopy_pt_sess.se_tpg = &xcopy_pt_tpg; + xcopy_pt_sess.se_node_acl = &xcopy_pt_nacl; + + return 0; +} + +void target_xcopy_release_pt(void) +{ + if (xcopy_wq) + destroy_workqueue(xcopy_wq); +} + +static void target_xcopy_setup_pt_port( + struct xcopy_pt_cmd *xpt_cmd, + struct xcopy_op *xop, + bool remote_port) +{ + struct se_cmd *ec_cmd = xop->xop_se_cmd; + struct se_cmd *pt_cmd = &xpt_cmd->se_cmd; + + if (xop->op_origin == XCOL_SOURCE_RECV_OP) { + /* + * Honor destination port reservations for X-COPY PUSH emulation + * when CDB is received on local source port, and READs blocks to + * WRITE on remote destination port. + */ + if (remote_port) { + xpt_cmd->remote_port = remote_port; + pt_cmd->se_lun->lun_sep = &xcopy_pt_port; + pr_debug("Setup emulated remote DEST xcopy_pt_port: %p to" + " cmd->se_lun->lun_sep for X-COPY data PUSH\n", + pt_cmd->se_lun->lun_sep); + } else { + pt_cmd->se_lun = ec_cmd->se_lun; + pt_cmd->se_dev = ec_cmd->se_dev; + + pr_debug("Honoring local SRC port from ec_cmd->se_dev:" + " %p\n", pt_cmd->se_dev); + pt_cmd->se_lun = ec_cmd->se_lun; + pr_debug("Honoring local SRC port from ec_cmd->se_lun: %p\n", + pt_cmd->se_lun); + } + } else { + /* + * Honor source port reservation for X-COPY PULL emulation + * when CDB is received on local desintation port, and READs + * blocks from the remote source port to WRITE on local + * destination port. + */ + if (remote_port) { + xpt_cmd->remote_port = remote_port; + pt_cmd->se_lun->lun_sep = &xcopy_pt_port; + pr_debug("Setup emulated remote SRC xcopy_pt_port: %p to" + " cmd->se_lun->lun_sep for X-COPY data PULL\n", + pt_cmd->se_lun->lun_sep); + } else { + pt_cmd->se_lun = ec_cmd->se_lun; + pt_cmd->se_dev = ec_cmd->se_dev; + + pr_debug("Honoring local DST port from ec_cmd->se_dev:" + " %p\n", pt_cmd->se_dev); + pt_cmd->se_lun = ec_cmd->se_lun; + pr_debug("Honoring local DST port from ec_cmd->se_lun: %p\n", + pt_cmd->se_lun); + } + } +} + +static int target_xcopy_init_pt_lun( + struct xcopy_pt_cmd *xpt_cmd, + struct xcopy_op *xop, + struct se_device *se_dev, + struct se_cmd *pt_cmd, + bool remote_port) +{ + /* + * Don't allocate + init an pt_cmd->se_lun if honoring local port for + * reservations. The pt_cmd->se_lun pointer will be setup from within + * target_xcopy_setup_pt_port() + */ + if (remote_port == false) { + pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD | SCF_CMD_XCOPY_PASSTHROUGH; + return 0; + } + + pt_cmd->se_lun = kzalloc(sizeof(struct se_lun), GFP_KERNEL); + if (!pt_cmd->se_lun) { + pr_err("Unable to allocate pt_cmd->se_lun\n"); + return -ENOMEM; + } + init_completion(&pt_cmd->se_lun->lun_shutdown_comp); + INIT_LIST_HEAD(&pt_cmd->se_lun->lun_cmd_list); + INIT_LIST_HEAD(&pt_cmd->se_lun->lun_acl_list); + spin_lock_init(&pt_cmd->se_lun->lun_acl_lock); + spin_lock_init(&pt_cmd->se_lun->lun_cmd_lock); + spin_lock_init(&pt_cmd->se_lun->lun_sep_lock); + + pt_cmd->se_dev = se_dev; + + pr_debug("Setup emulated se_dev: %p from se_dev\n", pt_cmd->se_dev); + pt_cmd->se_lun->lun_se_dev = se_dev; + pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD | SCF_CMD_XCOPY_PASSTHROUGH; + + pr_debug("Setup emulated se_dev: %p to pt_cmd->se_lun->lun_se_dev\n", + pt_cmd->se_lun->lun_se_dev); + + return 0; +} + +static int target_xcopy_setup_pt_cmd( + struct xcopy_pt_cmd *xpt_cmd, + struct xcopy_op *xop, + struct se_device *se_dev, + unsigned char *cdb, + bool remote_port, + bool alloc_mem) +{ + struct se_cmd *cmd = &xpt_cmd->se_cmd; + sense_reason_t sense_rc; + int ret = 0, rc; + /* + * Setup LUN+port to honor reservations based upon xop->op_origin for + * X-COPY PUSH or X-COPY PULL based upon where the CDB was received. + */ + rc = target_xcopy_init_pt_lun(xpt_cmd, xop, se_dev, cmd, remote_port); + if (rc < 0) { + ret = rc; + goto out; + } + xpt_cmd->xcopy_op = xop; + target_xcopy_setup_pt_port(xpt_cmd, xop, remote_port); + + sense_rc = target_setup_cmd_from_cdb(cmd, cdb); + if (sense_rc) { + ret = -EINVAL; + goto out; + } + + if (alloc_mem) { + rc = target_alloc_sgl(&cmd->t_data_sg, &cmd->t_data_nents, + cmd->data_length, false); + if (rc < 0) { + ret = rc; + goto out; + } + /* + * Set this bit so that transport_free_pages() allows the + * caller to release SGLs + physical memory allocated by + * transport_generic_get_mem().. + */ + cmd->se_cmd_flags |= SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; + } else { + /* + * Here the previously allocated SGLs for the internal READ + * are mapped zero-copy to the internal WRITE. + */ + sense_rc = transport_generic_map_mem_to_cmd(cmd, + xop->xop_data_sg, xop->xop_data_nents, + NULL, 0); + if (sense_rc) { + ret = -EINVAL; + goto out; + } + + pr_debug("Setup PASSTHROUGH_NOALLOC t_data_sg: %p t_data_nents:" + " %u\n", cmd->t_data_sg, cmd->t_data_nents); + } + + return 0; + +out: + if (remote_port == true) + kfree(cmd->se_lun); + return ret; +} + +static int target_xcopy_issue_pt_cmd(struct xcopy_pt_cmd *xpt_cmd) +{ + struct se_cmd *se_cmd = &xpt_cmd->se_cmd; + sense_reason_t sense_rc; + + sense_rc = transport_generic_new_cmd(se_cmd); + if (sense_rc) + return -EINVAL; + + if (se_cmd->data_direction == DMA_TO_DEVICE) + target_execute_cmd(se_cmd); + + wait_for_completion_interruptible(&xpt_cmd->xpt_passthrough_sem); + + pr_debug("target_xcopy_issue_pt_cmd(): SCSI status: 0x%02x\n", + se_cmd->scsi_status); + return 0; +} + +static int target_xcopy_read_source( + struct se_cmd *ec_cmd, + struct xcopy_op *xop, + struct se_device *src_dev, + sector_t src_lba, + u32 src_sectors) +{ + struct xcopy_pt_cmd *xpt_cmd; + struct se_cmd *se_cmd; + u32 length = (src_sectors * src_dev->dev_attrib.block_size); + int rc; + unsigned char cdb[16]; + bool remote_port = (xop->op_origin == XCOL_DEST_RECV_OP); + + xpt_cmd = kzalloc(sizeof(struct xcopy_pt_cmd), GFP_KERNEL); + if (!xpt_cmd) { + pr_err("Unable to allocate xcopy_pt_cmd\n"); + return -ENOMEM; + } + init_completion(&xpt_cmd->xpt_passthrough_sem); + se_cmd = &xpt_cmd->se_cmd; + + memset(&cdb[0], 0, 16); + cdb[0] = READ_16; + put_unaligned_be64(src_lba, &cdb[2]); + put_unaligned_be32(src_sectors, &cdb[10]); + pr_debug("XCOPY: Built READ_16: LBA: %llu Sectors: %u Length: %u\n", + (unsigned long long)src_lba, src_sectors, length); + + transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, NULL, length, + DMA_FROM_DEVICE, 0, NULL); + xop->src_pt_cmd = xpt_cmd; + + rc = target_xcopy_setup_pt_cmd(xpt_cmd, xop, src_dev, &cdb[0], + remote_port, true); + if (rc < 0) { + transport_generic_free_cmd(se_cmd, 0); + return rc; + } + + xop->xop_data_sg = se_cmd->t_data_sg; + xop->xop_data_nents = se_cmd->t_data_nents; + pr_debug("XCOPY-READ: Saved xop->xop_data_sg: %p, num: %u for READ" + " memory\n", xop->xop_data_sg, xop->xop_data_nents); + + rc = target_xcopy_issue_pt_cmd(xpt_cmd); + if (rc < 0) { + transport_generic_free_cmd(se_cmd, 0); + return rc; + } + /* + * Clear off the allocated t_data_sg, that has been saved for + * zero-copy WRITE submission reuse in struct xcopy_op.. + */ + se_cmd->t_data_sg = NULL; + se_cmd->t_data_nents = 0; + + return 0; +} + +static int target_xcopy_write_destination( + struct se_cmd *ec_cmd, + struct xcopy_op *xop, + struct se_device *dst_dev, + sector_t dst_lba, + u32 dst_sectors) +{ + struct xcopy_pt_cmd *xpt_cmd; + struct se_cmd *se_cmd; + u32 length = (dst_sectors * dst_dev->dev_attrib.block_size); + int rc; + unsigned char cdb[16]; + bool remote_port = (xop->op_origin == XCOL_SOURCE_RECV_OP); + + xpt_cmd = kzalloc(sizeof(struct xcopy_pt_cmd), GFP_KERNEL); + if (!xpt_cmd) { + pr_err("Unable to allocate xcopy_pt_cmd\n"); + return -ENOMEM; + } + init_completion(&xpt_cmd->xpt_passthrough_sem); + se_cmd = &xpt_cmd->se_cmd; + + memset(&cdb[0], 0, 16); + cdb[0] = WRITE_16; + put_unaligned_be64(dst_lba, &cdb[2]); + put_unaligned_be32(dst_sectors, &cdb[10]); + pr_debug("XCOPY: Built WRITE_16: LBA: %llu Sectors: %u Length: %u\n", + (unsigned long long)dst_lba, dst_sectors, length); + + transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, NULL, length, + DMA_TO_DEVICE, 0, NULL); + xop->dst_pt_cmd = xpt_cmd; + + rc = target_xcopy_setup_pt_cmd(xpt_cmd, xop, dst_dev, &cdb[0], + remote_port, false); + if (rc < 0) { + struct se_cmd *src_cmd = &xop->src_pt_cmd->se_cmd; + /* + * If the failure happened before the t_mem_list hand-off in + * target_xcopy_setup_pt_cmd(), Reset memory + clear flag so that + * core releases this memory on error during X-COPY WRITE I/O. + */ + src_cmd->se_cmd_flags &= ~SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; + src_cmd->t_data_sg = xop->xop_data_sg; + src_cmd->t_data_nents = xop->xop_data_nents; + + transport_generic_free_cmd(se_cmd, 0); + return rc; + } + + rc = target_xcopy_issue_pt_cmd(xpt_cmd); + if (rc < 0) { + se_cmd->se_cmd_flags &= ~SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; + transport_generic_free_cmd(se_cmd, 0); + return rc; + } + + return 0; +} + +static void target_xcopy_do_work(struct work_struct *work) +{ + struct xcopy_op *xop = container_of(work, struct xcopy_op, xop_work); + struct se_device *src_dev = xop->src_dev, *dst_dev = xop->dst_dev; + struct se_cmd *ec_cmd = xop->xop_se_cmd; + sector_t src_lba = xop->src_lba, dst_lba = xop->dst_lba, end_lba; + unsigned int max_sectors; + int rc; + unsigned short nolb = xop->nolb, cur_nolb, max_nolb, copied_nolb = 0; + + end_lba = src_lba + nolb; + /* + * Break up XCOPY I/O into hw_max_sectors sized I/O based on the + * smallest max_sectors between src_dev + dev_dev, or + */ + max_sectors = min(src_dev->dev_attrib.hw_max_sectors, + dst_dev->dev_attrib.hw_max_sectors); + max_sectors = min_t(u32, max_sectors, XCOPY_MAX_SECTORS); + + max_nolb = min_t(u16, max_sectors, ((u16)(~0U))); + + pr_debug("target_xcopy_do_work: nolb: %hu, max_nolb: %hu end_lba: %llu\n", + nolb, max_nolb, (unsigned long long)end_lba); + pr_debug("target_xcopy_do_work: Starting src_lba: %llu, dst_lba: %llu\n", + (unsigned long long)src_lba, (unsigned long long)dst_lba); + + while (src_lba < end_lba) { + cur_nolb = min(nolb, max_nolb); + + pr_debug("target_xcopy_do_work: Calling read src_dev: %p src_lba: %llu," + " cur_nolb: %hu\n", src_dev, (unsigned long long)src_lba, cur_nolb); + + rc = target_xcopy_read_source(ec_cmd, xop, src_dev, src_lba, cur_nolb); + if (rc < 0) + goto out; + + src_lba += cur_nolb; + pr_debug("target_xcopy_do_work: Incremented READ src_lba to %llu\n", + (unsigned long long)src_lba); + + pr_debug("target_xcopy_do_work: Calling write dst_dev: %p dst_lba: %llu," + " cur_nolb: %hu\n", dst_dev, (unsigned long long)dst_lba, cur_nolb); + + rc = target_xcopy_write_destination(ec_cmd, xop, dst_dev, + dst_lba, cur_nolb); + if (rc < 0) { + transport_generic_free_cmd(&xop->src_pt_cmd->se_cmd, 0); + goto out; + } + + dst_lba += cur_nolb; + pr_debug("target_xcopy_do_work: Incremented WRITE dst_lba to %llu\n", + (unsigned long long)dst_lba); + + copied_nolb += cur_nolb; + nolb -= cur_nolb; + + transport_generic_free_cmd(&xop->src_pt_cmd->se_cmd, 0); + xop->dst_pt_cmd->se_cmd.se_cmd_flags &= ~SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; + + transport_generic_free_cmd(&xop->dst_pt_cmd->se_cmd, 0); + } + + xcopy_pt_undepend_remotedev(xop); + kfree(xop); + + pr_debug("target_xcopy_do_work: Final src_lba: %llu, dst_lba: %llu\n", + (unsigned long long)src_lba, (unsigned long long)dst_lba); + pr_debug("target_xcopy_do_work: Blocks copied: %hu, Bytes Copied: %u\n", + copied_nolb, copied_nolb * dst_dev->dev_attrib.block_size); + + pr_debug("target_xcopy_do_work: Setting X-COPY GOOD status -> sending response\n"); + target_complete_cmd(ec_cmd, SAM_STAT_GOOD); + return; + +out: + xcopy_pt_undepend_remotedev(xop); + kfree(xop); + + pr_warn("target_xcopy_do_work: Setting X-COPY CHECK_CONDITION -> sending response\n"); + ec_cmd->scsi_status = SAM_STAT_CHECK_CONDITION; + target_complete_cmd(ec_cmd, SAM_STAT_CHECK_CONDITION); +} + +sense_reason_t target_do_xcopy(struct se_cmd *se_cmd) +{ + struct xcopy_op *xop = NULL; + unsigned char *p = NULL, *seg_desc; + unsigned int list_id, list_id_usage, sdll, inline_dl, sa; + int rc; + unsigned short tdll; + + sa = se_cmd->t_task_cdb[1] & 0x1f; + if (sa != 0x00) { + pr_err("EXTENDED_COPY(LID4) not supported\n"); + return TCM_UNSUPPORTED_SCSI_OPCODE; + } + + p = transport_kmap_data_sg(se_cmd); + if (!p) { + pr_err("transport_kmap_data_sg() failed in target_do_xcopy\n"); + return TCM_OUT_OF_RESOURCES; + } + + list_id = p[0]; + if (list_id != 0x00) { + pr_err("XCOPY with non zero list_id: 0x%02x\n", list_id); + goto out; + } + list_id_usage = (p[1] & 0x18); + /* + * Determine TARGET DESCRIPTOR LIST LENGTH + SEGMENT DESCRIPTOR LIST LENGTH + */ + tdll = get_unaligned_be16(&p[2]); + sdll = get_unaligned_be32(&p[8]); + + inline_dl = get_unaligned_be32(&p[12]); + if (inline_dl != 0) { + pr_err("XCOPY with non zero inline data length\n"); + goto out; + } + + xop = kzalloc(sizeof(struct xcopy_op), GFP_KERNEL); + if (!xop) { + pr_err("Unable to allocate xcopy_op\n"); + goto out; + } + xop->xop_se_cmd = se_cmd; + + pr_debug("Processing XCOPY with list_id: 0x%02x list_id_usage: 0x%02x" + " tdll: %hu sdll: %u inline_dl: %u\n", list_id, list_id_usage, + tdll, sdll, inline_dl); + + rc = target_xcopy_parse_target_descriptors(se_cmd, xop, &p[16], tdll); + if (rc <= 0) + goto out; + + pr_debug("XCOPY: Processed %d target descriptors, length: %u\n", rc, + rc * XCOPY_TARGET_DESC_LEN); + seg_desc = &p[16]; + seg_desc += (rc * XCOPY_TARGET_DESC_LEN); + + rc = target_xcopy_parse_segment_descriptors(se_cmd, xop, seg_desc, sdll); + if (rc <= 0) { + xcopy_pt_undepend_remotedev(xop); + goto out; + } + transport_kunmap_data_sg(se_cmd); + + pr_debug("XCOPY: Processed %d segment descriptors, length: %u\n", rc, + rc * XCOPY_SEGMENT_DESC_LEN); + INIT_WORK(&xop->xop_work, target_xcopy_do_work); + queue_work(xcopy_wq, &xop->xop_work); + return TCM_NO_SENSE; + +out: + if (p) + transport_kunmap_data_sg(se_cmd); + kfree(xop); + return TCM_INVALID_CDB_FIELD; +} + +static sense_reason_t target_rcr_operating_parameters(struct se_cmd *se_cmd) +{ + unsigned char *p; + + p = transport_kmap_data_sg(se_cmd); + if (!p) { + pr_err("transport_kmap_data_sg failed in" + " target_rcr_operating_parameters\n"); + return TCM_OUT_OF_RESOURCES; + } + + if (se_cmd->data_length < 54) { + pr_err("Receive Copy Results Op Parameters length" + " too small: %u\n", se_cmd->data_length); + transport_kunmap_data_sg(se_cmd); + return TCM_INVALID_CDB_FIELD; + } + /* + * Set SNLID=1 (Supports no List ID) + */ + p[4] = 0x1; + /* + * MAXIMUM TARGET DESCRIPTOR COUNT + */ + put_unaligned_be16(RCR_OP_MAX_TARGET_DESC_COUNT, &p[8]); + /* + * MAXIMUM SEGMENT DESCRIPTOR COUNT + */ + put_unaligned_be16(RCR_OP_MAX_SG_DESC_COUNT, &p[10]); + /* + * MAXIMUM DESCRIPTOR LIST LENGTH + */ + put_unaligned_be32(RCR_OP_MAX_DESC_LIST_LEN, &p[12]); + /* + * MAXIMUM SEGMENT LENGTH + */ + put_unaligned_be32(RCR_OP_MAX_SEGMENT_LEN, &p[16]); + /* + * MAXIMUM INLINE DATA LENGTH for SA 0x04 (NOT SUPPORTED) + */ + put_unaligned_be32(0x0, &p[20]); + /* + * HELD DATA LIMIT + */ + put_unaligned_be32(0x0, &p[24]); + /* + * MAXIMUM STREAM DEVICE TRANSFER SIZE + */ + put_unaligned_be32(0x0, &p[28]); + /* + * TOTAL CONCURRENT COPIES + */ + put_unaligned_be16(RCR_OP_TOTAL_CONCURR_COPIES, &p[34]); + /* + * MAXIMUM CONCURRENT COPIES + */ + p[36] = RCR_OP_MAX_CONCURR_COPIES; + /* + * DATA SEGMENT GRANULARITY (log 2) + */ + p[37] = RCR_OP_DATA_SEG_GRAN_LOG2; + /* + * INLINE DATA GRANULARITY log 2) + */ + p[38] = RCR_OP_INLINE_DATA_GRAN_LOG2; + /* + * HELD DATA GRANULARITY + */ + p[39] = RCR_OP_HELD_DATA_GRAN_LOG2; + /* + * IMPLEMENTED DESCRIPTOR LIST LENGTH + */ + p[43] = 0x2; + /* + * List of implemented descriptor type codes (ordered) + */ + p[44] = 0x02; /* Copy Block to Block device */ + p[45] = 0xe4; /* Identification descriptor target descriptor */ + + /* + * AVAILABLE DATA (n-3) + */ + put_unaligned_be32(42, &p[0]); + + transport_kunmap_data_sg(se_cmd); + target_complete_cmd(se_cmd, GOOD); + + return TCM_NO_SENSE; +} + +sense_reason_t target_do_receive_copy_results(struct se_cmd *se_cmd) +{ + unsigned char *cdb = &se_cmd->t_task_cdb[0]; + int sa = (cdb[1] & 0x1f), list_id = cdb[2]; + sense_reason_t rc = TCM_NO_SENSE; + + pr_debug("Entering target_do_receive_copy_results: SA: 0x%02x, List ID:" + " 0x%02x, AL: %u\n", sa, list_id, se_cmd->data_length); + + if (list_id != 0) { + pr_err("Receive Copy Results with non zero list identifier" + " not supported\n"); + return TCM_INVALID_CDB_FIELD; + } + + switch (sa) { + case RCR_SA_OPERATING_PARAMETERS: + rc = target_rcr_operating_parameters(se_cmd); + break; + case RCR_SA_COPY_STATUS: + case RCR_SA_RECEIVE_DATA: + case RCR_SA_FAILED_SEGMENT_DETAILS: + default: + pr_err("Unsupported SA for receive copy results: 0x%02x\n", sa); + return TCM_INVALID_CDB_FIELD; + } + + return rc; +} diff --git a/drivers/target/target_core_xcopy.h b/drivers/target/target_core_xcopy.h new file mode 100644 index 00000000000..700a981c7b4 --- /dev/null +++ b/drivers/target/target_core_xcopy.h @@ -0,0 +1,62 @@ +#define XCOPY_TARGET_DESC_LEN 32 +#define XCOPY_SEGMENT_DESC_LEN 28 +#define XCOPY_NAA_IEEE_REGEX_LEN 16 +#define XCOPY_MAX_SECTORS 1024 + +enum xcopy_origin_list { + XCOL_SOURCE_RECV_OP = 0x01, + XCOL_DEST_RECV_OP = 0x02, +}; + +struct xcopy_pt_cmd; + +struct xcopy_op { + int op_origin; + + struct se_cmd *xop_se_cmd; + struct se_device *src_dev; + unsigned char src_tid_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; + struct se_device *dst_dev; + unsigned char dst_tid_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; + unsigned char local_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; + + sector_t src_lba; + sector_t dst_lba; + unsigned short stdi; + unsigned short dtdi; + unsigned short nolb; + unsigned int dbl; + + struct xcopy_pt_cmd *src_pt_cmd; + struct xcopy_pt_cmd *dst_pt_cmd; + + u32 xop_data_nents; + struct scatterlist *xop_data_sg; + struct work_struct xop_work; +}; + +/* + * Receive Copy Results Sevice Actions + */ +#define RCR_SA_COPY_STATUS 0x00 +#define RCR_SA_RECEIVE_DATA 0x01 +#define RCR_SA_OPERATING_PARAMETERS 0x03 +#define RCR_SA_FAILED_SEGMENT_DETAILS 0x04 + +/* + * Receive Copy Results defs for Operating Parameters + */ +#define RCR_OP_MAX_TARGET_DESC_COUNT 0x2 +#define RCR_OP_MAX_SG_DESC_COUNT 0x1 +#define RCR_OP_MAX_DESC_LIST_LEN 1024 +#define RCR_OP_MAX_SEGMENT_LEN 268435456 /* 256 MB */ +#define RCR_OP_TOTAL_CONCURR_COPIES 0x1 /* Must be <= 16384 */ +#define RCR_OP_MAX_CONCURR_COPIES 0x1 /* Must be <= 255 */ +#define RCR_OP_DATA_SEG_GRAN_LOG2 9 /* 512 bytes in log 2 */ +#define RCR_OP_INLINE_DATA_GRAN_LOG2 9 /* 512 bytes in log 2 */ +#define RCR_OP_HELD_DATA_GRAN_LOG2 9 /* 512 bytes in log 2 */ + +extern int target_xcopy_setup_pt(void); +extern void target_xcopy_release_pt(void); +extern sense_reason_t target_do_xcopy(struct se_cmd *); +extern sense_reason_t target_do_receive_copy_results(struct se_cmd *); diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index b74feb0d513..4e0050840a7 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -311,7 +311,11 @@ static struct se_portal_group *ft_add_tpg( */ if (strstr(name, "tpgt_") != name) return NULL; - if (strict_strtoul(name + 5, 10, &index) || index > UINT_MAX) + + ret = kstrtoul(name + 5, 10, &index); + if (ret) + return NULL; + if (index > UINT_MAX) return NULL; lacl = container_of(wwn, struct ft_lport_acl, fc_lport_wwn); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 0c27c7df1b0..4b79a1f2f90 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1,12 +1,12 @@ /******************************************************************************* * Vhost kernel TCM fabric driver for virtio SCSI initiators * - * (C) Copyright 2010-2012 RisingTide Systems LLC. + * (C) Copyright 2010-2013 Datera, Inc. * (C) Copyright 2010-2012 IBM Corp. * * Licensed to the Linux Foundation under the General Public License (GPL) version 2. * - * Authors: Nicholas A. Bellinger <nab@risingtidesystems.com> + * Authors: Nicholas A. Bellinger <nab@daterainc.com> * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> * * This program is free software; you can redistribute it and/or modify @@ -48,12 +48,16 @@ #include <linux/virtio_scsi.h> #include <linux/llist.h> #include <linux/bitmap.h> +#include <linux/percpu_ida.h> #include "vhost.h" #define TCM_VHOST_VERSION "v0.1" #define TCM_VHOST_NAMELEN 256 #define TCM_VHOST_MAX_CDB_SIZE 32 +#define TCM_VHOST_DEFAULT_TAGS 256 +#define TCM_VHOST_PREALLOC_SGLS 2048 +#define TCM_VHOST_PREALLOC_PAGES 2048 struct vhost_scsi_inflight { /* Wait for the flush operation to finish */ @@ -79,6 +83,7 @@ struct tcm_vhost_cmd { u32 tvc_lun; /* Pointer to the SGL formatted memory from virtio-scsi */ struct scatterlist *tvc_sgl; + struct page **tvc_upages; /* Pointer to response */ struct virtio_scsi_cmd_resp __user *tvc_resp; /* Pointer to vhost_scsi for our device */ @@ -450,17 +455,16 @@ static void tcm_vhost_release_cmd(struct se_cmd *se_cmd) { struct tcm_vhost_cmd *tv_cmd = container_of(se_cmd, struct tcm_vhost_cmd, tvc_se_cmd); + struct se_session *se_sess = se_cmd->se_sess; if (tv_cmd->tvc_sgl_count) { u32 i; for (i = 0; i < tv_cmd->tvc_sgl_count; i++) put_page(sg_page(&tv_cmd->tvc_sgl[i])); - - kfree(tv_cmd->tvc_sgl); } tcm_vhost_put_inflight(tv_cmd->inflight); - kfree(tv_cmd); + percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag); } static int tcm_vhost_shutdown_session(struct se_session *se_sess) @@ -704,7 +708,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) } static struct tcm_vhost_cmd * -vhost_scsi_allocate_cmd(struct vhost_virtqueue *vq, +vhost_scsi_get_tag(struct vhost_virtqueue *vq, struct tcm_vhost_tpg *tpg, struct virtio_scsi_cmd_req *v_req, u32 exp_data_len, @@ -712,18 +716,27 @@ vhost_scsi_allocate_cmd(struct vhost_virtqueue *vq, { struct tcm_vhost_cmd *cmd; struct tcm_vhost_nexus *tv_nexus; + struct se_session *se_sess; + struct scatterlist *sg; + struct page **pages; + int tag; tv_nexus = tpg->tpg_nexus; if (!tv_nexus) { pr_err("Unable to locate active struct tcm_vhost_nexus\n"); return ERR_PTR(-EIO); } + se_sess = tv_nexus->tvn_se_sess; - cmd = kzalloc(sizeof(struct tcm_vhost_cmd), GFP_ATOMIC); - if (!cmd) { - pr_err("Unable to allocate struct tcm_vhost_cmd\n"); - return ERR_PTR(-ENOMEM); - } + tag = percpu_ida_alloc(&se_sess->sess_tag_pool, GFP_KERNEL); + cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[tag]; + sg = cmd->tvc_sgl; + pages = cmd->tvc_upages; + memset(cmd, 0, sizeof(struct tcm_vhost_cmd)); + + cmd->tvc_sgl = sg; + cmd->tvc_upages = pages; + cmd->tvc_se_cmd.map_tag = tag; cmd->tvc_tag = v_req->tag; cmd->tvc_task_attr = v_req->task_attr; cmd->tvc_exp_data_len = exp_data_len; @@ -740,7 +753,8 @@ vhost_scsi_allocate_cmd(struct vhost_virtqueue *vq, * Returns the number of scatterlist entries used or -errno on error. */ static int -vhost_scsi_map_to_sgl(struct scatterlist *sgl, +vhost_scsi_map_to_sgl(struct tcm_vhost_cmd *tv_cmd, + struct scatterlist *sgl, unsigned int sgl_count, struct iovec *iov, int write) @@ -752,13 +766,25 @@ vhost_scsi_map_to_sgl(struct scatterlist *sgl, struct page **pages; int ret, i; + if (sgl_count > TCM_VHOST_PREALLOC_SGLS) { + pr_err("vhost_scsi_map_to_sgl() psgl_count: %u greater than" + " preallocated TCM_VHOST_PREALLOC_SGLS: %u\n", + sgl_count, TCM_VHOST_PREALLOC_SGLS); + return -ENOBUFS; + } + pages_nr = iov_num_pages(iov); if (pages_nr > sgl_count) return -ENOBUFS; - pages = kmalloc(pages_nr * sizeof(struct page *), GFP_KERNEL); - if (!pages) - return -ENOMEM; + if (pages_nr > TCM_VHOST_PREALLOC_PAGES) { + pr_err("vhost_scsi_map_to_sgl() pages_nr: %u greater than" + " preallocated TCM_VHOST_PREALLOC_PAGES: %u\n", + pages_nr, TCM_VHOST_PREALLOC_PAGES); + return -ENOBUFS; + } + + pages = tv_cmd->tvc_upages; ret = get_user_pages_fast((unsigned long)ptr, pages_nr, write, pages); /* No pages were pinned */ @@ -783,7 +809,6 @@ vhost_scsi_map_to_sgl(struct scatterlist *sgl, } out: - kfree(pages); return ret; } @@ -807,24 +832,20 @@ vhost_scsi_map_iov_to_sgl(struct tcm_vhost_cmd *cmd, /* TODO overflow checking */ - sg = kmalloc(sizeof(cmd->tvc_sgl[0]) * sgl_count, GFP_ATOMIC); - if (!sg) - return -ENOMEM; - pr_debug("%s sg %p sgl_count %u is_err %d\n", __func__, - sg, sgl_count, !sg); + sg = cmd->tvc_sgl; + pr_debug("%s sg %p sgl_count %u\n", __func__, sg, sgl_count); sg_init_table(sg, sgl_count); - cmd->tvc_sgl = sg; cmd->tvc_sgl_count = sgl_count; pr_debug("Mapping %u iovecs for %u pages\n", niov, sgl_count); for (i = 0; i < niov; i++) { - ret = vhost_scsi_map_to_sgl(sg, sgl_count, &iov[i], write); + ret = vhost_scsi_map_to_sgl(cmd, sg, sgl_count, &iov[i], + write); if (ret < 0) { for (i = 0; i < cmd->tvc_sgl_count; i++) put_page(sg_page(&cmd->tvc_sgl[i])); - kfree(cmd->tvc_sgl); - cmd->tvc_sgl = NULL; + cmd->tvc_sgl_count = 0; return ret; } @@ -989,10 +1010,10 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) for (i = 0; i < data_num; i++) exp_data_len += vq->iov[data_first + i].iov_len; - cmd = vhost_scsi_allocate_cmd(vq, tpg, &v_req, - exp_data_len, data_direction); + cmd = vhost_scsi_get_tag(vq, tpg, &v_req, + exp_data_len, data_direction); if (IS_ERR(cmd)) { - vq_err(vq, "vhost_scsi_allocate_cmd failed %ld\n", + vq_err(vq, "vhost_scsi_get_tag failed %ld\n", PTR_ERR(cmd)); goto err_cmd; } @@ -1654,11 +1675,31 @@ static void tcm_vhost_drop_nodeacl(struct se_node_acl *se_acl) kfree(nacl); } +static void tcm_vhost_free_cmd_map_res(struct tcm_vhost_nexus *nexus, + struct se_session *se_sess) +{ + struct tcm_vhost_cmd *tv_cmd; + unsigned int i; + + if (!se_sess->sess_cmd_map) + return; + + for (i = 0; i < TCM_VHOST_DEFAULT_TAGS; i++) { + tv_cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[i]; + + kfree(tv_cmd->tvc_sgl); + kfree(tv_cmd->tvc_upages); + } +} + static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg, const char *name) { struct se_portal_group *se_tpg; + struct se_session *se_sess; struct tcm_vhost_nexus *tv_nexus; + struct tcm_vhost_cmd *tv_cmd; + unsigned int i; mutex_lock(&tpg->tv_tpg_mutex); if (tpg->tpg_nexus) { @@ -1675,14 +1716,37 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg, return -ENOMEM; } /* - * Initialize the struct se_session pointer + * Initialize the struct se_session pointer and setup tagpool + * for struct tcm_vhost_cmd descriptors */ - tv_nexus->tvn_se_sess = transport_init_session(); + tv_nexus->tvn_se_sess = transport_init_session_tags( + TCM_VHOST_DEFAULT_TAGS, + sizeof(struct tcm_vhost_cmd)); if (IS_ERR(tv_nexus->tvn_se_sess)) { mutex_unlock(&tpg->tv_tpg_mutex); kfree(tv_nexus); return -ENOMEM; } + se_sess = tv_nexus->tvn_se_sess; + for (i = 0; i < TCM_VHOST_DEFAULT_TAGS; i++) { + tv_cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[i]; + + tv_cmd->tvc_sgl = kzalloc(sizeof(struct scatterlist) * + TCM_VHOST_PREALLOC_SGLS, GFP_KERNEL); + if (!tv_cmd->tvc_sgl) { + mutex_unlock(&tpg->tv_tpg_mutex); + pr_err("Unable to allocate tv_cmd->tvc_sgl\n"); + goto out; + } + + tv_cmd->tvc_upages = kzalloc(sizeof(struct page *) * + TCM_VHOST_PREALLOC_PAGES, GFP_KERNEL); + if (!tv_cmd->tvc_upages) { + mutex_unlock(&tpg->tv_tpg_mutex); + pr_err("Unable to allocate tv_cmd->tvc_upages\n"); + goto out; + } + } /* * Since we are running in 'demo mode' this call with generate a * struct se_node_acl for the tcm_vhost struct se_portal_group with @@ -1694,9 +1758,7 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg, mutex_unlock(&tpg->tv_tpg_mutex); pr_debug("core_tpg_check_initiator_node_acl() failed" " for %s\n", name); - transport_free_session(tv_nexus->tvn_se_sess); - kfree(tv_nexus); - return -ENOMEM; + goto out; } /* * Now register the TCM vhost virtual I_T Nexus as active with the @@ -1708,6 +1770,12 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg, mutex_unlock(&tpg->tv_tpg_mutex); return 0; + +out: + tcm_vhost_free_cmd_map_res(tv_nexus, se_sess); + transport_free_session(se_sess); + kfree(tv_nexus); + return -ENOMEM; } static int tcm_vhost_drop_nexus(struct tcm_vhost_tpg *tpg) @@ -1747,6 +1815,8 @@ static int tcm_vhost_drop_nexus(struct tcm_vhost_tpg *tpg) pr_debug("TCM_vhost_ConfigFS: Removing I_T Nexus to emulated" " %s Initiator Port: %s\n", tcm_vhost_dump_proto_id(tpg->tport), tv_nexus->tvn_se_sess->se_node_acl->initiatorname); + + tcm_vhost_free_cmd_map_res(tv_nexus, se_sess); /* * Release the SCSI I_T Nexus to the emulated vhost Target Port */ diff --git a/include/linux/percpu_ida.h b/include/linux/percpu_ida.h new file mode 100644 index 00000000000..0b23edbee30 --- /dev/null +++ b/include/linux/percpu_ida.h @@ -0,0 +1,60 @@ +#ifndef __PERCPU_IDA_H__ +#define __PERCPU_IDA_H__ + +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/init.h> +#include <linux/spinlock_types.h> +#include <linux/wait.h> +#include <linux/cpumask.h> + +struct percpu_ida_cpu; + +struct percpu_ida { + /* + * number of tags available to be allocated, as passed to + * percpu_ida_init() + */ + unsigned nr_tags; + + struct percpu_ida_cpu __percpu *tag_cpu; + + /* + * Bitmap of cpus that (may) have tags on their percpu freelists: + * steal_tags() uses this to decide when to steal tags, and which cpus + * to try stealing from. + * + * It's ok for a freelist to be empty when its bit is set - steal_tags() + * will just keep looking - but the bitmap _must_ be set whenever a + * percpu freelist does have tags. + */ + cpumask_t cpus_have_tags; + + struct { + spinlock_t lock; + /* + * When we go to steal tags from another cpu (see steal_tags()), + * we want to pick a cpu at random. Cycling through them every + * time we steal is a bit easier and more or less equivalent: + */ + unsigned cpu_last_stolen; + + /* For sleeping on allocation failure */ + wait_queue_head_t wait; + + /* + * Global freelist - it's a stack where nr_free points to the + * top + */ + unsigned nr_free; + unsigned *freelist; + } ____cacheline_aligned_in_smp; +}; + +int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp); +void percpu_ida_free(struct percpu_ida *pool, unsigned tag); + +void percpu_ida_destroy(struct percpu_ida *pool); +int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags); + +#endif /* __PERCPU_IDA_H__ */ diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index d477bfb73fb..66d42edfb3f 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -144,6 +144,7 @@ enum scsi_timeouts { #define ACCESS_CONTROL_IN 0x86 #define ACCESS_CONTROL_OUT 0x87 #define READ_16 0x88 +#define COMPARE_AND_WRITE 0x89 #define WRITE_16 0x8a #define READ_ATTRIBUTE 0x8c #define WRITE_ATTRIBUTE 0x8d diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h index e5d09d242ba..a12589c4ee9 100644 --- a/include/target/iscsi/iscsi_transport.h +++ b/include/target/iscsi/iscsi_transport.h @@ -6,13 +6,13 @@ struct iscsit_transport { #define ISCSIT_TRANSPORT_NAME 16 char name[ISCSIT_TRANSPORT_NAME]; int transport_type; + int priv_size; struct module *owner; struct list_head t_node; int (*iscsit_setup_np)(struct iscsi_np *, struct __kernel_sockaddr_storage *); int (*iscsit_accept_np)(struct iscsi_np *, struct iscsi_conn *); void (*iscsit_free_np)(struct iscsi_np *); void (*iscsit_free_conn)(struct iscsi_conn *); - struct iscsi_cmd *(*iscsit_alloc_cmd)(struct iscsi_conn *, gfp_t); int (*iscsit_get_login_rx)(struct iscsi_conn *, struct iscsi_login *); int (*iscsit_put_login_tx)(struct iscsi_conn *, struct iscsi_login *, u32); int (*iscsit_immediate_queue)(struct iscsi_conn *, struct iscsi_cmd *, int); @@ -22,6 +22,11 @@ struct iscsit_transport { int (*iscsit_queue_status)(struct iscsi_conn *, struct iscsi_cmd *); }; +static inline void *iscsit_priv_cmd(struct iscsi_cmd *cmd) +{ + return (void *)(cmd + 1); +} + /* * From iscsi_target_transport.c */ @@ -92,3 +97,4 @@ extern int iscsit_tmr_post_handler(struct iscsi_cmd *, struct iscsi_conn *); extern struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *, gfp_t); extern int iscsit_sequence_cmd(struct iscsi_conn *, struct iscsi_cmd *, unsigned char *, __be32); +extern void iscsit_release_cmd(struct iscsi_cmd *); diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index ffa2696d64d..5ebe21cd5d1 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -39,7 +39,8 @@ struct se_subsystem_api { }; struct sbc_ops { - sense_reason_t (*execute_rw)(struct se_cmd *cmd); + sense_reason_t (*execute_rw)(struct se_cmd *cmd, struct scatterlist *, + u32, enum dma_data_direction); sense_reason_t (*execute_sync_cache)(struct se_cmd *cmd); sense_reason_t (*execute_write_same)(struct se_cmd *cmd); sense_reason_t (*execute_write_same_unmap)(struct se_cmd *cmd); @@ -73,6 +74,10 @@ int transport_set_vpd_ident(struct t10_vpd *, unsigned char *); /* core helpers also used by command snooping in pscsi */ void *transport_kmap_data_sg(struct se_cmd *); void transport_kunmap_data_sg(struct se_cmd *); +/* core helpers also used by xcopy during internal command setup */ +int target_alloc_sgl(struct scatterlist **, unsigned int *, u32, bool); +sense_reason_t transport_generic_map_mem_to_cmd(struct se_cmd *, + struct scatterlist *, u32, struct scatterlist *, u32); void array_free(void *array, int n); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index e34fc904f2e..5bdb8b7d2a6 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -5,11 +5,12 @@ #include <linux/configfs.h> #include <linux/dma-mapping.h> #include <linux/blkdev.h> +#include <linux/percpu_ida.h> #include <scsi/scsi_cmnd.h> #include <net/sock.h> #include <net/tcp.h> -#define TARGET_CORE_MOD_VERSION "v4.1.0-rc2-ml" +#define TARGET_CORE_MOD_VERSION "v4.1.0" #define TARGET_CORE_VERSION TARGET_CORE_MOD_VERSION /* Maximum Number of LUNs per Target Portal Group */ @@ -96,6 +97,10 @@ * block/blk-lib.c:blkdev_issue_discard() */ #define DA_EMULATE_TPWS 0 +/* Emulation for CompareAndWrite (AtomicTestandSet) by default */ +#define DA_EMULATE_CAW 1 +/* Emulation for 3rd Party Copy (ExtendedCopy) by default */ +#define DA_EMULATE_3PC 1 /* No Emulation for PSCSI by default */ #define DA_EMULATE_ALUA 0 /* Enforce SCSI Initiator Port TransportID with 'ISID' for PR */ @@ -158,6 +163,9 @@ enum se_cmd_flags_table { SCF_ALUA_NON_OPTIMIZED = 0x00008000, SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC = 0x00020000, SCF_ACK_KREF = 0x00040000, + SCF_COMPARE_AND_WRITE = 0x00080000, + SCF_COMPARE_AND_WRITE_POST = 0x00100000, + SCF_CMD_XCOPY_PASSTHROUGH = 0x00200000, }; /* struct se_dev_entry->lun_flags and struct se_lun->lun_access */ @@ -196,6 +204,7 @@ enum tcm_sense_reason_table { TCM_ADDRESS_OUT_OF_RANGE = R(0x11), TCM_OUT_OF_RESOURCES = R(0x12), TCM_PARAMETER_LIST_LENGTH_ERROR = R(0x13), + TCM_MISCOMPARE_VERIFY = R(0x14), #undef R }; @@ -415,6 +424,8 @@ struct se_cmd { enum dma_data_direction data_direction; /* For SAM Task Attribute */ int sam_task_attr; + /* Used for se_sess->sess_tag_pool */ + unsigned int map_tag; /* Transport protocol dependent state, see transport_state_table */ enum transport_state_table t_state; unsigned cmd_wait_set:1; @@ -444,11 +455,14 @@ struct se_cmd { struct kref cmd_kref; struct target_core_fabric_ops *se_tfo; sense_reason_t (*execute_cmd)(struct se_cmd *); - void (*transport_complete_callback)(struct se_cmd *); + sense_reason_t (*execute_rw)(struct se_cmd *, struct scatterlist *, + u32, enum dma_data_direction); + sense_reason_t (*transport_complete_callback)(struct se_cmd *); unsigned char *t_task_cdb; unsigned char __t_task_cdb[TCM_MAX_COMMAND_SIZE]; unsigned long long t_task_lba; + unsigned int t_task_nolb; unsigned int transport_state; #define CMD_T_ABORTED (1 << 0) #define CMD_T_ACTIVE (1 << 1) @@ -469,7 +483,9 @@ struct se_cmd { struct work_struct work; struct scatterlist *t_data_sg; + struct scatterlist *t_data_sg_orig; unsigned int t_data_nents; + unsigned int t_data_nents_orig; void *t_data_vmap; struct scatterlist *t_bidi_data_sg; unsigned int t_bidi_data_nents; @@ -536,6 +552,8 @@ struct se_session { struct list_head sess_wait_list; spinlock_t sess_cmd_lock; struct kref sess_kref; + void *sess_cmd_map; + struct percpu_ida sess_tag_pool; }; struct se_device; @@ -589,6 +607,8 @@ struct se_dev_attrib { int emulate_tas; int emulate_tpu; int emulate_tpws; + int emulate_caw; + int emulate_3pc; int enforce_pr_isids; int is_nonrot; int emulate_rest_reord; @@ -656,6 +676,7 @@ struct se_device { spinlock_t se_port_lock; spinlock_t se_tmr_lock; spinlock_t qf_cmd_lock; + struct semaphore caw_sem; /* Used for legacy SPC-2 reservationsa */ struct se_node_acl *dev_reserved_node_acl; /* Used for ALUA Logical Unit Group membership */ @@ -669,6 +690,7 @@ struct se_device { struct list_head delayed_cmd_list; struct list_head state_list; struct list_head qf_cmd_list; + struct list_head g_dev_node; /* Pointer to associated SE HBA */ struct se_hba *se_hba; /* T10 Inquiry and VPD WWN Information */ diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 7a16178424f..882b650e32b 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -84,6 +84,9 @@ struct target_core_fabric_ops { }; struct se_session *transport_init_session(void); +int transport_alloc_session_tags(struct se_session *, unsigned int, + unsigned int); +struct se_session *transport_init_session_tags(unsigned int, unsigned int); void __transport_register_session(struct se_portal_group *, struct se_node_acl *, struct se_session *, void *); void transport_register_session(struct se_portal_group *, @@ -131,6 +134,7 @@ int core_tmr_alloc_req(struct se_cmd *, void *, u8, gfp_t); void core_tmr_release_req(struct se_tmr_req *); int transport_generic_handle_tmr(struct se_cmd *); void transport_generic_request_failure(struct se_cmd *, sense_reason_t); +void __target_execute_cmd(struct se_cmd *); int transport_lookup_tmr_lun(struct se_cmd *, u32); struct se_node_acl *core_tpg_check_initiator_node_acl(struct se_portal_group *, @@ -175,4 +179,30 @@ u32 iscsi_get_pr_transport_id_len(struct se_portal_group *, struct se_node_acl * char *iscsi_parse_pr_out_transport_id(struct se_portal_group *, const char *, u32 *, char **); +/* + * The LIO target core uses DMA_TO_DEVICE to mean that data is going + * to the target (eg handling a WRITE) and DMA_FROM_DEVICE to mean + * that data is coming from the target (eg handling a READ). However, + * this is just the opposite of what we have to tell the DMA mapping + * layer -- eg when handling a READ, the HBA will have to DMA the data + * out of memory so it can send it to the initiator, which means we + * need to use DMA_TO_DEVICE when we map the data. + */ +static inline enum dma_data_direction +target_reverse_dma_direction(struct se_cmd *se_cmd) +{ + if (se_cmd->se_cmd_flags & SCF_BIDI) + return DMA_BIDIRECTIONAL; + + switch (se_cmd->data_direction) { + case DMA_TO_DEVICE: + return DMA_FROM_DEVICE; + case DMA_FROM_DEVICE: + return DMA_TO_DEVICE; + case DMA_NONE: + default: + return DMA_NONE; + } +} + #endif /* TARGET_CORE_FABRICH */ diff --git a/lib/Makefile b/lib/Makefile index f2cb3082697..f3bb2cb98ad 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -13,7 +13,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ - earlycpio.o percpu-refcount.o + earlycpio.o percpu-refcount.o percpu_ida.o obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o lib-$(CONFIG_MMU) += ioremap.o @@ -25,7 +25,8 @@ obj-y += lockref.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ - bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o + bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ + percpu_ida.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c new file mode 100644 index 00000000000..bab1ba2a4c7 --- /dev/null +++ b/lib/percpu_ida.c @@ -0,0 +1,335 @@ +/* + * Percpu IDA library + * + * Copyright (C) 2013 Datera, Inc. Kent Overstreet + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include <linux/bitmap.h> +#include <linux/bitops.h> +#include <linux/bug.h> +#include <linux/err.h> +#include <linux/export.h> +#include <linux/hardirq.h> +#include <linux/idr.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/percpu.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/spinlock.h> +#include <linux/percpu_ida.h> + +/* + * Number of tags we move between the percpu freelist and the global freelist at + * a time + */ +#define IDA_PCPU_BATCH_MOVE 32U + +/* Max size of percpu freelist, */ +#define IDA_PCPU_SIZE ((IDA_PCPU_BATCH_MOVE * 3) / 2) + +struct percpu_ida_cpu { + /* + * Even though this is percpu, we need a lock for tag stealing by remote + * CPUs: + */ + spinlock_t lock; + + /* nr_free/freelist form a stack of free IDs */ + unsigned nr_free; + unsigned freelist[]; +}; + +static inline void move_tags(unsigned *dst, unsigned *dst_nr, + unsigned *src, unsigned *src_nr, + unsigned nr) +{ + *src_nr -= nr; + memcpy(dst + *dst_nr, src + *src_nr, sizeof(unsigned) * nr); + *dst_nr += nr; +} + +/* + * Try to steal tags from a remote cpu's percpu freelist. + * + * We first check how many percpu freelists have tags - we don't steal tags + * unless enough percpu freelists have tags on them that it's possible more than + * half the total tags could be stuck on remote percpu freelists. + * + * Then we iterate through the cpus until we find some tags - we don't attempt + * to find the "best" cpu to steal from, to keep cacheline bouncing to a + * minimum. + */ +static inline void steal_tags(struct percpu_ida *pool, + struct percpu_ida_cpu *tags) +{ + unsigned cpus_have_tags, cpu = pool->cpu_last_stolen; + struct percpu_ida_cpu *remote; + + for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags); + cpus_have_tags * IDA_PCPU_SIZE > pool->nr_tags / 2; + cpus_have_tags--) { + cpu = cpumask_next(cpu, &pool->cpus_have_tags); + + if (cpu >= nr_cpu_ids) { + cpu = cpumask_first(&pool->cpus_have_tags); + if (cpu >= nr_cpu_ids) + BUG(); + } + + pool->cpu_last_stolen = cpu; + remote = per_cpu_ptr(pool->tag_cpu, cpu); + + cpumask_clear_cpu(cpu, &pool->cpus_have_tags); + + if (remote == tags) + continue; + + spin_lock(&remote->lock); + + if (remote->nr_free) { + memcpy(tags->freelist, + remote->freelist, + sizeof(unsigned) * remote->nr_free); + + tags->nr_free = remote->nr_free; + remote->nr_free = 0; + } + + spin_unlock(&remote->lock); + + if (tags->nr_free) + break; + } +} + +/* + * Pop up to IDA_PCPU_BATCH_MOVE IDs off the global freelist, and push them onto + * our percpu freelist: + */ +static inline void alloc_global_tags(struct percpu_ida *pool, + struct percpu_ida_cpu *tags) +{ + move_tags(tags->freelist, &tags->nr_free, + pool->freelist, &pool->nr_free, + min(pool->nr_free, IDA_PCPU_BATCH_MOVE)); +} + +static inline unsigned alloc_local_tag(struct percpu_ida *pool, + struct percpu_ida_cpu *tags) +{ + int tag = -ENOSPC; + + spin_lock(&tags->lock); + if (tags->nr_free) + tag = tags->freelist[--tags->nr_free]; + spin_unlock(&tags->lock); + + return tag; +} + +/** + * percpu_ida_alloc - allocate a tag + * @pool: pool to allocate from + * @gfp: gfp flags + * + * Returns a tag - an integer in the range [0..nr_tags) (passed to + * tag_pool_init()), or otherwise -ENOSPC on allocation failure. + * + * Safe to be called from interrupt context (assuming it isn't passed + * __GFP_WAIT, of course). + * + * @gfp indicates whether or not to wait until a free id is available (it's not + * used for internal memory allocations); thus if passed __GFP_WAIT we may sleep + * however long it takes until another thread frees an id (same semantics as a + * mempool). + * + * Will not fail if passed __GFP_WAIT. + */ +int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp) +{ + DEFINE_WAIT(wait); + struct percpu_ida_cpu *tags; + unsigned long flags; + int tag; + + local_irq_save(flags); + tags = this_cpu_ptr(pool->tag_cpu); + + /* Fastpath */ + tag = alloc_local_tag(pool, tags); + if (likely(tag >= 0)) { + local_irq_restore(flags); + return tag; + } + + while (1) { + spin_lock(&pool->lock); + + /* + * prepare_to_wait() must come before steal_tags(), in case + * percpu_ida_free() on another cpu flips a bit in + * cpus_have_tags + * + * global lock held and irqs disabled, don't need percpu lock + */ + prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); + + if (!tags->nr_free) + alloc_global_tags(pool, tags); + if (!tags->nr_free) + steal_tags(pool, tags); + + if (tags->nr_free) { + tag = tags->freelist[--tags->nr_free]; + if (tags->nr_free) + cpumask_set_cpu(smp_processor_id(), + &pool->cpus_have_tags); + } + + spin_unlock(&pool->lock); + local_irq_restore(flags); + + if (tag >= 0 || !(gfp & __GFP_WAIT)) + break; + + schedule(); + + local_irq_save(flags); + tags = this_cpu_ptr(pool->tag_cpu); + } + + finish_wait(&pool->wait, &wait); + return tag; +} +EXPORT_SYMBOL_GPL(percpu_ida_alloc); + +/** + * percpu_ida_free - free a tag + * @pool: pool @tag was allocated from + * @tag: a tag previously allocated with percpu_ida_alloc() + * + * Safe to be called from interrupt context. + */ +void percpu_ida_free(struct percpu_ida *pool, unsigned tag) +{ + struct percpu_ida_cpu *tags; + unsigned long flags; + unsigned nr_free; + + BUG_ON(tag >= pool->nr_tags); + + local_irq_save(flags); + tags = this_cpu_ptr(pool->tag_cpu); + + spin_lock(&tags->lock); + tags->freelist[tags->nr_free++] = tag; + + nr_free = tags->nr_free; + spin_unlock(&tags->lock); + + if (nr_free == 1) { + cpumask_set_cpu(smp_processor_id(), + &pool->cpus_have_tags); + wake_up(&pool->wait); + } + + if (nr_free == IDA_PCPU_SIZE) { + spin_lock(&pool->lock); + + /* + * Global lock held and irqs disabled, don't need percpu + * lock + */ + if (tags->nr_free == IDA_PCPU_SIZE) { + move_tags(pool->freelist, &pool->nr_free, + tags->freelist, &tags->nr_free, + IDA_PCPU_BATCH_MOVE); + + wake_up(&pool->wait); + } + spin_unlock(&pool->lock); + } + + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(percpu_ida_free); + +/** + * percpu_ida_destroy - release a tag pool's resources + * @pool: pool to free + * + * Frees the resources allocated by percpu_ida_init(). + */ +void percpu_ida_destroy(struct percpu_ida *pool) +{ + free_percpu(pool->tag_cpu); + free_pages((unsigned long) pool->freelist, + get_order(pool->nr_tags * sizeof(unsigned))); +} +EXPORT_SYMBOL_GPL(percpu_ida_destroy); + +/** + * percpu_ida_init - initialize a percpu tag pool + * @pool: pool to initialize + * @nr_tags: number of tags that will be available for allocation + * + * Initializes @pool so that it can be used to allocate tags - integers in the + * range [0, nr_tags). Typically, they'll be used by driver code to refer to a + * preallocated array of tag structures. + * + * Allocation is percpu, but sharding is limited by nr_tags - for best + * performance, the workload should not span more cpus than nr_tags / 128. + */ +int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags) +{ + unsigned i, cpu, order; + + memset(pool, 0, sizeof(*pool)); + + init_waitqueue_head(&pool->wait); + spin_lock_init(&pool->lock); + pool->nr_tags = nr_tags; + + /* Guard against overflow */ + if (nr_tags > (unsigned) INT_MAX + 1) { + pr_err("percpu_ida_init(): nr_tags too large\n"); + return -EINVAL; + } + + order = get_order(nr_tags * sizeof(unsigned)); + pool->freelist = (void *) __get_free_pages(GFP_KERNEL, order); + if (!pool->freelist) + return -ENOMEM; + + for (i = 0; i < nr_tags; i++) + pool->freelist[i] = i; + + pool->nr_free = nr_tags; + + pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) + + IDA_PCPU_SIZE * sizeof(unsigned), + sizeof(unsigned)); + if (!pool->tag_cpu) + goto err; + + for_each_possible_cpu(cpu) + spin_lock_init(&per_cpu_ptr(pool->tag_cpu, cpu)->lock); + + return 0; +err: + percpu_ida_destroy(pool); + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(percpu_ida_init); |