diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2013-11-14 17:38:05 -0800 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2013-11-14 17:38:05 -0800 |
commit | 42249094f79422fbf5ed4b54eeb48ff096809b8f (patch) | |
tree | 91e6850c8c7e8cc284cf8bb6363f8662f84011f4 /drivers/vhost | |
parent | 936816161978ca716a56c5e553c68f25972b1e3a (diff) | |
parent | 2c027b7c48a888ab173ba45babb4525e278375d9 (diff) |
Merge branch 'next' into for-linus
Merge first round of changes for 3.13 merge window.
Diffstat (limited to 'drivers/vhost')
-rw-r--r-- | drivers/vhost/Kconfig | 8 | ||||
-rw-r--r-- | drivers/vhost/Makefile | 3 | ||||
-rw-r--r-- | drivers/vhost/net.c | 167 | ||||
-rw-r--r-- | drivers/vhost/scsi.c | 656 | ||||
-rw-r--r-- | drivers/vhost/test.c | 39 | ||||
-rw-r--r-- | drivers/vhost/vhost.c | 154 | ||||
-rw-r--r-- | drivers/vhost/vhost.h | 13 |
7 files changed, 590 insertions, 450 deletions
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 8b9226da3f5..017a1e8a8f6 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -1,6 +1,7 @@ config VHOST_NET tristate "Host kernel accelerator for virtio net" depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP) + select VHOST select VHOST_RING ---help--- This kernel module can be loaded in host kernel to accelerate @@ -13,6 +14,7 @@ config VHOST_NET config VHOST_SCSI tristate "VHOST_SCSI TCM fabric driver" depends on TARGET_CORE && EVENTFD && m + select VHOST select VHOST_RING default n ---help--- @@ -24,3 +26,9 @@ config VHOST_RING ---help--- This option is selected by any driver which needs to access the host side of a virtio ring. + +config VHOST + tristate + ---help--- + This option is selected by any driver which needs to access + the core of vhost. diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile index 654e9afb11f..e0441c34db1 100644 --- a/drivers/vhost/Makefile +++ b/drivers/vhost/Makefile @@ -1,7 +1,8 @@ obj-$(CONFIG_VHOST_NET) += vhost_net.o -vhost_net-y := vhost.o net.o +vhost_net-y := net.o obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o vhost_scsi-y := scsi.o obj-$(CONFIG_VHOST_RING) += vringh.o +obj-$(CONFIG_VHOST) += vhost.o diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 2b51e2336aa..831eb4fd197 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -15,7 +15,6 @@ #include <linux/moduleparam.h> #include <linux/mutex.h> #include <linux/workqueue.h> -#include <linux/rcupdate.h> #include <linux/file.h> #include <linux/slab.h> @@ -150,28 +149,30 @@ static void vhost_net_ubuf_put_and_wait(struct vhost_net_ubuf_ref *ubufs) { kref_put(&ubufs->kref, vhost_net_zerocopy_done_signal); wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount)); +} + +static void vhost_net_ubuf_put_wait_and_free(struct vhost_net_ubuf_ref *ubufs) +{ + vhost_net_ubuf_put_and_wait(ubufs); kfree(ubufs); } static void vhost_net_clear_ubuf_info(struct vhost_net *n) { - - bool zcopy; int i; - for (i = 0; i < n->dev.nvqs; ++i) { - zcopy = vhost_net_zcopy_mask & (0x1 << i); - if (zcopy) - kfree(n->vqs[i].ubuf_info); + for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { + kfree(n->vqs[i].ubuf_info); + n->vqs[i].ubuf_info = NULL; } } -int vhost_net_set_ubuf_info(struct vhost_net *n) +static int vhost_net_set_ubuf_info(struct vhost_net *n) { bool zcopy; int i; - for (i = 0; i < n->dev.nvqs; ++i) { + for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { zcopy = vhost_net_zcopy_mask & (0x1 << i); if (!zcopy) continue; @@ -183,25 +184,20 @@ int vhost_net_set_ubuf_info(struct vhost_net *n) return 0; err: - while (i--) { - zcopy = vhost_net_zcopy_mask & (0x1 << i); - if (!zcopy) - continue; - kfree(n->vqs[i].ubuf_info); - } + vhost_net_clear_ubuf_info(n); return -ENOMEM; } -void vhost_net_vq_reset(struct vhost_net *n) +static void vhost_net_vq_reset(struct vhost_net *n) { int i; + vhost_net_clear_ubuf_info(n); + for (i = 0; i < VHOST_NET_VQ_MAX; i++) { n->vqs[i].done_idx = 0; n->vqs[i].upend_idx = 0; n->vqs[i].ubufs = NULL; - kfree(n->vqs[i].ubuf_info); - n->vqs[i].ubuf_info = NULL; n->vqs[i].vhost_hlen = 0; n->vqs[i].sock_hlen = 0; } @@ -280,12 +276,12 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, * of used idx. Once lower device DMA done contiguously, we will signal KVM * guest used idx. */ -static int vhost_zerocopy_signal_used(struct vhost_net *net, - struct vhost_virtqueue *vq) +static void vhost_zerocopy_signal_used(struct vhost_net *net, + struct vhost_virtqueue *vq) { struct vhost_net_virtqueue *nvq = container_of(vq, struct vhost_net_virtqueue, vq); - int i; + int i, add; int j = 0; for (i = nvq->done_idx; i != nvq->upend_idx; i = (i + 1) % UIO_MAXIOV) { @@ -293,15 +289,17 @@ static int vhost_zerocopy_signal_used(struct vhost_net *net, vhost_net_tx_err(net); if (VHOST_DMA_IS_DONE(vq->heads[i].len)) { vq->heads[i].len = VHOST_DMA_CLEAR_LEN; - vhost_add_used_and_signal(vq->dev, vq, - vq->heads[i].id, 0); ++j; } else break; } - if (j) - nvq->done_idx = i; - return j; + while (j) { + add = min(UIO_MAXIOV - nvq->done_idx, j); + vhost_add_used_and_signal_n(vq->dev, vq, + &vq->heads[nvq->done_idx], add); + nvq->done_idx = (nvq->done_idx + add) % UIO_MAXIOV; + j -= add; + } } static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) @@ -310,6 +308,11 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) struct vhost_virtqueue *vq = ubufs->vq; int cnt = atomic_read(&ubufs->kref.refcount); + /* set len to mark this desc buffers done DMA */ + vq->heads[ubuf->desc].len = success ? + VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN; + vhost_net_ubuf_put(ubufs); + /* * Trigger polling thread if guest stopped submitting new buffers: * in this case, the refcount after decrement will eventually reach 1 @@ -320,10 +323,6 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) */ if (cnt <= 2 || !(cnt % 16)) vhost_poll_queue(&vq->poll); - /* set len to mark this desc buffers done DMA */ - vq->heads[ubuf->desc].len = success ? - VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN; - vhost_net_ubuf_put(ubufs); } /* Expects to be always run from workqueue - which acts as @@ -349,12 +348,11 @@ static void handle_tx(struct vhost_net *net) struct vhost_net_ubuf_ref *uninitialized_var(ubufs); bool zcopy, zcopy_used; - /* TODO: check that we are running from vhost_worker? */ - sock = rcu_dereference_check(vq->private_data, 1); + mutex_lock(&vq->mutex); + sock = vq->private_data; if (!sock) - return; + goto out; - mutex_lock(&vq->mutex); vhost_disable_notify(&net->dev, vq); hdr_size = nvq->vhost_hlen; @@ -365,6 +363,13 @@ static void handle_tx(struct vhost_net *net) if (zcopy) vhost_zerocopy_signal_used(net, vq); + /* If more outstanding DMAs, queue the work. + * Handle upend_idx wrap around + */ + if (unlikely((nvq->upend_idx + vq->num - VHOST_MAX_PEND) + % UIO_MAXIOV == nvq->done_idx)) + break; + head = vhost_get_vq_desc(&net->dev, vq, vq->iov, ARRAY_SIZE(vq->iov), &out, &in, @@ -374,17 +379,6 @@ static void handle_tx(struct vhost_net *net) break; /* Nothing new? Wait for eventfd to tell us they refilled. */ if (head == vq->num) { - int num_pends; - - /* If more outstanding DMAs, queue the work. - * Handle upend_idx wrap around - */ - num_pends = likely(nvq->upend_idx >= nvq->done_idx) ? - (nvq->upend_idx - nvq->done_idx) : - (nvq->upend_idx + UIO_MAXIOV - - nvq->done_idx); - if (unlikely(num_pends > VHOST_MAX_PEND)) - break; if (unlikely(vhost_enable_notify(&net->dev, vq))) { vhost_disable_notify(&net->dev, vq); continue; @@ -407,42 +401,36 @@ static void handle_tx(struct vhost_net *net) iov_length(nvq->hdr, s), hdr_size); break; } - zcopy_used = zcopy && (len >= VHOST_GOODCOPY_LEN || - nvq->upend_idx != nvq->done_idx); + + zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN + && (nvq->upend_idx + 1) % UIO_MAXIOV != + nvq->done_idx + && vhost_net_tx_select_zcopy(net); /* use msg_control to pass vhost zerocopy ubuf info to skb */ if (zcopy_used) { + struct ubuf_info *ubuf; + ubuf = nvq->ubuf_info + nvq->upend_idx; + vq->heads[nvq->upend_idx].id = head; - if (!vhost_net_tx_select_zcopy(net) || - len < VHOST_GOODCOPY_LEN) { - /* copy don't need to wait for DMA done */ - vq->heads[nvq->upend_idx].len = - VHOST_DMA_DONE_LEN; - msg.msg_control = NULL; - msg.msg_controllen = 0; - ubufs = NULL; - } else { - struct ubuf_info *ubuf; - ubuf = nvq->ubuf_info + nvq->upend_idx; - - vq->heads[nvq->upend_idx].len = - VHOST_DMA_IN_PROGRESS; - ubuf->callback = vhost_zerocopy_callback; - ubuf->ctx = nvq->ubufs; - ubuf->desc = nvq->upend_idx; - msg.msg_control = ubuf; - msg.msg_controllen = sizeof(ubuf); - ubufs = nvq->ubufs; - kref_get(&ubufs->kref); - } + vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS; + ubuf->callback = vhost_zerocopy_callback; + ubuf->ctx = nvq->ubufs; + ubuf->desc = nvq->upend_idx; + msg.msg_control = ubuf; + msg.msg_controllen = sizeof(ubuf); + ubufs = nvq->ubufs; + kref_get(&ubufs->kref); nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV; + } else { + msg.msg_control = NULL; + ubufs = NULL; } /* TODO: Check specific error and bomb out unless ENOBUFS? */ err = sock->ops->sendmsg(NULL, sock, &msg, len); if (unlikely(err < 0)) { if (zcopy_used) { - if (ubufs) - vhost_net_ubuf_put(ubufs); + vhost_net_ubuf_put(ubufs); nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) % UIO_MAXIOV; } @@ -463,7 +451,7 @@ static void handle_tx(struct vhost_net *net) break; } } - +out: mutex_unlock(&vq->mutex); } @@ -572,14 +560,14 @@ static void handle_rx(struct vhost_net *net) s16 headcount; size_t vhost_hlen, sock_hlen; size_t vhost_len, sock_len; - /* TODO: check that we are running from vhost_worker? */ - struct socket *sock = rcu_dereference_check(vq->private_data, 1); - - if (!sock) - return; + struct socket *sock; mutex_lock(&vq->mutex); + sock = vq->private_data; + if (!sock) + goto out; vhost_disable_notify(&net->dev, vq); + vhost_hlen = nvq->vhost_hlen; sock_hlen = nvq->sock_hlen; @@ -654,7 +642,7 @@ static void handle_rx(struct vhost_net *net) break; } } - +out: mutex_unlock(&vq->mutex); } @@ -752,8 +740,7 @@ static int vhost_net_enable_vq(struct vhost_net *n, struct vhost_poll *poll = n->poll + (nvq - n->vqs); struct socket *sock; - sock = rcu_dereference_protected(vq->private_data, - lockdep_is_held(&vq->mutex)); + sock = vq->private_data; if (!sock) return 0; @@ -766,10 +753,9 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n, struct socket *sock; mutex_lock(&vq->mutex); - sock = rcu_dereference_protected(vq->private_data, - lockdep_is_held(&vq->mutex)); + sock = vq->private_data; vhost_net_disable_vq(n, vq); - rcu_assign_pointer(vq->private_data, NULL); + vq->private_data = NULL; mutex_unlock(&vq->mutex); return sock; } @@ -925,8 +911,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) } /* start polling new socket */ - oldsock = rcu_dereference_protected(vq->private_data, - lockdep_is_held(&vq->mutex)); + oldsock = vq->private_data; if (sock != oldsock) { ubufs = vhost_net_ubuf_alloc(vq, sock && vhost_sock_zcopy(sock)); @@ -936,7 +921,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) } vhost_net_disable_vq(n, vq); - rcu_assign_pointer(vq->private_data, sock); + vq->private_data = sock; r = vhost_init_used(vq); if (r) goto err_used; @@ -955,7 +940,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) mutex_unlock(&vq->mutex); if (oldubufs) { - vhost_net_ubuf_put_and_wait(oldubufs); + vhost_net_ubuf_put_wait_and_free(oldubufs); mutex_lock(&vq->mutex); vhost_zerocopy_signal_used(n, vq); mutex_unlock(&vq->mutex); @@ -970,10 +955,10 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) return 0; err_used: - rcu_assign_pointer(vq->private_data, oldsock); + vq->private_data = oldsock; vhost_net_enable_vq(n, vq); if (ubufs) - vhost_net_ubuf_put_and_wait(ubufs); + vhost_net_ubuf_put_wait_and_free(ubufs); err_ubufs: fput(sock->file); err_vq: @@ -1053,6 +1038,10 @@ static long vhost_net_set_owner(struct vhost_net *n) int r; mutex_lock(&n->dev.mutex); + if (vhost_dev_has_owner(&n->dev)) { + r = -EBUSY; + goto out; + } r = vhost_net_set_ubuf_info(n); if (r) goto out; diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 70142029722..ce5221fa393 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1,12 +1,12 @@ /******************************************************************************* * Vhost kernel TCM fabric driver for virtio SCSI initiators * - * (C) Copyright 2010-2012 RisingTide Systems LLC. + * (C) Copyright 2010-2013 Datera, Inc. * (C) Copyright 2010-2012 IBM Corp. * * Licensed to the Linux Foundation under the General Public License (GPL) version 2. * - * Authors: Nicholas A. Bellinger <nab@risingtidesystems.com> + * Authors: Nicholas A. Bellinger <nab@daterainc.com> * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> * * This program is free software; you can redistribute it and/or modify @@ -48,13 +48,16 @@ #include <linux/virtio_scsi.h> #include <linux/llist.h> #include <linux/bitmap.h> +#include <linux/percpu_ida.h> -#include "vhost.c" #include "vhost.h" #define TCM_VHOST_VERSION "v0.1" #define TCM_VHOST_NAMELEN 256 #define TCM_VHOST_MAX_CDB_SIZE 32 +#define TCM_VHOST_DEFAULT_TAGS 256 +#define TCM_VHOST_PREALLOC_SGLS 2048 +#define TCM_VHOST_PREALLOC_PAGES 2048 struct vhost_scsi_inflight { /* Wait for the flush operation to finish */ @@ -80,6 +83,7 @@ struct tcm_vhost_cmd { u32 tvc_lun; /* Pointer to the SGL formatted memory from virtio-scsi */ struct scatterlist *tvc_sgl; + struct page **tvc_upages; /* Pointer to response */ struct virtio_scsi_cmd_resp __user *tvc_resp; /* Pointer to vhost_scsi for our device */ @@ -116,7 +120,6 @@ struct tcm_vhost_nacl { struct se_node_acl se_node_acl; }; -struct vhost_scsi; struct tcm_vhost_tpg { /* Vhost port target portal group tag for TCM */ u16 tport_tpgt; @@ -218,7 +221,7 @@ static int iov_num_pages(struct iovec *iov) ((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT; } -void tcm_vhost_done_inflight(struct kref *kref) +static void tcm_vhost_done_inflight(struct kref *kref) { struct vhost_scsi_inflight *inflight; @@ -329,11 +332,12 @@ static u32 tcm_vhost_get_default_depth(struct se_portal_group *se_tpg) return 1; } -static u32 tcm_vhost_get_pr_transport_id(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code, - unsigned char *buf) +static u32 +tcm_vhost_get_pr_transport_id(struct se_portal_group *se_tpg, + struct se_node_acl *se_nacl, + struct t10_pr_registration *pr_reg, + int *format_code, + unsigned char *buf) { struct tcm_vhost_tpg *tpg = container_of(se_tpg, struct tcm_vhost_tpg, se_tpg); @@ -359,10 +363,11 @@ static u32 tcm_vhost_get_pr_transport_id(struct se_portal_group *se_tpg, format_code, buf); } -static u32 tcm_vhost_get_pr_transport_id_len(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl, - struct t10_pr_registration *pr_reg, - int *format_code) +static u32 +tcm_vhost_get_pr_transport_id_len(struct se_portal_group *se_tpg, + struct se_node_acl *se_nacl, + struct t10_pr_registration *pr_reg, + int *format_code) { struct tcm_vhost_tpg *tpg = container_of(se_tpg, struct tcm_vhost_tpg, se_tpg); @@ -388,10 +393,11 @@ static u32 tcm_vhost_get_pr_transport_id_len(struct se_portal_group *se_tpg, format_code); } -static char *tcm_vhost_parse_pr_out_transport_id(struct se_portal_group *se_tpg, - const char *buf, - u32 *out_tid_len, - char **port_nexus_ptr) +static char * +tcm_vhost_parse_pr_out_transport_id(struct se_portal_group *se_tpg, + const char *buf, + u32 *out_tid_len, + char **port_nexus_ptr) { struct tcm_vhost_tpg *tpg = container_of(se_tpg, struct tcm_vhost_tpg, se_tpg); @@ -417,8 +423,8 @@ static char *tcm_vhost_parse_pr_out_transport_id(struct se_portal_group *se_tpg, port_nexus_ptr); } -static struct se_node_acl *tcm_vhost_alloc_fabric_acl( - struct se_portal_group *se_tpg) +static struct se_node_acl * +tcm_vhost_alloc_fabric_acl(struct se_portal_group *se_tpg) { struct tcm_vhost_nacl *nacl; @@ -431,8 +437,9 @@ static struct se_node_acl *tcm_vhost_alloc_fabric_acl( return &nacl->se_node_acl; } -static void tcm_vhost_release_fabric_acl(struct se_portal_group *se_tpg, - struct se_node_acl *se_nacl) +static void +tcm_vhost_release_fabric_acl(struct se_portal_group *se_tpg, + struct se_node_acl *se_nacl) { struct tcm_vhost_nacl *nacl = container_of(se_nacl, struct tcm_vhost_nacl, se_node_acl); @@ -446,7 +453,18 @@ static u32 tcm_vhost_tpg_get_inst_index(struct se_portal_group *se_tpg) static void tcm_vhost_release_cmd(struct se_cmd *se_cmd) { - return; + struct tcm_vhost_cmd *tv_cmd = container_of(se_cmd, + struct tcm_vhost_cmd, tvc_se_cmd); + struct se_session *se_sess = se_cmd->se_sess; + + if (tv_cmd->tvc_sgl_count) { + u32 i; + for (i = 0; i < tv_cmd->tvc_sgl_count; i++) + put_page(sg_page(&tv_cmd->tvc_sgl[i])); + } + + tcm_vhost_put_inflight(tv_cmd->inflight); + percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag); } static int tcm_vhost_shutdown_session(struct se_session *se_sess) @@ -491,34 +509,34 @@ static int tcm_vhost_get_cmd_state(struct se_cmd *se_cmd) return 0; } -static void vhost_scsi_complete_cmd(struct tcm_vhost_cmd *tv_cmd) +static void vhost_scsi_complete_cmd(struct tcm_vhost_cmd *cmd) { - struct vhost_scsi *vs = tv_cmd->tvc_vhost; + struct vhost_scsi *vs = cmd->tvc_vhost; - llist_add(&tv_cmd->tvc_completion_list, &vs->vs_completion_list); + llist_add(&cmd->tvc_completion_list, &vs->vs_completion_list); vhost_work_queue(&vs->dev, &vs->vs_completion_work); } static int tcm_vhost_queue_data_in(struct se_cmd *se_cmd) { - struct tcm_vhost_cmd *tv_cmd = container_of(se_cmd, + struct tcm_vhost_cmd *cmd = container_of(se_cmd, struct tcm_vhost_cmd, tvc_se_cmd); - vhost_scsi_complete_cmd(tv_cmd); + vhost_scsi_complete_cmd(cmd); return 0; } static int tcm_vhost_queue_status(struct se_cmd *se_cmd) { - struct tcm_vhost_cmd *tv_cmd = container_of(se_cmd, + struct tcm_vhost_cmd *cmd = container_of(se_cmd, struct tcm_vhost_cmd, tvc_se_cmd); - vhost_scsi_complete_cmd(tv_cmd); + vhost_scsi_complete_cmd(cmd); return 0; } -static int tcm_vhost_queue_tm_rsp(struct se_cmd *se_cmd) +static void tcm_vhost_queue_tm_rsp(struct se_cmd *se_cmd) { - return 0; + return; } static void tcm_vhost_free_evt(struct vhost_scsi *vs, struct tcm_vhost_evt *evt) @@ -527,8 +545,9 @@ static void tcm_vhost_free_evt(struct vhost_scsi *vs, struct tcm_vhost_evt *evt) kfree(evt); } -static struct tcm_vhost_evt *tcm_vhost_allocate_evt(struct vhost_scsi *vs, - u32 event, u32 reason) +static struct tcm_vhost_evt * +tcm_vhost_allocate_evt(struct vhost_scsi *vs, + u32 event, u32 reason) { struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; struct tcm_vhost_evt *evt; @@ -552,28 +571,22 @@ static struct tcm_vhost_evt *tcm_vhost_allocate_evt(struct vhost_scsi *vs, return evt; } -static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd) +static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *cmd) { - struct se_cmd *se_cmd = &tv_cmd->tvc_se_cmd; + struct se_cmd *se_cmd = &cmd->tvc_se_cmd; /* TODO locking against target/backend threads? */ - transport_generic_free_cmd(se_cmd, 1); + transport_generic_free_cmd(se_cmd, 0); - if (tv_cmd->tvc_sgl_count) { - u32 i; - for (i = 0; i < tv_cmd->tvc_sgl_count; i++) - put_page(sg_page(&tv_cmd->tvc_sgl[i])); - - kfree(tv_cmd->tvc_sgl); - } - - tcm_vhost_put_inflight(tv_cmd->inflight); +} - kfree(tv_cmd); +static int vhost_scsi_check_stop_free(struct se_cmd *se_cmd) +{ + return target_put_sess_cmd(se_cmd->se_sess, se_cmd); } -static void tcm_vhost_do_evt_work(struct vhost_scsi *vs, - struct tcm_vhost_evt *evt) +static void +tcm_vhost_do_evt_work(struct vhost_scsi *vs, struct tcm_vhost_evt *evt) { struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; struct virtio_scsi_event *event = &evt->event; @@ -652,7 +665,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) vs_completion_work); DECLARE_BITMAP(signal, VHOST_SCSI_MAX_VQ); struct virtio_scsi_cmd_resp v_rsp; - struct tcm_vhost_cmd *tv_cmd; + struct tcm_vhost_cmd *cmd; struct llist_node *llnode; struct se_cmd *se_cmd; int ret, vq; @@ -660,32 +673,32 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) bitmap_zero(signal, VHOST_SCSI_MAX_VQ); llnode = llist_del_all(&vs->vs_completion_list); while (llnode) { - tv_cmd = llist_entry(llnode, struct tcm_vhost_cmd, + cmd = llist_entry(llnode, struct tcm_vhost_cmd, tvc_completion_list); llnode = llist_next(llnode); - se_cmd = &tv_cmd->tvc_se_cmd; + se_cmd = &cmd->tvc_se_cmd; pr_debug("%s tv_cmd %p resid %u status %#02x\n", __func__, - tv_cmd, se_cmd->residual_count, se_cmd->scsi_status); + cmd, se_cmd->residual_count, se_cmd->scsi_status); memset(&v_rsp, 0, sizeof(v_rsp)); v_rsp.resid = se_cmd->residual_count; /* TODO is status_qualifier field needed? */ v_rsp.status = se_cmd->scsi_status; v_rsp.sense_len = se_cmd->scsi_sense_length; - memcpy(v_rsp.sense, tv_cmd->tvc_sense_buf, + memcpy(v_rsp.sense, cmd->tvc_sense_buf, v_rsp.sense_len); - ret = copy_to_user(tv_cmd->tvc_resp, &v_rsp, sizeof(v_rsp)); + ret = copy_to_user(cmd->tvc_resp, &v_rsp, sizeof(v_rsp)); if (likely(ret == 0)) { struct vhost_scsi_virtqueue *q; - vhost_add_used(tv_cmd->tvc_vq, tv_cmd->tvc_vq_desc, 0); - q = container_of(tv_cmd->tvc_vq, struct vhost_scsi_virtqueue, vq); + vhost_add_used(cmd->tvc_vq, cmd->tvc_vq_desc, 0); + q = container_of(cmd->tvc_vq, struct vhost_scsi_virtqueue, vq); vq = q - vs->vqs; __set_bit(vq, signal); } else pr_err("Faulted on virtio_scsi_cmd_resp\n"); - vhost_scsi_free_cmd(tv_cmd); + vhost_scsi_free_cmd(cmd); } vq = -1; @@ -694,35 +707,49 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) vhost_signal(&vs->dev, &vs->vqs[vq].vq); } -static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd( - struct vhost_virtqueue *vq, - struct tcm_vhost_tpg *tv_tpg, - struct virtio_scsi_cmd_req *v_req, - u32 exp_data_len, - int data_direction) +static struct tcm_vhost_cmd * +vhost_scsi_get_tag(struct vhost_virtqueue *vq, + struct tcm_vhost_tpg *tpg, + struct virtio_scsi_cmd_req *v_req, + u32 exp_data_len, + int data_direction) { - struct tcm_vhost_cmd *tv_cmd; + struct tcm_vhost_cmd *cmd; struct tcm_vhost_nexus *tv_nexus; + struct se_session *se_sess; + struct scatterlist *sg; + struct page **pages; + int tag; - tv_nexus = tv_tpg->tpg_nexus; + tv_nexus = tpg->tpg_nexus; if (!tv_nexus) { pr_err("Unable to locate active struct tcm_vhost_nexus\n"); return ERR_PTR(-EIO); } + se_sess = tv_nexus->tvn_se_sess; - tv_cmd = kzalloc(sizeof(struct tcm_vhost_cmd), GFP_ATOMIC); - if (!tv_cmd) { - pr_err("Unable to allocate struct tcm_vhost_cmd\n"); + tag = percpu_ida_alloc(&se_sess->sess_tag_pool, GFP_ATOMIC); + if (tag < 0) { + pr_err("Unable to obtain tag for tcm_vhost_cmd\n"); return ERR_PTR(-ENOMEM); } - tv_cmd->tvc_tag = v_req->tag; - tv_cmd->tvc_task_attr = v_req->task_attr; - tv_cmd->tvc_exp_data_len = exp_data_len; - tv_cmd->tvc_data_direction = data_direction; - tv_cmd->tvc_nexus = tv_nexus; - tv_cmd->inflight = tcm_vhost_get_inflight(vq); - return tv_cmd; + cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[tag]; + sg = cmd->tvc_sgl; + pages = cmd->tvc_upages; + memset(cmd, 0, sizeof(struct tcm_vhost_cmd)); + + cmd->tvc_sgl = sg; + cmd->tvc_upages = pages; + cmd->tvc_se_cmd.map_tag = tag; + cmd->tvc_tag = v_req->tag; + cmd->tvc_task_attr = v_req->task_attr; + cmd->tvc_exp_data_len = exp_data_len; + cmd->tvc_data_direction = data_direction; + cmd->tvc_nexus = tv_nexus; + cmd->inflight = tcm_vhost_get_inflight(vq); + + return cmd; } /* @@ -730,8 +757,12 @@ static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd( * * Returns the number of scatterlist entries used or -errno on error. */ -static int vhost_scsi_map_to_sgl(struct scatterlist *sgl, - unsigned int sgl_count, struct iovec *iov, int write) +static int +vhost_scsi_map_to_sgl(struct tcm_vhost_cmd *tv_cmd, + struct scatterlist *sgl, + unsigned int sgl_count, + struct iovec *iov, + int write) { unsigned int npages = 0, pages_nr, offset, nbytes; struct scatterlist *sg = sgl; @@ -740,13 +771,25 @@ static int vhost_scsi_map_to_sgl(struct scatterlist *sgl, struct page **pages; int ret, i; + if (sgl_count > TCM_VHOST_PREALLOC_SGLS) { + pr_err("vhost_scsi_map_to_sgl() psgl_count: %u greater than" + " preallocated TCM_VHOST_PREALLOC_SGLS: %u\n", + sgl_count, TCM_VHOST_PREALLOC_SGLS); + return -ENOBUFS; + } + pages_nr = iov_num_pages(iov); if (pages_nr > sgl_count) return -ENOBUFS; - pages = kmalloc(pages_nr * sizeof(struct page *), GFP_KERNEL); - if (!pages) - return -ENOMEM; + if (pages_nr > TCM_VHOST_PREALLOC_PAGES) { + pr_err("vhost_scsi_map_to_sgl() pages_nr: %u greater than" + " preallocated TCM_VHOST_PREALLOC_PAGES: %u\n", + pages_nr, TCM_VHOST_PREALLOC_PAGES); + return -ENOBUFS; + } + + pages = tv_cmd->tvc_upages; ret = get_user_pages_fast((unsigned long)ptr, pages_nr, write, pages); /* No pages were pinned */ @@ -771,12 +814,14 @@ static int vhost_scsi_map_to_sgl(struct scatterlist *sgl, } out: - kfree(pages); return ret; } -static int vhost_scsi_map_iov_to_sgl(struct tcm_vhost_cmd *tv_cmd, - struct iovec *iov, unsigned int niov, int write) +static int +vhost_scsi_map_iov_to_sgl(struct tcm_vhost_cmd *cmd, + struct iovec *iov, + unsigned int niov, + int write) { int ret; unsigned int i; @@ -792,25 +837,21 @@ static int vhost_scsi_map_iov_to_sgl(struct tcm_vhost_cmd *tv_cmd, /* TODO overflow checking */ - sg = kmalloc(sizeof(tv_cmd->tvc_sgl[0]) * sgl_count, GFP_ATOMIC); - if (!sg) - return -ENOMEM; - pr_debug("%s sg %p sgl_count %u is_err %d\n", __func__, - sg, sgl_count, !sg); + sg = cmd->tvc_sgl; + pr_debug("%s sg %p sgl_count %u\n", __func__, sg, sgl_count); sg_init_table(sg, sgl_count); - tv_cmd->tvc_sgl = sg; - tv_cmd->tvc_sgl_count = sgl_count; + cmd->tvc_sgl_count = sgl_count; pr_debug("Mapping %u iovecs for %u pages\n", niov, sgl_count); for (i = 0; i < niov; i++) { - ret = vhost_scsi_map_to_sgl(sg, sgl_count, &iov[i], write); + ret = vhost_scsi_map_to_sgl(cmd, sg, sgl_count, &iov[i], + write); if (ret < 0) { - for (i = 0; i < tv_cmd->tvc_sgl_count; i++) - put_page(sg_page(&tv_cmd->tvc_sgl[i])); - kfree(tv_cmd->tvc_sgl); - tv_cmd->tvc_sgl = NULL; - tv_cmd->tvc_sgl_count = 0; + for (i = 0; i < cmd->tvc_sgl_count; i++) + put_page(sg_page(&cmd->tvc_sgl[i])); + + cmd->tvc_sgl_count = 0; return ret; } @@ -822,15 +863,15 @@ static int vhost_scsi_map_iov_to_sgl(struct tcm_vhost_cmd *tv_cmd, static void tcm_vhost_submission_work(struct work_struct *work) { - struct tcm_vhost_cmd *tv_cmd = + struct tcm_vhost_cmd *cmd = container_of(work, struct tcm_vhost_cmd, work); struct tcm_vhost_nexus *tv_nexus; - struct se_cmd *se_cmd = &tv_cmd->tvc_se_cmd; + struct se_cmd *se_cmd = &cmd->tvc_se_cmd; struct scatterlist *sg_ptr, *sg_bidi_ptr = NULL; int rc, sg_no_bidi = 0; - if (tv_cmd->tvc_sgl_count) { - sg_ptr = tv_cmd->tvc_sgl; + if (cmd->tvc_sgl_count) { + sg_ptr = cmd->tvc_sgl; /* FIXME: Fix BIDI operation in tcm_vhost_submission_work() */ #if 0 if (se_cmd->se_cmd_flags & SCF_BIDI) { @@ -841,13 +882,13 @@ static void tcm_vhost_submission_work(struct work_struct *work) } else { sg_ptr = NULL; } - tv_nexus = tv_cmd->tvc_nexus; + tv_nexus = cmd->tvc_nexus; rc = target_submit_cmd_map_sgls(se_cmd, tv_nexus->tvn_se_sess, - tv_cmd->tvc_cdb, &tv_cmd->tvc_sense_buf[0], - tv_cmd->tvc_lun, tv_cmd->tvc_exp_data_len, - tv_cmd->tvc_task_attr, tv_cmd->tvc_data_direction, - 0, sg_ptr, tv_cmd->tvc_sgl_count, + cmd->tvc_cdb, &cmd->tvc_sense_buf[0], + cmd->tvc_lun, cmd->tvc_exp_data_len, + cmd->tvc_task_attr, cmd->tvc_data_direction, + TARGET_SCF_ACK_KREF, sg_ptr, cmd->tvc_sgl_count, sg_bidi_ptr, sg_no_bidi); if (rc < 0) { transport_send_check_condition_and_sense(se_cmd, @@ -856,8 +897,10 @@ static void tcm_vhost_submission_work(struct work_struct *work) } } -static void vhost_scsi_send_bad_target(struct vhost_scsi *vs, - struct vhost_virtqueue *vq, int head, unsigned out) +static void +vhost_scsi_send_bad_target(struct vhost_scsi *vs, + struct vhost_virtqueue *vq, + int head, unsigned out) { struct virtio_scsi_cmd_resp __user *resp; struct virtio_scsi_cmd_resp rsp; @@ -873,31 +916,27 @@ static void vhost_scsi_send_bad_target(struct vhost_scsi *vs, pr_err("Faulted on virtio_scsi_cmd_resp\n"); } -static void vhost_scsi_handle_vq(struct vhost_scsi *vs, - struct vhost_virtqueue *vq) +static void +vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) { struct tcm_vhost_tpg **vs_tpg; struct virtio_scsi_cmd_req v_req; - struct tcm_vhost_tpg *tv_tpg; - struct tcm_vhost_cmd *tv_cmd; + struct tcm_vhost_tpg *tpg; + struct tcm_vhost_cmd *cmd; u32 exp_data_len, data_first, data_num, data_direction; unsigned out, in, i; int head, ret; u8 target; + mutex_lock(&vq->mutex); /* * We can handle the vq only after the endpoint is setup by calling the * VHOST_SCSI_SET_ENDPOINT ioctl. - * - * TODO: Check that we are running from vhost_worker which acts - * as read-side critical section for vhost kind of RCU. - * See the comments in struct vhost_virtqueue in drivers/vhost/vhost.h */ - vs_tpg = rcu_dereference_check(vq->private_data, 1); + vs_tpg = vq->private_data; if (!vs_tpg) - return; + goto out; - mutex_lock(&vq->mutex); vhost_disable_notify(&vs->dev, vq); for (;;) { @@ -964,10 +1003,10 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, /* Extract the tpgt */ target = v_req.lun[1]; - tv_tpg = ACCESS_ONCE(vs_tpg[target]); + tpg = ACCESS_ONCE(vs_tpg[target]); /* Target does not exist, fail the request */ - if (unlikely(!tv_tpg)) { + if (unlikely(!tpg)) { vhost_scsi_send_bad_target(vs, vq, head, out); continue; } @@ -976,46 +1015,46 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, for (i = 0; i < data_num; i++) exp_data_len += vq->iov[data_first + i].iov_len; - tv_cmd = vhost_scsi_allocate_cmd(vq, tv_tpg, &v_req, - exp_data_len, data_direction); - if (IS_ERR(tv_cmd)) { - vq_err(vq, "vhost_scsi_allocate_cmd failed %ld\n", - PTR_ERR(tv_cmd)); + cmd = vhost_scsi_get_tag(vq, tpg, &v_req, + exp_data_len, data_direction); + if (IS_ERR(cmd)) { + vq_err(vq, "vhost_scsi_get_tag failed %ld\n", + PTR_ERR(cmd)); goto err_cmd; } pr_debug("Allocated tv_cmd: %p exp_data_len: %d, data_direction" - ": %d\n", tv_cmd, exp_data_len, data_direction); + ": %d\n", cmd, exp_data_len, data_direction); - tv_cmd->tvc_vhost = vs; - tv_cmd->tvc_vq = vq; - tv_cmd->tvc_resp = vq->iov[out].iov_base; + cmd->tvc_vhost = vs; + cmd->tvc_vq = vq; + cmd->tvc_resp = vq->iov[out].iov_base; /* - * Copy in the recieved CDB descriptor into tv_cmd->tvc_cdb + * Copy in the recieved CDB descriptor into cmd->tvc_cdb * that will be used by tcm_vhost_new_cmd_map() and down into * target_setup_cmd_from_cdb() */ - memcpy(tv_cmd->tvc_cdb, v_req.cdb, TCM_VHOST_MAX_CDB_SIZE); + memcpy(cmd->tvc_cdb, v_req.cdb, TCM_VHOST_MAX_CDB_SIZE); /* * Check that the recieved CDB size does not exceeded our * hardcoded max for tcm_vhost */ /* TODO what if cdb was too small for varlen cdb header? */ - if (unlikely(scsi_command_size(tv_cmd->tvc_cdb) > + if (unlikely(scsi_command_size(cmd->tvc_cdb) > TCM_VHOST_MAX_CDB_SIZE)) { vq_err(vq, "Received SCSI CDB with command_size: %d that" " exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n", - scsi_command_size(tv_cmd->tvc_cdb), + scsi_command_size(cmd->tvc_cdb), TCM_VHOST_MAX_CDB_SIZE); goto err_free; } - tv_cmd->tvc_lun = ((v_req.lun[2] << 8) | v_req.lun[3]) & 0x3FFF; + cmd->tvc_lun = ((v_req.lun[2] << 8) | v_req.lun[3]) & 0x3FFF; pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n", - tv_cmd->tvc_cdb[0], tv_cmd->tvc_lun); + cmd->tvc_cdb[0], cmd->tvc_lun); if (data_direction != DMA_NONE) { - ret = vhost_scsi_map_iov_to_sgl(tv_cmd, + ret = vhost_scsi_map_iov_to_sgl(cmd, &vq->iov[data_first], data_num, data_direction == DMA_TO_DEVICE); if (unlikely(ret)) { @@ -1029,24 +1068,25 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs, * complete the virtio-scsi request in TCM callback context via * tcm_vhost_queue_data_in() and tcm_vhost_queue_status() */ - tv_cmd->tvc_vq_desc = head; + cmd->tvc_vq_desc = head; /* * Dispatch tv_cmd descriptor for cmwq execution in process * context provided by tcm_vhost_workqueue. This also ensures * tv_cmd is executed on the same kworker CPU as this vhost * thread to gain positive L2 cache locality effects.. */ - INIT_WORK(&tv_cmd->work, tcm_vhost_submission_work); - queue_work(tcm_vhost_workqueue, &tv_cmd->work); + INIT_WORK(&cmd->work, tcm_vhost_submission_work); + queue_work(tcm_vhost_workqueue, &cmd->work); } mutex_unlock(&vq->mutex); return; err_free: - vhost_scsi_free_cmd(tv_cmd); + vhost_scsi_free_cmd(cmd); err_cmd: vhost_scsi_send_bad_target(vs, vq, head, out); +out: mutex_unlock(&vq->mutex); } @@ -1055,8 +1095,12 @@ static void vhost_scsi_ctl_handle_kick(struct vhost_work *work) pr_debug("%s: The handling func for control queue.\n", __func__); } -static void tcm_vhost_send_evt(struct vhost_scsi *vs, struct tcm_vhost_tpg *tpg, - struct se_lun *lun, u32 event, u32 reason) +static void +tcm_vhost_send_evt(struct vhost_scsi *vs, + struct tcm_vhost_tpg *tpg, + struct se_lun *lun, + u32 event, + u32 reason) { struct tcm_vhost_evt *evt; @@ -1146,12 +1190,12 @@ static void vhost_scsi_flush(struct vhost_scsi *vs) * The lock nesting rule is: * tcm_vhost_mutex -> vs->dev.mutex -> tpg->tv_tpg_mutex -> vq->mutex */ -static int vhost_scsi_set_endpoint( - struct vhost_scsi *vs, - struct vhost_scsi_target *t) +static int +vhost_scsi_set_endpoint(struct vhost_scsi *vs, + struct vhost_scsi_target *t) { struct tcm_vhost_tport *tv_tport; - struct tcm_vhost_tpg *tv_tpg; + struct tcm_vhost_tpg *tpg; struct tcm_vhost_tpg **vs_tpg; struct vhost_virtqueue *vq; int index, ret, i, len; @@ -1178,32 +1222,32 @@ static int vhost_scsi_set_endpoint( if (vs->vs_tpg) memcpy(vs_tpg, vs->vs_tpg, len); - list_for_each_entry(tv_tpg, &tcm_vhost_list, tv_tpg_list) { - mutex_lock(&tv_tpg->tv_tpg_mutex); - if (!tv_tpg->tpg_nexus) { - mutex_unlock(&tv_tpg->tv_tpg_mutex); + list_for_each_entry(tpg, &tcm_vhost_list, tv_tpg_list) { + mutex_lock(&tpg->tv_tpg_mutex); + if (!tpg->tpg_nexus) { + mutex_unlock(&tpg->tv_tpg_mutex); continue; } - if (tv_tpg->tv_tpg_vhost_count != 0) { - mutex_unlock(&tv_tpg->tv_tpg_mutex); + if (tpg->tv_tpg_vhost_count != 0) { + mutex_unlock(&tpg->tv_tpg_mutex); continue; } - tv_tport = tv_tpg->tport; + tv_tport = tpg->tport; if (!strcmp(tv_tport->tport_name, t->vhost_wwpn)) { - if (vs->vs_tpg && vs->vs_tpg[tv_tpg->tport_tpgt]) { + if (vs->vs_tpg && vs->vs_tpg[tpg->tport_tpgt]) { kfree(vs_tpg); - mutex_unlock(&tv_tpg->tv_tpg_mutex); + mutex_unlock(&tpg->tv_tpg_mutex); ret = -EEXIST; goto out; } - tv_tpg->tv_tpg_vhost_count++; - tv_tpg->vhost_scsi = vs; - vs_tpg[tv_tpg->tport_tpgt] = tv_tpg; + tpg->tv_tpg_vhost_count++; + tpg->vhost_scsi = vs; + vs_tpg[tpg->tport_tpgt] = tpg; smp_mb__after_atomic_inc(); match = true; } - mutex_unlock(&tv_tpg->tv_tpg_mutex); + mutex_unlock(&tpg->tv_tpg_mutex); } if (match) { @@ -1211,9 +1255,8 @@ static int vhost_scsi_set_endpoint( sizeof(vs->vs_vhost_wwpn)); for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { vq = &vs->vqs[i].vq; - /* Flushing the vhost_work acts as synchronize_rcu */ mutex_lock(&vq->mutex); - rcu_assign_pointer(vq->private_data, vs_tpg); + vq->private_data = vs_tpg; vhost_init_used(vq); mutex_unlock(&vq->mutex); } @@ -1236,12 +1279,12 @@ out: return ret; } -static int vhost_scsi_clear_endpoint( - struct vhost_scsi *vs, - struct vhost_scsi_target *t) +static int +vhost_scsi_clear_endpoint(struct vhost_scsi *vs, + struct vhost_scsi_target *t) { struct tcm_vhost_tport *tv_tport; - struct tcm_vhost_tpg *tv_tpg; + struct tcm_vhost_tpg *tpg; struct vhost_virtqueue *vq; bool match = false; int index, ret, i; @@ -1264,37 +1307,36 @@ static int vhost_scsi_clear_endpoint( for (i = 0; i < VHOST_SCSI_MAX_TARGET; i++) { target = i; - tv_tpg = vs->vs_tpg[target]; - if (!tv_tpg) + tpg = vs->vs_tpg[target]; + if (!tpg) continue; - mutex_lock(&tv_tpg->tv_tpg_mutex); - tv_tport = tv_tpg->tport; + mutex_lock(&tpg->tv_tpg_mutex); + tv_tport = tpg->tport; if (!tv_tport) { ret = -ENODEV; goto err_tpg; } if (strcmp(tv_tport->tport_name, t->vhost_wwpn)) { - pr_warn("tv_tport->tport_name: %s, tv_tpg->tport_tpgt: %hu" + pr_warn("tv_tport->tport_name: %s, tpg->tport_tpgt: %hu" " does not match t->vhost_wwpn: %s, t->vhost_tpgt: %hu\n", - tv_tport->tport_name, tv_tpg->tport_tpgt, + tv_tport->tport_name, tpg->tport_tpgt, t->vhost_wwpn, t->vhost_tpgt); ret = -EINVAL; goto err_tpg; } - tv_tpg->tv_tpg_vhost_count--; - tv_tpg->vhost_scsi = NULL; + tpg->tv_tpg_vhost_count--; + tpg->vhost_scsi = NULL; vs->vs_tpg[target] = NULL; match = true; - mutex_unlock(&tv_tpg->tv_tpg_mutex); + mutex_unlock(&tpg->tv_tpg_mutex); } if (match) { for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { vq = &vs->vqs[i].vq; - /* Flushing the vhost_work acts as synchronize_rcu */ mutex_lock(&vq->mutex); - rcu_assign_pointer(vq->private_data, NULL); + vq->private_data = NULL; mutex_unlock(&vq->mutex); } } @@ -1311,7 +1353,7 @@ static int vhost_scsi_clear_endpoint( return 0; err_tpg: - mutex_unlock(&tv_tpg->tv_tpg_mutex); + mutex_unlock(&tpg->tv_tpg_mutex); err_dev: mutex_unlock(&vs->dev.mutex); mutex_unlock(&tcm_vhost_mutex); @@ -1336,70 +1378,85 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features) return 0; } +static void vhost_scsi_free(struct vhost_scsi *vs) +{ + if (is_vmalloc_addr(vs)) + vfree(vs); + else + kfree(vs); +} + static int vhost_scsi_open(struct inode *inode, struct file *f) { - struct vhost_scsi *s; + struct vhost_scsi *vs; struct vhost_virtqueue **vqs; - int r, i; + int r = -ENOMEM, i; - s = kzalloc(sizeof(*s), GFP_KERNEL); - if (!s) - return -ENOMEM; + vs = kzalloc(sizeof(*vs), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!vs) { + vs = vzalloc(sizeof(*vs)); + if (!vs) + goto err_vs; + } vqs = kmalloc(VHOST_SCSI_MAX_VQ * sizeof(*vqs), GFP_KERNEL); - if (!vqs) { - kfree(s); - return -ENOMEM; - } + if (!vqs) + goto err_vqs; - vhost_work_init(&s->vs_completion_work, vhost_scsi_complete_cmd_work); - vhost_work_init(&s->vs_event_work, tcm_vhost_evt_work); + vhost_work_init(&vs->vs_completion_work, vhost_scsi_complete_cmd_work); + vhost_work_init(&vs->vs_event_work, tcm_vhost_evt_work); - s->vs_events_nr = 0; - s->vs_events_missed = false; + vs->vs_events_nr = 0; + vs->vs_events_missed = false; - vqs[VHOST_SCSI_VQ_CTL] = &s->vqs[VHOST_SCSI_VQ_CTL].vq; - vqs[VHOST_SCSI_VQ_EVT] = &s->vqs[VHOST_SCSI_VQ_EVT].vq; - s->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick; - s->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick; + vqs[VHOST_SCSI_VQ_CTL] = &vs->vqs[VHOST_SCSI_VQ_CTL].vq; + vqs[VHOST_SCSI_VQ_EVT] = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; + vs->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick; + vs->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick; for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) { - vqs[i] = &s->vqs[i].vq; - s->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; + vqs[i] = &vs->vqs[i].vq; + vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; } - r = vhost_dev_init(&s->dev, vqs, VHOST_SCSI_MAX_VQ); + r = vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ); - tcm_vhost_init_inflight(s, NULL); + tcm_vhost_init_inflight(vs, NULL); - if (r < 0) { - kfree(vqs); - kfree(s); - return r; - } + if (r < 0) + goto err_init; - f->private_data = s; + f->private_data = vs; return 0; + +err_init: + kfree(vqs); +err_vqs: + vhost_scsi_free(vs); +err_vs: + return r; } static int vhost_scsi_release(struct inode *inode, struct file *f) { - struct vhost_scsi *s = f->private_data; + struct vhost_scsi *vs = f->private_data; struct vhost_scsi_target t; - mutex_lock(&s->dev.mutex); - memcpy(t.vhost_wwpn, s->vs_vhost_wwpn, sizeof(t.vhost_wwpn)); - mutex_unlock(&s->dev.mutex); - vhost_scsi_clear_endpoint(s, &t); - vhost_dev_stop(&s->dev); - vhost_dev_cleanup(&s->dev, false); + mutex_lock(&vs->dev.mutex); + memcpy(t.vhost_wwpn, vs->vs_vhost_wwpn, sizeof(t.vhost_wwpn)); + mutex_unlock(&vs->dev.mutex); + vhost_scsi_clear_endpoint(vs, &t); + vhost_dev_stop(&vs->dev); + vhost_dev_cleanup(&vs->dev, false); /* Jobs can re-queue themselves in evt kick handler. Do extra flush. */ - vhost_scsi_flush(s); - kfree(s->dev.vqs); - kfree(s); + vhost_scsi_flush(vs); + kfree(vs->dev.vqs); + vhost_scsi_free(vs); return 0; } -static long vhost_scsi_ioctl(struct file *f, unsigned int ioctl, - unsigned long arg) +static long +vhost_scsi_ioctl(struct file *f, + unsigned int ioctl, + unsigned long arg) { struct vhost_scsi *vs = f->private_data; struct vhost_scsi_target backend; @@ -1515,8 +1572,9 @@ static char *tcm_vhost_dump_proto_id(struct tcm_vhost_tport *tport) return "Unknown"; } -static void tcm_vhost_do_plug(struct tcm_vhost_tpg *tpg, - struct se_lun *lun, bool plug) +static void +tcm_vhost_do_plug(struct tcm_vhost_tpg *tpg, + struct se_lun *lun, bool plug) { struct vhost_scsi *vs = tpg->vhost_scsi; @@ -1556,18 +1614,18 @@ static void tcm_vhost_hotunplug(struct tcm_vhost_tpg *tpg, struct se_lun *lun) } static int tcm_vhost_port_link(struct se_portal_group *se_tpg, - struct se_lun *lun) + struct se_lun *lun) { - struct tcm_vhost_tpg *tv_tpg = container_of(se_tpg, + struct tcm_vhost_tpg *tpg = container_of(se_tpg, struct tcm_vhost_tpg, se_tpg); mutex_lock(&tcm_vhost_mutex); - mutex_lock(&tv_tpg->tv_tpg_mutex); - tv_tpg->tv_tpg_port_count++; - mutex_unlock(&tv_tpg->tv_tpg_mutex); + mutex_lock(&tpg->tv_tpg_mutex); + tpg->tv_tpg_port_count++; + mutex_unlock(&tpg->tv_tpg_mutex); - tcm_vhost_hotplug(tv_tpg, lun); + tcm_vhost_hotplug(tpg, lun); mutex_unlock(&tcm_vhost_mutex); @@ -1575,26 +1633,26 @@ static int tcm_vhost_port_link(struct se_portal_group *se_tpg, } static void tcm_vhost_port_unlink(struct se_portal_group *se_tpg, - struct se_lun *lun) + struct se_lun *lun) { - struct tcm_vhost_tpg *tv_tpg = container_of(se_tpg, + struct tcm_vhost_tpg *tpg = container_of(se_tpg, struct tcm_vhost_tpg, se_tpg); mutex_lock(&tcm_vhost_mutex); - mutex_lock(&tv_tpg->tv_tpg_mutex); - tv_tpg->tv_tpg_port_count--; - mutex_unlock(&tv_tpg->tv_tpg_mutex); + mutex_lock(&tpg->tv_tpg_mutex); + tpg->tv_tpg_port_count--; + mutex_unlock(&tpg->tv_tpg_mutex); - tcm_vhost_hotunplug(tv_tpg, lun); + tcm_vhost_hotunplug(tpg, lun); mutex_unlock(&tcm_vhost_mutex); } -static struct se_node_acl *tcm_vhost_make_nodeacl( - struct se_portal_group *se_tpg, - struct config_group *group, - const char *name) +static struct se_node_acl * +tcm_vhost_make_nodeacl(struct se_portal_group *se_tpg, + struct config_group *group, + const char *name) { struct se_node_acl *se_nacl, *se_nacl_new; struct tcm_vhost_nacl *nacl; @@ -1635,35 +1693,78 @@ static void tcm_vhost_drop_nodeacl(struct se_node_acl *se_acl) kfree(nacl); } -static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tv_tpg, - const char *name) +static void tcm_vhost_free_cmd_map_res(struct tcm_vhost_nexus *nexus, + struct se_session *se_sess) +{ + struct tcm_vhost_cmd *tv_cmd; + unsigned int i; + + if (!se_sess->sess_cmd_map) + return; + + for (i = 0; i < TCM_VHOST_DEFAULT_TAGS; i++) { + tv_cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[i]; + + kfree(tv_cmd->tvc_sgl); + kfree(tv_cmd->tvc_upages); + } +} + +static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg, + const char *name) { struct se_portal_group *se_tpg; + struct se_session *se_sess; struct tcm_vhost_nexus *tv_nexus; + struct tcm_vhost_cmd *tv_cmd; + unsigned int i; - mutex_lock(&tv_tpg->tv_tpg_mutex); - if (tv_tpg->tpg_nexus) { - mutex_unlock(&tv_tpg->tv_tpg_mutex); - pr_debug("tv_tpg->tpg_nexus already exists\n"); + mutex_lock(&tpg->tv_tpg_mutex); + if (tpg->tpg_nexus) { + mutex_unlock(&tpg->tv_tpg_mutex); + pr_debug("tpg->tpg_nexus already exists\n"); return -EEXIST; } - se_tpg = &tv_tpg->se_tpg; + se_tpg = &tpg->se_tpg; tv_nexus = kzalloc(sizeof(struct tcm_vhost_nexus), GFP_KERNEL); if (!tv_nexus) { - mutex_unlock(&tv_tpg->tv_tpg_mutex); + mutex_unlock(&tpg->tv_tpg_mutex); pr_err("Unable to allocate struct tcm_vhost_nexus\n"); return -ENOMEM; } /* - * Initialize the struct se_session pointer + * Initialize the struct se_session pointer and setup tagpool + * for struct tcm_vhost_cmd descriptors */ - tv_nexus->tvn_se_sess = transport_init_session(); + tv_nexus->tvn_se_sess = transport_init_session_tags( + TCM_VHOST_DEFAULT_TAGS, + sizeof(struct tcm_vhost_cmd)); if (IS_ERR(tv_nexus->tvn_se_sess)) { - mutex_unlock(&tv_tpg->tv_tpg_mutex); + mutex_unlock(&tpg->tv_tpg_mutex); kfree(tv_nexus); return -ENOMEM; } + se_sess = tv_nexus->tvn_se_sess; + for (i = 0; i < TCM_VHOST_DEFAULT_TAGS; i++) { + tv_cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[i]; + + tv_cmd->tvc_sgl = kzalloc(sizeof(struct scatterlist) * + TCM_VHOST_PREALLOC_SGLS, GFP_KERNEL); + if (!tv_cmd->tvc_sgl) { + mutex_unlock(&tpg->tv_tpg_mutex); + pr_err("Unable to allocate tv_cmd->tvc_sgl\n"); + goto out; + } + + tv_cmd->tvc_upages = kzalloc(sizeof(struct page *) * + TCM_VHOST_PREALLOC_PAGES, GFP_KERNEL); + if (!tv_cmd->tvc_upages) { + mutex_unlock(&tpg->tv_tpg_mutex); + pr_err("Unable to allocate tv_cmd->tvc_upages\n"); + goto out; + } + } /* * Since we are running in 'demo mode' this call with generate a * struct se_node_acl for the tcm_vhost struct se_portal_group with @@ -1672,12 +1773,10 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tv_tpg, tv_nexus->tvn_se_sess->se_node_acl = core_tpg_check_initiator_node_acl( se_tpg, (unsigned char *)name); if (!tv_nexus->tvn_se_sess->se_node_acl) { - mutex_unlock(&tv_tpg->tv_tpg_mutex); + mutex_unlock(&tpg->tv_tpg_mutex); pr_debug("core_tpg_check_initiator_node_acl() failed" " for %s\n", name); - transport_free_session(tv_nexus->tvn_se_sess); - kfree(tv_nexus); - return -ENOMEM; + goto out; } /* * Now register the TCM vhost virtual I_T Nexus as active with the @@ -1685,10 +1784,16 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tv_tpg, */ __transport_register_session(se_tpg, tv_nexus->tvn_se_sess->se_node_acl, tv_nexus->tvn_se_sess, tv_nexus); - tv_tpg->tpg_nexus = tv_nexus; + tpg->tpg_nexus = tv_nexus; - mutex_unlock(&tv_tpg->tv_tpg_mutex); + mutex_unlock(&tpg->tv_tpg_mutex); return 0; + +out: + tcm_vhost_free_cmd_map_res(tv_nexus, se_sess); + transport_free_session(se_sess); + kfree(tv_nexus); + return -ENOMEM; } static int tcm_vhost_drop_nexus(struct tcm_vhost_tpg *tpg) @@ -1728,6 +1833,8 @@ static int tcm_vhost_drop_nexus(struct tcm_vhost_tpg *tpg) pr_debug("TCM_vhost_ConfigFS: Removing I_T Nexus to emulated" " %s Initiator Port: %s\n", tcm_vhost_dump_proto_id(tpg->tport), tv_nexus->tvn_se_sess->se_node_acl->initiatorname); + + tcm_vhost_free_cmd_map_res(tv_nexus, se_sess); /* * Release the SCSI I_T Nexus to the emulated vhost Target Port */ @@ -1740,40 +1847,40 @@ static int tcm_vhost_drop_nexus(struct tcm_vhost_tpg *tpg) } static ssize_t tcm_vhost_tpg_show_nexus(struct se_portal_group *se_tpg, - char *page) + char *page) { - struct tcm_vhost_tpg *tv_tpg = container_of(se_tpg, + struct tcm_vhost_tpg *tpg = container_of(se_tpg, struct tcm_vhost_tpg, se_tpg); struct tcm_vhost_nexus *tv_nexus; ssize_t ret; - mutex_lock(&tv_tpg->tv_tpg_mutex); - tv_nexus = tv_tpg->tpg_nexus; + mutex_lock(&tpg->tv_tpg_mutex); + tv_nexus = tpg->tpg_nexus; if (!tv_nexus) { - mutex_unlock(&tv_tpg->tv_tpg_mutex); + mutex_unlock(&tpg->tv_tpg_mutex); return -ENODEV; } ret = snprintf(page, PAGE_SIZE, "%s\n", tv_nexus->tvn_se_sess->se_node_acl->initiatorname); - mutex_unlock(&tv_tpg->tv_tpg_mutex); + mutex_unlock(&tpg->tv_tpg_mutex); return ret; } static ssize_t tcm_vhost_tpg_store_nexus(struct se_portal_group *se_tpg, - const char *page, - size_t count) + const char *page, + size_t count) { - struct tcm_vhost_tpg *tv_tpg = container_of(se_tpg, + struct tcm_vhost_tpg *tpg = container_of(se_tpg, struct tcm_vhost_tpg, se_tpg); - struct tcm_vhost_tport *tport_wwn = tv_tpg->tport; + struct tcm_vhost_tport *tport_wwn = tpg->tport; unsigned char i_port[TCM_VHOST_NAMELEN], *ptr, *port_ptr; int ret; /* * Shutdown the active I_T nexus if 'NULL' is passed.. */ if (!strncmp(page, "NULL", 4)) { - ret = tcm_vhost_drop_nexus(tv_tpg); + ret = tcm_vhost_drop_nexus(tpg); return (!ret) ? count : ret; } /* @@ -1831,7 +1938,7 @@ check_newline: if (i_port[strlen(i_port)-1] == '\n') i_port[strlen(i_port)-1] = '\0'; - ret = tcm_vhost_make_nexus(tv_tpg, port_ptr); + ret = tcm_vhost_make_nexus(tpg, port_ptr); if (ret < 0) return ret; @@ -1845,9 +1952,10 @@ static struct configfs_attribute *tcm_vhost_tpg_attrs[] = { NULL, }; -static struct se_portal_group *tcm_vhost_make_tpg(struct se_wwn *wwn, - struct config_group *group, - const char *name) +static struct se_portal_group * +tcm_vhost_make_tpg(struct se_wwn *wwn, + struct config_group *group, + const char *name) { struct tcm_vhost_tport *tport = container_of(wwn, struct tcm_vhost_tport, tport_wwn); @@ -1903,9 +2011,10 @@ static void tcm_vhost_drop_tpg(struct se_portal_group *se_tpg) kfree(tpg); } -static struct se_wwn *tcm_vhost_make_tport(struct target_fabric_configfs *tf, - struct config_group *group, - const char *name) +static struct se_wwn * +tcm_vhost_make_tport(struct target_fabric_configfs *tf, + struct config_group *group, + const char *name) { struct tcm_vhost_tport *tport; char *ptr; @@ -1975,9 +2084,9 @@ static void tcm_vhost_drop_tport(struct se_wwn *wwn) kfree(tport); } -static ssize_t tcm_vhost_wwn_show_attr_version( - struct target_fabric_configfs *tf, - char *page) +static ssize_t +tcm_vhost_wwn_show_attr_version(struct target_fabric_configfs *tf, + char *page) { return sprintf(page, "TCM_VHOST fabric module %s on %s/%s" "on "UTS_RELEASE"\n", TCM_VHOST_VERSION, utsname()->sysname, @@ -2008,6 +2117,7 @@ static struct target_core_fabric_ops tcm_vhost_ops = { .tpg_release_fabric_acl = tcm_vhost_release_fabric_acl, .tpg_get_inst_index = tcm_vhost_tpg_get_inst_index, .release_cmd = tcm_vhost_release_cmd, + .check_stop_free = vhost_scsi_check_stop_free, .shutdown_session = tcm_vhost_shutdown_session, .close_session = tcm_vhost_close_session, .sess_get_index = tcm_vhost_sess_get_index, diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 1ee45bc85f6..339eae85859 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -13,12 +13,11 @@ #include <linux/module.h> #include <linux/mutex.h> #include <linux/workqueue.h> -#include <linux/rcupdate.h> #include <linux/file.h> #include <linux/slab.h> #include "test.h" -#include "vhost.c" +#include "vhost.h" /* Max number of bytes transferred before requeueing the job. * Using this limit prevents one virtqueue from starving others. */ @@ -38,17 +37,19 @@ struct vhost_test { * read-size critical section for our kind of RCU. */ static void handle_vq(struct vhost_test *n) { - struct vhost_virtqueue *vq = &n->dev.vqs[VHOST_TEST_VQ]; + struct vhost_virtqueue *vq = &n->vqs[VHOST_TEST_VQ]; unsigned out, in; int head; size_t len, total_len = 0; void *private; - private = rcu_dereference_check(vq->private_data, 1); - if (!private) + mutex_lock(&vq->mutex); + private = vq->private_data; + if (!private) { + mutex_unlock(&vq->mutex); return; + } - mutex_lock(&vq->mutex); vhost_disable_notify(&n->dev, vq); for (;;) { @@ -102,15 +103,23 @@ static int vhost_test_open(struct inode *inode, struct file *f) { struct vhost_test *n = kmalloc(sizeof *n, GFP_KERNEL); struct vhost_dev *dev; + struct vhost_virtqueue **vqs; int r; if (!n) return -ENOMEM; + vqs = kmalloc(VHOST_TEST_VQ_MAX * sizeof(*vqs), GFP_KERNEL); + if (!vqs) { + kfree(n); + return -ENOMEM; + } dev = &n->dev; + vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ]; n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick; - r = vhost_dev_init(dev, n->vqs, VHOST_TEST_VQ_MAX); + r = vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX); if (r < 0) { + kfree(vqs); kfree(n); return r; } @@ -126,9 +135,8 @@ static void *vhost_test_stop_vq(struct vhost_test *n, void *private; mutex_lock(&vq->mutex); - private = rcu_dereference_protected(vq->private_data, - lockdep_is_held(&vq->mutex)); - rcu_assign_pointer(vq->private_data, NULL); + private = vq->private_data; + vq->private_data = NULL; mutex_unlock(&vq->mutex); return private; } @@ -140,7 +148,7 @@ static void vhost_test_stop(struct vhost_test *n, void **privatep) static void vhost_test_flush_vq(struct vhost_test *n, int index) { - vhost_poll_flush(&n->dev.vqs[index].poll); + vhost_poll_flush(&n->vqs[index].poll); } static void vhost_test_flush(struct vhost_test *n) @@ -191,9 +199,8 @@ static long vhost_test_run(struct vhost_test *n, int test) priv = test ? n : NULL; /* start polling new socket */ - oldpriv = rcu_dereference_protected(vq->private_data, - lockdep_is_held(&vq->mutex)); - rcu_assign_pointer(vq->private_data, priv); + oldpriv = vq->private_data; + vq->private_data = priv; r = vhost_init_used(&n->vqs[index]); @@ -268,14 +275,14 @@ static long vhost_test_ioctl(struct file *f, unsigned int ioctl, return -EFAULT; return vhost_test_run(n, test); case VHOST_GET_FEATURES: - features = VHOST_NET_FEATURES; + features = VHOST_FEATURES; if (copy_to_user(featurep, &features, sizeof features)) return -EFAULT; return 0; case VHOST_SET_FEATURES: if (copy_from_user(&features, featurep, sizeof features)) return -EFAULT; - if (features & ~VHOST_NET_FEATURES) + if (features & ~VHOST_FEATURES) return -EOPNOTSUPP; return vhost_test_set_features(n, features); case VHOST_RESET_OWNER: diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index beee7f5787e..69068e0d8f3 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -13,7 +13,7 @@ #include <linux/eventfd.h> #include <linux/vhost.h> -#include <linux/socket.h> /* memcpy_fromiovec */ +#include <linux/uio.h> #include <linux/mm.h> #include <linux/mmu_context.h> #include <linux/miscdevice.h> @@ -25,6 +25,7 @@ #include <linux/slab.h> #include <linux/kthread.h> #include <linux/cgroup.h> +#include <linux/module.h> #include "vhost.h" @@ -66,6 +67,7 @@ void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn) work->flushing = 0; work->queue_seq = work->done_seq = 0; } +EXPORT_SYMBOL_GPL(vhost_work_init); /* Init poll structure */ void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, @@ -79,6 +81,7 @@ void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, vhost_work_init(&poll->work, fn); } +EXPORT_SYMBOL_GPL(vhost_poll_init); /* Start polling a file. We add ourselves to file's wait queue. The caller must * keep a reference to a file until after vhost_poll_stop is called. */ @@ -101,6 +104,7 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file) return ret; } +EXPORT_SYMBOL_GPL(vhost_poll_start); /* Stop polling a file. After this function returns, it becomes safe to drop the * file reference. You must also flush afterwards. */ @@ -111,6 +115,7 @@ void vhost_poll_stop(struct vhost_poll *poll) poll->wqh = NULL; } } +EXPORT_SYMBOL_GPL(vhost_poll_stop); static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work, unsigned seq) @@ -123,7 +128,7 @@ static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work, return left <= 0; } -static void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work) +void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work) { unsigned seq; int flushing; @@ -138,6 +143,7 @@ static void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work) spin_unlock_irq(&dev->work_lock); BUG_ON(flushing < 0); } +EXPORT_SYMBOL_GPL(vhost_work_flush); /* Flush any work that has been scheduled. When calling this, don't hold any * locks that are also used by the callback. */ @@ -145,6 +151,7 @@ void vhost_poll_flush(struct vhost_poll *poll) { vhost_work_flush(poll->dev, &poll->work); } +EXPORT_SYMBOL_GPL(vhost_poll_flush); void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) { @@ -154,15 +161,19 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) if (list_empty(&work->node)) { list_add_tail(&work->node, &dev->work_list); work->queue_seq++; + spin_unlock_irqrestore(&dev->work_lock, flags); wake_up_process(dev->worker); + } else { + spin_unlock_irqrestore(&dev->work_lock, flags); } - spin_unlock_irqrestore(&dev->work_lock, flags); } +EXPORT_SYMBOL_GPL(vhost_work_queue); void vhost_poll_queue(struct vhost_poll *poll) { vhost_work_queue(poll->dev, &poll->work); } +EXPORT_SYMBOL_GPL(vhost_poll_queue); static void vhost_vq_reset(struct vhost_dev *dev, struct vhost_virtqueue *vq) @@ -251,17 +262,16 @@ static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) /* Helper to allocate iovec buffers for all vqs. */ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) { + struct vhost_virtqueue *vq; int i; for (i = 0; i < dev->nvqs; ++i) { - dev->vqs[i]->indirect = kmalloc(sizeof *dev->vqs[i]->indirect * - UIO_MAXIOV, GFP_KERNEL); - dev->vqs[i]->log = kmalloc(sizeof *dev->vqs[i]->log * UIO_MAXIOV, - GFP_KERNEL); - dev->vqs[i]->heads = kmalloc(sizeof *dev->vqs[i]->heads * - UIO_MAXIOV, GFP_KERNEL); - if (!dev->vqs[i]->indirect || !dev->vqs[i]->log || - !dev->vqs[i]->heads) + vq = dev->vqs[i]; + vq->indirect = kmalloc(sizeof *vq->indirect * UIO_MAXIOV, + GFP_KERNEL); + vq->log = kmalloc(sizeof *vq->log * UIO_MAXIOV, GFP_KERNEL); + vq->heads = kmalloc(sizeof *vq->heads * UIO_MAXIOV, GFP_KERNEL); + if (!vq->indirect || !vq->log || !vq->heads) goto err_nomem; } return 0; @@ -283,6 +293,7 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev) long vhost_dev_init(struct vhost_dev *dev, struct vhost_virtqueue **vqs, int nvqs) { + struct vhost_virtqueue *vq; int i; dev->vqs = vqs; @@ -297,19 +308,21 @@ long vhost_dev_init(struct vhost_dev *dev, dev->worker = NULL; for (i = 0; i < dev->nvqs; ++i) { - dev->vqs[i]->log = NULL; - dev->vqs[i]->indirect = NULL; - dev->vqs[i]->heads = NULL; - dev->vqs[i]->dev = dev; - mutex_init(&dev->vqs[i]->mutex); - vhost_vq_reset(dev, dev->vqs[i]); - if (dev->vqs[i]->handle_kick) - vhost_poll_init(&dev->vqs[i]->poll, - dev->vqs[i]->handle_kick, POLLIN, dev); + vq = dev->vqs[i]; + vq->log = NULL; + vq->indirect = NULL; + vq->heads = NULL; + vq->dev = dev; + mutex_init(&vq->mutex); + vhost_vq_reset(dev, vq); + if (vq->handle_kick) + vhost_poll_init(&vq->poll, vq->handle_kick, + POLLIN, dev); } return 0; } +EXPORT_SYMBOL_GPL(vhost_dev_init); /* Caller should have device mutex */ long vhost_dev_check_owner(struct vhost_dev *dev) @@ -317,6 +330,7 @@ long vhost_dev_check_owner(struct vhost_dev *dev) /* Are you the owner? If not, I don't think you mean to do that */ return dev->mm == current->mm ? 0 : -EPERM; } +EXPORT_SYMBOL_GPL(vhost_dev_check_owner); struct vhost_attach_cgroups_struct { struct vhost_work work; @@ -344,13 +358,20 @@ static int vhost_attach_cgroups(struct vhost_dev *dev) } /* Caller should have device mutex */ +bool vhost_dev_has_owner(struct vhost_dev *dev) +{ + return dev->mm; +} +EXPORT_SYMBOL_GPL(vhost_dev_has_owner); + +/* Caller should have device mutex */ long vhost_dev_set_owner(struct vhost_dev *dev) { struct task_struct *worker; int err; /* Is there an owner already? */ - if (dev->mm) { + if (vhost_dev_has_owner(dev)) { err = -EBUSY; goto err_mm; } @@ -385,11 +406,13 @@ err_worker: err_mm: return err; } +EXPORT_SYMBOL_GPL(vhost_dev_set_owner); struct vhost_memory *vhost_dev_reset_owner_prepare(void) { return kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL); } +EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare); /* Caller should have device mutex */ void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory *memory) @@ -400,6 +423,7 @@ void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory *memory) memory->nregions = 0; RCU_INIT_POINTER(dev->memory, memory); } +EXPORT_SYMBOL_GPL(vhost_dev_reset_owner); void vhost_dev_stop(struct vhost_dev *dev) { @@ -412,6 +436,7 @@ void vhost_dev_stop(struct vhost_dev *dev) } } } +EXPORT_SYMBOL_GPL(vhost_dev_stop); /* Caller should have device mutex if and only if locked is set */ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) @@ -452,6 +477,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) mmput(dev->mm); dev->mm = NULL; } +EXPORT_SYMBOL_GPL(vhost_dev_cleanup); static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) { @@ -537,6 +563,7 @@ int vhost_log_access_ok(struct vhost_dev *dev) lockdep_is_held(&dev->mutex)); return memory_access_ok(dev, mp, 1); } +EXPORT_SYMBOL_GPL(vhost_log_access_ok); /* Verify access for write logging. */ /* Caller should have vq mutex and device mutex */ @@ -562,6 +589,7 @@ int vhost_vq_access_ok(struct vhost_virtqueue *vq) return vq_access_ok(vq->dev, vq->num, vq->desc, vq->avail, vq->used) && vq_log_access_ok(vq->dev, vq, vq->log_base); } +EXPORT_SYMBOL_GPL(vhost_vq_access_ok); static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) { @@ -791,6 +819,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) vhost_poll_flush(&vq->poll); return r; } +EXPORT_SYMBOL_GPL(vhost_vring_ioctl); /* Caller must have device mutex */ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) @@ -871,6 +900,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) done: return r; } +EXPORT_SYMBOL_GPL(vhost_dev_ioctl); static const struct vhost_memory_region *find_region(struct vhost_memory *mem, __u64 addr, __u32 len) @@ -962,6 +992,7 @@ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, BUG(); return 0; } +EXPORT_SYMBOL_GPL(vhost_log_write); static int vhost_update_used_flags(struct vhost_virtqueue *vq) { @@ -1013,6 +1044,7 @@ int vhost_init_used(struct vhost_virtqueue *vq) vq->signalled_used_valid = false; return get_user(vq->last_used_idx, &vq->used->idx); } +EXPORT_SYMBOL_GPL(vhost_init_used); static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len, struct iovec iov[], int iov_size) @@ -1289,60 +1321,24 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY)); return head; } +EXPORT_SYMBOL_GPL(vhost_get_vq_desc); /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n) { vq->last_avail_idx -= n; } +EXPORT_SYMBOL_GPL(vhost_discard_vq_desc); /* After we've used one of their buffers, we tell them about it. We'll then * want to notify the guest, using eventfd. */ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) { - struct vring_used_elem __user *used; + struct vring_used_elem heads = { head, len }; - /* The virtqueue contains a ring of used buffers. Get a pointer to the - * next entry in that used ring. */ - used = &vq->used->ring[vq->last_used_idx % vq->num]; - if (__put_user(head, &used->id)) { - vq_err(vq, "Failed to write used id"); - return -EFAULT; - } - if (__put_user(len, &used->len)) { - vq_err(vq, "Failed to write used len"); - return -EFAULT; - } - /* Make sure buffer is written before we update index. */ - smp_wmb(); - if (__put_user(vq->last_used_idx + 1, &vq->used->idx)) { - vq_err(vq, "Failed to increment used idx"); - return -EFAULT; - } - if (unlikely(vq->log_used)) { - /* Make sure data is seen before log. */ - smp_wmb(); - /* Log used ring entry write. */ - log_write(vq->log_base, - vq->log_addr + - ((void __user *)used - (void __user *)vq->used), - sizeof *used); - /* Log used index update. */ - log_write(vq->log_base, - vq->log_addr + offsetof(struct vring_used, idx), - sizeof vq->used->idx); - if (vq->log_ctx) - eventfd_signal(vq->log_ctx, 1); - } - vq->last_used_idx++; - /* If the driver never bothers to signal in a very long while, - * used index might wrap around. If that happens, invalidate - * signalled_used index we stored. TODO: make sure driver - * signals at least once in 2^16 and remove this. */ - if (unlikely(vq->last_used_idx == vq->signalled_used)) - vq->signalled_used_valid = false; - return 0; + return vhost_add_used_n(vq, &heads, 1); } +EXPORT_SYMBOL_GPL(vhost_add_used); static int __vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, @@ -1354,7 +1350,16 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, start = vq->last_used_idx % vq->num; used = vq->used->ring + start; - if (__copy_to_user(used, heads, count * sizeof *used)) { + if (count == 1) { + if (__put_user(heads[0].id, &used->id)) { + vq_err(vq, "Failed to write used id"); + return -EFAULT; + } + if (__put_user(heads[0].len, &used->len)) { + vq_err(vq, "Failed to write used len"); + return -EFAULT; + } + } else if (__copy_to_user(used, heads, count * sizeof *used)) { vq_err(vq, "Failed to write used"); return -EFAULT; } @@ -1412,6 +1417,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, } return r; } +EXPORT_SYMBOL_GPL(vhost_add_used_n); static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { @@ -1456,6 +1462,7 @@ void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) if (vq->call_ctx && vhost_notify(dev, vq)) eventfd_signal(vq->call_ctx, 1); } +EXPORT_SYMBOL_GPL(vhost_signal); /* And here's the combo meal deal. Supersize me! */ void vhost_add_used_and_signal(struct vhost_dev *dev, @@ -1465,6 +1472,7 @@ void vhost_add_used_and_signal(struct vhost_dev *dev, vhost_add_used(vq, head, len); vhost_signal(dev, vq); } +EXPORT_SYMBOL_GPL(vhost_add_used_and_signal); /* multi-buffer version of vhost_add_used_and_signal */ void vhost_add_used_and_signal_n(struct vhost_dev *dev, @@ -1474,6 +1482,7 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev, vhost_add_used_n(vq, heads, count); vhost_signal(dev, vq); } +EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n); /* OK, now we need to know about added descriptors. */ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) @@ -1511,6 +1520,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) return avail_idx != vq->avail_idx; } +EXPORT_SYMBOL_GPL(vhost_enable_notify); /* We don't need to be notified again. */ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) @@ -1527,3 +1537,21 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) &vq->used->flags, r); } } +EXPORT_SYMBOL_GPL(vhost_disable_notify); + +static int __init vhost_init(void) +{ + return 0; +} + +static void __exit vhost_exit(void) +{ +} + +module_init(vhost_init); +module_exit(vhost_exit); + +MODULE_VERSION("0.0.1"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Michael S. Tsirkin"); +MODULE_DESCRIPTION("Host kernel accelerator for virtio"); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index a7ad6359298..4465ed5f316 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -46,6 +46,8 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file); void vhost_poll_stop(struct vhost_poll *poll); void vhost_poll_flush(struct vhost_poll *poll); void vhost_poll_queue(struct vhost_poll *poll); +void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work); +long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp); struct vhost_log { u64 addr; @@ -101,14 +103,8 @@ struct vhost_virtqueue { struct iovec iov[UIO_MAXIOV]; struct iovec *indirect; struct vring_used_elem *heads; - /* We use a kind of RCU to access private pointer. - * All readers access it from worker, which makes it possible to - * flush the vhost_work instead of synchronize_rcu. Therefore readers do - * not need to call rcu_read_lock/rcu_read_unlock: the beginning of - * vhost_work execution acts instead of rcu_read_lock() and the end of - * vhost_work execution acts instead of rcu_read_unlock(). - * Writers use virtqueue mutex. */ - void __rcu *private_data; + /* Protected by virtqueue mutex. */ + void *private_data; /* Log write descriptors */ void __user *log_base; struct vhost_log *log; @@ -133,6 +129,7 @@ struct vhost_dev { long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs); long vhost_dev_set_owner(struct vhost_dev *dev); +bool vhost_dev_has_owner(struct vhost_dev *dev); long vhost_dev_check_owner(struct vhost_dev *); struct vhost_memory *vhost_dev_reset_owner_prepare(void); void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_memory *); |