summaryrefslogtreecommitdiffstats
path: root/drivers/vhost
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vhost')
-rw-r--r--drivers/vhost/net.c14
-rw-r--r--drivers/vhost/test.c320
-rw-r--r--drivers/vhost/test.h7
-rw-r--r--drivers/vhost/vhost.c62
-rw-r--r--drivers/vhost/vhost.h2
5 files changed, 366 insertions, 39 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 4b4da5b86ff..9b3ca103135 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -10,7 +10,6 @@
#include <linux/eventfd.h>
#include <linux/vhost.h>
#include <linux/virtio_net.h>
-#include <linux/mmu_context.h>
#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/mutex.h>
@@ -129,8 +128,9 @@ static void handle_tx(struct vhost_net *net)
size_t hdr_size;
struct socket *sock;
- sock = rcu_dereference_check(vq->private_data,
- lockdep_is_held(&vq->mutex));
+ /* TODO: check that we are running from vhost_worker?
+ * Not sure it's worth it, it's straight-forward enough. */
+ sock = rcu_dereference_check(vq->private_data, 1);
if (!sock)
return;
@@ -142,7 +142,6 @@ static void handle_tx(struct vhost_net *net)
return;
}
- use_mm(net->dev.mm);
mutex_lock(&vq->mutex);
vhost_disable_notify(vq);
@@ -207,7 +206,6 @@ static void handle_tx(struct vhost_net *net)
}
mutex_unlock(&vq->mutex);
- unuse_mm(net->dev.mm);
}
static int peek_head_len(struct sock *sk)
@@ -312,7 +310,6 @@ static void handle_rx_big(struct vhost_net *net)
if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
return;
- use_mm(net->dev.mm);
mutex_lock(&vq->mutex);
vhost_disable_notify(vq);
hdr_size = vq->vhost_hlen;
@@ -391,7 +388,6 @@ static void handle_rx_big(struct vhost_net *net)
}
mutex_unlock(&vq->mutex);
- unuse_mm(net->dev.mm);
}
/* Expects to be always run from workqueue - which acts as
@@ -423,7 +419,6 @@ static void handle_rx_mergeable(struct vhost_net *net)
if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
return;
- use_mm(net->dev.mm);
mutex_lock(&vq->mutex);
vhost_disable_notify(vq);
vhost_hlen = vq->vhost_hlen;
@@ -458,7 +453,7 @@ static void handle_rx_mergeable(struct vhost_net *net)
move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in);
else
/* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF:
- * needed because sendmsg can modify msg_iov. */
+ * needed because recvmsg can modify msg_iov. */
copy_iovec_hdr(vq->iov, vq->hdr, sock_hlen, in);
msg.msg_iovlen = in;
err = sock->ops->recvmsg(NULL, sock, &msg,
@@ -500,7 +495,6 @@ static void handle_rx_mergeable(struct vhost_net *net)
}
mutex_unlock(&vq->mutex);
- unuse_mm(net->dev.mm);
}
static void handle_rx(struct vhost_net *net)
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
new file mode 100644
index 00000000000..099f30230d0
--- /dev/null
+++ b/drivers/vhost/test.c
@@ -0,0 +1,320 @@
+/* Copyright (C) 2009 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * test virtio server in host kernel.
+ */
+
+#include <linux/compat.h>
+#include <linux/eventfd.h>
+#include <linux/vhost.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+#include <linux/rcupdate.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+
+#include "test.h"
+#include "vhost.c"
+
+/* Max number of bytes transferred before requeueing the job.
+ * Using this limit prevents one virtqueue from starving others. */
+#define VHOST_TEST_WEIGHT 0x80000
+
+enum {
+ VHOST_TEST_VQ = 0,
+ VHOST_TEST_VQ_MAX = 1,
+};
+
+struct vhost_test {
+ struct vhost_dev dev;
+ struct vhost_virtqueue vqs[VHOST_TEST_VQ_MAX];
+};
+
+/* Expects to be always run from workqueue - which acts as
+ * read-size critical section for our kind of RCU. */
+static void handle_vq(struct vhost_test *n)
+{
+ struct vhost_virtqueue *vq = &n->dev.vqs[VHOST_TEST_VQ];
+ unsigned out, in;
+ int head;
+ size_t len, total_len = 0;
+ void *private;
+
+ private = rcu_dereference_check(vq->private_data, 1);
+ if (!private)
+ return;
+
+ mutex_lock(&vq->mutex);
+ vhost_disable_notify(vq);
+
+ for (;;) {
+ head = vhost_get_vq_desc(&n->dev, vq, vq->iov,
+ ARRAY_SIZE(vq->iov),
+ &out, &in,
+ NULL, NULL);
+ /* On error, stop handling until the next kick. */
+ if (unlikely(head < 0))
+ break;
+ /* Nothing new? Wait for eventfd to tell us they refilled. */
+ if (head == vq->num) {
+ if (unlikely(vhost_enable_notify(vq))) {
+ vhost_disable_notify(vq);
+ continue;
+ }
+ break;
+ }
+ if (in) {
+ vq_err(vq, "Unexpected descriptor format for TX: "
+ "out %d, int %d\n", out, in);
+ break;
+ }
+ len = iov_length(vq->iov, out);
+ /* Sanity check */
+ if (!len) {
+ vq_err(vq, "Unexpected 0 len for TX\n");
+ break;
+ }
+ vhost_add_used_and_signal(&n->dev, vq, head, 0);
+ total_len += len;
+ if (unlikely(total_len >= VHOST_TEST_WEIGHT)) {
+ vhost_poll_queue(&vq->poll);
+ break;
+ }
+ }
+
+ mutex_unlock(&vq->mutex);
+}
+
+static void handle_vq_kick(struct vhost_work *work)
+{
+ struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
+ poll.work);
+ struct vhost_test *n = container_of(vq->dev, struct vhost_test, dev);
+
+ handle_vq(n);
+}
+
+static int vhost_test_open(struct inode *inode, struct file *f)
+{
+ struct vhost_test *n = kmalloc(sizeof *n, GFP_KERNEL);
+ struct vhost_dev *dev;
+ int r;
+
+ if (!n)
+ return -ENOMEM;
+
+ dev = &n->dev;
+ n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick;
+ r = vhost_dev_init(dev, n->vqs, VHOST_TEST_VQ_MAX);
+ if (r < 0) {
+ kfree(n);
+ return r;
+ }
+
+ f->private_data = n;
+
+ return 0;
+}
+
+static void *vhost_test_stop_vq(struct vhost_test *n,
+ struct vhost_virtqueue *vq)
+{
+ void *private;
+
+ mutex_lock(&vq->mutex);
+ private = rcu_dereference_protected(vq->private_data,
+ lockdep_is_held(&vq->mutex));
+ rcu_assign_pointer(vq->private_data, NULL);
+ mutex_unlock(&vq->mutex);
+ return private;
+}
+
+static void vhost_test_stop(struct vhost_test *n, void **privatep)
+{
+ *privatep = vhost_test_stop_vq(n, n->vqs + VHOST_TEST_VQ);
+}
+
+static void vhost_test_flush_vq(struct vhost_test *n, int index)
+{
+ vhost_poll_flush(&n->dev.vqs[index].poll);
+}
+
+static void vhost_test_flush(struct vhost_test *n)
+{
+ vhost_test_flush_vq(n, VHOST_TEST_VQ);
+}
+
+static int vhost_test_release(struct inode *inode, struct file *f)
+{
+ struct vhost_test *n = f->private_data;
+ void *private;
+
+ vhost_test_stop(n, &private);
+ vhost_test_flush(n);
+ vhost_dev_cleanup(&n->dev);
+ /* We do an extra flush before freeing memory,
+ * since jobs can re-queue themselves. */
+ vhost_test_flush(n);
+ kfree(n);
+ return 0;
+}
+
+static long vhost_test_run(struct vhost_test *n, int test)
+{
+ void *priv, *oldpriv;
+ struct vhost_virtqueue *vq;
+ int r, index;
+
+ if (test < 0 || test > 1)
+ return -EINVAL;
+
+ mutex_lock(&n->dev.mutex);
+ r = vhost_dev_check_owner(&n->dev);
+ if (r)
+ goto err;
+
+ for (index = 0; index < n->dev.nvqs; ++index) {
+ /* Verify that ring has been setup correctly. */
+ if (!vhost_vq_access_ok(&n->vqs[index])) {
+ r = -EFAULT;
+ goto err;
+ }
+ }
+
+ for (index = 0; index < n->dev.nvqs; ++index) {
+ vq = n->vqs + index;
+ mutex_lock(&vq->mutex);
+ priv = test ? n : NULL;
+
+ /* start polling new socket */
+ oldpriv = rcu_dereference_protected(vq->private_data,
+ lockdep_is_held(&vq->mutex));
+ rcu_assign_pointer(vq->private_data, priv);
+
+ mutex_unlock(&vq->mutex);
+
+ if (oldpriv) {
+ vhost_test_flush_vq(n, index);
+ }
+ }
+
+ mutex_unlock(&n->dev.mutex);
+ return 0;
+
+err:
+ mutex_unlock(&n->dev.mutex);
+ return r;
+}
+
+static long vhost_test_reset_owner(struct vhost_test *n)
+{
+ void *priv = NULL;
+ long err;
+ mutex_lock(&n->dev.mutex);
+ err = vhost_dev_check_owner(&n->dev);
+ if (err)
+ goto done;
+ vhost_test_stop(n, &priv);
+ vhost_test_flush(n);
+ err = vhost_dev_reset_owner(&n->dev);
+done:
+ mutex_unlock(&n->dev.mutex);
+ return err;
+}
+
+static int vhost_test_set_features(struct vhost_test *n, u64 features)
+{
+ mutex_lock(&n->dev.mutex);
+ if ((features & (1 << VHOST_F_LOG_ALL)) &&
+ !vhost_log_access_ok(&n->dev)) {
+ mutex_unlock(&n->dev.mutex);
+ return -EFAULT;
+ }
+ n->dev.acked_features = features;
+ smp_wmb();
+ vhost_test_flush(n);
+ mutex_unlock(&n->dev.mutex);
+ return 0;
+}
+
+static long vhost_test_ioctl(struct file *f, unsigned int ioctl,
+ unsigned long arg)
+{
+ struct vhost_test *n = f->private_data;
+ void __user *argp = (void __user *)arg;
+ u64 __user *featurep = argp;
+ int test;
+ u64 features;
+ int r;
+ switch (ioctl) {
+ case VHOST_TEST_RUN:
+ if (copy_from_user(&test, argp, sizeof test))
+ return -EFAULT;
+ return vhost_test_run(n, test);
+ case VHOST_GET_FEATURES:
+ features = VHOST_FEATURES;
+ if (copy_to_user(featurep, &features, sizeof features))
+ return -EFAULT;
+ return 0;
+ case VHOST_SET_FEATURES:
+ if (copy_from_user(&features, featurep, sizeof features))
+ return -EFAULT;
+ if (features & ~VHOST_FEATURES)
+ return -EOPNOTSUPP;
+ return vhost_test_set_features(n, features);
+ case VHOST_RESET_OWNER:
+ return vhost_test_reset_owner(n);
+ default:
+ mutex_lock(&n->dev.mutex);
+ r = vhost_dev_ioctl(&n->dev, ioctl, arg);
+ vhost_test_flush(n);
+ mutex_unlock(&n->dev.mutex);
+ return r;
+ }
+}
+
+#ifdef CONFIG_COMPAT
+static long vhost_test_compat_ioctl(struct file *f, unsigned int ioctl,
+ unsigned long arg)
+{
+ return vhost_test_ioctl(f, ioctl, (unsigned long)compat_ptr(arg));
+}
+#endif
+
+static const struct file_operations vhost_test_fops = {
+ .owner = THIS_MODULE,
+ .release = vhost_test_release,
+ .unlocked_ioctl = vhost_test_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = vhost_test_compat_ioctl,
+#endif
+ .open = vhost_test_open,
+ .llseek = noop_llseek,
+};
+
+static struct miscdevice vhost_test_misc = {
+ MISC_DYNAMIC_MINOR,
+ "vhost-test",
+ &vhost_test_fops,
+};
+
+static int vhost_test_init(void)
+{
+ return misc_register(&vhost_test_misc);
+}
+module_init(vhost_test_init);
+
+static void vhost_test_exit(void)
+{
+ misc_deregister(&vhost_test_misc);
+}
+module_exit(vhost_test_exit);
+
+MODULE_VERSION("0.0.1");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Michael S. Tsirkin");
+MODULE_DESCRIPTION("Host kernel side for virtio simulator");
diff --git a/drivers/vhost/test.h b/drivers/vhost/test.h
new file mode 100644
index 00000000000..1fef5df8215
--- /dev/null
+++ b/drivers/vhost/test.h
@@ -0,0 +1,7 @@
+#ifndef LINUX_VHOST_TEST_H
+#define LINUX_VHOST_TEST_H
+
+/* Start a given test on the virtio null device. 0 stops all tests. */
+#define VHOST_TEST_RUN _IOW(VHOST_VIRTIO, 0x31, int)
+
+#endif
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 94701ff3a23..ade0568c07a 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -15,6 +15,7 @@
#include <linux/vhost.h>
#include <linux/virtio_net.h>
#include <linux/mm.h>
+#include <linux/mmu_context.h>
#include <linux/miscdevice.h>
#include <linux/mutex.h>
#include <linux/rcupdate.h>
@@ -29,8 +30,6 @@
#include <linux/if_packet.h>
#include <linux/if_arp.h>
-#include <net/sock.h>
-
#include "vhost.h"
enum {
@@ -98,22 +97,26 @@ void vhost_poll_stop(struct vhost_poll *poll)
remove_wait_queue(poll->wqh, &poll->wait);
}
+static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work,
+ unsigned seq)
+{
+ int left;
+ spin_lock_irq(&dev->work_lock);
+ left = seq - work->done_seq;
+ spin_unlock_irq(&dev->work_lock);
+ return left <= 0;
+}
+
static void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work)
{
unsigned seq;
- int left;
int flushing;
spin_lock_irq(&dev->work_lock);
seq = work->queue_seq;
work->flushing++;
spin_unlock_irq(&dev->work_lock);
- wait_event(work->done, ({
- spin_lock_irq(&dev->work_lock);
- left = seq - work->done_seq <= 0;
- spin_unlock_irq(&dev->work_lock);
- left;
- }));
+ wait_event(work->done, vhost_work_seq_done(dev, work, seq));
spin_lock_irq(&dev->work_lock);
flushing = --work->flushing;
spin_unlock_irq(&dev->work_lock);
@@ -157,7 +160,6 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->avail_idx = 0;
vq->last_used_idx = 0;
vq->used_flags = 0;
- vq->used_flags = 0;
vq->log_used = false;
vq->log_addr = -1ull;
vq->vhost_hlen = 0;
@@ -178,6 +180,8 @@ static int vhost_worker(void *data)
struct vhost_work *work = NULL;
unsigned uninitialized_var(seq);
+ use_mm(dev->mm);
+
for (;;) {
/* mb paired w/ kthread_stop */
set_current_state(TASK_INTERRUPTIBLE);
@@ -192,7 +196,7 @@ static int vhost_worker(void *data)
if (kthread_should_stop()) {
spin_unlock_irq(&dev->work_lock);
__set_current_state(TASK_RUNNING);
- return 0;
+ break;
}
if (!list_empty(&dev->work_list)) {
work = list_first_entry(&dev->work_list,
@@ -210,6 +214,8 @@ static int vhost_worker(void *data)
schedule();
}
+ unuse_mm(dev->mm);
+ return 0;
}
/* Helper to allocate iovec buffers for all vqs. */
@@ -402,15 +408,14 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
kfree(rcu_dereference_protected(dev->memory,
lockdep_is_held(&dev->mutex)));
RCU_INIT_POINTER(dev->memory, NULL);
- if (dev->mm)
- mmput(dev->mm);
- dev->mm = NULL;
-
WARN_ON(!list_empty(&dev->work_list));
if (dev->worker) {
kthread_stop(dev->worker);
dev->worker = NULL;
}
+ if (dev->mm)
+ mmput(dev->mm);
+ dev->mm = NULL;
}
static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
@@ -881,14 +886,15 @@ static int set_bit_to_user(int nr, void __user *addr)
static int log_write(void __user *log_base,
u64 write_address, u64 write_length)
{
+ u64 write_page = write_address / VHOST_PAGE_SIZE;
int r;
if (!write_length)
return 0;
- write_address /= VHOST_PAGE_SIZE;
+ write_length += write_address % VHOST_PAGE_SIZE;
for (;;) {
u64 base = (u64)(unsigned long)log_base;
- u64 log = base + write_address / 8;
- int bit = write_address % 8;
+ u64 log = base + write_page / 8;
+ int bit = write_page % 8;
if ((u64)(unsigned long)log != log)
return -EFAULT;
r = set_bit_to_user(bit, (void __user *)(unsigned long)log);
@@ -897,7 +903,7 @@ static int log_write(void __user *log_base,
if (write_length <= VHOST_PAGE_SIZE)
break;
write_length -= VHOST_PAGE_SIZE;
- write_address += VHOST_PAGE_SIZE;
+ write_page += 1;
}
return r;
}
@@ -1092,7 +1098,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
/* Check it isn't doing very strange things with descriptor numbers. */
last_avail_idx = vq->last_avail_idx;
- if (unlikely(get_user(vq->avail_idx, &vq->avail->idx))) {
+ if (unlikely(__get_user(vq->avail_idx, &vq->avail->idx))) {
vq_err(vq, "Failed to access avail idx at %p\n",
&vq->avail->idx);
return -EFAULT;
@@ -1113,8 +1119,8 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
/* Grab the next descriptor number they're advertising, and increment
* the index we've seen. */
- if (unlikely(get_user(head,
- &vq->avail->ring[last_avail_idx % vq->num]))) {
+ if (unlikely(__get_user(head,
+ &vq->avail->ring[last_avail_idx % vq->num]))) {
vq_err(vq, "Failed to read head: idx %d address %p\n",
last_avail_idx,
&vq->avail->ring[last_avail_idx % vq->num]);
@@ -1213,17 +1219,17 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
/* The virtqueue contains a ring of used buffers. Get a pointer to the
* next entry in that used ring. */
used = &vq->used->ring[vq->last_used_idx % vq->num];
- if (put_user(head, &used->id)) {
+ if (__put_user(head, &used->id)) {
vq_err(vq, "Failed to write used id");
return -EFAULT;
}
- if (put_user(len, &used->len)) {
+ if (__put_user(len, &used->len)) {
vq_err(vq, "Failed to write used len");
return -EFAULT;
}
/* Make sure buffer is written before we update index. */
smp_wmb();
- if (put_user(vq->last_used_idx + 1, &vq->used->idx)) {
+ if (__put_user(vq->last_used_idx + 1, &vq->used->idx)) {
vq_err(vq, "Failed to increment used idx");
return -EFAULT;
}
@@ -1255,7 +1261,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
start = vq->last_used_idx % vq->num;
used = vq->used->ring + start;
- if (copy_to_user(used, heads, count * sizeof *used)) {
+ if (__copy_to_user(used, heads, count * sizeof *used)) {
vq_err(vq, "Failed to write used");
return -EFAULT;
}
@@ -1316,7 +1322,7 @@ void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
* interrupts. */
smp_mb();
- if (get_user(flags, &vq->avail->flags)) {
+ if (__get_user(flags, &vq->avail->flags)) {
vq_err(vq, "Failed to get flags");
return;
}
@@ -1367,7 +1373,7 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq)
/* They could have slipped one in as we were doing that: make
* sure it's written, then check again. */
smp_mb();
- r = get_user(avail_idx, &vq->avail->idx);
+ r = __get_user(avail_idx, &vq->avail->idx);
if (r) {
vq_err(vq, "Failed to check avail idx at %p: %d\n",
&vq->avail->idx, r);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 073d06ae091..2af44b7b1f3 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -102,7 +102,7 @@ struct vhost_virtqueue {
* flush the vhost_work instead of synchronize_rcu. Therefore readers do
* not need to call rcu_read_lock/rcu_read_unlock: the beginning of
* vhost_work execution acts instead of rcu_read_lock() and the end of
- * vhost_work execution acts instead of rcu_read_lock().
+ * vhost_work execution acts instead of rcu_read_unlock().
* Writers use virtqueue mutex. */
void __rcu *private_data;
/* Log write descriptors */