diff options
Diffstat (limited to 'drivers/vhost')
-rw-r--r-- | drivers/vhost/net.c | 14 | ||||
-rw-r--r-- | drivers/vhost/test.c | 320 | ||||
-rw-r--r-- | drivers/vhost/test.h | 7 | ||||
-rw-r--r-- | drivers/vhost/vhost.c | 62 | ||||
-rw-r--r-- | drivers/vhost/vhost.h | 2 |
5 files changed, 366 insertions, 39 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 4b4da5b86ff..9b3ca103135 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -10,7 +10,6 @@ #include <linux/eventfd.h> #include <linux/vhost.h> #include <linux/virtio_net.h> -#include <linux/mmu_context.h> #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/mutex.h> @@ -129,8 +128,9 @@ static void handle_tx(struct vhost_net *net) size_t hdr_size; struct socket *sock; - sock = rcu_dereference_check(vq->private_data, - lockdep_is_held(&vq->mutex)); + /* TODO: check that we are running from vhost_worker? + * Not sure it's worth it, it's straight-forward enough. */ + sock = rcu_dereference_check(vq->private_data, 1); if (!sock) return; @@ -142,7 +142,6 @@ static void handle_tx(struct vhost_net *net) return; } - use_mm(net->dev.mm); mutex_lock(&vq->mutex); vhost_disable_notify(vq); @@ -207,7 +206,6 @@ static void handle_tx(struct vhost_net *net) } mutex_unlock(&vq->mutex); - unuse_mm(net->dev.mm); } static int peek_head_len(struct sock *sk) @@ -312,7 +310,6 @@ static void handle_rx_big(struct vhost_net *net) if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) return; - use_mm(net->dev.mm); mutex_lock(&vq->mutex); vhost_disable_notify(vq); hdr_size = vq->vhost_hlen; @@ -391,7 +388,6 @@ static void handle_rx_big(struct vhost_net *net) } mutex_unlock(&vq->mutex); - unuse_mm(net->dev.mm); } /* Expects to be always run from workqueue - which acts as @@ -423,7 +419,6 @@ static void handle_rx_mergeable(struct vhost_net *net) if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue)) return; - use_mm(net->dev.mm); mutex_lock(&vq->mutex); vhost_disable_notify(vq); vhost_hlen = vq->vhost_hlen; @@ -458,7 +453,7 @@ static void handle_rx_mergeable(struct vhost_net *net) move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in); else /* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF: - * needed because sendmsg can modify msg_iov. */ + * needed because recvmsg can modify msg_iov. */ copy_iovec_hdr(vq->iov, vq->hdr, sock_hlen, in); msg.msg_iovlen = in; err = sock->ops->recvmsg(NULL, sock, &msg, @@ -500,7 +495,6 @@ static void handle_rx_mergeable(struct vhost_net *net) } mutex_unlock(&vq->mutex); - unuse_mm(net->dev.mm); } static void handle_rx(struct vhost_net *net) diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c new file mode 100644 index 00000000000..099f30230d0 --- /dev/null +++ b/drivers/vhost/test.c @@ -0,0 +1,320 @@ +/* Copyright (C) 2009 Red Hat, Inc. + * Author: Michael S. Tsirkin <mst@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * test virtio server in host kernel. + */ + +#include <linux/compat.h> +#include <linux/eventfd.h> +#include <linux/vhost.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/workqueue.h> +#include <linux/rcupdate.h> +#include <linux/file.h> +#include <linux/slab.h> + +#include "test.h" +#include "vhost.c" + +/* Max number of bytes transferred before requeueing the job. + * Using this limit prevents one virtqueue from starving others. */ +#define VHOST_TEST_WEIGHT 0x80000 + +enum { + VHOST_TEST_VQ = 0, + VHOST_TEST_VQ_MAX = 1, +}; + +struct vhost_test { + struct vhost_dev dev; + struct vhost_virtqueue vqs[VHOST_TEST_VQ_MAX]; +}; + +/* Expects to be always run from workqueue - which acts as + * read-size critical section for our kind of RCU. */ +static void handle_vq(struct vhost_test *n) +{ + struct vhost_virtqueue *vq = &n->dev.vqs[VHOST_TEST_VQ]; + unsigned out, in; + int head; + size_t len, total_len = 0; + void *private; + + private = rcu_dereference_check(vq->private_data, 1); + if (!private) + return; + + mutex_lock(&vq->mutex); + vhost_disable_notify(vq); + + for (;;) { + head = vhost_get_vq_desc(&n->dev, vq, vq->iov, + ARRAY_SIZE(vq->iov), + &out, &in, + NULL, NULL); + /* On error, stop handling until the next kick. */ + if (unlikely(head < 0)) + break; + /* Nothing new? Wait for eventfd to tell us they refilled. */ + if (head == vq->num) { + if (unlikely(vhost_enable_notify(vq))) { + vhost_disable_notify(vq); + continue; + } + break; + } + if (in) { + vq_err(vq, "Unexpected descriptor format for TX: " + "out %d, int %d\n", out, in); + break; + } + len = iov_length(vq->iov, out); + /* Sanity check */ + if (!len) { + vq_err(vq, "Unexpected 0 len for TX\n"); + break; + } + vhost_add_used_and_signal(&n->dev, vq, head, 0); + total_len += len; + if (unlikely(total_len >= VHOST_TEST_WEIGHT)) { + vhost_poll_queue(&vq->poll); + break; + } + } + + mutex_unlock(&vq->mutex); +} + +static void handle_vq_kick(struct vhost_work *work) +{ + struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, + poll.work); + struct vhost_test *n = container_of(vq->dev, struct vhost_test, dev); + + handle_vq(n); +} + +static int vhost_test_open(struct inode *inode, struct file *f) +{ + struct vhost_test *n = kmalloc(sizeof *n, GFP_KERNEL); + struct vhost_dev *dev; + int r; + + if (!n) + return -ENOMEM; + + dev = &n->dev; + n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick; + r = vhost_dev_init(dev, n->vqs, VHOST_TEST_VQ_MAX); + if (r < 0) { + kfree(n); + return r; + } + + f->private_data = n; + + return 0; +} + +static void *vhost_test_stop_vq(struct vhost_test *n, + struct vhost_virtqueue *vq) +{ + void *private; + + mutex_lock(&vq->mutex); + private = rcu_dereference_protected(vq->private_data, + lockdep_is_held(&vq->mutex)); + rcu_assign_pointer(vq->private_data, NULL); + mutex_unlock(&vq->mutex); + return private; +} + +static void vhost_test_stop(struct vhost_test *n, void **privatep) +{ + *privatep = vhost_test_stop_vq(n, n->vqs + VHOST_TEST_VQ); +} + +static void vhost_test_flush_vq(struct vhost_test *n, int index) +{ + vhost_poll_flush(&n->dev.vqs[index].poll); +} + +static void vhost_test_flush(struct vhost_test *n) +{ + vhost_test_flush_vq(n, VHOST_TEST_VQ); +} + +static int vhost_test_release(struct inode *inode, struct file *f) +{ + struct vhost_test *n = f->private_data; + void *private; + + vhost_test_stop(n, &private); + vhost_test_flush(n); + vhost_dev_cleanup(&n->dev); + /* We do an extra flush before freeing memory, + * since jobs can re-queue themselves. */ + vhost_test_flush(n); + kfree(n); + return 0; +} + +static long vhost_test_run(struct vhost_test *n, int test) +{ + void *priv, *oldpriv; + struct vhost_virtqueue *vq; + int r, index; + + if (test < 0 || test > 1) + return -EINVAL; + + mutex_lock(&n->dev.mutex); + r = vhost_dev_check_owner(&n->dev); + if (r) + goto err; + + for (index = 0; index < n->dev.nvqs; ++index) { + /* Verify that ring has been setup correctly. */ + if (!vhost_vq_access_ok(&n->vqs[index])) { + r = -EFAULT; + goto err; + } + } + + for (index = 0; index < n->dev.nvqs; ++index) { + vq = n->vqs + index; + mutex_lock(&vq->mutex); + priv = test ? n : NULL; + + /* start polling new socket */ + oldpriv = rcu_dereference_protected(vq->private_data, + lockdep_is_held(&vq->mutex)); + rcu_assign_pointer(vq->private_data, priv); + + mutex_unlock(&vq->mutex); + + if (oldpriv) { + vhost_test_flush_vq(n, index); + } + } + + mutex_unlock(&n->dev.mutex); + return 0; + +err: + mutex_unlock(&n->dev.mutex); + return r; +} + +static long vhost_test_reset_owner(struct vhost_test *n) +{ + void *priv = NULL; + long err; + mutex_lock(&n->dev.mutex); + err = vhost_dev_check_owner(&n->dev); + if (err) + goto done; + vhost_test_stop(n, &priv); + vhost_test_flush(n); + err = vhost_dev_reset_owner(&n->dev); +done: + mutex_unlock(&n->dev.mutex); + return err; +} + +static int vhost_test_set_features(struct vhost_test *n, u64 features) +{ + mutex_lock(&n->dev.mutex); + if ((features & (1 << VHOST_F_LOG_ALL)) && + !vhost_log_access_ok(&n->dev)) { + mutex_unlock(&n->dev.mutex); + return -EFAULT; + } + n->dev.acked_features = features; + smp_wmb(); + vhost_test_flush(n); + mutex_unlock(&n->dev.mutex); + return 0; +} + +static long vhost_test_ioctl(struct file *f, unsigned int ioctl, + unsigned long arg) +{ + struct vhost_test *n = f->private_data; + void __user *argp = (void __user *)arg; + u64 __user *featurep = argp; + int test; + u64 features; + int r; + switch (ioctl) { + case VHOST_TEST_RUN: + if (copy_from_user(&test, argp, sizeof test)) + return -EFAULT; + return vhost_test_run(n, test); + case VHOST_GET_FEATURES: + features = VHOST_FEATURES; + if (copy_to_user(featurep, &features, sizeof features)) + return -EFAULT; + return 0; + case VHOST_SET_FEATURES: + if (copy_from_user(&features, featurep, sizeof features)) + return -EFAULT; + if (features & ~VHOST_FEATURES) + return -EOPNOTSUPP; + return vhost_test_set_features(n, features); + case VHOST_RESET_OWNER: + return vhost_test_reset_owner(n); + default: + mutex_lock(&n->dev.mutex); + r = vhost_dev_ioctl(&n->dev, ioctl, arg); + vhost_test_flush(n); + mutex_unlock(&n->dev.mutex); + return r; + } +} + +#ifdef CONFIG_COMPAT +static long vhost_test_compat_ioctl(struct file *f, unsigned int ioctl, + unsigned long arg) +{ + return vhost_test_ioctl(f, ioctl, (unsigned long)compat_ptr(arg)); +} +#endif + +static const struct file_operations vhost_test_fops = { + .owner = THIS_MODULE, + .release = vhost_test_release, + .unlocked_ioctl = vhost_test_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = vhost_test_compat_ioctl, +#endif + .open = vhost_test_open, + .llseek = noop_llseek, +}; + +static struct miscdevice vhost_test_misc = { + MISC_DYNAMIC_MINOR, + "vhost-test", + &vhost_test_fops, +}; + +static int vhost_test_init(void) +{ + return misc_register(&vhost_test_misc); +} +module_init(vhost_test_init); + +static void vhost_test_exit(void) +{ + misc_deregister(&vhost_test_misc); +} +module_exit(vhost_test_exit); + +MODULE_VERSION("0.0.1"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Michael S. Tsirkin"); +MODULE_DESCRIPTION("Host kernel side for virtio simulator"); diff --git a/drivers/vhost/test.h b/drivers/vhost/test.h new file mode 100644 index 00000000000..1fef5df8215 --- /dev/null +++ b/drivers/vhost/test.h @@ -0,0 +1,7 @@ +#ifndef LINUX_VHOST_TEST_H +#define LINUX_VHOST_TEST_H + +/* Start a given test on the virtio null device. 0 stops all tests. */ +#define VHOST_TEST_RUN _IOW(VHOST_VIRTIO, 0x31, int) + +#endif diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 94701ff3a23..ade0568c07a 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -15,6 +15,7 @@ #include <linux/vhost.h> #include <linux/virtio_net.h> #include <linux/mm.h> +#include <linux/mmu_context.h> #include <linux/miscdevice.h> #include <linux/mutex.h> #include <linux/rcupdate.h> @@ -29,8 +30,6 @@ #include <linux/if_packet.h> #include <linux/if_arp.h> -#include <net/sock.h> - #include "vhost.h" enum { @@ -98,22 +97,26 @@ void vhost_poll_stop(struct vhost_poll *poll) remove_wait_queue(poll->wqh, &poll->wait); } +static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work, + unsigned seq) +{ + int left; + spin_lock_irq(&dev->work_lock); + left = seq - work->done_seq; + spin_unlock_irq(&dev->work_lock); + return left <= 0; +} + static void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work) { unsigned seq; - int left; int flushing; spin_lock_irq(&dev->work_lock); seq = work->queue_seq; work->flushing++; spin_unlock_irq(&dev->work_lock); - wait_event(work->done, ({ - spin_lock_irq(&dev->work_lock); - left = seq - work->done_seq <= 0; - spin_unlock_irq(&dev->work_lock); - left; - })); + wait_event(work->done, vhost_work_seq_done(dev, work, seq)); spin_lock_irq(&dev->work_lock); flushing = --work->flushing; spin_unlock_irq(&dev->work_lock); @@ -157,7 +160,6 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->avail_idx = 0; vq->last_used_idx = 0; vq->used_flags = 0; - vq->used_flags = 0; vq->log_used = false; vq->log_addr = -1ull; vq->vhost_hlen = 0; @@ -178,6 +180,8 @@ static int vhost_worker(void *data) struct vhost_work *work = NULL; unsigned uninitialized_var(seq); + use_mm(dev->mm); + for (;;) { /* mb paired w/ kthread_stop */ set_current_state(TASK_INTERRUPTIBLE); @@ -192,7 +196,7 @@ static int vhost_worker(void *data) if (kthread_should_stop()) { spin_unlock_irq(&dev->work_lock); __set_current_state(TASK_RUNNING); - return 0; + break; } if (!list_empty(&dev->work_list)) { work = list_first_entry(&dev->work_list, @@ -210,6 +214,8 @@ static int vhost_worker(void *data) schedule(); } + unuse_mm(dev->mm); + return 0; } /* Helper to allocate iovec buffers for all vqs. */ @@ -402,15 +408,14 @@ void vhost_dev_cleanup(struct vhost_dev *dev) kfree(rcu_dereference_protected(dev->memory, lockdep_is_held(&dev->mutex))); RCU_INIT_POINTER(dev->memory, NULL); - if (dev->mm) - mmput(dev->mm); - dev->mm = NULL; - WARN_ON(!list_empty(&dev->work_list)); if (dev->worker) { kthread_stop(dev->worker); dev->worker = NULL; } + if (dev->mm) + mmput(dev->mm); + dev->mm = NULL; } static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) @@ -881,14 +886,15 @@ static int set_bit_to_user(int nr, void __user *addr) static int log_write(void __user *log_base, u64 write_address, u64 write_length) { + u64 write_page = write_address / VHOST_PAGE_SIZE; int r; if (!write_length) return 0; - write_address /= VHOST_PAGE_SIZE; + write_length += write_address % VHOST_PAGE_SIZE; for (;;) { u64 base = (u64)(unsigned long)log_base; - u64 log = base + write_address / 8; - int bit = write_address % 8; + u64 log = base + write_page / 8; + int bit = write_page % 8; if ((u64)(unsigned long)log != log) return -EFAULT; r = set_bit_to_user(bit, (void __user *)(unsigned long)log); @@ -897,7 +903,7 @@ static int log_write(void __user *log_base, if (write_length <= VHOST_PAGE_SIZE) break; write_length -= VHOST_PAGE_SIZE; - write_address += VHOST_PAGE_SIZE; + write_page += 1; } return r; } @@ -1092,7 +1098,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, /* Check it isn't doing very strange things with descriptor numbers. */ last_avail_idx = vq->last_avail_idx; - if (unlikely(get_user(vq->avail_idx, &vq->avail->idx))) { + if (unlikely(__get_user(vq->avail_idx, &vq->avail->idx))) { vq_err(vq, "Failed to access avail idx at %p\n", &vq->avail->idx); return -EFAULT; @@ -1113,8 +1119,8 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, /* Grab the next descriptor number they're advertising, and increment * the index we've seen. */ - if (unlikely(get_user(head, - &vq->avail->ring[last_avail_idx % vq->num]))) { + if (unlikely(__get_user(head, + &vq->avail->ring[last_avail_idx % vq->num]))) { vq_err(vq, "Failed to read head: idx %d address %p\n", last_avail_idx, &vq->avail->ring[last_avail_idx % vq->num]); @@ -1213,17 +1219,17 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) /* The virtqueue contains a ring of used buffers. Get a pointer to the * next entry in that used ring. */ used = &vq->used->ring[vq->last_used_idx % vq->num]; - if (put_user(head, &used->id)) { + if (__put_user(head, &used->id)) { vq_err(vq, "Failed to write used id"); return -EFAULT; } - if (put_user(len, &used->len)) { + if (__put_user(len, &used->len)) { vq_err(vq, "Failed to write used len"); return -EFAULT; } /* Make sure buffer is written before we update index. */ smp_wmb(); - if (put_user(vq->last_used_idx + 1, &vq->used->idx)) { + if (__put_user(vq->last_used_idx + 1, &vq->used->idx)) { vq_err(vq, "Failed to increment used idx"); return -EFAULT; } @@ -1255,7 +1261,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, start = vq->last_used_idx % vq->num; used = vq->used->ring + start; - if (copy_to_user(used, heads, count * sizeof *used)) { + if (__copy_to_user(used, heads, count * sizeof *used)) { vq_err(vq, "Failed to write used"); return -EFAULT; } @@ -1316,7 +1322,7 @@ void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) * interrupts. */ smp_mb(); - if (get_user(flags, &vq->avail->flags)) { + if (__get_user(flags, &vq->avail->flags)) { vq_err(vq, "Failed to get flags"); return; } @@ -1367,7 +1373,7 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq) /* They could have slipped one in as we were doing that: make * sure it's written, then check again. */ smp_mb(); - r = get_user(avail_idx, &vq->avail->idx); + r = __get_user(avail_idx, &vq->avail->idx); if (r) { vq_err(vq, "Failed to check avail idx at %p: %d\n", &vq->avail->idx, r); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 073d06ae091..2af44b7b1f3 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -102,7 +102,7 @@ struct vhost_virtqueue { * flush the vhost_work instead of synchronize_rcu. Therefore readers do * not need to call rcu_read_lock/rcu_read_unlock: the beginning of * vhost_work execution acts instead of rcu_read_lock() and the end of - * vhost_work execution acts instead of rcu_read_lock(). + * vhost_work execution acts instead of rcu_read_unlock(). * Writers use virtqueue mutex. */ void __rcu *private_data; /* Log write descriptors */ |