diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-12 12:37:27 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-12 12:37:27 -0800 |
commit | bcf8a3dfcb274cf6654a19e12e244f3af8c0d355 (patch) | |
tree | f1d0e0f36c0575a9202750aff65ba17ce91bc437 | |
parent | 61bd5e5683244a564ecfe31c73575ee0bc708ccc (diff) | |
parent | b6c96c0214138186f495e3ee73737c6fc5e4efa2 (diff) |
Merge tag 'to-linus' of git://github.com/rustyrussell/linux
* tag 'to-linus' of git://github.com/rustyrussell/linux: (24 commits)
lguest: Make sure interrupt is allocated ok by lguest_setup_irq
lguest: move the lguest tool to the tools directory
lguest: switch segment-voodoo-numbers to readable symbols
virtio: balloon: Add freeze, restore handlers to support S4
virtio: balloon: Move vq initialization into separate function
virtio: net: Add freeze, restore handlers to support S4
virtio: net: Move vq and vq buf removal into separate function
virtio: net: Move vq initialization into separate function
virtio: blk: Add freeze, restore handlers to support S4
virtio: blk: Move vq initialization to separate function
virtio: console: Disable callbacks for virtqueues at start of S4 freeze
virtio: console: Add freeze and restore handlers to support S4
virtio: console: Move vq and vq buf removal into separate functions
virtio: pci: add PM notification handlers for restore, freeze, thaw, poweroff
virtio: pci: switch to new PM API
virtio_blk: fix config handler race
virtio: add debugging if driver doesn't kick.
virtio: expose added descriptors immediately.
virtio: avoid modulus operation.
virtio: support unlocked queue kick
...
23 files changed, 757 insertions, 247 deletions
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index cf4603ba866..642d8805bc1 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -856,18 +856,23 @@ static void __init lguest_init_IRQ(void) } /* - * With CONFIG_SPARSE_IRQ, interrupt descriptors are allocated as-needed, so - * rather than set them in lguest_init_IRQ we are called here every time an - * lguest device needs an interrupt. - * - * FIXME: irq_alloc_desc_at() can fail due to lack of memory, we should - * pass that up! + * Interrupt descriptors are allocated as-needed, but low-numbered ones are + * reserved by the generic x86 code. So we ignore irq_alloc_desc_at if it + * tells us the irq is already used: other errors (ie. ENOMEM) we take + * seriously. */ -void lguest_setup_irq(unsigned int irq) +int lguest_setup_irq(unsigned int irq) { - irq_alloc_desc_at(irq, 0); + int err; + + /* Returns -ve error or vector number. */ + err = irq_alloc_desc_at(irq, 0); + if (err < 0 && err != -EEXIST) + return err; + irq_set_chip_and_handler_name(irq, &lguest_irq_controller, handle_level_irq, "level"); + return 0; } /* diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 4d0b70adf5f..ffd5ca91929 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -4,6 +4,7 @@ #include <linux/blkdev.h> #include <linux/hdreg.h> #include <linux/module.h> +#include <linux/mutex.h> #include <linux/virtio.h> #include <linux/virtio_blk.h> #include <linux/scatterlist.h> @@ -36,6 +37,12 @@ struct virtio_blk /* Process context for config space updates */ struct work_struct config_work; + /* Lock for config space updates */ + struct mutex config_lock; + + /* enable config space updates */ + bool config_enable; + /* What host tells us, plus 2 for header & tailer. */ unsigned int sg_elems; @@ -172,7 +179,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, } } - if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) { + if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr, GFP_ATOMIC)<0) { mempool_free(vbr, vblk->pool); return false; } @@ -318,6 +325,10 @@ static void virtblk_config_changed_work(struct work_struct *work) char cap_str_2[10], cap_str_10[10]; u64 capacity, size; + mutex_lock(&vblk->config_lock); + if (!vblk->config_enable) + goto done; + /* Host must always specify the capacity. */ vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity), &capacity, sizeof(capacity)); @@ -340,6 +351,8 @@ static void virtblk_config_changed_work(struct work_struct *work) cap_str_10, cap_str_2); set_capacity(vblk->disk, capacity); +done: + mutex_unlock(&vblk->config_lock); } static void virtblk_config_changed(struct virtio_device *vdev) @@ -349,6 +362,18 @@ static void virtblk_config_changed(struct virtio_device *vdev) queue_work(virtblk_wq, &vblk->config_work); } +static int init_vq(struct virtio_blk *vblk) +{ + int err = 0; + + /* We expect one virtqueue, for output. */ + vblk->vq = virtio_find_single_vq(vblk->vdev, blk_done, "requests"); + if (IS_ERR(vblk->vq)) + err = PTR_ERR(vblk->vq); + + return err; +} + static int __devinit virtblk_probe(struct virtio_device *vdev) { struct virtio_blk *vblk; @@ -388,14 +413,13 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) vblk->vdev = vdev; vblk->sg_elems = sg_elems; sg_init_table(vblk->sg, vblk->sg_elems); + mutex_init(&vblk->config_lock); INIT_WORK(&vblk->config_work, virtblk_config_changed_work); + vblk->config_enable = true; - /* We expect one virtqueue, for output. */ - vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests"); - if (IS_ERR(vblk->vq)) { - err = PTR_ERR(vblk->vq); + err = init_vq(vblk); + if (err) goto out_free_vblk; - } vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req)); if (!vblk->pool) { @@ -542,7 +566,10 @@ static void __devexit virtblk_remove(struct virtio_device *vdev) struct virtio_blk *vblk = vdev->priv; int index = vblk->index; - flush_work(&vblk->config_work); + /* Prevent config work handler from accessing the device. */ + mutex_lock(&vblk->config_lock); + vblk->config_enable = false; + mutex_unlock(&vblk->config_lock); /* Nothing should be pending. */ BUG_ON(!list_empty(&vblk->reqs)); @@ -550,6 +577,8 @@ static void __devexit virtblk_remove(struct virtio_device *vdev) /* Stop all the virtqueues. */ vdev->config->reset(vdev); + flush_work(&vblk->config_work); + del_gendisk(vblk->disk); blk_cleanup_queue(vblk->disk->queue); put_disk(vblk->disk); @@ -559,6 +588,46 @@ static void __devexit virtblk_remove(struct virtio_device *vdev) ida_simple_remove(&vd_index_ida, index); } +#ifdef CONFIG_PM +static int virtblk_freeze(struct virtio_device *vdev) +{ + struct virtio_blk *vblk = vdev->priv; + + /* Ensure we don't receive any more interrupts */ + vdev->config->reset(vdev); + + /* Prevent config work handler from accessing the device. */ + mutex_lock(&vblk->config_lock); + vblk->config_enable = false; + mutex_unlock(&vblk->config_lock); + + flush_work(&vblk->config_work); + + spin_lock_irq(vblk->disk->queue->queue_lock); + blk_stop_queue(vblk->disk->queue); + spin_unlock_irq(vblk->disk->queue->queue_lock); + blk_sync_queue(vblk->disk->queue); + + vdev->config->del_vqs(vdev); + return 0; +} + +static int virtblk_restore(struct virtio_device *vdev) +{ + struct virtio_blk *vblk = vdev->priv; + int ret; + + vblk->config_enable = true; + ret = init_vq(vdev->priv); + if (!ret) { + spin_lock_irq(vblk->disk->queue->queue_lock); + blk_start_queue(vblk->disk->queue); + spin_unlock_irq(vblk->disk->queue->queue_lock); + } + return ret; +} +#endif + static const struct virtio_device_id id_table[] = { { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, { 0 }, @@ -584,6 +653,10 @@ static struct virtio_driver __refdata virtio_blk = { .probe = virtblk_probe, .remove = __devexit_p(virtblk_remove), .config_changed = virtblk_config_changed, +#ifdef CONFIG_PM + .freeze = virtblk_freeze, + .restore = virtblk_restore, +#endif }; static int __init init(void) diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index fd699ccecf5..723725bbb96 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -47,7 +47,7 @@ static void register_buffer(u8 *buf, size_t size) sg_init_one(&sg, buf, size); /* There should always be room for one buffer. */ - if (virtqueue_add_buf(vq, &sg, 0, 1, buf) < 0) + if (virtqueue_add_buf(vq, &sg, 0, 1, buf, GFP_KERNEL) < 0) BUG(); virtqueue_kick(vq); diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 8e3c46d67cb..b58b5618706 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -392,7 +392,7 @@ static int add_inbuf(struct virtqueue *vq, struct port_buffer *buf) sg_init_one(sg, buf->buf, buf->size); - ret = virtqueue_add_buf(vq, sg, 0, 1, buf); + ret = virtqueue_add_buf(vq, sg, 0, 1, buf, GFP_ATOMIC); virtqueue_kick(vq); return ret; } @@ -457,7 +457,7 @@ static ssize_t __send_control_msg(struct ports_device *portdev, u32 port_id, vq = portdev->c_ovq; sg_init_one(sg, &cpkt, sizeof(cpkt)); - if (virtqueue_add_buf(vq, sg, 1, 0, &cpkt) >= 0) { + if (virtqueue_add_buf(vq, sg, 1, 0, &cpkt, GFP_ATOMIC) >= 0) { virtqueue_kick(vq); while (!virtqueue_get_buf(vq, &len)) cpu_relax(); @@ -506,7 +506,7 @@ static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count, reclaim_consumed_buffers(port); sg_init_one(sg, in_buf, in_count); - ret = virtqueue_add_buf(out_vq, sg, 1, 0, in_buf); + ret = virtqueue_add_buf(out_vq, sg, 1, 0, in_buf, GFP_ATOMIC); /* Tell Host to go! */ virtqueue_kick(out_vq); @@ -1271,6 +1271,20 @@ static void remove_port(struct kref *kref) kfree(port); } +static void remove_port_data(struct port *port) +{ + struct port_buffer *buf; + + /* Remove unused data this port might have received. */ + discard_port_data(port); + + reclaim_consumed_buffers(port); + + /* Remove buffers we queued up for the Host to send us data in. */ + while ((buf = virtqueue_detach_unused_buf(port->in_vq))) + free_buf(buf); +} + /* * Port got unplugged. Remove port from portdev's list and drop the * kref reference. If no userspace has this port opened, it will @@ -1278,8 +1292,6 @@ static void remove_port(struct kref *kref) */ static void unplug_port(struct port *port) { - struct port_buffer *buf; - spin_lock_irq(&port->portdev->ports_lock); list_del(&port->list); spin_unlock_irq(&port->portdev->ports_lock); @@ -1300,14 +1312,7 @@ static void unplug_port(struct port *port) hvc_remove(port->cons.hvc); } - /* Remove unused data this port might have received. */ - discard_port_data(port); - - reclaim_consumed_buffers(port); - - /* Remove buffers we queued up for the Host to send us data in. */ - while ((buf = virtqueue_detach_unused_buf(port->in_vq))) - free_buf(buf); + remove_port_data(port); /* * We should just assume the device itself has gone off -- @@ -1659,6 +1664,28 @@ static const struct file_operations portdev_fops = { .owner = THIS_MODULE, }; +static void remove_vqs(struct ports_device *portdev) +{ + portdev->vdev->config->del_vqs(portdev->vdev); + kfree(portdev->in_vqs); + kfree(portdev->out_vqs); +} + +static void remove_controlq_data(struct ports_device *portdev) +{ + struct port_buffer *buf; + unsigned int len; + + if (!use_multiport(portdev)) + return; + + while ((buf = virtqueue_get_buf(portdev->c_ivq, &len))) + free_buf(buf); + + while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq))) + free_buf(buf); +} + /* * Once we're further in boot, we get probed like any other virtio * device. @@ -1764,9 +1791,7 @@ free_vqs: /* The host might want to notify mgmt sw about device add failure */ __send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID, VIRTIO_CONSOLE_DEVICE_READY, 0); - vdev->config->del_vqs(vdev); - kfree(portdev->in_vqs); - kfree(portdev->out_vqs); + remove_vqs(portdev); free_chrdev: unregister_chrdev(portdev->chr_major, "virtio-portsdev"); free: @@ -1804,21 +1829,8 @@ static void virtcons_remove(struct virtio_device *vdev) * have to just stop using the port, as the vqs are going * away. */ - if (use_multiport(portdev)) { - struct port_buffer *buf; - unsigned int len; - - while ((buf = virtqueue_get_buf(portdev->c_ivq, &len))) - free_buf(buf); - - while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq))) - free_buf(buf); - } - - vdev->config->del_vqs(vdev); - kfree(portdev->in_vqs); - kfree(portdev->out_vqs); - + remove_controlq_data(portdev); + remove_vqs(portdev); kfree(portdev); } @@ -1832,6 +1844,68 @@ static unsigned int features[] = { VIRTIO_CONSOLE_F_MULTIPORT, }; +#ifdef CONFIG_PM +static int virtcons_freeze(struct virtio_device *vdev) +{ + struct ports_device *portdev; + struct port *port; + + portdev = vdev->priv; + + vdev->config->reset(vdev); + + virtqueue_disable_cb(portdev->c_ivq); + cancel_work_sync(&portdev->control_work); + /* + * Once more: if control_work_handler() was running, it would + * enable the cb as the last step. + */ + virtqueue_disable_cb(portdev->c_ivq); + remove_controlq_data(portdev); + + list_for_each_entry(port, &portdev->ports, list) { + virtqueue_disable_cb(port->in_vq); + virtqueue_disable_cb(port->out_vq); + /* + * We'll ask the host later if the new invocation has + * the port opened or closed. + */ + port->host_connected = false; + remove_port_data(port); + } + remove_vqs(portdev); + + return 0; +} + +static int virtcons_restore(struct virtio_device *vdev) +{ + struct ports_device *portdev; + struct port *port; + int ret; + + portdev = vdev->priv; + + ret = init_vqs(portdev); + if (ret) + return ret; + + if (use_multiport(portdev)) + fill_queue(portdev->c_ivq, &portdev->cvq_lock); + + list_for_each_entry(port, &portdev->ports, list) { + port->in_vq = portdev->in_vqs[port->id]; + port->out_vq = portdev->out_vqs[port->id]; + + fill_queue(port->in_vq, &port->inbuf_lock); + + /* Get port open/close status on the host */ + send_control_msg(port, VIRTIO_CONSOLE_PORT_READY, 1); + } + return 0; +} +#endif + static struct virtio_driver virtio_console = { .feature_table = features, .feature_table_size = ARRAY_SIZE(features), @@ -1841,6 +1915,10 @@ static struct virtio_driver virtio_console = { .probe = virtcons_probe, .remove = virtcons_remove, .config_changed = config_intr, +#ifdef CONFIG_PM + .freeze = virtcons_freeze, + .restore = virtcons_restore, +#endif }; static int __init init(void) diff --git a/drivers/lguest/Makefile b/drivers/lguest/Makefile index 8ac947c7e7c..c4197503900 100644 --- a/drivers/lguest/Makefile +++ b/drivers/lguest/Makefile @@ -18,7 +18,7 @@ Mastery: PREFIX=M Beer: @for f in Preparation Guest Drivers Launcher Host Switcher Mastery; do echo "{==- $$f -==}"; make -s $$f; done; echo "{==-==}" Preparation Preparation! Guest Drivers Launcher Host Switcher Mastery: - @sh ../../Documentation/virtual/lguest/extract $(PREFIX) `find ../../* -name '*.[chS]' -wholename '*lguest*'` + @sh ../../tools/lguest/extract $(PREFIX) `find ../../* -name '*.[chS]' -wholename '*lguest*'` Puppy: @clear @printf " __ \n (___()'\`;\n /, /\`\n \\\\\\\"--\\\\\\ \n" diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index 595d7319701..9e8388efd88 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -241,7 +241,7 @@ static void lg_notify(struct virtqueue *vq) } /* An extern declaration inside a C file is bad form. Don't do it. */ -extern void lguest_setup_irq(unsigned int irq); +extern int lguest_setup_irq(unsigned int irq); /* * This routine finds the Nth virtqueue described in the configuration of @@ -292,17 +292,21 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, /* * OK, tell virtio_ring.c to set up a virtqueue now we know its size - * and we've got a pointer to its pages. + * and we've got a pointer to its pages. Note that we set weak_barriers + * to 'true': the host just a(nother) SMP CPU, so we only need inter-cpu + * barriers. */ - vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN, - vdev, lvq->pages, lg_notify, callback, name); + vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN, vdev, + true, lvq->pages, lg_notify, callback, name); if (!vq) { err = -ENOMEM; goto unmap; } /* Make sure the interrupt is allocated. */ - lguest_setup_irq(lvq->config.irq); + err = lguest_setup_irq(lvq->config.irq); + if (err) + goto destroy_vring; /* * Tell the interrupt for this virtqueue to go to the virtio_ring @@ -315,7 +319,7 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, err = request_irq(lvq->config.irq, vring_interrupt, IRQF_SHARED, dev_name(&vdev->dev), vq); if (err) - goto destroy_vring; + goto free_desc; /* * Last of all we hook up our 'struct lguest_vq_info" to the @@ -324,6 +328,8 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, vq->priv = lvq; return vq; +free_desc: + irq_free_desc(lvq->config.irq); destroy_vring: vring_del_virtqueue(vq); unmap: diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c index ede46581351..c4fb424dfdd 100644 --- a/drivers/lguest/segments.c +++ b/drivers/lguest/segments.c @@ -81,8 +81,8 @@ static void fixup_gdt_table(struct lg_cpu *cpu, unsigned start, unsigned end) * sometimes careless and leaves this as 0, even though it's * running at privilege level 1. If so, we fix it here. */ - if ((cpu->arch.gdt[i].b & 0x00006000) == 0) - cpu->arch.gdt[i].b |= (GUEST_PL << 13); + if (cpu->arch.gdt[i].dpl == 0) + cpu->arch.gdt[i].dpl |= GUEST_PL; /* * Each descriptor has an "accessed" bit. If we don't set it @@ -90,7 +90,7 @@ static void fixup_gdt_table(struct lg_cpu *cpu, unsigned start, unsigned end) * that entry into a segment register. But the GDT isn't * writable by the Guest, so bad things can happen. */ - cpu->arch.gdt[i].b |= 0x00000100; + cpu->arch.gdt[i].type |= 0x1; } } @@ -114,13 +114,19 @@ void setup_default_gdt_entries(struct lguest_ro_state *state) /* * The TSS segment refers to the TSS entry for this particular CPU. - * Forgive the magic flags: the 0x8900 means the entry is Present, it's - * privilege level 0 Available 386 TSS system segment, and the 0x67 - * means Saturn is eclipsed by Mercury in the twelfth house. */ - gdt[GDT_ENTRY_TSS].a = 0x00000067 | (tss << 16); - gdt[GDT_ENTRY_TSS].b = 0x00008900 | (tss & 0xFF000000) - | ((tss >> 16) & 0x000000FF); + gdt[GDT_ENTRY_TSS].a = 0; + gdt[GDT_ENTRY_TSS].b = 0; + + gdt[GDT_ENTRY_TSS].limit0 = 0x67; + gdt[GDT_ENTRY_TSS].base0 = tss & 0xFFFF; + gdt[GDT_ENTRY_TSS].base1 = (tss >> 16) & 0xFF; + gdt[GDT_ENTRY_TSS].base2 = tss >> 24; + gdt[GDT_ENTRY_TSS].type = 0x9; /* 32-bit TSS (available) */ + gdt[GDT_ENTRY_TSS].p = 0x1; /* Entry is present */ + gdt[GDT_ENTRY_TSS].dpl = 0x0; /* Privilege level 0 */ + gdt[GDT_ENTRY_TSS].s = 0x0; /* system segment */ + } /* @@ -135,8 +141,8 @@ void setup_guest_gdt(struct lg_cpu *cpu) */ cpu->arch.gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT; cpu->arch.gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT; - cpu->arch.gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13); - cpu->arch.gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13); + cpu->arch.gdt[GDT_ENTRY_KERNEL_CS].dpl |= GUEST_PL; + cpu->arch.gdt[GDT_ENTRY_KERNEL_DS].dpl |= GUEST_PL; } /*H:650 diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 76fe14efb2b..4880aa8b4c2 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -370,7 +370,7 @@ static int add_recvbuf_small(struct virtnet_info *vi, gfp_t gfp) skb_to_sgvec(skb, vi->rx_sg + 1, 0, skb->len); - err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, 2, skb, gfp); + err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, 2, skb, gfp); if (err < 0) dev_kfree_skb(skb); @@ -415,8 +415,8 @@ static int add_recvbuf_big(struct virtnet_info *vi, gfp_t gfp) /* chain first in list head */ first->private = (unsigned long)list; - err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, MAX_SKB_FRAGS + 2, - first, gfp); + err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, MAX_SKB_FRAGS + 2, + first, gfp); if (err < 0) give_pages(vi, first); @@ -434,7 +434,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, gfp_t gfp) sg_init_one(vi->rx_sg, page_address(page), PAGE_SIZE); - err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, 1, page, gfp); + err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, 1, page, gfp); if (err < 0) give_pages(vi, page); @@ -609,7 +609,7 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) hdr->num_sg = skb_to_sgvec(skb, vi->tx_sg + 1, 0, skb->len) + 1; return virtqueue_add_buf(vi->svq, vi->tx_sg, hdr->num_sg, - 0, skb); + 0, skb, GFP_ATOMIC); } static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) @@ -767,7 +767,7 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd, sg_set_buf(&sg[i + 1], sg_virt(s), s->length); sg_set_buf(&sg[out + in - 1], &status, sizeof(status)); - BUG_ON(virtqueue_add_buf(vi->cvq, sg, out, in, vi) < 0); + BUG_ON(virtqueue_add_buf(vi->cvq, sg, out, in, vi, GFP_ATOMIC) < 0); virtqueue_kick(vi->cvq); @@ -985,15 +985,38 @@ static void virtnet_config_changed(struct virtio_device *vdev) virtnet_update_status(vi); } +static int init_vqs(struct virtnet_info *vi) +{ + struct virtqueue *vqs[3]; + vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL}; + const char *names[] = { "input", "output", "control" }; + int nvqs, err; + + /* We expect two virtqueues, receive then send, + * and optionally control. */ + nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2; + + err = vi->vdev->config->find_vqs(vi->vdev, nvqs, vqs, callbacks, names); + if (err) + return err; + + vi->rvq = vqs[0]; + vi->svq = vqs[1]; + + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) { + vi->cvq = vqs[2]; + + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) + vi->dev->features |= NETIF_F_HW_VLAN_FILTER; + } + return 0; +} + static int virtnet_probe(struct virtio_device *vdev) { int err; struct net_device *dev; struct virtnet_info *vi; - struct virtqueue *vqs[3]; - vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL}; - const char *names[] = { "input", "output", "control" }; - int nvqs; /* Allocate ourselves a network device with room for our info */ dev = alloc_etherdev(sizeof(struct virtnet_info)); @@ -1065,24 +1088,10 @@ static int virtnet_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) vi->mergeable_rx_bufs = true; - /* We expect two virtqueues, receive then send, - * and optionally control. */ - nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2; - - err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names); + err = init_vqs(vi); if (err) goto free_stats; - vi->rvq = vqs[0]; - vi->svq = vqs[1]; - - if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) { - vi->cvq = vqs[2]; - - if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) - dev->features |= NETIF_F_HW_VLAN_FILTER; - } - err = register_netdev(dev); if (err) { pr_debug("virtio_net: registering device failed\n"); @@ -1144,27 +1153,73 @@ static void free_unused_bufs(struct virtnet_info *vi) BUG_ON(vi->num != 0); } -static void __devexit virtnet_remove(struct virtio_device *vdev) +static void remove_vq_common(struct virtnet_info *vi) { - struct virtnet_info *vi = vdev->priv; - - /* Stop all the virtqueues. */ - vdev->config->reset(vdev); - - unregister_netdev(vi->dev); + vi->vdev->config->reset(vi->vdev); /* Free unused buffers in both send and recv, if any. */ free_unused_bufs(vi); - vdev->config->del_vqs(vi->vdev); + vi->vdev->config->del_vqs(vi->vdev); while (vi->pages) __free_pages(get_a_page(vi, GFP_KERNEL), 0); +} + +static void __devexit virtnet_remove(struct virtio_device *vdev) +{ + struct virtnet_info *vi = vdev->priv; + + unregister_netdev(vi->dev); + + remove_vq_common(vi); free_percpu(vi->stats); free_netdev(vi->dev); } +#ifdef CONFIG_PM +static int virtnet_freeze(struct virtio_device *vdev) +{ + struct virtnet_info *vi = vdev->priv; + + virtqueue_disable_cb(vi->rvq); + virtqueue_disable_cb(vi->svq); + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) + virtqueue_disable_cb(vi->cvq); + + netif_device_detach(vi->dev); + cancel_delayed_work_sync(&vi->refill); + + if (netif_running(vi->dev)) + napi_disable(&vi->napi); + + remove_vq_common(vi); + + return 0; +} + +static int virtnet_restore(struct virtio_device *vdev) +{ + struct virtnet_info *vi = vdev->priv; + int err; + + err = init_vqs(vi); + if (err) + return err; + + if (netif_running(vi->dev)) + virtnet_napi_enable(vi); + + netif_device_attach(vi->dev); + + if (!try_fill_recv(vi, GFP_KERNEL)) + queue_delayed_work(system_nrt_wq, &vi->refill, 0); + + return 0; +} +#endif + static struct virtio_device_id id_table[] = { { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, { 0 }, @@ -1189,6 +1244,10 @@ static struct virtio_driver virtio_net_driver = { .probe = virtnet_probe, .remove = __devexit_p(virtnet_remove), .config_changed = virtnet_config_changed, +#ifdef CONFIG_PM + .freeze = virtnet_freeze, + .restore = virtnet_restore, +#endif }; static int __init init(void) diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index 8af868bab20..7bc1955337e 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -198,7 +198,7 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev, goto out; vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN, - vdev, (void *) config->address, + vdev, true, (void *) config->address, kvm_notify, callback, name); if (!vq) { err = -ENOMEM; diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 94fd738a774..95aeedf198f 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -1,4 +1,5 @@ -/* Virtio balloon implementation, inspired by Dor Loar and Marcelo +/* + * Virtio balloon implementation, inspired by Dor Laor and Marcelo * Tosatti's implementations. * * Copyright 2008 Rusty Russell IBM Corporation @@ -17,7 +18,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -//#define DEBUG + #include <linux/virtio.h> #include <linux/virtio_balloon.h> #include <linux/swap.h> @@ -87,7 +88,7 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq) init_completion(&vb->acked); /* We should always be able to add one buffer to an empty queue. */ - if (virtqueue_add_buf(vq, &sg, 1, 0, vb) < 0) + if (virtqueue_add_buf(vq, &sg, 1, 0, vb, GFP_KERNEL) < 0) BUG(); virtqueue_kick(vq); @@ -149,7 +150,6 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num) vb->num_pages--; } - /* * Note that if * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); @@ -220,7 +220,7 @@ static void stats_handle_request(struct virtio_balloon *vb) vq = vb->stats_vq; sg_init_one(&sg, vb->stats, sizeof(vb->stats)); - if (virtqueue_add_buf(vq, &sg, 1, 0, vb) < 0) + if (virtqueue_add_buf(vq, &sg, 1, 0, vb, GFP_KERNEL) < 0) BUG(); virtqueue_kick(vq); } @@ -275,32 +275,21 @@ static int balloon(void *_vballoon) return 0; } -static int virtballoon_probe(struct virtio_device *vdev) +static int init_vqs(struct virtio_balloon *vb) { - struct virtio_balloon *vb; struct virtqueue *vqs[3]; vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; const char *names[] = { "inflate", "deflate", "stats" }; int err, nvqs; - vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL); - if (!vb) { - err = -ENOMEM; - goto out; - } - - INIT_LIST_HEAD(&vb->pages); - vb->num_pages = 0; - init_waitqueue_head(&vb->config_change); - vb->vdev = vdev; - vb->need_stats_update = 0; - - /* We expect two virtqueues: inflate and deflate, - * and optionally stat. */ + /* + * We expect two virtqueues: inflate and deflate, and + * optionally stat. + */ nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; - err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names); + err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names); if (err) - goto out_free_vb; + return err; vb->inflate_vq = vqs[0]; vb->deflate_vq = vqs[1]; @@ -313,10 +302,34 @@ static int virtballoon_probe(struct virtio_device *vdev) * use it to signal us later. */ sg_init_one(&sg, vb->stats, sizeof vb->stats); - if (virtqueue_add_buf(vb->stats_vq, &sg, 1, 0, vb) < 0) + if (virtqueue_add_buf(vb->stats_vq, &sg, 1, 0, vb, GFP_KERNEL) + < 0) BUG(); virtqueue_kick(vb->stats_vq); } + return 0; +} + +static int virtballoon_probe(struct virtio_device *vdev) +{ + struct virtio_balloon *vb; + int err; + + vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL); + if (!vb) { + err = -ENOMEM; + goto out; + } + + INIT_LIST_HEAD(&vb->pages); + vb->num_pages = 0; + init_waitqueue_head(&vb->config_change); + vb->vdev = vdev; + vb->need_stats_update = 0; + + err = init_vqs(vb); + if (err) + goto out_free_vb; vb->thread = kthread_run(balloon, vb, "vballoon"); if (IS_ERR(vb->thread)) { @@ -351,6 +364,48 @@ static void __devexit virtballoon_remove(struct virtio_device *vdev) kfree(vb); } +#ifdef CONFIG_PM +static int virtballoon_freeze(struct virtio_device *vdev) +{ + /* + * The kthread is already frozen by the PM core before this + * function is called. + */ + + /* Ensure we don't get any more requests from the host */ + vdev->config->reset(vdev); + vdev->config->del_vqs(vdev); + return 0; +} + +static int virtballoon_thaw(struct virtio_device *vdev) +{ + return init_vqs(vdev->priv); +} + +static int virtballoon_restore(struct virtio_device *vdev) +{ + struct virtio_balloon *vb = vdev->priv; + struct page *page, *page2; + + /* We're starting from a clean slate */ + vb->num_pages = 0; + + /* + * If a request wasn't complete at the time of freezing, this + * could have been set. + */ + vb->need_stats_update = 0; + + /* We don't have these pages in the balloon anymore! */ + list_for_each_entry_safe(page, page2, &vb->pages, lru) { + list_del(&page->lru); + totalram_pages++; + } + return init_vqs(vdev->priv); +} +#endif + static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_F_STATS_VQ, @@ -365,6 +420,11 @@ static struct virtio_driver virtio_balloon_driver = { .probe = virtballoon_probe, .remove = __devexit_p(virtballoon_remove), .config_changed = virtballoon_changed, +#ifdef CONFIG_PM + .freeze = virtballoon_freeze, + .restore = virtballoon_restore, + .thaw = virtballoon_thaw, +#endif }; static int __init init(void) diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 0269717436a..01d6dc250d5 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -310,8 +310,8 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); /* Create the vring */ - vq = vring_new_virtqueue(info->num, VIRTIO_MMIO_VRING_ALIGN, - vdev, info->queue, vm_notify, callback, name); + vq = vring_new_virtqueue(info->num, VIRTIO_MMIO_VRING_ALIGN, vdev, + true, info->queue, vm_notify, callback, name); if (!vq) { err = -ENOMEM; goto error_new_virtqueue; diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index baabb7937ec..635e1efb379 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -55,6 +55,10 @@ struct virtio_pci_device unsigned msix_vectors; /* Vectors allocated, excluding per-vq vectors if any */ unsigned msix_used_vectors; + + /* Status saved during hibernate/restore */ + u8 saved_status; + /* Whether we have vector per vq */ bool per_vq_vectors; }; @@ -414,8 +418,8 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); /* create the vring */ - vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN, - vdev, info->queue, vp_notify, callback, name); + vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN, vdev, + true, info->queue, vp_notify, callback, name); if (!vq) { err = -ENOMEM; goto out_activate_queue; @@ -716,19 +720,114 @@ static void __devexit virtio_pci_remove(struct pci_dev *pci_dev) } #ifdef CONFIG_PM -static int virtio_pci_suspend(struct pci_dev *pci_dev, pm_message_t state) +static int virtio_pci_suspend(struct device *dev) { + struct pci_dev *pci_dev = to_pci_dev(dev); + pci_save_state(pci_dev); pci_set_power_state(pci_dev, PCI_D3hot); return 0; } -static int virtio_pci_resume(struct pci_dev *pci_dev) +static int virtio_pci_resume(struct device *dev) { + struct pci_dev *pci_dev = to_pci_dev(dev); + pci_restore_state(pci_dev); pci_set_power_state(pci_dev, PCI_D0); return 0; } + +static int virtio_pci_freeze(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); + struct virtio_driver *drv; + int ret; + + drv = container_of(vp_dev->vdev.dev.driver, + struct virtio_driver, driver); + + ret = 0; + vp_dev->saved_status = vp_get_status(&vp_dev->vdev); + if (drv && drv->freeze) + ret = drv->freeze(&vp_dev->vdev); + + if (!ret) + pci_disable_device(pci_dev); + return ret; +} + +static int restore_common(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); + int ret; + + ret = pci_enable_device(pci_dev); + if (ret) + return ret; + pci_set_master(pci_dev); + vp_finalize_features(&vp_dev->vdev); + + return ret; +} + +static int virtio_pci_thaw(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); + struct virtio_driver *drv; + int ret; + + ret = restore_common(dev); + if (ret) + return ret; + + drv = container_of(vp_dev->vdev.dev.driver, + struct virtio_driver, driver); + + if (drv && drv->thaw) + ret = drv->thaw(&vp_dev->vdev); + else if (drv && drv->restore) + ret = drv->restore(&vp_dev->vdev); + + /* Finally, tell the device we're all set */ + if (!ret) + vp_set_status(&vp_dev->vdev, vp_dev->saved_status); + + return ret; +} + +static int virtio_pci_restore(struct device *dev) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); + struct virtio_driver *drv; + int ret; + + drv = container_of(vp_dev->vdev.dev.driver, + struct virtio_driver, driver); + + ret = restore_common(dev); + if (!ret && drv && drv->restore) + ret = drv->restore(&vp_dev->vdev); + + /* Finally, tell the device we're all set */ + if (!ret) + vp_set_status(&vp_dev->vdev, vp_dev->saved_status); + + return ret; +} + +static const struct dev_pm_ops virtio_pci_pm_ops = { + .suspend = virtio_pci_suspend, + .resume = virtio_pci_resume, + .freeze = virtio_pci_freeze, + .thaw = virtio_pci_thaw, + .restore = virtio_pci_restore, + .poweroff = virtio_pci_suspend, +}; #endif static struct pci_driver virtio_pci_driver = { @@ -737,8 +836,7 @@ static struct pci_driver virtio_pci_driver = { .probe = virtio_pci_probe, .remove = __devexit_p(virtio_pci_remove), #ifdef CONFIG_PM - .suspend = virtio_pci_suspend, - .resume = virtio_pci_resume, + .driver.pm = &virtio_pci_pm_ops, #endif }; diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index c7a2c208f6e..79e1b292c03 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -22,23 +22,27 @@ #include <linux/device.h> #include <linux/slab.h> #include <linux/module.h> +#include <linux/hrtimer.h> /* virtio guest is communicating with a virtual "device" that actually runs on * a host processor. Memory barriers are used to control SMP effects. */ #ifdef CONFIG_SMP /* Where possible, use SMP barriers which are more lightweight than mandatory * barriers, because mandatory barriers control MMIO effects on accesses - * through relaxed memory I/O windows (which virtio does not use). */ -#define virtio_mb() smp_mb() -#define virtio_rmb() smp_rmb() -#define virtio_wmb() smp_wmb() + * through relaxed memory I/O windows (which virtio-pci does not use). */ +#define virtio_mb(vq) \ + do { if ((vq)->weak_barriers) smp_mb(); else mb(); } while(0) +#define virtio_rmb(vq) \ + do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0) +#define virtio_wmb(vq) \ + do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0) #else /* We must force memory ordering even if guest is UP since host could be * running on another CPU, but SMP barriers are defined to barrier() in that * configuration. So fall back to mandatory barriers instead. */ -#define virtio_mb() mb() -#define virtio_rmb() rmb() -#define virtio_wmb() wmb() +#define virtio_mb(vq) mb() +#define virtio_rmb(vq) rmb() +#define virtio_wmb(vq) wmb() #endif #ifdef DEBUG @@ -77,6 +81,9 @@ struct vring_virtqueue /* Actual memory layout for this queue */ struct vring vring; + /* Can we use weak barriers? */ + bool weak_barriers; + /* Other side has made a mess, don't try any more. */ bool broken; @@ -102,6 +109,10 @@ struct vring_virtqueue #ifdef DEBUG /* They're supposed to lock for us. */ unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; #endif /* Tokens for callbacks. */ @@ -160,12 +171,29 @@ static int vring_add_indirect(struct vring_virtqueue *vq, return head; } -int virtqueue_add_buf_gfp(struct virtqueue *_vq, - struct scatterlist sg[], - unsigned int out, - unsigned int in, - void *data, - gfp_t gfp) +/** + * virtqueue_add_buf - expose buffer to other end + * @vq: the struct virtqueue we're talking about. + * @sg: the description of the buffer(s). + * @out_num: the number of sg readable by other side + * @in_num: the number of sg which are writable (after readable ones) + * @data: the token identifying the buffer. + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns remaining capacity of queue or a negative error + * (ie. ENOSPC). Note that it only really makes sense to treat all + * positive return values as "available": indirect buffers mean that + * we can put an entire sg[] array inside a single queue entry. + */ +int virtqueue_add_buf(struct virtqueue *_vq, + struct scatterlist sg[], + unsigned int out, + unsigned int in, + void *data, + gfp_t gfp) { struct vring_virtqueue *vq = to_vvq(_vq); unsigned int i, avail, uninitialized_var(prev); @@ -175,6 +203,19 @@ int virtqueue_add_buf_gfp(struct virtqueue *_vq, BUG_ON(data == NULL); +#ifdef DEBUG + { + ktime_t now = ktime_get(); + + /* No kick or get, with .1 second between? Warn. */ + if (vq->last_add_time_valid) + WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time)) + > 100); + vq->last_add_time = now; + vq->last_add_time_valid = true; + } +#endif + /* If the host supports indirect descriptor tables, and we have multiple * buffers, then go indirect. FIXME: tune this threshold */ if (vq->indirect && (out + in) > 1 && vq->num_free) { @@ -227,40 +268,102 @@ add_head: vq->data[head] = data; /* Put entry in available array (but don't update avail->idx until they - * do sync). FIXME: avoid modulus here? */ - avail = (vq->vring.avail->idx + vq->num_added++) % vq->vring.num; + * do sync). */ + avail = (vq->vring.avail->idx & (vq->vring.num-1)); vq->vring.avail->ring[avail] = head; + /* Descriptors and available array need to be set before we expose the + * new available array entries. */ + virtio_wmb(vq); + vq->vring.avail->idx++; + vq->num_added++; + + /* This is very unlikely, but theoretically possible. Kick + * just in case. */ + if (unlikely(vq->num_added == (1 << 16) - 1)) + virtqueue_kick(_vq); + pr_debug("Added buffer head %i to %p\n", head, vq); END_USE(vq); return vq->num_free; } -EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp); +EXPORT_SYMBOL_GPL(virtqueue_add_buf); -void virtqueue_kick(struct virtqueue *_vq) +/** + * virtqueue_kick_prepare - first half of split virtqueue_kick call. + * @vq: the struct virtqueue + * + * Instead of virtqueue_kick(), you can do: + * if (virtqueue_kick_prepare(vq)) + * virtqueue_notify(vq); + * + * This is sometimes useful because the virtqueue_kick_prepare() needs + * to be serialized, but the actual virtqueue_notify() call does not. + */ +bool virtqueue_kick_prepare(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); u16 new, old; + bool needs_kick; + START_USE(vq); /* Descriptors and available array need to be set before we expose the * new available array entries. */ - virtio_wmb(); + virtio_wmb(vq); - old = vq->vring.avail->idx; - new = vq->vring.avail->idx = old + vq->num_added; + old = vq->vring.avail->idx - vq->num_added; + new = vq->vring.avail->idx; vq->num_added = 0; - /* Need to update avail index before checking if we should notify */ - virtio_mb(); - - if (vq->event ? - vring_need_event(vring_avail_event(&vq->vring), new, old) : - !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) - /* Prod other side to tell it about changes. */ - vq->notify(&vq->vq); +#ifdef DEBUG + if (vq->last_add_time_valid) { + WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), + vq->last_add_time)) > 100); + } + vq->last_add_time_valid = false; +#endif + if (vq->event) { + needs_kick = vring_need_event(vring_avail_event(&vq->vring), + new, old); + } else { + needs_kick = !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY); + } END_USE(vq); + return needs_kick; +} +EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); + +/** + * virtqueue_notify - second half of split virtqueue_kick call. + * @vq: the struct virtqueue + * + * This does not need to be serialized. + */ +void virtqueue_notify(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + /* Prod other side to tell it about changes. */ + vq->notify(_vq); +} +EXPORT_SYMBOL_GPL(virtqueue_notify); + +/** + * virtqueue_kick - update after add_buf + * @vq: the struct virtqueue + * + * After one or more virtqueue_add_buf calls, invoke this to kick + * the other side. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ +void virtqueue_kick(struct virtqueue *vq) +{ + if (virtqueue_kick_prepare(vq)) + virtqueue_notify(vq); } EXPORT_SYMBOL_GPL(virtqueue_kick); @@ -294,11 +397,28 @@ static inline bool more_used(const struct vring_virtqueue *vq) return vq->last_used_idx != vq->vring.used->idx; } +/** + * virtqueue_get_buf - get the next used buffer + * @vq: the struct virtqueue we're talking about. + * @len: the length written into the buffer + * + * If the driver wrote data into the buffer, @len will be set to the + * amount written. This means you don't need to clear the buffer + * beforehand to ensure there's no data leakage in the case of short + * writes. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + * + * Returns NULL if there are no used buffers, or the "data" token + * handed to virtqueue_add_buf(). + */ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) { struct vring_virtqueue *vq = to_vvq(_vq); void *ret; unsigned int i; + u16 last_used; START_USE(vq); @@ -314,10 +434,11 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) } /* Only get used array entries after they have been exposed by host. */ - virtio_rmb(); + virtio_rmb(vq); - i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id; - *len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len; + last_used = (vq->last_used_idx & (vq->vring.num - 1)); + i = vq->vring.used->ring[last_used].id; + *len = vq->vring.used->ring[last_used].len; if (unlikely(i >= vq->vring.num)) { BAD_RING(vq, "id %u out of range\n", i); @@ -337,14 +458,27 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) * the read in the next get_buf call. */ if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { vring_used_event(&vq->vring) = vq->last_used_idx; - virtio_mb(); + virtio_mb(vq); } +#ifdef DEBUG + vq->last_add_time_valid = false; +#endif + END_USE(vq); return ret; } EXPORT_SYMBOL_GPL(virtqueue_get_buf); +/** + * virtqueue_disable_cb - disable callbacks + * @vq: the struct virtqueue we're talking about. + * + * Note that this is not necessarily synchronous, hence unreliable and only + * useful as an optimization. + * + * Unlike other operations, this need not be serialized. + */ void virtqueue_disable_cb(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -353,6 +487,17 @@ void virtqueue_disable_cb(struct virtqueue *_vq) } EXPORT_SYMBOL_GPL(virtqueue_disable_cb); +/** + * virtqueue_enable_cb - restart callbacks after disable_cb. + * @vq: the struct virtqueue we're talking about. + * + * This re-enables callbacks; it returns "false" if there are pending + * buffers in the queue, to detect a possible race between the driver + * checking for more work, and enabling callbacks. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ bool virtqueue_enable_cb(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -366,7 +511,7 @@ bool virtqueue_enable_cb(struct virtqueue *_vq) * entry. Always do both to keep code simple. */ vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; vring_used_event(&vq->vring) = vq->last_used_idx; - virtio_mb(); + virtio_mb(vq); if (unlikely(more_used(vq))) { END_USE(vq); return false; @@ -377,6 +522,19 @@ bool virtqueue_enable_cb(struct virtqueue *_vq) } EXPORT_SYMBOL_GPL(virtqueue_enable_cb); +/** + * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. + * @vq: the struct virtqueue we're talking about. + * + * This re-enables callbacks but hints to the other side to delay + * interrupts until most of the available buffers have been processed; + * it returns "false" if there are many pending buffers in the queue, + * to detect a possible race between the driver checking for more work, + * and enabling callbacks. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -393,7 +551,7 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) /* TODO: tune this threshold */ bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4; vring_used_event(&vq->vring) = vq->last_used_idx + bufs; - virtio_mb(); + virtio_mb(vq); if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) { END_USE(vq); return false; @@ -404,6 +562,14 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) } EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); +/** + * virtqueue_detach_unused_buf - detach first unused buffer + * @vq: the struct virtqueue we're talking about. + * + * Returns NULL or the "data" token handed to virtqueue_add_buf(). + * This is not valid on an active queue; it is useful only for device + * shutdown. + */ void *virtqueue_detach_unused_buf(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -453,6 +619,7 @@ EXPORT_SYMBOL_GPL(vring_interrupt); struct virtqueue *vring_new_virtqueue(unsigned int num, unsigned int vring_align, struct virtio_device *vdev, + bool weak_barriers, void *pages, void (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), @@ -476,12 +643,14 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, vq->vq.vdev = vdev; vq->vq.name = name; vq->notify = notify; + vq->weak_barriers = weak_barriers; vq->broken = false; vq->last_used_idx = 0; vq->num_added = 0; list_add_tail(&vq->vq.list, &vdev->vqs); #ifdef DEBUG vq->in_use = false; + vq->last_add_time_valid = false; #endif vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC); @@ -530,7 +699,13 @@ void vring_transport_features(struct virtio_device *vdev) } EXPORT_SYMBOL_GPL(vring_transport_features); -/* return the size of the vring within the virtqueue */ +/** + * virtqueue_get_vring_size - return the size of the virtqueue's vring + * @vq: the struct virtqueue containing the vring of interest. + * + * Returns the size of the vring. This is mainly used for boasting to + * userspace. Unlike other operations, this need not be serialized. + */ unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) { diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 4c069d8bd74..d0018d27c28 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -25,70 +25,18 @@ struct virtqueue { void *priv; }; -/** - * operations for virtqueue - * virtqueue_add_buf: expose buffer to other end - * vq: the struct virtqueue we're talking about. - * sg: the description of the buffer(s). - * out_num: the number of sg readable by other side - * in_num: the number of sg which are writable (after readable ones) - * data: the token identifying the buffer. - * gfp: how to do memory allocations (if necessary). - * Returns remaining capacity of queue (sg segments) or a negative error. - * virtqueue_kick: update after add_buf - * vq: the struct virtqueue - * After one or more add_buf calls, invoke this to kick the other side. - * virtqueue_get_buf: get the next used buffer - * vq: the struct virtqueue we're talking about. - * len: the length written into the buffer - * Returns NULL or the "data" token handed to add_buf. - * virtqueue_disable_cb: disable callbacks - * vq: the struct virtqueue we're talking about. - * Note that this is not necessarily synchronous, hence unreliable and only - * useful as an optimization. - * virtqueue_enable_cb: restart callbacks after disable_cb. - * vq: the struct virtqueue we're talking about. - * This re-enables callbacks; it returns "false" if there are pending - * buffers in the queue, to detect a possible race between the driver - * checking for more work, and enabling callbacks. - * virtqueue_enable_cb_delayed: restart callbacks after disable_cb. - * vq: the struct virtqueue we're talking about. - * This re-enables callbacks but hints to the other side to delay - * interrupts until most of the available buffers have been processed; - * it returns "false" if there are many pending buffers in the queue, - * to detect a possible race between the driver checking for more work, - * and enabling callbacks. - * virtqueue_detach_unused_buf: detach first unused buffer - * vq: the struct virtqueue we're talking about. - * Returns NULL or the "data" token handed to add_buf - * virtqueue_get_vring_size: return the size of the virtqueue's vring - * vq: the struct virtqueue containing the vring of interest. - * Returns the size of the vring. - * - * Locking rules are straightforward: the driver is responsible for - * locking. No two operations may be invoked simultaneously, with the exception - * of virtqueue_disable_cb. - * - * All operations can be called in any context. - */ +int virtqueue_add_buf(struct virtqueue *vq, + struct scatterlist sg[], + unsigned int out_num, + unsigned int in_num, + void *data, + gfp_t gfp); -int virtqueue_add_buf_gfp(struct virtqueue *vq, - struct scatterlist sg[], - unsigned int out_num, - unsigned int in_num, - void *data, - gfp_t gfp); +void virtqueue_kick(struct virtqueue *vq); -static inline int virtqueue_add_buf(struct virtqueue *vq, - struct scatterlist sg[], - unsigned int out_num, - unsigned int in_num, - void *data) -{ - return virtqueue_add_buf_gfp(vq, sg, out_num, in_num, data, GFP_ATOMIC); -} +bool virtqueue_kick_prepare(struct virtqueue *vq); -void virtqueue_kick(struct virtqueue *vq); +void virtqueue_notify(struct virtqueue *vq); void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); @@ -146,6 +94,11 @@ struct virtio_driver { int (*probe)(struct virtio_device *dev); void (*remove)(struct virtio_device *dev); void (*config_changed)(struct virtio_device *dev); +#ifdef CONFIG_PM + int (*freeze)(struct virtio_device *dev); + int (*thaw)(struct virtio_device *dev); + int (*restore)(struct virtio_device *dev); +#endif }; int register_virtio_driver(struct virtio_driver *drv); diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index 36be0f6e18a..e338730c266 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -168,6 +168,7 @@ struct virtqueue; struct virtqueue *vring_new_virtqueue(unsigned int num, unsigned int vring_align, struct virtio_device *vdev, + bool weak_barriers, void *pages, void (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq), diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 330421e5471..3d432068f62 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -272,7 +272,8 @@ req_retry: in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity); - err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); + err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc, + GFP_ATOMIC); if (err < 0) { if (err == -ENOSPC) { chan->ring_bufs_avail = 0; @@ -414,7 +415,8 @@ req_retry_pinned: in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM, in_pages, in_nr_pages, uidata, inlen); - err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); + err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc, + GFP_ATOMIC); if (err < 0) { if (err == -ENOSPC) { chan->ring_bufs_avail = 0; diff --git a/Documentation/virtual/lguest/.gitignore b/tools/lguest/.gitignore index 115587fd5f6..115587fd5f6 100644 --- a/Documentation/virtual/lguest/.gitignore +++ b/tools/lguest/.gitignore diff --git a/Documentation/virtual/lguest/Makefile b/tools/lguest/Makefile index 0ac34206f7a..0ac34206f7a 100644 --- a/Documentation/virtual/lguest/Makefile +++ b/tools/lguest/Makefile diff --git a/Documentation/virtual/lguest/extract b/tools/lguest/extract index 7730bb6e4b9..7730bb6e4b9 100644 --- a/Documentation/virtual/lguest/extract +++ b/tools/lguest/extract diff --git a/Documentation/virtual/lguest/lguest.c b/tools/lguest/lguest.c index c095d79cae7..f759f4f097c 100644 --- a/Documentation/virtual/lguest/lguest.c +++ b/tools/lguest/lguest.c @@ -49,7 +49,7 @@ #include <linux/virtio_rng.h> #include <linux/virtio_ring.h> #include <asm/bootparam.h> -#include "../../../include/linux/lguest_launcher.h" +#include "../../include/linux/lguest_launcher.h" /*L:110 * We can ignore the 43 include files we need for this program, but I do want * to draw attention to the use of kernel-style types. diff --git a/Documentation/virtual/lguest/lguest.txt b/tools/lguest/lguest.txt index bff0c554485..bff0c554485 100644 --- a/Documentation/virtual/lguest/lguest.txt +++ b/tools/lguest/lguest.txt diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h index 669bcdd4580..b4fbc91c41b 100644 --- a/tools/virtio/linux/virtio.h +++ b/tools/virtio/linux/virtio.h @@ -186,21 +186,12 @@ struct virtqueue { #endif /* Interfaces exported by virtio_ring. */ -int virtqueue_add_buf_gfp(struct virtqueue *vq, - struct scatterlist sg[], - unsigned int out_num, - unsigned int in_num, - void *data, - gfp_t gfp); - -static inline int virtqueue_add_buf(struct virtqueue *vq, - struct scatterlist sg[], - unsigned int out_num, - unsigned int in_num, - void *data) -{ - return virtqueue_add_buf_gfp(vq, sg, out_num, in_num, data, GFP_ATOMIC); -} +int virtqueue_add_buf(struct virtqueue *vq, + struct scatterlist sg[], + unsigned int out_num, + unsigned int in_num, + void *data, + gfp_t gfp); void virtqueue_kick(struct virtqueue *vq); @@ -214,6 +205,7 @@ void *virtqueue_detach_unused_buf(struct virtqueue *vq); struct virtqueue *vring_new_virtqueue(unsigned int num, unsigned int vring_align, struct virtio_device *vdev, + bool weak_barriers, void *pages, void (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq), diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index 74d3331bdaf..6bf95f99536 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -92,7 +92,8 @@ static void vq_info_add(struct vdev_info *dev, int num) assert(r >= 0); memset(info->ring, 0, vring_size(num, 4096)); vring_init(&info->vring, num, info->ring, 4096); - info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev, info->ring, + info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev, + true, info->ring, vq_notify, vq_callback, "test"); assert(info->vq); info->vq->priv = info; @@ -160,7 +161,8 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs) if (started < bufs) { sg_init_one(&sl, dev->buf, dev->buf_size); r = virtqueue_add_buf(vq->vq, &sl, 1, 0, - dev->buf + started); + dev->buf + started, + GFP_ATOMIC); if (likely(r >= 0)) { ++started; virtqueue_kick(vq->vq); |