diff options
Diffstat (limited to 'drivers/vfio')
-rw-r--r-- | drivers/vfio/pci/Kconfig | 10 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci.c | 156 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_config.c | 52 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_private.h | 19 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_rdwr.c | 281 | ||||
-rw-r--r-- | drivers/vfio/vfio.c | 86 |
6 files changed, 309 insertions, 295 deletions
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index 5980758563e..c41b01e2b69 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -6,3 +6,13 @@ config VFIO_PCI use of PCI drivers using the VFIO framework. If you don't know what to do here, say N. + +config VFIO_PCI_VGA + bool "VFIO PCI support for VGA devices" + depends on VFIO_PCI && X86 && VGA_ARB + help + Support for VGA extension to VFIO PCI. This exposes an additional + region on VGA devices for accessing legacy VGA addresses used by + BIOS and generic video drivers. + + If you don't know what to do here, say N. diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 6c119944bbb..8189cb6a86a 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -43,6 +43,10 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) u16 cmd; u8 msix_pos; + ret = pci_enable_device(pdev); + if (ret) + return ret; + vdev->reset_works = (pci_reset_function(pdev) == 0); pci_save_state(pdev); vdev->pci_saved_state = pci_store_saved_state(pdev); @@ -51,8 +55,11 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) __func__, dev_name(&pdev->dev)); ret = vfio_config_init(vdev); - if (ret) - goto out; + if (ret) { + pci_load_and_free_saved_state(pdev, &vdev->pci_saved_state); + pci_disable_device(pdev); + return ret; + } if (likely(!nointxmask)) vdev->pci_2_3 = pci_intx_mask_supported(pdev); @@ -77,24 +84,20 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) } else vdev->msix_bar = 0xFF; - ret = pci_enable_device(pdev); - if (ret) - goto out; - - return ret; +#ifdef CONFIG_VFIO_PCI_VGA + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) + vdev->has_vga = true; +#endif -out: - kfree(vdev->pci_saved_state); - vdev->pci_saved_state = NULL; - vfio_config_free(vdev); - return ret; + return 0; } static void vfio_pci_disable(struct vfio_pci_device *vdev) { + struct pci_dev *pdev = vdev->pdev; int bar; - pci_disable_device(vdev->pdev); + pci_disable_device(pdev); vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER, @@ -104,22 +107,40 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) vfio_config_free(vdev); - pci_reset_function(vdev->pdev); - - if (pci_load_and_free_saved_state(vdev->pdev, - &vdev->pci_saved_state) == 0) - pci_restore_state(vdev->pdev); - else - pr_info("%s: Couldn't reload %s saved state\n", - __func__, dev_name(&vdev->pdev->dev)); - for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) { if (!vdev->barmap[bar]) continue; - pci_iounmap(vdev->pdev, vdev->barmap[bar]); - pci_release_selected_regions(vdev->pdev, 1 << bar); + pci_iounmap(pdev, vdev->barmap[bar]); + pci_release_selected_regions(pdev, 1 << bar); vdev->barmap[bar] = NULL; } + + /* + * If we have saved state, restore it. If we can reset the device, + * even better. Resetting with current state seems better than + * nothing, but saving and restoring current state without reset + * is just busy work. + */ + if (pci_load_and_free_saved_state(pdev, &vdev->pci_saved_state)) { + pr_info("%s: Couldn't reload %s saved state\n", + __func__, dev_name(&pdev->dev)); + + if (!vdev->reset_works) + return; + + pci_save_state(pdev); + } + + /* + * Disable INTx and MSI, presumably to avoid spurious interrupts + * during reset. Stolen from pci_reset_function() + */ + pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); + + if (vdev->reset_works) + __pci_reset_function(pdev); + + pci_restore_state(pdev); } static void vfio_pci_release(void *device_data) @@ -269,6 +290,16 @@ static long vfio_pci_ioctl(void *device_data, info.flags = VFIO_REGION_INFO_FLAG_READ; break; } + case VFIO_PCI_VGA_REGION_INDEX: + if (!vdev->has_vga) + return -EINVAL; + + info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); + info.size = 0xc0000; + info.flags = VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE; + + break; default: return -EINVAL; } @@ -327,15 +358,10 @@ static long vfio_pci_ioctl(void *device_data, hdr.count > vfio_pci_get_irq_count(vdev, hdr.index)) return -EINVAL; - data = kmalloc(hdr.count * size, GFP_KERNEL); - if (!data) - return -ENOMEM; - - if (copy_from_user(data, (void __user *)(arg + minsz), - hdr.count * size)) { - kfree(data); - return -EFAULT; - } + data = memdup_user((void __user *)(arg + minsz), + hdr.count * size); + if (IS_ERR(data)) + return PTR_ERR(data); } mutex_lock(&vdev->igate); @@ -355,52 +381,50 @@ static long vfio_pci_ioctl(void *device_data, return -ENOTTY; } -static ssize_t vfio_pci_read(void *device_data, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t vfio_pci_rw(void *device_data, char __user *buf, + size_t count, loff_t *ppos, bool iswrite) { unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); struct vfio_pci_device *vdev = device_data; - struct pci_dev *pdev = vdev->pdev; if (index >= VFIO_PCI_NUM_REGIONS) return -EINVAL; - if (index == VFIO_PCI_CONFIG_REGION_INDEX) - return vfio_pci_config_readwrite(vdev, buf, count, ppos, false); - else if (index == VFIO_PCI_ROM_REGION_INDEX) - return vfio_pci_mem_readwrite(vdev, buf, count, ppos, false); - else if (pci_resource_flags(pdev, index) & IORESOURCE_IO) - return vfio_pci_io_readwrite(vdev, buf, count, ppos, false); - else if (pci_resource_flags(pdev, index) & IORESOURCE_MEM) - return vfio_pci_mem_readwrite(vdev, buf, count, ppos, false); + switch (index) { + case VFIO_PCI_CONFIG_REGION_INDEX: + return vfio_pci_config_rw(vdev, buf, count, ppos, iswrite); + + case VFIO_PCI_ROM_REGION_INDEX: + if (iswrite) + return -EINVAL; + return vfio_pci_bar_rw(vdev, buf, count, ppos, false); + + case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX: + return vfio_pci_bar_rw(vdev, buf, count, ppos, iswrite); + + case VFIO_PCI_VGA_REGION_INDEX: + return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite); + } return -EINVAL; } -static ssize_t vfio_pci_write(void *device_data, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t vfio_pci_read(void *device_data, char __user *buf, + size_t count, loff_t *ppos) { - unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); - struct vfio_pci_device *vdev = device_data; - struct pci_dev *pdev = vdev->pdev; + if (!count) + return 0; - if (index >= VFIO_PCI_NUM_REGIONS) - return -EINVAL; + return vfio_pci_rw(device_data, buf, count, ppos, false); +} - if (index == VFIO_PCI_CONFIG_REGION_INDEX) - return vfio_pci_config_readwrite(vdev, (char __user *)buf, - count, ppos, true); - else if (index == VFIO_PCI_ROM_REGION_INDEX) - return -EINVAL; - else if (pci_resource_flags(pdev, index) & IORESOURCE_IO) - return vfio_pci_io_readwrite(vdev, (char __user *)buf, - count, ppos, true); - else if (pci_resource_flags(pdev, index) & IORESOURCE_MEM) { - return vfio_pci_mem_readwrite(vdev, (char __user *)buf, - count, ppos, true); - } +static ssize_t vfio_pci_write(void *device_data, const char __user *buf, + size_t count, loff_t *ppos) +{ + if (!count) + return 0; - return -EINVAL; + return vfio_pci_rw(device_data, (char __user *)buf, count, ppos, true); } static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) @@ -562,9 +586,9 @@ static int __init vfio_pci_init(void) return 0; -out_virqfd: - vfio_pci_virqfd_exit(); out_driver: + vfio_pci_virqfd_exit(); +out_virqfd: vfio_pci_uninit_perm_bits(); return ret; } diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 8b8f7d11e10..964ff22bf28 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -587,12 +587,46 @@ static int __init init_pci_cap_basic_perm(struct perm_bits *perm) return 0; } +static int vfio_pm_config_write(struct vfio_pci_device *vdev, int pos, + int count, struct perm_bits *perm, + int offset, __le32 val) +{ + count = vfio_default_config_write(vdev, pos, count, perm, offset, val); + if (count < 0) + return count; + + if (offset == PCI_PM_CTRL) { + pci_power_t state; + + switch (le32_to_cpu(val) & PCI_PM_CTRL_STATE_MASK) { + case 0: + state = PCI_D0; + break; + case 1: + state = PCI_D1; + break; + case 2: + state = PCI_D2; + break; + case 3: + state = PCI_D3hot; + break; + } + + pci_set_power_state(vdev->pdev, state); + } + + return count; +} + /* Permissions for the Power Management capability */ static int __init init_pci_cap_pm_perm(struct perm_bits *perm) { if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_PM])) return -ENOMEM; + perm->writefn = vfio_pm_config_write; + /* * We always virtualize the next field so we can remove * capabilities from the chain if we want to. @@ -600,10 +634,11 @@ static int __init init_pci_cap_pm_perm(struct perm_bits *perm) p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); /* - * Power management is defined *per function*, - * so we let the user write this + * Power management is defined *per function*, so we can let + * the user change power state, but we trap and initiate the + * change ourselves, so the state bits are read-only. */ - p_setd(perm, PCI_PM_CTRL, NO_VIRT, ALL_WRITE); + p_setd(perm, PCI_PM_CTRL, NO_VIRT, ~PCI_PM_CTRL_STATE_MASK); return 0; } @@ -985,12 +1020,12 @@ static int vfio_cap_len(struct vfio_pci_device *vdev, u8 cap, u8 pos) if (ret) return pcibios_err_to_errno(ret); + vdev->extended_caps = true; + if ((word & PCI_EXP_FLAGS_VERS) == 1) return PCI_CAP_EXP_ENDPOINT_SIZEOF_V1; - else { - vdev->extended_caps = true; + else return PCI_CAP_EXP_ENDPOINT_SIZEOF_V2; - } case PCI_CAP_ID_HT: ret = pci_read_config_byte(pdev, pos + 3, &byte); if (ret) @@ -1501,9 +1536,8 @@ static ssize_t vfio_config_do_rw(struct vfio_pci_device *vdev, char __user *buf, return ret; } -ssize_t vfio_pci_config_readwrite(struct vfio_pci_device *vdev, - char __user *buf, size_t count, - loff_t *ppos, bool iswrite) +ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite) { size_t done = 0; int ret = 0; diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 611827cba8c..d7e55d03f49 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -53,6 +53,7 @@ struct vfio_pci_device { bool reset_works; bool extended_caps; bool bardirty; + bool has_vga; struct pci_saved_state *pci_saved_state; atomic_t refcnt; }; @@ -70,15 +71,15 @@ extern int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags, unsigned index, unsigned start, unsigned count, void *data); -extern ssize_t vfio_pci_config_readwrite(struct vfio_pci_device *vdev, - char __user *buf, size_t count, - loff_t *ppos, bool iswrite); -extern ssize_t vfio_pci_mem_readwrite(struct vfio_pci_device *vdev, - char __user *buf, size_t count, - loff_t *ppos, bool iswrite); -extern ssize_t vfio_pci_io_readwrite(struct vfio_pci_device *vdev, - char __user *buf, size_t count, - loff_t *ppos, bool iswrite); +extern ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev, + char __user *buf, size_t count, + loff_t *ppos, bool iswrite); + +extern ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite); + +extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite); extern int vfio_pci_init_perm_bits(void); extern void vfio_pci_uninit_perm_bits(void); diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 4362d9e7baa..210db24d220 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -17,253 +17,222 @@ #include <linux/pci.h> #include <linux/uaccess.h> #include <linux/io.h> +#include <linux/vgaarb.h> #include "vfio_pci_private.h" -/* I/O Port BAR access */ -ssize_t vfio_pci_io_readwrite(struct vfio_pci_device *vdev, char __user *buf, - size_t count, loff_t *ppos, bool iswrite) +/* + * Read or write from an __iomem region (MMIO or I/O port) with an excluded + * range which is inaccessible. The excluded range drops writes and fills + * reads with -1. This is intended for handling MSI-X vector tables and + * leftover space for ROM BARs. + */ +static ssize_t do_io_rw(void __iomem *io, char __user *buf, + loff_t off, size_t count, size_t x_start, + size_t x_end, bool iswrite) { - struct pci_dev *pdev = vdev->pdev; - loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; - int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos); - void __iomem *io; - size_t done = 0; - - if (!pci_resource_start(pdev, bar)) - return -EINVAL; - - if (pos + count > pci_resource_len(pdev, bar)) - return -EINVAL; - - if (!vdev->barmap[bar]) { - int ret; - - ret = pci_request_selected_regions(pdev, 1 << bar, "vfio"); - if (ret) - return ret; - - vdev->barmap[bar] = pci_iomap(pdev, bar, 0); - - if (!vdev->barmap[bar]) { - pci_release_selected_regions(pdev, 1 << bar); - return -EINVAL; - } - } - - io = vdev->barmap[bar]; + ssize_t done = 0; while (count) { - int filled; + size_t fillable, filled; + + if (off < x_start) + fillable = min(count, (size_t)(x_start - off)); + else if (off >= x_end) + fillable = count; + else + fillable = 0; - if (count >= 3 && !(pos % 4)) { + if (fillable >= 4 && !(off % 4)) { __le32 val; if (iswrite) { if (copy_from_user(&val, buf, 4)) return -EFAULT; - iowrite32(le32_to_cpu(val), io + pos); + iowrite32(le32_to_cpu(val), io + off); } else { - val = cpu_to_le32(ioread32(io + pos)); + val = cpu_to_le32(ioread32(io + off)); if (copy_to_user(buf, &val, 4)) return -EFAULT; } filled = 4; - - } else if ((pos % 2) == 0 && count >= 2) { + } else if (fillable >= 2 && !(off % 2)) { __le16 val; if (iswrite) { if (copy_from_user(&val, buf, 2)) return -EFAULT; - iowrite16(le16_to_cpu(val), io + pos); + iowrite16(le16_to_cpu(val), io + off); } else { - val = cpu_to_le16(ioread16(io + pos)); + val = cpu_to_le16(ioread16(io + off)); if (copy_to_user(buf, &val, 2)) return -EFAULT; } filled = 2; - } else { + } else if (fillable) { u8 val; if (iswrite) { if (copy_from_user(&val, buf, 1)) return -EFAULT; - iowrite8(val, io + pos); + iowrite8(val, io + off); } else { - val = ioread8(io + pos); + val = ioread8(io + off); if (copy_to_user(buf, &val, 1)) return -EFAULT; } filled = 1; + } else { + /* Fill reads with -1, drop writes */ + filled = min(count, (size_t)(x_end - off)); + if (!iswrite) { + u8 val = 0xFF; + size_t i; + + for (i = 0; i < filled; i++) + if (copy_to_user(buf + i, &val, 1)) + return -EFAULT; + } } count -= filled; done += filled; + off += filled; buf += filled; - pos += filled; } - *ppos += done; - return done; } -/* - * MMIO BAR access - * We handle two excluded ranges here as well, if the user tries to read - * the ROM beyond what PCI tells us is available or the MSI-X table region, - * we return 0xFF and writes are dropped. - */ -ssize_t vfio_pci_mem_readwrite(struct vfio_pci_device *vdev, char __user *buf, - size_t count, loff_t *ppos, bool iswrite) +ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite) { struct pci_dev *pdev = vdev->pdev; loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos); - void __iomem *io; + size_t x_start = 0, x_end = 0; resource_size_t end; - size_t done = 0; - size_t x_start = 0, x_end = 0; /* excluded range */ + void __iomem *io; + ssize_t done; if (!pci_resource_start(pdev, bar)) return -EINVAL; end = pci_resource_len(pdev, bar); - if (pos > end) + if (pos >= end) return -EINVAL; - if (pos == end) - return 0; - - if (pos + count > end) - count = end - pos; + count = min(count, (size_t)(end - pos)); if (bar == PCI_ROM_RESOURCE) { + /* + * The ROM can fill less space than the BAR, so we start the + * excluded range at the end of the actual ROM. This makes + * filling large ROM BARs much faster. + */ io = pci_map_rom(pdev, &x_start); + if (!io) + return -ENOMEM; x_end = end; - } else { - if (!vdev->barmap[bar]) { - int ret; - - ret = pci_request_selected_regions(pdev, 1 << bar, - "vfio"); - if (ret) - return ret; + } else if (!vdev->barmap[bar]) { + int ret; - vdev->barmap[bar] = pci_iomap(pdev, bar, 0); + ret = pci_request_selected_regions(pdev, 1 << bar, "vfio"); + if (ret) + return ret; - if (!vdev->barmap[bar]) { - pci_release_selected_regions(pdev, 1 << bar); - return -EINVAL; - } + io = pci_iomap(pdev, bar, 0); + if (!io) { + pci_release_selected_regions(pdev, 1 << bar); + return -ENOMEM; } + vdev->barmap[bar] = io; + } else io = vdev->barmap[bar]; - if (bar == vdev->msix_bar) { - x_start = vdev->msix_offset; - x_end = vdev->msix_offset + vdev->msix_size; - } + if (bar == vdev->msix_bar) { + x_start = vdev->msix_offset; + x_end = vdev->msix_offset + vdev->msix_size; } - if (!io) - return -EINVAL; - - while (count) { - size_t fillable, filled; - - if (pos < x_start) - fillable = x_start - pos; - else if (pos >= x_end) - fillable = end - pos; - else - fillable = 0; - - if (fillable >= 4 && !(pos % 4) && (count >= 4)) { - __le32 val; - - if (iswrite) { - if (copy_from_user(&val, buf, 4)) - goto out; - - iowrite32(le32_to_cpu(val), io + pos); - } else { - val = cpu_to_le32(ioread32(io + pos)); + done = do_io_rw(io, buf, pos, count, x_start, x_end, iswrite); - if (copy_to_user(buf, &val, 4)) - goto out; - } + if (done >= 0) + *ppos += done; - filled = 4; - } else if (fillable >= 2 && !(pos % 2) && (count >= 2)) { - __le16 val; - - if (iswrite) { - if (copy_from_user(&val, buf, 2)) - goto out; - - iowrite16(le16_to_cpu(val), io + pos); - } else { - val = cpu_to_le16(ioread16(io + pos)); - - if (copy_to_user(buf, &val, 2)) - goto out; - } - - filled = 2; - } else if (fillable) { - u8 val; + if (bar == PCI_ROM_RESOURCE) + pci_unmap_rom(pdev, io); - if (iswrite) { - if (copy_from_user(&val, buf, 1)) - goto out; + return done; +} - iowrite8(val, io + pos); - } else { - val = ioread8(io + pos); +ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite) +{ + int ret; + loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK; + void __iomem *iomem = NULL; + unsigned int rsrc; + bool is_ioport; + ssize_t done; + + if (!vdev->has_vga) + return -EINVAL; - if (copy_to_user(buf, &val, 1)) - goto out; - } + switch (pos) { + case 0xa0000 ... 0xbffff: + count = min(count, (size_t)(0xc0000 - pos)); + iomem = ioremap_nocache(0xa0000, 0xbffff - 0xa0000 + 1); + off = pos - 0xa0000; + rsrc = VGA_RSRC_LEGACY_MEM; + is_ioport = false; + break; + case 0x3b0 ... 0x3bb: + count = min(count, (size_t)(0x3bc - pos)); + iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1); + off = pos - 0x3b0; + rsrc = VGA_RSRC_LEGACY_IO; + is_ioport = true; + break; + case 0x3c0 ... 0x3df: + count = min(count, (size_t)(0x3e0 - pos)); + iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1); + off = pos - 0x3c0; + rsrc = VGA_RSRC_LEGACY_IO; + is_ioport = true; + break; + default: + return -EINVAL; + } - filled = 1; - } else { - /* Drop writes, fill reads with FF */ - if (!iswrite) { - char val = 0xFF; - size_t i; + if (!iomem) + return -ENOMEM; - for (i = 0; i < x_end - pos; i++) { - if (put_user(val, buf + i)) - goto out; - } - } + ret = vga_get_interruptible(vdev->pdev, rsrc); + if (ret) { + is_ioport ? ioport_unmap(iomem) : iounmap(iomem); + return ret; + } - filled = x_end - pos; - } + done = do_io_rw(iomem, buf, off, count, 0, 0, iswrite); - count -= filled; - done += filled; - buf += filled; - pos += filled; - } + vga_put(vdev->pdev, rsrc); - *ppos += done; + is_ioport ? ioport_unmap(iomem) : iounmap(iomem); -out: - if (bar == PCI_ROM_RESOURCE) - pci_unmap_rom(pdev, io); + if (done >= 0) + *ppos += done; - return count ? -EFAULT : done; + return done; } diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 56097c6d072..fcc12f3e60a 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -139,23 +139,8 @@ EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); */ static int vfio_alloc_group_minor(struct vfio_group *group) { - int ret, minor; - -again: - if (unlikely(idr_pre_get(&vfio.group_idr, GFP_KERNEL) == 0)) - return -ENOMEM; - /* index 0 is used by /dev/vfio/vfio */ - ret = idr_get_new_above(&vfio.group_idr, group, 1, &minor); - if (ret == -EAGAIN) - goto again; - if (ret || minor > MINORMASK) { - if (minor > MINORMASK) - idr_remove(&vfio.group_idr, minor); - return -ENOSPC; - } - - return minor; + return idr_alloc(&vfio.group_idr, group, 1, MINORMASK + 1, GFP_KERNEL); } static void vfio_free_group_minor(int minor) @@ -191,6 +176,17 @@ static void vfio_container_put(struct vfio_container *container) kref_put(&container->kref, vfio_container_release); } +static void vfio_group_unlock_and_free(struct vfio_group *group) +{ + mutex_unlock(&vfio.group_lock); + /* + * Unregister outside of lock. A spurious callback is harmless now + * that the group is no longer in vfio.group_list. + */ + iommu_group_unregister_notifier(group->iommu_group, &group->nb); + kfree(group); +} + /** * Group objects - create, release, get, put, search */ @@ -229,8 +225,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) minor = vfio_alloc_group_minor(group); if (minor < 0) { - mutex_unlock(&vfio.group_lock); - kfree(group); + vfio_group_unlock_and_free(group); return ERR_PTR(minor); } @@ -239,8 +234,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) if (tmp->iommu_group == iommu_group) { vfio_group_get(tmp); vfio_free_group_minor(minor); - mutex_unlock(&vfio.group_lock); - kfree(group); + vfio_group_unlock_and_free(group); return tmp; } } @@ -249,8 +243,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) group, "%d", iommu_group_id(iommu_group)); if (IS_ERR(dev)) { vfio_free_group_minor(minor); - mutex_unlock(&vfio.group_lock); - kfree(group); + vfio_group_unlock_and_free(group); return (struct vfio_group *)dev; /* ERR_PTR */ } @@ -274,16 +267,7 @@ static void vfio_group_release(struct kref *kref) device_destroy(vfio.class, MKDEV(MAJOR(vfio.devt), group->minor)); list_del(&group->vfio_next); vfio_free_group_minor(group->minor); - - mutex_unlock(&vfio.group_lock); - - /* - * Unregister outside of lock. A spurious callback is harmless now - * that the group is no longer in vfio.group_list. - */ - iommu_group_unregister_notifier(group->iommu_group, &group->nb); - - kfree(group); + vfio_group_unlock_and_free(group); } static void vfio_group_put(struct vfio_group *group) @@ -443,7 +427,7 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group, * a device. It's not always practical to leave a device within a group * driverless as it could get re-bound to something unsafe. */ -static const char * const vfio_driver_whitelist[] = { "pci-stub" }; +static const char * const vfio_driver_whitelist[] = { "pci-stub", "pcieport" }; static bool vfio_whitelisted_driver(struct device_driver *drv) { @@ -466,8 +450,9 @@ static int vfio_dev_viable(struct device *dev, void *data) { struct vfio_group *group = data; struct vfio_device *device; + struct device_driver *drv = ACCESS_ONCE(dev->driver); - if (!dev->driver || vfio_whitelisted_driver(dev->driver)) + if (!drv || vfio_whitelisted_driver(drv)) return 0; device = vfio_group_get_device(group, dev); @@ -642,33 +627,16 @@ int vfio_add_group_dev(struct device *dev, } EXPORT_SYMBOL_GPL(vfio_add_group_dev); -/* Test whether a struct device is present in our tracking */ -static bool vfio_dev_present(struct device *dev) +/* Given a referenced group, check if it contains the device */ +static bool vfio_dev_present(struct vfio_group *group, struct device *dev) { - struct iommu_group *iommu_group; - struct vfio_group *group; struct vfio_device *device; - iommu_group = iommu_group_get(dev); - if (!iommu_group) - return false; - - group = vfio_group_get_from_iommu(iommu_group); - if (!group) { - iommu_group_put(iommu_group); - return false; - } - device = vfio_group_get_device(group, dev); - if (!device) { - vfio_group_put(group); - iommu_group_put(iommu_group); + if (!device) return false; - } vfio_device_put(device); - vfio_group_put(group); - iommu_group_put(iommu_group); return true; } @@ -682,10 +650,18 @@ void *vfio_del_group_dev(struct device *dev) struct iommu_group *iommu_group = group->iommu_group; void *device_data = device->device_data; + /* + * The group exists so long as we have a device reference. Get + * a group reference and use it to scan for the device going away. + */ + vfio_group_get(group); + vfio_device_put(device); /* TODO send a signal to encourage this to be released */ - wait_event(vfio.release_q, !vfio_dev_present(dev)); + wait_event(vfio.release_q, !vfio_dev_present(group, dev)); + + vfio_group_put(group); iommu_group_put(iommu_group); |