summaryrefslogtreecommitdiffstats
path: root/drivers/vfio
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vfio')
-rw-r--r--drivers/vfio/pci/Kconfig10
-rw-r--r--drivers/vfio/pci/vfio_pci.c156
-rw-r--r--drivers/vfio/pci/vfio_pci_config.c52
-rw-r--r--drivers/vfio/pci/vfio_pci_private.h19
-rw-r--r--drivers/vfio/pci/vfio_pci_rdwr.c281
-rw-r--r--drivers/vfio/vfio.c86
6 files changed, 309 insertions, 295 deletions
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 5980758563e..c41b01e2b69 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -6,3 +6,13 @@ config VFIO_PCI
use of PCI drivers using the VFIO framework.
If you don't know what to do here, say N.
+
+config VFIO_PCI_VGA
+ bool "VFIO PCI support for VGA devices"
+ depends on VFIO_PCI && X86 && VGA_ARB
+ help
+ Support for VGA extension to VFIO PCI. This exposes an additional
+ region on VGA devices for accessing legacy VGA addresses used by
+ BIOS and generic video drivers.
+
+ If you don't know what to do here, say N.
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 6c119944bbb..8189cb6a86a 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -43,6 +43,10 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
u16 cmd;
u8 msix_pos;
+ ret = pci_enable_device(pdev);
+ if (ret)
+ return ret;
+
vdev->reset_works = (pci_reset_function(pdev) == 0);
pci_save_state(pdev);
vdev->pci_saved_state = pci_store_saved_state(pdev);
@@ -51,8 +55,11 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
__func__, dev_name(&pdev->dev));
ret = vfio_config_init(vdev);
- if (ret)
- goto out;
+ if (ret) {
+ pci_load_and_free_saved_state(pdev, &vdev->pci_saved_state);
+ pci_disable_device(pdev);
+ return ret;
+ }
if (likely(!nointxmask))
vdev->pci_2_3 = pci_intx_mask_supported(pdev);
@@ -77,24 +84,20 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
} else
vdev->msix_bar = 0xFF;
- ret = pci_enable_device(pdev);
- if (ret)
- goto out;
-
- return ret;
+#ifdef CONFIG_VFIO_PCI_VGA
+ if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
+ vdev->has_vga = true;
+#endif
-out:
- kfree(vdev->pci_saved_state);
- vdev->pci_saved_state = NULL;
- vfio_config_free(vdev);
- return ret;
+ return 0;
}
static void vfio_pci_disable(struct vfio_pci_device *vdev)
{
+ struct pci_dev *pdev = vdev->pdev;
int bar;
- pci_disable_device(vdev->pdev);
+ pci_disable_device(pdev);
vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE |
VFIO_IRQ_SET_ACTION_TRIGGER,
@@ -104,22 +107,40 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
vfio_config_free(vdev);
- pci_reset_function(vdev->pdev);
-
- if (pci_load_and_free_saved_state(vdev->pdev,
- &vdev->pci_saved_state) == 0)
- pci_restore_state(vdev->pdev);
- else
- pr_info("%s: Couldn't reload %s saved state\n",
- __func__, dev_name(&vdev->pdev->dev));
-
for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) {
if (!vdev->barmap[bar])
continue;
- pci_iounmap(vdev->pdev, vdev->barmap[bar]);
- pci_release_selected_regions(vdev->pdev, 1 << bar);
+ pci_iounmap(pdev, vdev->barmap[bar]);
+ pci_release_selected_regions(pdev, 1 << bar);
vdev->barmap[bar] = NULL;
}
+
+ /*
+ * If we have saved state, restore it. If we can reset the device,
+ * even better. Resetting with current state seems better than
+ * nothing, but saving and restoring current state without reset
+ * is just busy work.
+ */
+ if (pci_load_and_free_saved_state(pdev, &vdev->pci_saved_state)) {
+ pr_info("%s: Couldn't reload %s saved state\n",
+ __func__, dev_name(&pdev->dev));
+
+ if (!vdev->reset_works)
+ return;
+
+ pci_save_state(pdev);
+ }
+
+ /*
+ * Disable INTx and MSI, presumably to avoid spurious interrupts
+ * during reset. Stolen from pci_reset_function()
+ */
+ pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
+
+ if (vdev->reset_works)
+ __pci_reset_function(pdev);
+
+ pci_restore_state(pdev);
}
static void vfio_pci_release(void *device_data)
@@ -269,6 +290,16 @@ static long vfio_pci_ioctl(void *device_data,
info.flags = VFIO_REGION_INFO_FLAG_READ;
break;
}
+ case VFIO_PCI_VGA_REGION_INDEX:
+ if (!vdev->has_vga)
+ return -EINVAL;
+
+ info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+ info.size = 0xc0000;
+ info.flags = VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE;
+
+ break;
default:
return -EINVAL;
}
@@ -327,15 +358,10 @@ static long vfio_pci_ioctl(void *device_data,
hdr.count > vfio_pci_get_irq_count(vdev, hdr.index))
return -EINVAL;
- data = kmalloc(hdr.count * size, GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- if (copy_from_user(data, (void __user *)(arg + minsz),
- hdr.count * size)) {
- kfree(data);
- return -EFAULT;
- }
+ data = memdup_user((void __user *)(arg + minsz),
+ hdr.count * size);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
}
mutex_lock(&vdev->igate);
@@ -355,52 +381,50 @@ static long vfio_pci_ioctl(void *device_data,
return -ENOTTY;
}
-static ssize_t vfio_pci_read(void *device_data, char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t vfio_pci_rw(void *device_data, char __user *buf,
+ size_t count, loff_t *ppos, bool iswrite)
{
unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
struct vfio_pci_device *vdev = device_data;
- struct pci_dev *pdev = vdev->pdev;
if (index >= VFIO_PCI_NUM_REGIONS)
return -EINVAL;
- if (index == VFIO_PCI_CONFIG_REGION_INDEX)
- return vfio_pci_config_readwrite(vdev, buf, count, ppos, false);
- else if (index == VFIO_PCI_ROM_REGION_INDEX)
- return vfio_pci_mem_readwrite(vdev, buf, count, ppos, false);
- else if (pci_resource_flags(pdev, index) & IORESOURCE_IO)
- return vfio_pci_io_readwrite(vdev, buf, count, ppos, false);
- else if (pci_resource_flags(pdev, index) & IORESOURCE_MEM)
- return vfio_pci_mem_readwrite(vdev, buf, count, ppos, false);
+ switch (index) {
+ case VFIO_PCI_CONFIG_REGION_INDEX:
+ return vfio_pci_config_rw(vdev, buf, count, ppos, iswrite);
+
+ case VFIO_PCI_ROM_REGION_INDEX:
+ if (iswrite)
+ return -EINVAL;
+ return vfio_pci_bar_rw(vdev, buf, count, ppos, false);
+
+ case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
+ return vfio_pci_bar_rw(vdev, buf, count, ppos, iswrite);
+
+ case VFIO_PCI_VGA_REGION_INDEX:
+ return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite);
+ }
return -EINVAL;
}
-static ssize_t vfio_pci_write(void *device_data, const char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t vfio_pci_read(void *device_data, char __user *buf,
+ size_t count, loff_t *ppos)
{
- unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
- struct vfio_pci_device *vdev = device_data;
- struct pci_dev *pdev = vdev->pdev;
+ if (!count)
+ return 0;
- if (index >= VFIO_PCI_NUM_REGIONS)
- return -EINVAL;
+ return vfio_pci_rw(device_data, buf, count, ppos, false);
+}
- if (index == VFIO_PCI_CONFIG_REGION_INDEX)
- return vfio_pci_config_readwrite(vdev, (char __user *)buf,
- count, ppos, true);
- else if (index == VFIO_PCI_ROM_REGION_INDEX)
- return -EINVAL;
- else if (pci_resource_flags(pdev, index) & IORESOURCE_IO)
- return vfio_pci_io_readwrite(vdev, (char __user *)buf,
- count, ppos, true);
- else if (pci_resource_flags(pdev, index) & IORESOURCE_MEM) {
- return vfio_pci_mem_readwrite(vdev, (char __user *)buf,
- count, ppos, true);
- }
+static ssize_t vfio_pci_write(void *device_data, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ if (!count)
+ return 0;
- return -EINVAL;
+ return vfio_pci_rw(device_data, (char __user *)buf, count, ppos, true);
}
static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
@@ -562,9 +586,9 @@ static int __init vfio_pci_init(void)
return 0;
-out_virqfd:
- vfio_pci_virqfd_exit();
out_driver:
+ vfio_pci_virqfd_exit();
+out_virqfd:
vfio_pci_uninit_perm_bits();
return ret;
}
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index 8b8f7d11e10..964ff22bf28 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -587,12 +587,46 @@ static int __init init_pci_cap_basic_perm(struct perm_bits *perm)
return 0;
}
+static int vfio_pm_config_write(struct vfio_pci_device *vdev, int pos,
+ int count, struct perm_bits *perm,
+ int offset, __le32 val)
+{
+ count = vfio_default_config_write(vdev, pos, count, perm, offset, val);
+ if (count < 0)
+ return count;
+
+ if (offset == PCI_PM_CTRL) {
+ pci_power_t state;
+
+ switch (le32_to_cpu(val) & PCI_PM_CTRL_STATE_MASK) {
+ case 0:
+ state = PCI_D0;
+ break;
+ case 1:
+ state = PCI_D1;
+ break;
+ case 2:
+ state = PCI_D2;
+ break;
+ case 3:
+ state = PCI_D3hot;
+ break;
+ }
+
+ pci_set_power_state(vdev->pdev, state);
+ }
+
+ return count;
+}
+
/* Permissions for the Power Management capability */
static int __init init_pci_cap_pm_perm(struct perm_bits *perm)
{
if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_PM]))
return -ENOMEM;
+ perm->writefn = vfio_pm_config_write;
+
/*
* We always virtualize the next field so we can remove
* capabilities from the chain if we want to.
@@ -600,10 +634,11 @@ static int __init init_pci_cap_pm_perm(struct perm_bits *perm)
p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE);
/*
- * Power management is defined *per function*,
- * so we let the user write this
+ * Power management is defined *per function*, so we can let
+ * the user change power state, but we trap and initiate the
+ * change ourselves, so the state bits are read-only.
*/
- p_setd(perm, PCI_PM_CTRL, NO_VIRT, ALL_WRITE);
+ p_setd(perm, PCI_PM_CTRL, NO_VIRT, ~PCI_PM_CTRL_STATE_MASK);
return 0;
}
@@ -985,12 +1020,12 @@ static int vfio_cap_len(struct vfio_pci_device *vdev, u8 cap, u8 pos)
if (ret)
return pcibios_err_to_errno(ret);
+ vdev->extended_caps = true;
+
if ((word & PCI_EXP_FLAGS_VERS) == 1)
return PCI_CAP_EXP_ENDPOINT_SIZEOF_V1;
- else {
- vdev->extended_caps = true;
+ else
return PCI_CAP_EXP_ENDPOINT_SIZEOF_V2;
- }
case PCI_CAP_ID_HT:
ret = pci_read_config_byte(pdev, pos + 3, &byte);
if (ret)
@@ -1501,9 +1536,8 @@ static ssize_t vfio_config_do_rw(struct vfio_pci_device *vdev, char __user *buf,
return ret;
}
-ssize_t vfio_pci_config_readwrite(struct vfio_pci_device *vdev,
- char __user *buf, size_t count,
- loff_t *ppos, bool iswrite)
+ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev, char __user *buf,
+ size_t count, loff_t *ppos, bool iswrite)
{
size_t done = 0;
int ret = 0;
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 611827cba8c..d7e55d03f49 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -53,6 +53,7 @@ struct vfio_pci_device {
bool reset_works;
bool extended_caps;
bool bardirty;
+ bool has_vga;
struct pci_saved_state *pci_saved_state;
atomic_t refcnt;
};
@@ -70,15 +71,15 @@ extern int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev,
uint32_t flags, unsigned index,
unsigned start, unsigned count, void *data);
-extern ssize_t vfio_pci_config_readwrite(struct vfio_pci_device *vdev,
- char __user *buf, size_t count,
- loff_t *ppos, bool iswrite);
-extern ssize_t vfio_pci_mem_readwrite(struct vfio_pci_device *vdev,
- char __user *buf, size_t count,
- loff_t *ppos, bool iswrite);
-extern ssize_t vfio_pci_io_readwrite(struct vfio_pci_device *vdev,
- char __user *buf, size_t count,
- loff_t *ppos, bool iswrite);
+extern ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev,
+ char __user *buf, size_t count,
+ loff_t *ppos, bool iswrite);
+
+extern ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
+ size_t count, loff_t *ppos, bool iswrite);
+
+extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
+ size_t count, loff_t *ppos, bool iswrite);
extern int vfio_pci_init_perm_bits(void);
extern void vfio_pci_uninit_perm_bits(void);
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 4362d9e7baa..210db24d220 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -17,253 +17,222 @@
#include <linux/pci.h>
#include <linux/uaccess.h>
#include <linux/io.h>
+#include <linux/vgaarb.h>
#include "vfio_pci_private.h"
-/* I/O Port BAR access */
-ssize_t vfio_pci_io_readwrite(struct vfio_pci_device *vdev, char __user *buf,
- size_t count, loff_t *ppos, bool iswrite)
+/*
+ * Read or write from an __iomem region (MMIO or I/O port) with an excluded
+ * range which is inaccessible. The excluded range drops writes and fills
+ * reads with -1. This is intended for handling MSI-X vector tables and
+ * leftover space for ROM BARs.
+ */
+static ssize_t do_io_rw(void __iomem *io, char __user *buf,
+ loff_t off, size_t count, size_t x_start,
+ size_t x_end, bool iswrite)
{
- struct pci_dev *pdev = vdev->pdev;
- loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
- int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
- void __iomem *io;
- size_t done = 0;
-
- if (!pci_resource_start(pdev, bar))
- return -EINVAL;
-
- if (pos + count > pci_resource_len(pdev, bar))
- return -EINVAL;
-
- if (!vdev->barmap[bar]) {
- int ret;
-
- ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
- if (ret)
- return ret;
-
- vdev->barmap[bar] = pci_iomap(pdev, bar, 0);
-
- if (!vdev->barmap[bar]) {
- pci_release_selected_regions(pdev, 1 << bar);
- return -EINVAL;
- }
- }
-
- io = vdev->barmap[bar];
+ ssize_t done = 0;
while (count) {
- int filled;
+ size_t fillable, filled;
+
+ if (off < x_start)
+ fillable = min(count, (size_t)(x_start - off));
+ else if (off >= x_end)
+ fillable = count;
+ else
+ fillable = 0;
- if (count >= 3 && !(pos % 4)) {
+ if (fillable >= 4 && !(off % 4)) {
__le32 val;
if (iswrite) {
if (copy_from_user(&val, buf, 4))
return -EFAULT;
- iowrite32(le32_to_cpu(val), io + pos);
+ iowrite32(le32_to_cpu(val), io + off);
} else {
- val = cpu_to_le32(ioread32(io + pos));
+ val = cpu_to_le32(ioread32(io + off));
if (copy_to_user(buf, &val, 4))
return -EFAULT;
}
filled = 4;
-
- } else if ((pos % 2) == 0 && count >= 2) {
+ } else if (fillable >= 2 && !(off % 2)) {
__le16 val;
if (iswrite) {
if (copy_from_user(&val, buf, 2))
return -EFAULT;
- iowrite16(le16_to_cpu(val), io + pos);
+ iowrite16(le16_to_cpu(val), io + off);
} else {
- val = cpu_to_le16(ioread16(io + pos));
+ val = cpu_to_le16(ioread16(io + off));
if (copy_to_user(buf, &val, 2))
return -EFAULT;
}
filled = 2;
- } else {
+ } else if (fillable) {
u8 val;
if (iswrite) {
if (copy_from_user(&val, buf, 1))
return -EFAULT;
- iowrite8(val, io + pos);
+ iowrite8(val, io + off);
} else {
- val = ioread8(io + pos);
+ val = ioread8(io + off);
if (copy_to_user(buf, &val, 1))
return -EFAULT;
}
filled = 1;
+ } else {
+ /* Fill reads with -1, drop writes */
+ filled = min(count, (size_t)(x_end - off));
+ if (!iswrite) {
+ u8 val = 0xFF;
+ size_t i;
+
+ for (i = 0; i < filled; i++)
+ if (copy_to_user(buf + i, &val, 1))
+ return -EFAULT;
+ }
}
count -= filled;
done += filled;
+ off += filled;
buf += filled;
- pos += filled;
}
- *ppos += done;
-
return done;
}
-/*
- * MMIO BAR access
- * We handle two excluded ranges here as well, if the user tries to read
- * the ROM beyond what PCI tells us is available or the MSI-X table region,
- * we return 0xFF and writes are dropped.
- */
-ssize_t vfio_pci_mem_readwrite(struct vfio_pci_device *vdev, char __user *buf,
- size_t count, loff_t *ppos, bool iswrite)
+ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
+ size_t count, loff_t *ppos, bool iswrite)
{
struct pci_dev *pdev = vdev->pdev;
loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
- void __iomem *io;
+ size_t x_start = 0, x_end = 0;
resource_size_t end;
- size_t done = 0;
- size_t x_start = 0, x_end = 0; /* excluded range */
+ void __iomem *io;
+ ssize_t done;
if (!pci_resource_start(pdev, bar))
return -EINVAL;
end = pci_resource_len(pdev, bar);
- if (pos > end)
+ if (pos >= end)
return -EINVAL;
- if (pos == end)
- return 0;
-
- if (pos + count > end)
- count = end - pos;
+ count = min(count, (size_t)(end - pos));
if (bar == PCI_ROM_RESOURCE) {
+ /*
+ * The ROM can fill less space than the BAR, so we start the
+ * excluded range at the end of the actual ROM. This makes
+ * filling large ROM BARs much faster.
+ */
io = pci_map_rom(pdev, &x_start);
+ if (!io)
+ return -ENOMEM;
x_end = end;
- } else {
- if (!vdev->barmap[bar]) {
- int ret;
-
- ret = pci_request_selected_regions(pdev, 1 << bar,
- "vfio");
- if (ret)
- return ret;
+ } else if (!vdev->barmap[bar]) {
+ int ret;
- vdev->barmap[bar] = pci_iomap(pdev, bar, 0);
+ ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
+ if (ret)
+ return ret;
- if (!vdev->barmap[bar]) {
- pci_release_selected_regions(pdev, 1 << bar);
- return -EINVAL;
- }
+ io = pci_iomap(pdev, bar, 0);
+ if (!io) {
+ pci_release_selected_regions(pdev, 1 << bar);
+ return -ENOMEM;
}
+ vdev->barmap[bar] = io;
+ } else
io = vdev->barmap[bar];
- if (bar == vdev->msix_bar) {
- x_start = vdev->msix_offset;
- x_end = vdev->msix_offset + vdev->msix_size;
- }
+ if (bar == vdev->msix_bar) {
+ x_start = vdev->msix_offset;
+ x_end = vdev->msix_offset + vdev->msix_size;
}
- if (!io)
- return -EINVAL;
-
- while (count) {
- size_t fillable, filled;
-
- if (pos < x_start)
- fillable = x_start - pos;
- else if (pos >= x_end)
- fillable = end - pos;
- else
- fillable = 0;
-
- if (fillable >= 4 && !(pos % 4) && (count >= 4)) {
- __le32 val;
-
- if (iswrite) {
- if (copy_from_user(&val, buf, 4))
- goto out;
-
- iowrite32(le32_to_cpu(val), io + pos);
- } else {
- val = cpu_to_le32(ioread32(io + pos));
+ done = do_io_rw(io, buf, pos, count, x_start, x_end, iswrite);
- if (copy_to_user(buf, &val, 4))
- goto out;
- }
+ if (done >= 0)
+ *ppos += done;
- filled = 4;
- } else if (fillable >= 2 && !(pos % 2) && (count >= 2)) {
- __le16 val;
-
- if (iswrite) {
- if (copy_from_user(&val, buf, 2))
- goto out;
-
- iowrite16(le16_to_cpu(val), io + pos);
- } else {
- val = cpu_to_le16(ioread16(io + pos));
-
- if (copy_to_user(buf, &val, 2))
- goto out;
- }
-
- filled = 2;
- } else if (fillable) {
- u8 val;
+ if (bar == PCI_ROM_RESOURCE)
+ pci_unmap_rom(pdev, io);
- if (iswrite) {
- if (copy_from_user(&val, buf, 1))
- goto out;
+ return done;
+}
- iowrite8(val, io + pos);
- } else {
- val = ioread8(io + pos);
+ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
+ size_t count, loff_t *ppos, bool iswrite)
+{
+ int ret;
+ loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK;
+ void __iomem *iomem = NULL;
+ unsigned int rsrc;
+ bool is_ioport;
+ ssize_t done;
+
+ if (!vdev->has_vga)
+ return -EINVAL;
- if (copy_to_user(buf, &val, 1))
- goto out;
- }
+ switch (pos) {
+ case 0xa0000 ... 0xbffff:
+ count = min(count, (size_t)(0xc0000 - pos));
+ iomem = ioremap_nocache(0xa0000, 0xbffff - 0xa0000 + 1);
+ off = pos - 0xa0000;
+ rsrc = VGA_RSRC_LEGACY_MEM;
+ is_ioport = false;
+ break;
+ case 0x3b0 ... 0x3bb:
+ count = min(count, (size_t)(0x3bc - pos));
+ iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
+ off = pos - 0x3b0;
+ rsrc = VGA_RSRC_LEGACY_IO;
+ is_ioport = true;
+ break;
+ case 0x3c0 ... 0x3df:
+ count = min(count, (size_t)(0x3e0 - pos));
+ iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
+ off = pos - 0x3c0;
+ rsrc = VGA_RSRC_LEGACY_IO;
+ is_ioport = true;
+ break;
+ default:
+ return -EINVAL;
+ }
- filled = 1;
- } else {
- /* Drop writes, fill reads with FF */
- if (!iswrite) {
- char val = 0xFF;
- size_t i;
+ if (!iomem)
+ return -ENOMEM;
- for (i = 0; i < x_end - pos; i++) {
- if (put_user(val, buf + i))
- goto out;
- }
- }
+ ret = vga_get_interruptible(vdev->pdev, rsrc);
+ if (ret) {
+ is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
+ return ret;
+ }
- filled = x_end - pos;
- }
+ done = do_io_rw(iomem, buf, off, count, 0, 0, iswrite);
- count -= filled;
- done += filled;
- buf += filled;
- pos += filled;
- }
+ vga_put(vdev->pdev, rsrc);
- *ppos += done;
+ is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
-out:
- if (bar == PCI_ROM_RESOURCE)
- pci_unmap_rom(pdev, io);
+ if (done >= 0)
+ *ppos += done;
- return count ? -EFAULT : done;
+ return done;
}
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 56097c6d072..fcc12f3e60a 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -139,23 +139,8 @@ EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
*/
static int vfio_alloc_group_minor(struct vfio_group *group)
{
- int ret, minor;
-
-again:
- if (unlikely(idr_pre_get(&vfio.group_idr, GFP_KERNEL) == 0))
- return -ENOMEM;
-
/* index 0 is used by /dev/vfio/vfio */
- ret = idr_get_new_above(&vfio.group_idr, group, 1, &minor);
- if (ret == -EAGAIN)
- goto again;
- if (ret || minor > MINORMASK) {
- if (minor > MINORMASK)
- idr_remove(&vfio.group_idr, minor);
- return -ENOSPC;
- }
-
- return minor;
+ return idr_alloc(&vfio.group_idr, group, 1, MINORMASK + 1, GFP_KERNEL);
}
static void vfio_free_group_minor(int minor)
@@ -191,6 +176,17 @@ static void vfio_container_put(struct vfio_container *container)
kref_put(&container->kref, vfio_container_release);
}
+static void vfio_group_unlock_and_free(struct vfio_group *group)
+{
+ mutex_unlock(&vfio.group_lock);
+ /*
+ * Unregister outside of lock. A spurious callback is harmless now
+ * that the group is no longer in vfio.group_list.
+ */
+ iommu_group_unregister_notifier(group->iommu_group, &group->nb);
+ kfree(group);
+}
+
/**
* Group objects - create, release, get, put, search
*/
@@ -229,8 +225,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
minor = vfio_alloc_group_minor(group);
if (minor < 0) {
- mutex_unlock(&vfio.group_lock);
- kfree(group);
+ vfio_group_unlock_and_free(group);
return ERR_PTR(minor);
}
@@ -239,8 +234,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
if (tmp->iommu_group == iommu_group) {
vfio_group_get(tmp);
vfio_free_group_minor(minor);
- mutex_unlock(&vfio.group_lock);
- kfree(group);
+ vfio_group_unlock_and_free(group);
return tmp;
}
}
@@ -249,8 +243,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
group, "%d", iommu_group_id(iommu_group));
if (IS_ERR(dev)) {
vfio_free_group_minor(minor);
- mutex_unlock(&vfio.group_lock);
- kfree(group);
+ vfio_group_unlock_and_free(group);
return (struct vfio_group *)dev; /* ERR_PTR */
}
@@ -274,16 +267,7 @@ static void vfio_group_release(struct kref *kref)
device_destroy(vfio.class, MKDEV(MAJOR(vfio.devt), group->minor));
list_del(&group->vfio_next);
vfio_free_group_minor(group->minor);
-
- mutex_unlock(&vfio.group_lock);
-
- /*
- * Unregister outside of lock. A spurious callback is harmless now
- * that the group is no longer in vfio.group_list.
- */
- iommu_group_unregister_notifier(group->iommu_group, &group->nb);
-
- kfree(group);
+ vfio_group_unlock_and_free(group);
}
static void vfio_group_put(struct vfio_group *group)
@@ -443,7 +427,7 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
* a device. It's not always practical to leave a device within a group
* driverless as it could get re-bound to something unsafe.
*/
-static const char * const vfio_driver_whitelist[] = { "pci-stub" };
+static const char * const vfio_driver_whitelist[] = { "pci-stub", "pcieport" };
static bool vfio_whitelisted_driver(struct device_driver *drv)
{
@@ -466,8 +450,9 @@ static int vfio_dev_viable(struct device *dev, void *data)
{
struct vfio_group *group = data;
struct vfio_device *device;
+ struct device_driver *drv = ACCESS_ONCE(dev->driver);
- if (!dev->driver || vfio_whitelisted_driver(dev->driver))
+ if (!drv || vfio_whitelisted_driver(drv))
return 0;
device = vfio_group_get_device(group, dev);
@@ -642,33 +627,16 @@ int vfio_add_group_dev(struct device *dev,
}
EXPORT_SYMBOL_GPL(vfio_add_group_dev);
-/* Test whether a struct device is present in our tracking */
-static bool vfio_dev_present(struct device *dev)
+/* Given a referenced group, check if it contains the device */
+static bool vfio_dev_present(struct vfio_group *group, struct device *dev)
{
- struct iommu_group *iommu_group;
- struct vfio_group *group;
struct vfio_device *device;
- iommu_group = iommu_group_get(dev);
- if (!iommu_group)
- return false;
-
- group = vfio_group_get_from_iommu(iommu_group);
- if (!group) {
- iommu_group_put(iommu_group);
- return false;
- }
-
device = vfio_group_get_device(group, dev);
- if (!device) {
- vfio_group_put(group);
- iommu_group_put(iommu_group);
+ if (!device)
return false;
- }
vfio_device_put(device);
- vfio_group_put(group);
- iommu_group_put(iommu_group);
return true;
}
@@ -682,10 +650,18 @@ void *vfio_del_group_dev(struct device *dev)
struct iommu_group *iommu_group = group->iommu_group;
void *device_data = device->device_data;
+ /*
+ * The group exists so long as we have a device reference. Get
+ * a group reference and use it to scan for the device going away.
+ */
+ vfio_group_get(group);
+
vfio_device_put(device);
/* TODO send a signal to encourage this to be released */
- wait_event(vfio.release_q, !vfio_dev_present(dev));
+ wait_event(vfio.release_q, !vfio_dev_present(group, dev));
+
+ vfio_group_put(group);
iommu_group_put(iommu_group);