From 54cfe08b5f352d9aa4979e2434e85907c84af07a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 14 Dec 2014 14:54:17 +0200 Subject: mn10300: drop dead code pci-iomap.c was (apparently, mistakenly) reintroduced as part of commit 83c2dc15ce824450e7044b9f90cd529c25747ae0 MN10300: Handle cacheable PCI regions in pci_iomap() probably as side-effect of forward-porting the patch from an old kernel. It's not really needed: the generic pci_iomap does the right thing here. The new file isn't compiled so it's safe to drop. Cc: Bjorn Helgaas Cc: linux-pci@vger.kernel.org Cc: trivial@kernel.org Cc: David Howells Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- arch/mn10300/unit-asb2305/pci-iomap.c | 35 ----------------------------------- 1 file changed, 35 deletions(-) delete mode 100644 arch/mn10300/unit-asb2305/pci-iomap.c (limited to 'arch') diff --git a/arch/mn10300/unit-asb2305/pci-iomap.c b/arch/mn10300/unit-asb2305/pci-iomap.c deleted file mode 100644 index bd65dae17f3..00000000000 --- a/arch/mn10300/unit-asb2305/pci-iomap.c +++ /dev/null @@ -1,35 +0,0 @@ -/* ASB2305 PCI I/O mapping handler - * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - */ -#include -#include - -/* - * Create a virtual mapping cookie for a PCI BAR (memory or IO) - */ -void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) -{ - resource_size_t start = pci_resource_start(dev, bar); - resource_size_t len = pci_resource_len(dev, bar); - unsigned long flags = pci_resource_flags(dev, bar); - - if (!len || !start) - return NULL; - - if ((flags & IORESOURCE_IO) || (flags & IORESOURCE_MEM)) { - if (flags & IORESOURCE_CACHEABLE && !(flags & IORESOURCE_IO)) - return ioremap(start, len); - else - return ioremap_nocache(start, len); - } - - return NULL; -} -EXPORT_SYMBOL(pci_iomap); -- cgit v1.2.3-70-g09d2 From 8cfc99b58366ea9f391fe0da7d16940ca6a1d9c0 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 29 May 2013 11:52:21 +0930 Subject: s390: add pci_iomap_range Virtio drivers should map the part of the range they need, not necessarily all of it. To this end, support mapping ranges within BAR on s390. Since multiple ranges can now be mapped within a BAR, we keep track of the number of mappings created, and only clear out the mapping for a BAR when this number reaches 0. Cc: Bjorn Helgaas Cc: linux-pci@vger.kernel.org Tested-by: Sebastian Ott Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- arch/s390/include/asm/pci_io.h | 1 + arch/s390/pci/pci.c | 34 +++++++++++++++++++++++++++------- 2 files changed, 28 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h index f664e96f48c..1a9a98de5bd 100644 --- a/arch/s390/include/asm/pci_io.h +++ b/arch/s390/include/asm/pci_io.h @@ -16,6 +16,7 @@ struct zpci_iomap_entry { u32 fh; u8 bar; + u16 count; }; extern struct zpci_iomap_entry *zpci_iomap_start; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 3290f11ae1d..753a5673195 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -259,7 +259,10 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count) } /* Create a virtual mapping cookie for a PCI BAR */ -void __iomem *pci_iomap(struct pci_dev *pdev, int bar, unsigned long max) +void __iomem *pci_iomap_range(struct pci_dev *pdev, + int bar, + unsigned long offset, + unsigned long max) { struct zpci_dev *zdev = get_zdev(pdev); u64 addr; @@ -270,14 +273,27 @@ void __iomem *pci_iomap(struct pci_dev *pdev, int bar, unsigned long max) idx = zdev->bars[bar].map_idx; spin_lock(&zpci_iomap_lock); - zpci_iomap_start[idx].fh = zdev->fh; - zpci_iomap_start[idx].bar = bar; + if (zpci_iomap_start[idx].count++) { + BUG_ON(zpci_iomap_start[idx].fh != zdev->fh || + zpci_iomap_start[idx].bar != bar); + } else { + zpci_iomap_start[idx].fh = zdev->fh; + zpci_iomap_start[idx].bar = bar; + } + /* Detect overrun */ + BUG_ON(!zpci_iomap_start[idx].count); spin_unlock(&zpci_iomap_lock); addr = ZPCI_IOMAP_ADDR_BASE | ((u64) idx << 48); - return (void __iomem *) addr; + return (void __iomem *) addr + offset; } -EXPORT_SYMBOL_GPL(pci_iomap); +EXPORT_SYMBOL_GPL(pci_iomap_range); + +void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) +{ + return pci_iomap_range(dev, bar, 0, maxlen); +} +EXPORT_SYMBOL(pci_iomap); void pci_iounmap(struct pci_dev *pdev, void __iomem *addr) { @@ -285,8 +301,12 @@ void pci_iounmap(struct pci_dev *pdev, void __iomem *addr) idx = (((__force u64) addr) & ~ZPCI_IOMAP_ADDR_BASE) >> 48; spin_lock(&zpci_iomap_lock); - zpci_iomap_start[idx].fh = 0; - zpci_iomap_start[idx].bar = 0; + /* Detect underrun */ + BUG_ON(!zpci_iomap_start[idx].count); + if (!--zpci_iomap_start[idx].count) { + zpci_iomap_start[idx].fh = 0; + zpci_iomap_start[idx].bar = 0; + } spin_unlock(&zpci_iomap_lock); } EXPORT_SYMBOL_GPL(pci_iounmap); -- cgit v1.2.3-70-g09d2 From d1c29465b8a52d8fc5a59aac92c6b206b69fe631 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 11 Feb 2015 15:15:10 +1030 Subject: lguest: don't disable iospace. This no longer speeds up boot (IDE got better, I guess), but it does stop us probing for a PCI bus. Signed-off-by: Rusty Russell --- arch/x86/lguest/boot.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index c1c1544b848..47ec7f201d2 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1399,14 +1399,6 @@ __init void lguest_init(void) /* Hook in our special panic hypercall code. */ atomic_notifier_chain_register(&panic_notifier_list, &paniced); - /* - * The IDE code spends about 3 seconds probing for disks: if we reserve - * all the I/O ports up front it can't get them and so doesn't probe. - * Other device drivers are similar (but less severe). This cuts the - * kernel boot time on my machine from 4.1 seconds to 0.45 seconds. - */ - paravirt_disable_iospace(); - /* * This is messy CPU setup stuff which the native boot code does before * start_kernel, so we have to do, too: -- cgit v1.2.3-70-g09d2 From ee72576c143d8e9081ae1fe8644122454dd323c5 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 11 Feb 2015 15:15:10 +1030 Subject: lguest: disable ACPI explicitly. Once we add PCI, it starts trying to manage our interrupts. Signed-off-by: Rusty Russell --- arch/x86/lguest/boot.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 47ec7f201d2..aa6e3b4ce29 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -1428,6 +1429,9 @@ __init void lguest_init(void) /* Register our very early console. */ virtio_cons_early_init(early_put_chars); + /* Don't let ACPI try to control our PCI interrupts. */ + disable_acpi(); + /* * Last of all, we set the power management poweroff hook to point to * the Guest routine to power off, and the reboot hook to our restart -- cgit v1.2.3-70-g09d2 From e1b83e27881cf3153ce420aea853797fed29a9ea Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 11 Feb 2015 15:15:10 +1030 Subject: lguest: Override pcibios_enable_irq/pcibios_disable_irq to our stupid PIC This lets us deliver interrupts for our emulated PCI devices using our dumb PIC, and not emulate an 8259 and PCI irq mapping tables or whatever. Signed-off-by: Rusty Russell --- arch/x86/lguest/boot.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'arch') diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index aa6e3b4ce29..2943ab93167 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -72,6 +73,7 @@ #include #include /* for struct machine_ops */ #include +#include /*G:010 * Welcome to the Guest! @@ -832,6 +834,24 @@ static struct irq_chip lguest_irq_controller = { .irq_unmask = enable_lguest_irq, }; +static int lguest_enable_irq(struct pci_dev *dev) +{ + u8 line = 0; + + /* We literally use the PCI interrupt line as the irq number. */ + pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &line); + irq_set_chip_and_handler_name(line, &lguest_irq_controller, + handle_level_irq, "level"); + dev->irq = line; + return 0; +} + +/* We don't do hotplug PCI, so this shouldn't be called. */ +static void lguest_disable_irq(struct pci_dev *dev) +{ + WARN_ON(1); +} + /* * This sets up the Interrupt Descriptor Table (IDT) entry for each hardware * interrupt (except 128, which is used for system calls), and then tells the @@ -1432,6 +1452,10 @@ __init void lguest_init(void) /* Don't let ACPI try to control our PCI interrupts. */ disable_acpi(); + /* We control them ourselves, by overriding these two hooks. */ + pcibios_enable_irq = lguest_enable_irq; + pcibios_disable_irq = lguest_disable_irq; + /* * Last of all, we set the power management poweroff hook to point to * the Guest routine to power off, and the reboot hook to our restart -- cgit v1.2.3-70-g09d2 From a561adfaecc9eb6fb66941b450458801f3f60ca0 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 11 Feb 2015 15:26:01 +1030 Subject: lguest: use the PCI console device's emerg_wr for early boot messages. This involves manually checking the console device (which is always in slot 1 of bus 0) and using the window in VIRTIO_PCI_CAP_PCI_CFG to program it (as we can't map the BAR yet). We could in fact do this much earlier, but we wait for the first write from the virtio_cons_early_init() facility. Signed-off-by: Rusty Russell --- arch/x86/lguest/boot.c | 146 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 134 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 2943ab93167..531b844cb48 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include #include @@ -74,6 +75,7 @@ #include /* for struct machine_ops */ #include #include +#include /*G:010 * Welcome to the Guest! @@ -1202,25 +1204,145 @@ static __init char *lguest_memory_setup(void) return "LGUEST"; } +/* Offset within PCI config space of BAR access capability. */ +static int console_cfg_offset = 0; +static int console_access_cap; + +/* Set up so that we access off in bar0 (on bus 0, device 1, function 0) */ +static void set_cfg_window(u32 cfg_offset, u32 off) +{ + write_pci_config_byte(0, 1, 0, + cfg_offset + offsetof(struct virtio_pci_cap, bar), + 0); + write_pci_config(0, 1, 0, + cfg_offset + offsetof(struct virtio_pci_cap, length), + 4); + write_pci_config(0, 1, 0, + cfg_offset + offsetof(struct virtio_pci_cap, offset), + off); +} + +static u32 read_bar_via_cfg(u32 cfg_offset, u32 off) +{ + set_cfg_window(cfg_offset, off); + return read_pci_config(0, 1, 0, + cfg_offset + sizeof(struct virtio_pci_cap)); +} + +static void write_bar_via_cfg(u32 cfg_offset, u32 off, u32 val) +{ + set_cfg_window(cfg_offset, off); + write_pci_config(0, 1, 0, + cfg_offset + sizeof(struct virtio_pci_cap), val); +} + +static void probe_pci_console(void) +{ + u8 cap, common_cap = 0, device_cap = 0; + /* Offsets within BAR0 */ + u32 common_offset, device_offset; + + /* Avoid recursive printk into here. */ + console_cfg_offset = -1; + + if (!early_pci_allowed()) { + printk(KERN_ERR "lguest: early PCI access not allowed!\n"); + return; + } + + /* We expect a console PCI device at BUS0, slot 1. */ + if (read_pci_config(0, 1, 0, 0) != 0x10431AF4) { + printk(KERN_ERR "lguest: PCI device is %#x!\n", + read_pci_config(0, 1, 0, 0)); + return; + } + + /* Find the capabilities we need (must be in bar0) */ + cap = read_pci_config_byte(0, 1, 0, PCI_CAPABILITY_LIST); + while (cap) { + u8 vndr = read_pci_config_byte(0, 1, 0, cap); + if (vndr == PCI_CAP_ID_VNDR) { + u8 type, bar; + u32 offset; + + type = read_pci_config_byte(0, 1, 0, + cap + offsetof(struct virtio_pci_cap, cfg_type)); + bar = read_pci_config_byte(0, 1, 0, + cap + offsetof(struct virtio_pci_cap, bar)); + offset = read_pci_config(0, 1, 0, + cap + offsetof(struct virtio_pci_cap, offset)); + + switch (type) { + case VIRTIO_PCI_CAP_COMMON_CFG: + if (bar == 0) { + common_cap = cap; + common_offset = offset; + } + break; + case VIRTIO_PCI_CAP_DEVICE_CFG: + if (bar == 0) { + device_cap = cap; + device_offset = offset; + } + break; + case VIRTIO_PCI_CAP_PCI_CFG: + console_access_cap = cap; + break; + } + } + cap = read_pci_config_byte(0, 1, 0, cap + PCI_CAP_LIST_NEXT); + } + if (!common_cap || !device_cap || !console_access_cap) { + printk(KERN_ERR "lguest: No caps (%u/%u/%u) in console!\n", + common_cap, device_cap, console_access_cap); + return; + } + + +#define write_common_config(reg, val) \ + write_bar_via_cfg(console_access_cap, \ + common_offset+offsetof(struct virtio_pci_common_cfg,reg),\ + val) + +#define read_common_config(reg) \ + read_bar_via_cfg(console_access_cap, \ + common_offset+offsetof(struct virtio_pci_common_cfg,reg)) + + /* Check features: they must offer EMERG_WRITE */ + write_common_config(device_feature_select, 0); + + if (!(read_common_config(device_feature) + & (1 << VIRTIO_CONSOLE_F_EMERG_WRITE))) { + printk(KERN_ERR "lguest: console missing EMERG_WRITE\n"); + return; + } + + console_cfg_offset = device_offset; +} + /* * We will eventually use the virtio console device to produce console output, - * but before that is set up we use LHCALL_NOTIFY on normal memory to produce - * console output. + * but before that is set up we use the virtio PCI console's backdoor mmio + * access and the "emergency" write facility (which is legal even before the + * device is configured). */ static __init int early_put_chars(u32 vtermno, const char *buf, int count) { - char scratch[17]; - unsigned int len = count; + /* If we couldn't find PCI console, forget it. */ + if (console_cfg_offset < 0) + return count; - /* We use a nul-terminated string, so we make a copy. Icky, huh? */ - if (len > sizeof(scratch) - 1) - len = sizeof(scratch) - 1; - scratch[len] = '\0'; - memcpy(scratch, buf, len); - hcall(LHCALL_NOTIFY, __pa(scratch), 0, 0, 0); + if (unlikely(!console_cfg_offset)) { + probe_pci_console(); + if (console_cfg_offset < 0) + return count; + } - /* This routine returns the number of bytes actually written. */ - return len; + write_bar_via_cfg(console_access_cap, + console_cfg_offset + + offsetof(struct virtio_console_config, emerg_wr), + buf[0]); + return 1; } /* -- cgit v1.2.3-70-g09d2 From d9bab50aa46ce46dd4537d455eb13b200cdac516 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 11 Feb 2015 15:28:01 +1030 Subject: lguest: remove NOTIFY call and eventfd facility. Disappointing, as this was kind of neat (especially getting to use RCU to manage the address -> eventfd mapping). But now the devices are PCI handled in userspace, we get rid of both the NOTIFY hypercall and the interface to connect an eventfd. Signed-off-by: Rusty Russell --- arch/x86/include/asm/lguest_hcall.h | 1 - drivers/lguest/core.c | 20 +--- drivers/lguest/hypercalls.c | 4 - drivers/lguest/lg.h | 12 --- drivers/lguest/lguest_user.c | 186 +----------------------------------- include/linux/lguest_launcher.h | 2 +- 6 files changed, 10 insertions(+), 215 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h index 879fd7d3387..ef01fef3eeb 100644 --- a/arch/x86/include/asm/lguest_hcall.h +++ b/arch/x86/include/asm/lguest_hcall.h @@ -16,7 +16,6 @@ #define LHCALL_SET_PTE 14 #define LHCALL_SET_PGD 15 #define LHCALL_LOAD_TLS 16 -#define LHCALL_NOTIFY 17 #define LHCALL_LOAD_GDT_ENTRY 18 #define LHCALL_SEND_INTERRUPTS 19 diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 9159dbc583f..7dc93aa004c 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -225,22 +225,12 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) if (cpu->hcall) do_hypercalls(cpu); - /* - * It's possible the Guest did a NOTIFY hypercall to the - * Launcher. - */ + /* Do we have to tell the Launcher about a trap? */ if (cpu->pending.trap) { - /* - * Does it just needs to write to a registered - * eventfd (ie. the appropriate virtqueue thread)? - */ - if (!send_notify_to_eventfd(cpu)) { - /* OK, we tell the main Launcher. */ - if (copy_to_user(user, &cpu->pending, - sizeof(cpu->pending))) - return -EFAULT; - return sizeof(cpu->pending); - } + if (copy_to_user(user, &cpu->pending, + sizeof(cpu->pending))) + return -EFAULT; + return sizeof(cpu->pending); } /* diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index 5dd1fb8a661..1219af493c0 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c @@ -117,10 +117,6 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) /* Similarly, this sets the halted flag for run_guest(). */ cpu->halted = 1; break; - case LHCALL_NOTIFY: - cpu->pending.trap = LGUEST_TRAP_ENTRY; - cpu->pending.addr = args->arg1; - break; default: /* It should be an architecture-specific hypercall. */ if (lguest_arch_do_hcall(cpu, args)) diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index eb81abc0599..307e8b39e7d 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -81,16 +81,6 @@ struct lg_cpu { struct lg_cpu_arch arch; }; -struct lg_eventfd { - unsigned long addr; - struct eventfd_ctx *event; -}; - -struct lg_eventfd_map { - unsigned int num; - struct lg_eventfd map[]; -}; - /* The private info the thread maintains about the guest. */ struct lguest { struct lguest_data __user *lguest_data; @@ -117,8 +107,6 @@ struct lguest { unsigned int stack_pages; u32 tsc_khz; - struct lg_eventfd_map *eventfds; - /* Dead? */ const char *dead; }; diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index c8b0e8575b4..c4c6113eb9a 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -2,182 +2,20 @@ * launcher controls and communicates with the Guest. For example, * the first write will tell us the Guest's memory layout and entry * point. A read will run the Guest until something happens, such as - * a signal or the Guest doing a NOTIFY out to the Launcher. There is - * also a way for the Launcher to attach eventfds to particular NOTIFY - * values instead of returning from the read() call. + * a signal or the Guest accessing a device. :*/ #include #include #include #include -#include #include #include #include #include "lg.h" -/*L:056 - * Before we move on, let's jump ahead and look at what the kernel does when - * it needs to look up the eventfds. That will complete our picture of how we - * use RCU. - * - * The notification value is in cpu->pending_notify: we return true if it went - * to an eventfd. - */ -bool send_notify_to_eventfd(struct lg_cpu *cpu) -{ - unsigned int i; - struct lg_eventfd_map *map; - - /* We only connect LHCALL_NOTIFY to event fds, not other traps. */ - if (cpu->pending.trap != LGUEST_TRAP_ENTRY) - return false; - - /* - * This "rcu_read_lock()" helps track when someone is still looking at - * the (RCU-using) eventfds array. It's not actually a lock at all; - * indeed it's a noop in many configurations. (You didn't expect me to - * explain all the RCU secrets here, did you?) - */ - rcu_read_lock(); - /* - * rcu_dereference is the counter-side of rcu_assign_pointer(); it - * makes sure we don't access the memory pointed to by - * cpu->lg->eventfds before cpu->lg->eventfds is set. Sounds crazy, - * but Alpha allows this! Paul McKenney points out that a really - * aggressive compiler could have the same effect: - * http://lists.ozlabs.org/pipermail/lguest/2009-July/001560.html - * - * So play safe, use rcu_dereference to get the rcu-protected pointer: - */ - map = rcu_dereference(cpu->lg->eventfds); - /* - * Simple array search: even if they add an eventfd while we do this, - * we'll continue to use the old array and just won't see the new one. - */ - for (i = 0; i < map->num; i++) { - if (map->map[i].addr == cpu->pending.addr) { - eventfd_signal(map->map[i].event, 1); - cpu->pending.trap = 0; - break; - } - } - /* We're done with the rcu-protected variable cpu->lg->eventfds. */ - rcu_read_unlock(); - - /* If we cleared the notification, it's because we found a match. */ - return cpu->pending.trap == 0; -} - -/*L:055 - * One of the more tricksy tricks in the Linux Kernel is a technique called - * Read Copy Update. Since one point of lguest is to teach lguest journeyers - * about kernel coding, I use it here. (In case you're curious, other purposes - * include learning about virtualization and instilling a deep appreciation for - * simplicity and puppies). - * - * We keep a simple array which maps LHCALL_NOTIFY values to eventfds, but we - * add new eventfds without ever blocking readers from accessing the array. - * The current Launcher only does this during boot, so that never happens. But - * Read Copy Update is cool, and adding a lock risks damaging even more puppies - * than this code does. - * - * We allocate a brand new one-larger array, copy the old one and add our new - * element. Then we make the lg eventfd pointer point to the new array. - * That's the easy part: now we need to free the old one, but we need to make - * sure no slow CPU somewhere is still looking at it. That's what - * synchronize_rcu does for us: waits until every CPU has indicated that it has - * moved on to know it's no longer using the old one. - * - * If that's unclear, see http://en.wikipedia.org/wiki/Read-copy-update. - */ -static int add_eventfd(struct lguest *lg, unsigned long addr, int fd) -{ - struct lg_eventfd_map *new, *old = lg->eventfds; - - /* - * We don't allow notifications on value 0 anyway (pending_notify of - * 0 means "nothing pending"). - */ - if (!addr) - return -EINVAL; - - /* - * Replace the old array with the new one, carefully: others can - * be accessing it at the same time. - */ - new = kmalloc(sizeof(*new) + sizeof(new->map[0]) * (old->num + 1), - GFP_KERNEL); - if (!new) - return -ENOMEM; - - /* First make identical copy. */ - memcpy(new->map, old->map, sizeof(old->map[0]) * old->num); - new->num = old->num; - - /* Now append new entry. */ - new->map[new->num].addr = addr; - new->map[new->num].event = eventfd_ctx_fdget(fd); - if (IS_ERR(new->map[new->num].event)) { - int err = PTR_ERR(new->map[new->num].event); - kfree(new); - return err; - } - new->num++; - - /* - * Now put new one in place: rcu_assign_pointer() is a fancy way of - * doing "lg->eventfds = new", but it uses memory barriers to make - * absolutely sure that the contents of "new" written above is nailed - * down before we actually do the assignment. - * - * We have to think about these kinds of things when we're operating on - * live data without locks. - */ - rcu_assign_pointer(lg->eventfds, new); - - /* - * We're not in a big hurry. Wait until no one's looking at old - * version, then free it. - */ - synchronize_rcu(); - kfree(old); - - return 0; -} - /*L:052 - * Receiving notifications from the Guest is usually done by attaching a - * particular LHCALL_NOTIFY value to an event filedescriptor. The eventfd will - * become readable when the Guest does an LHCALL_NOTIFY with that value. - * - * This is really convenient for processing each virtqueue in a separate - * thread. - */ -static int attach_eventfd(struct lguest *lg, const unsigned long __user *input) -{ - unsigned long addr, fd; - int err; - - if (get_user(addr, input) != 0) - return -EFAULT; - input++; - if (get_user(fd, input) != 0) - return -EFAULT; - - /* - * Just make sure two callers don't add eventfds at once. We really - * only need to lock against callers adding to the same Guest, so using - * the Big Lguest Lock is overkill. But this is setup, not a fast path. - */ - mutex_lock(&lguest_lock); - err = add_eventfd(lg, addr, fd); - mutex_unlock(&lguest_lock); - - return err; -} - -/* The Launcher can get the registers, and also set some of them. */ + The Launcher can get the registers, and also set some of them. +*/ static int getreg_setup(struct lg_cpu *cpu, const unsigned long __user *input) { unsigned long which; @@ -409,13 +247,6 @@ static int initialize(struct file *file, const unsigned long __user *input) goto unlock; } - lg->eventfds = kmalloc(sizeof(*lg->eventfds), GFP_KERNEL); - if (!lg->eventfds) { - err = -ENOMEM; - goto free_lg; - } - lg->eventfds->num = 0; - /* Populate the easy fields of our "struct lguest" */ lg->mem_base = (void __user *)args[0]; lg->pfn_limit = args[1]; @@ -424,7 +255,7 @@ static int initialize(struct file *file, const unsigned long __user *input) /* This is the first cpu (cpu 0) and it will start booting at args[2] */ err = lg_cpu_start(&lg->cpus[0], 0, args[2]); if (err) - goto free_eventfds; + goto free_lg; /* * Initialize the Guest's shadow page tables. This allocates @@ -445,8 +276,6 @@ static int initialize(struct file *file, const unsigned long __user *input) free_regs: /* FIXME: This should be in free_vcpu */ free_page(lg->cpus[0].regs_page); -free_eventfds: - kfree(lg->eventfds); free_lg: kfree(lg); unlock: @@ -499,8 +328,6 @@ static ssize_t write(struct file *file, const char __user *in, return initialize(file, input); case LHREQ_IRQ: return user_send_irq(cpu, input); - case LHREQ_EVENTFD: - return attach_eventfd(lg, input); case LHREQ_GETREG: return getreg_setup(cpu, input); case LHREQ_SETREG: @@ -551,11 +378,6 @@ static int close(struct inode *inode, struct file *file) mmput(lg->cpus[i].mm); } - /* Release any eventfds they registered. */ - for (i = 0; i < lg->eventfds->num; i++) - eventfd_ctx_put(lg->eventfds->map[i].event); - kfree(lg->eventfds); - /* * If lg->dead doesn't contain an error code it will be NULL or a * kmalloc()ed string, either of which is ok to hand to kfree(). diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h index 677cde735d4..acd5b12565c 100644 --- a/include/linux/lguest_launcher.h +++ b/include/linux/lguest_launcher.h @@ -23,7 +23,7 @@ enum lguest_req LHREQ_GETDMA, /* No longer used */ LHREQ_IRQ, /* + irq */ LHREQ_BREAK, /* No longer used */ - LHREQ_EVENTFD, /* + address, fd. */ + LHREQ_EVENTFD, /* No longer used. */ LHREQ_GETREG, /* + offset within struct pt_regs (then read value). */ LHREQ_SETREG, /* + offset within struct pt_regs, value. */ LHREQ_TRAP, /* + trap number to deliver to guest. */ -- cgit v1.2.3-70-g09d2 From 55c2d7884e9a97c2f2d46d5818f783bf3dcc5314 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 13 Feb 2015 17:13:43 +1030 Subject: lguest: don't look in console features to find emerg_wr. The 1.0 spec clearly states that you must set the ACKNOWLEDGE and DRIVER status bits before accessing the feature bits. This is a problem for the early console code, which doesn't really want to acknowledge the device (the spec specifically excepts writing to the console's emerg_wr from the usual ordering constrains). Instead, we check that the *size* of the device configuration is sufficient to hold emerg_wr: at worst (if the device doesn't support the VIRTIO_CONSOLE_F_EMERG_WRITE feature), it will ignore the writes. Signed-off-by: Rusty Russell --- arch/x86/lguest/boot.c | 57 +++++++++++++++++++++----------------------------- 1 file changed, 24 insertions(+), 33 deletions(-) (limited to 'arch') diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 531b844cb48..ac4453d8520 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1222,15 +1222,13 @@ static void set_cfg_window(u32 cfg_offset, u32 off) off); } -static u32 read_bar_via_cfg(u32 cfg_offset, u32 off) -{ - set_cfg_window(cfg_offset, off); - return read_pci_config(0, 1, 0, - cfg_offset + sizeof(struct virtio_pci_cap)); -} - static void write_bar_via_cfg(u32 cfg_offset, u32 off, u32 val) { + /* + * We could set this up once, then leave it; nothing else in the * + * kernel should touch these registers. But if it went wrong, that + * would be a horrible bug to find. + */ set_cfg_window(cfg_offset, off); write_pci_config(0, 1, 0, cfg_offset + sizeof(struct virtio_pci_cap), val); @@ -1239,8 +1237,9 @@ static void write_bar_via_cfg(u32 cfg_offset, u32 off, u32 val) static void probe_pci_console(void) { u8 cap, common_cap = 0, device_cap = 0; - /* Offsets within BAR0 */ - u32 common_offset, device_offset; + /* Offset within BAR0 */ + u32 device_offset; + u32 device_len; /* Avoid recursive printk into here. */ console_cfg_offset = -1; @@ -1263,7 +1262,7 @@ static void probe_pci_console(void) u8 vndr = read_pci_config_byte(0, 1, 0, cap); if (vndr == PCI_CAP_ID_VNDR) { u8 type, bar; - u32 offset; + u32 offset, length; type = read_pci_config_byte(0, 1, 0, cap + offsetof(struct virtio_pci_cap, cfg_type)); @@ -1271,18 +1270,15 @@ static void probe_pci_console(void) cap + offsetof(struct virtio_pci_cap, bar)); offset = read_pci_config(0, 1, 0, cap + offsetof(struct virtio_pci_cap, offset)); + length = read_pci_config(0, 1, 0, + cap + offsetof(struct virtio_pci_cap, length)); switch (type) { - case VIRTIO_PCI_CAP_COMMON_CFG: - if (bar == 0) { - common_cap = cap; - common_offset = offset; - } - break; case VIRTIO_PCI_CAP_DEVICE_CFG: if (bar == 0) { device_cap = cap; device_offset = offset; + device_len = length; } break; case VIRTIO_PCI_CAP_PCI_CFG: @@ -1292,32 +1288,27 @@ static void probe_pci_console(void) } cap = read_pci_config_byte(0, 1, 0, cap + PCI_CAP_LIST_NEXT); } - if (!common_cap || !device_cap || !console_access_cap) { + if (!device_cap || !console_access_cap) { printk(KERN_ERR "lguest: No caps (%u/%u/%u) in console!\n", common_cap, device_cap, console_access_cap); return; } - -#define write_common_config(reg, val) \ - write_bar_via_cfg(console_access_cap, \ - common_offset+offsetof(struct virtio_pci_common_cfg,reg),\ - val) - -#define read_common_config(reg) \ - read_bar_via_cfg(console_access_cap, \ - common_offset+offsetof(struct virtio_pci_common_cfg,reg)) - - /* Check features: they must offer EMERG_WRITE */ - write_common_config(device_feature_select, 0); - - if (!(read_common_config(device_feature) - & (1 << VIRTIO_CONSOLE_F_EMERG_WRITE))) { - printk(KERN_ERR "lguest: console missing EMERG_WRITE\n"); + /* + * Note that we can't check features, until we've set the DRIVER + * status bit. We don't want to do that until we have a real driver, + * so we just check that the device-specific config has room for + * emerg_wr. If it doesn't support VIRTIO_CONSOLE_F_EMERG_WRITE + * it should ignore the access. + */ + if (device_len < (offsetof(struct virtio_console_config, emerg_wr) + + sizeof(u32))) { + printk(KERN_ERR "lguest: console missing emerg_wr field\n"); return; } console_cfg_offset = device_offset; + printk(KERN_INFO "lguest: Console via virtio-pci emerg_wr\n"); } /* -- cgit v1.2.3-70-g09d2