From 2d7c1b77dd59387070aab355532dd157f888325c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 3 Feb 2014 02:22:07 +0100 Subject: ACPI / hotplug / PCI: Remove entries from bus->devices in reverse order According to the changelog of commit 29ed1f29b68a (PCI: pciehp: Fix null pointer deref when hot-removing SR-IOV device) it is unsafe to walk the bus->devices list of a PCI bus and remove devices from it in direct order, because that may lead to NULL pointer dereferences related to virtual functions. For this reason, change all of the bus->devices list walks in acpiphp_glue.c during which devices may be removed to be carried out in reverse order. Signed-off-by: Rafael J. Wysocki Tested-by: Mika Westerberg --- drivers/pci/hotplug/acpiphp_glue.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/pci/hotplug/acpiphp_glue.c') diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index cd929aed361..6a4b4b734fb 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -742,7 +742,7 @@ static void trim_stale_devices(struct pci_dev *dev) /* The device is a bridge. so check the bus below it. */ pm_runtime_get_sync(&dev->dev); - list_for_each_entry_safe(child, tmp, &bus->devices, bus_list) + list_for_each_entry_safe_reverse(child, tmp, &bus->devices, bus_list) trim_stale_devices(child); pm_runtime_put(&dev->dev); @@ -773,8 +773,8 @@ static void acpiphp_check_bridge(struct acpiphp_bridge *bridge) ; /* do nothing */ } else if (get_slot_status(slot) == ACPI_STA_ALL) { /* remove stale devices if any */ - list_for_each_entry_safe(dev, tmp, &bus->devices, - bus_list) + list_for_each_entry_safe_reverse(dev, tmp, + &bus->devices, bus_list) if (PCI_SLOT(dev->devfn) == slot->device) trim_stale_devices(dev); @@ -805,7 +805,7 @@ static void acpiphp_sanitize_bus(struct pci_bus *bus) int i; unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM; - list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) { + list_for_each_entry_safe_reverse(dev, tmp, &bus->devices, bus_list) { for (i=0; iresource[i]; if ((res->flags & type_mask) && !res->start && -- cgit v1.2.3-70-g09d2 From f41b32613138ae05329a0f0e7170223b775d6b24 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 3 Feb 2014 02:22:17 +0100 Subject: ACPI / hotplug / PCI: Move PCI rescan-remove locking to hotplug_event() Commit 9217a984671e (ACPI / hotplug / PCI: Use global PCI rescan-remove locking) modified ACPIPHP to protect its PCI device removal and addition code paths from races against sysfs-driven rescan and remove operations with the help of PCI rescan-remove locking. However, it overlooked the fact that hotplug_event_work() is not the only caller of hotplug_event() which may also be called by dock_hotplug_event() and that code path is missing the PCI rescan-remove locking. This means that, although the PCI rescan-remove lock is held as appropriate during the handling of events originating from handle_hotplug_event(), the ACPIPHP's operations resulting from dock events may still suffer the race conditions that commit 9217a984671e was supposed to eliminate. To address that problem, move the PCI rescan-remove locking from hotplug_event_work() to hotplug_event() so that it is used regardless of the way that function is invoked. Revamps: 9217a984671e (ACPI / hotplug / PCI: Use global PCI rescan-remove locking) Signed-off-by: Rafael J. Wysocki Tested-by: Mika Westerberg --- drivers/pci/hotplug/acpiphp_glue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/pci/hotplug/acpiphp_glue.c') diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 6a4b4b734fb..6e5bd79af81 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -852,6 +852,7 @@ static void hotplug_event(acpi_handle handle, u32 type, void *data) mutex_unlock(&acpiphp_context_lock); + pci_lock_rescan_remove(); acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer); switch (type) { @@ -905,6 +906,7 @@ static void hotplug_event(acpi_handle handle, u32 type, void *data) break; } + pci_unlock_rescan_remove(); if (bridge) put_bridge(bridge); } @@ -915,11 +917,9 @@ static void hotplug_event_work(void *data, u32 type) acpi_handle handle = context->handle; acpi_scan_lock_acquire(); - pci_lock_rescan_remove(); hotplug_event(handle, type, context); - pci_unlock_rescan_remove(); acpi_scan_lock_release(); acpi_evaluate_hotplug_ost(handle, type, ACPI_OST_SC_SUCCESS, NULL); put_bridge(context->func.parent); -- cgit v1.2.3-70-g09d2 From d42f5da2340083301dd2c48ff2d75f6ce4b30767 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 3 Feb 2014 02:22:27 +0100 Subject: ACPI / hotplug / PCI: Scan root bus under the PCI rescan-remove lock Since acpiphp_check_bridge() called by acpiphp_check_host_bridge() does things that require PCI rescan-remove locking around it, make acpiphp_check_host_bridge() use that locking. Signed-off-by: Rafael J. Wysocki Tested-by: Mika Westerberg --- drivers/pci/hotplug/acpiphp_glue.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/pci/hotplug/acpiphp_glue.c') diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 6e5bd79af81..931d0b44eac 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -829,7 +829,11 @@ void acpiphp_check_host_bridge(acpi_handle handle) bridge = acpiphp_handle_to_bridge(handle); if (bridge) { + pci_lock_rescan_remove(); + acpiphp_check_bridge(bridge); + + pci_unlock_rescan_remove(); put_bridge(bridge); } } -- cgit v1.2.3-70-g09d2 From 1b360f44d009059e446532f29c1a889951e72667 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 3 Feb 2014 22:30:06 +0100 Subject: ACPI / hotplug / PCI: Fix bridge removal race in handle_hotplug_event() If a PCI bridge with an ACPIPHP context attached is removed via sysfs, the code path executed as a result is the following: pci_stop_and_remove_bus_device_locked pci_remove_bus pcibios_remove_bus acpi_pci_remove_bus acpiphp_remove_slots cleanup_bridge put_bridge free_bridge acpiphp_put_context (for each child, under context lock) kfree (child context) Now, if a hotplug notify is dispatched for one of the bridge's children and the timing is such that handle_hotplug_event() for that notify is executed while free_bridge() above is running, the get_bridge(context->func.parent) in handle_hotplug_event() will not really help, because it is too late to prevent the bridge from going away and the child's context may be freed before hotplug_event_work() scheduled from handle_hotplug_event() dereferences the pointer to it passed via the data argument. That will cause a kernel crash to happpen in hotplug_event_work(). To prevent that from happening, make handle_hotplug_event() check the is_going_away flag of the function's parent bridge (under acpiphp_context_lock) and bail out if it's set. Also, make cleanup_bridge() set the bridge's is_going_away flag under acpiphp_context_lock so that it cannot be changed between the check and the subsequent get_bridge(context->func.parent) in handle_hotplug_event(). Then, in the above scenario, handle_hotplug_event() will notice that context->func.parent->is_going_away is already set and it will exit immediately preventing the crash from happening. Signed-off-by: Rafael J. Wysocki --- drivers/pci/hotplug/acpiphp_glue.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'drivers/pci/hotplug/acpiphp_glue.c') diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 931d0b44eac..91eceaf3131 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -441,7 +441,9 @@ static void cleanup_bridge(struct acpiphp_bridge *bridge) list_del(&bridge->list); mutex_unlock(&bridge_mutex); + mutex_lock(&acpiphp_context_lock); bridge->is_going_away = true; + mutex_unlock(&acpiphp_context_lock); } /** @@ -941,6 +943,7 @@ static void handle_hotplug_event(acpi_handle handle, u32 type, void *data) { struct acpiphp_context *context; u32 ost_code = ACPI_OST_SC_SUCCESS; + acpi_status status; switch (type) { case ACPI_NOTIFY_BUS_CHECK: @@ -976,13 +979,20 @@ static void handle_hotplug_event(acpi_handle handle, u32 type, void *data) mutex_lock(&acpiphp_context_lock); context = acpiphp_get_context(handle); - if (context && !WARN_ON(context->handle != handle)) { - get_bridge(context->func.parent); - acpiphp_put_context(context); - acpi_hotplug_execute(hotplug_event_work, context, type); + if (!context || WARN_ON(context->handle != handle) + || context->func.parent->is_going_away) + goto err_out; + + get_bridge(context->func.parent); + acpiphp_put_context(context); + status = acpi_hotplug_execute(hotplug_event_work, context, type); + if (ACPI_SUCCESS(status)) { mutex_unlock(&acpiphp_context_lock); return; } + put_bridge(context->func.parent); + + err_out: mutex_unlock(&acpiphp_context_lock); ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE; -- cgit v1.2.3-70-g09d2 From af9d8adc6b832003bbe3d83fde665ae6b4f072eb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 3 Feb 2014 22:30:15 +0100 Subject: ACPI / hotplug / PCI: Fix bridge removal race vs dock events If a PCI bridge with an ACPIPHP context attached is removed via sysfs, the code path executed as a result is the following: pci_stop_and_remove_bus_device_locked pci_remove_bus pcibios_remove_bus acpi_pci_remove_bus acpiphp_remove_slots cleanup_bridge unregister_hotplug_dock_device (drops dock references to the bridge) put_bridge free_bridge acpiphp_put_context (for each child, under context lock) kfree (context) Now, if a dock event affecting one of the bridge's child devices occurs (roughly at the same time), it will lead to the following code path: acpi_dock_deferred_cb dock_notify handle_eject_request hot_remove_dock_devices dock_hotplug_event hotplug_event (dereferences context) That may lead to a kernel crash in hotplug_event() if it is executed after the last kfree() in the bridge removal code path. To prevent that from happening, add a wrapper around hotplug_event() called dock_event() and point the .handler pointer in acpiphp_dock_ops to it. Make that wrapper retrieve the device's ACPIPHP context using acpiphp_get_context() (instead of taking it from the data argument) under acpiphp_context_lock and check if the parent bridge's is_going_away flag is set. If that flag is set, it will return immediately and if it is not set it will grab a reference to the device's parent bridge before executing hotplug_event(). Then, in the above scenario, the reference to the parent bridge held by dock_event() will prevent free_bridge() from being executed for it until hotplug_event() returns. Signed-off-by: Rafael J. Wysocki --- drivers/pci/hotplug/acpiphp_glue.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'drivers/pci/hotplug/acpiphp_glue.c') diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 91eceaf3131..e2a783fdb98 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -210,10 +210,29 @@ static void post_dock_fixups(acpi_handle not_used, u32 event, void *data) } } +static void dock_event(acpi_handle handle, u32 type, void *data) +{ + struct acpiphp_context *context; + + mutex_lock(&acpiphp_context_lock); + context = acpiphp_get_context(handle); + if (!context || WARN_ON(context->handle != handle) + || context->func.parent->is_going_away) { + mutex_unlock(&acpiphp_context_lock); + return; + } + get_bridge(context->func.parent); + acpiphp_put_context(context); + mutex_unlock(&acpiphp_context_lock); + + hotplug_event(handle, type, data); + + put_bridge(context->func.parent); +} static const struct acpi_dock_ops acpiphp_dock_ops = { .fixup = post_dock_fixups, - .handler = hotplug_event, + .handler = dock_event, }; /* Check whether the PCI device is managed by native PCIe hotplug driver */ -- cgit v1.2.3-70-g09d2 From 7282059489868e0ed1b0d79765730c6b233a8399 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 11 Feb 2014 12:42:37 +0200 Subject: ACPI / hotplug / PCI: Relax the checking of _STA return values The ACPI specification (ACPI 5.0A, Section 6.3.7) says: _STA may return bit 0 clear (not present) with bit 3 set (device is functional). This case is used to indicate a valid device for which no device driver should be loaded (for example, a bridge device.) Children of this device may be present and valid. OSPM should continue enumeration below a device whose _STA returns this bit combination. Evidently, some BIOSes follow that and return 0x0A from _STA, which causes problems to happen when they trigger bus check or device check notifications for those devices too. Namely, ACPIPHP thinks that they are gone and may drop them, for example, if such a notification is triggered during a resume from system suspend. To fix that, modify ACPICA to regard devies as present and functioning if _STA returns both the ACPI_STA_DEVICE_ENABLED and ACPI_STA_DEVICE_FUNCTIONING bits set for them. Reported-and-tested-by: Peter Wu Cc: 3.12+ # 3.12+ [rjw: Subject and changelog, minor code modifications] Signed-off-by: Rafael J. Wysocki --- drivers/pci/hotplug/acpiphp_glue.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'drivers/pci/hotplug/acpiphp_glue.c') diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index e2a783fdb98..7c7a388c85a 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -730,6 +730,17 @@ static unsigned int get_slot_status(struct acpiphp_slot *slot) return (unsigned int)sta; } +static inline bool device_status_valid(unsigned int sta) +{ + /* + * ACPI spec says that _STA may return bit 0 clear with bit 3 set + * if the device is valid but does not require a device driver to be + * loaded (Section 6.3.7 of ACPI 5.0A). + */ + unsigned int mask = ACPI_STA_DEVICE_ENABLED | ACPI_STA_DEVICE_FUNCTIONING; + return (sta & mask) == mask; +} + /** * trim_stale_devices - remove PCI devices that are not responding. * @dev: PCI device to start walking the hierarchy from. @@ -745,7 +756,7 @@ static void trim_stale_devices(struct pci_dev *dev) unsigned long long sta; status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); - alive = (ACPI_SUCCESS(status) && sta == ACPI_STA_ALL) + alive = (ACPI_SUCCESS(status) && device_status_valid(sta)) || acpiphp_no_hotplug(handle); } if (!alive) { @@ -792,7 +803,7 @@ static void acpiphp_check_bridge(struct acpiphp_bridge *bridge) mutex_lock(&slot->crit_sect); if (slot_no_hotplug(slot)) { ; /* do nothing */ - } else if (get_slot_status(slot) == ACPI_STA_ALL) { + } else if (device_status_valid(get_slot_status(slot))) { /* remove stale devices if any */ list_for_each_entry_safe_reverse(dev, tmp, &bus->devices, bus_list) -- cgit v1.2.3-70-g09d2