From 8c86e70acead629aacb4afcd818add66bf6844d9 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 8 Aug 2014 14:25:50 -0700 Subject: resource: provide new functions to walk through resources I have added two more functions to walk through resources. Currently walk_system_ram_range() deals with pfn and /proc/iomem can contain partial pages. By dealing in pfn, callback function loses the info that last page of a memory range is a partial page and not the full page. So I implemented walk_system_ram_res() which returns u64 values to callback functions and now it properly return start and end address. walk_system_ram_range() uses find_next_system_ram() to find the next ram resource. This in turn only travels through siblings of top level child and does not travers through all the nodes of the resoruce tree. I also need another function where I can walk through all the resources, for example figure out where "GART" aperture is. Figure out where ACPI memory is. So I wrote another function walk_iomem_res() which walks through all /proc/iomem resources and returns matches as asked by caller. Caller can specify "name" of resource, start and end and flags. Got rid of find_next_system_ram_res() and instead implemented more generic find_next_iomem_res() which can be used to traverse top level children only based on an argument. Signed-off-by: Vivek Goyal Cc: Yinghai Lu Cc: Borislav Petkov Cc: Michael Kerrisk Cc: Eric Biederman Cc: H. Peter Anvin Cc: Matthew Garrett Cc: Greg Kroah-Hartman Cc: Dave Young Cc: WANG Chao Cc: Baoquan He Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/resource.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 92 insertions(+), 9 deletions(-) (limited to 'kernel/resource.c') diff --git a/kernel/resource.c b/kernel/resource.c index 3c2237ac32d..da14b8d0929 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -59,10 +59,12 @@ static DEFINE_RWLOCK(resource_lock); static struct resource *bootmem_resource_free; static DEFINE_SPINLOCK(bootmem_resource_lock); -static void *r_next(struct seq_file *m, void *v, loff_t *pos) +static struct resource *next_resource(struct resource *p, bool sibling_only) { - struct resource *p = v; - (*pos)++; + /* Caller wants to traverse through siblings only */ + if (sibling_only) + return p->sibling; + if (p->child) return p->child; while (!p->sibling && p->parent) @@ -70,6 +72,13 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos) return p->sibling; } +static void *r_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct resource *p = v; + (*pos)++; + return (void *)next_resource(p, false); +} + #ifdef CONFIG_PROC_FS enum { MAX_IORES_LEVEL = 5 }; @@ -322,16 +331,19 @@ int release_resource(struct resource *old) EXPORT_SYMBOL(release_resource); -#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY) /* - * Finds the lowest memory reosurce exists within [res->start.res->end) + * Finds the lowest iomem reosurce exists with-in [res->start.res->end) * the caller must specify res->start, res->end, res->flags and "name". * If found, returns 0, res is overwritten, if not found, returns -1. + * This walks through whole tree and not just first level children + * until and unless first_level_children_only is true. */ -static int find_next_system_ram(struct resource *res, char *name) +static int find_next_iomem_res(struct resource *res, char *name, + bool first_level_children_only) { resource_size_t start, end; struct resource *p; + bool sibling_only = false; BUG_ON(!res); @@ -340,8 +352,14 @@ static int find_next_system_ram(struct resource *res, char *name) BUG_ON(start >= end); read_lock(&resource_lock); - for (p = iomem_resource.child; p ; p = p->sibling) { - /* system ram is just marked as IORESOURCE_MEM */ + + if (first_level_children_only) { + p = iomem_resource.child; + sibling_only = true; + } else + p = &iomem_resource; + + while ((p = next_resource(p, sibling_only))) { if (p->flags != res->flags) continue; if (name && strcmp(p->name, name)) @@ -353,6 +371,7 @@ static int find_next_system_ram(struct resource *res, char *name) if ((p->end >= start) && (p->start < end)) break; } + read_unlock(&resource_lock); if (!p) return -1; @@ -364,6 +383,70 @@ static int find_next_system_ram(struct resource *res, char *name) return 0; } +/* + * Walks through iomem resources and calls func() with matching resource + * ranges. This walks through whole tree and not just first level children. + * All the memory ranges which overlap start,end and also match flags and + * name are valid candidates. + * + * @name: name of resource + * @flags: resource flags + * @start: start addr + * @end: end addr + */ +int walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end, + void *arg, int (*func)(u64, u64, void *)) +{ + struct resource res; + u64 orig_end; + int ret = -1; + + res.start = start; + res.end = end; + res.flags = flags; + orig_end = res.end; + while ((res.start < res.end) && + (!find_next_iomem_res(&res, name, false))) { + ret = (*func)(res.start, res.end, arg); + if (ret) + break; + res.start = res.end + 1; + res.end = orig_end; + } + return ret; +} + +/* + * This function calls callback against all memory range of "System RAM" + * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY. + * Now, this function is only for "System RAM". This function deals with + * full ranges and not pfn. If resources are not pfn aligned, dealing + * with pfn can truncate ranges. + */ +int walk_system_ram_res(u64 start, u64 end, void *arg, + int (*func)(u64, u64, void *)) +{ + struct resource res; + u64 orig_end; + int ret = -1; + + res.start = start; + res.end = end; + res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; + orig_end = res.end; + while ((res.start < res.end) && + (!find_next_iomem_res(&res, "System RAM", true))) { + ret = (*func)(res.start, res.end, arg); + if (ret) + break; + res.start = res.end + 1; + res.end = orig_end; + } + return ret; +} + +#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY) + /* * This function calls callback against all memory range of "System RAM" * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY. @@ -382,7 +465,7 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; orig_end = res.end; while ((res.start < res.end) && - (find_next_system_ram(&res, "System RAM") >= 0)) { + (find_next_iomem_res(&res, "System RAM", true) >= 0)) { pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT; end_pfn = (res.end + 1) >> PAGE_SHIFT; if (end_pfn > pfn) -- cgit v1.2.3-70-g09d2 From 800df627e2eabaf4a921d342a1d5162c843b7fc2 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 29 Aug 2014 15:18:29 -0700 Subject: resource: fix the case of null pointer access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Richard and Daniel reported that UML is broken due to changes to resource traversal functions. Problem is that iomem_resource.child can be null and new code does not consider that possibility. Old code used a for loop and that loop will not even execute if p was null. Revert back to for() loop logic and bail out if p is null. I also moved sibling_only check out of resource_lock. There is no reason to keep it inside the lock. Following is backtrace of the UML crash. RIP: 0033:[<0000000060039b9f>] RSP: 0000000081459da0 EFLAGS: 00010202 RAX: 0000000000000000 RBX: 00000000219b3fff RCX: 000000006010d1d9 RDX: 0000000000000001 RSI: 00000000602dfb94 RDI: 0000000081459df8 RBP: 0000000081459de0 R08: 00000000601b59f4 R09: ffffffff0000ff00 R10: ffffffff0000ff00 R11: 0000000081459e88 R12: 0000000081459df8 R13: 00000000219b3fff R14: 00000000602dfb94 R15: 0000000000000000 Kernel panic - not syncing: Segfault with no mm CPU: 0 PID: 1 Comm: swapper Not tainted 3.16.0-10454-g58d08e3 #13 Stack: 00000000 000080d0 81459df0 219b3fff 81459e70 6010d1d9 ffffffff 6033e010 81459e50 6003a269 81459e30 00000000 Call Trace: [<6010d1d9>] ? kclist_add_private+0x0/0xe7 [<6003a269>] walk_system_ram_range+0x61/0xb7 [<6000e859>] ? proc_kcore_init+0x0/0xf1 [<6010d574>] kcore_update_ram+0x4c/0x168 [<6010d72e>] ? kclist_add+0x0/0x2e [<6000e943>] proc_kcore_init+0xea/0xf1 [<6000e859>] ? proc_kcore_init+0x0/0xf1 [<6000e859>] ? proc_kcore_init+0x0/0xf1 [<600189f0>] do_one_initcall+0x13c/0x204 [<6004ca46>] ? parse_args+0x1df/0x2e0 [<6004c82d>] ? parameq+0x0/0x3a [<601b5990>] ? strcpy+0x0/0x18 [<60001e1a>] kernel_init_freeable+0x240/0x31e [<6026f1c0>] kernel_init+0x12/0x148 [<60019fad>] new_thread_handler+0x81/0xa3 Fixes 8c86e70acead629aacb4a ("resource: provide new functions to walk through resources"). Reported-by: Daniel Walter Tested-by: Richard Weinberger Tested-by: Toralf Förster Tested-by: Daniel Walter Signed-off-by: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/resource.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'kernel/resource.c') diff --git a/kernel/resource.c b/kernel/resource.c index da14b8d0929..60c5a3856ab 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -351,15 +351,12 @@ static int find_next_iomem_res(struct resource *res, char *name, end = res->end; BUG_ON(start >= end); - read_lock(&resource_lock); - - if (first_level_children_only) { - p = iomem_resource.child; + if (first_level_children_only) sibling_only = true; - } else - p = &iomem_resource; - while ((p = next_resource(p, sibling_only))) { + read_lock(&resource_lock); + + for (p = iomem_resource.child; p; p = next_resource(p, sibling_only)) { if (p->flags != res->flags) continue; if (name && strcmp(p->name, name)) -- cgit v1.2.3-70-g09d2 From 8d38821cbcf51292cd5a23469d03bd38932a3ba9 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 1 Aug 2014 14:15:10 +0200 Subject: resources: Add device-managed request/release_resource() Provide device-managed implementations of the request_resource() and release_resource() functions. Upon failure to request a resource, the new devm_request_resource() function will output an error message for consistent error reporting. Signed-off-by: Thierry Reding Signed-off-by: Bjorn Helgaas Acked-by: Tejun Heo --- Documentation/driver-model/devres.txt | 2 + include/linux/ioport.h | 5 +++ kernel/resource.c | 70 +++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+) (limited to 'kernel/resource.c') diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt index d14710b0443..befc3fe12ba 100644 --- a/Documentation/driver-model/devres.txt +++ b/Documentation/driver-model/devres.txt @@ -264,8 +264,10 @@ IIO IO region devm_release_mem_region() devm_release_region() + devm_release_resource() devm_request_mem_region() devm_request_region() + devm_request_resource() IOMAP devm_ioport_map() diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 142ec544167..2c525022227 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -215,6 +215,11 @@ static inline int __deprecated check_region(resource_size_t s, /* Wrappers for managed devices */ struct device; + +extern int devm_request_resource(struct device *dev, struct resource *root, + struct resource *new); +extern void devm_release_resource(struct device *dev, struct resource *new); + #define devm_request_region(dev,start,n,name) \ __devm_request_region(dev, &ioport_resource, (start), (n), (name)) #define devm_request_mem_region(dev,start,n,name) \ diff --git a/kernel/resource.c b/kernel/resource.c index da14b8d0929..ca24f19f9d1 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1248,6 +1248,76 @@ int release_mem_region_adjustable(struct resource *parent, /* * Managed region resource */ +static void devm_resource_release(struct device *dev, void *ptr) +{ + struct resource **r = ptr; + + release_resource(*r); +} + +/** + * devm_request_resource() - request and reserve an I/O or memory resource + * @dev: device for which to request the resource + * @root: root of the resource tree from which to request the resource + * @new: descriptor of the resource to request + * + * This is a device-managed version of request_resource(). There is usually + * no need to release resources requested by this function explicitly since + * that will be taken care of when the device is unbound from its driver. + * If for some reason the resource needs to be released explicitly, because + * of ordering issues for example, drivers must call devm_release_resource() + * rather than the regular release_resource(). + * + * When a conflict is detected between any existing resources and the newly + * requested resource, an error message will be printed. + * + * Returns 0 on success or a negative error code on failure. + */ +int devm_request_resource(struct device *dev, struct resource *root, + struct resource *new) +{ + struct resource *conflict, **ptr; + + ptr = devres_alloc(devm_resource_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + *ptr = new; + + conflict = request_resource_conflict(root, new); + if (conflict) { + dev_err(dev, "resource collision: %pR conflicts with %s %pR\n", + new, conflict->name, conflict); + devres_free(ptr); + return -EBUSY; + } + + devres_add(dev, ptr); + return 0; +} +EXPORT_SYMBOL(devm_request_resource); + +static int devm_resource_match(struct device *dev, void *res, void *data) +{ + struct resource **ptr = res; + + return *ptr == data; +} + +/** + * devm_release_resource() - release a previously requested resource + * @dev: device for which to release the resource + * @new: descriptor of the resource to release + * + * Releases a resource previously requested using devm_request_resource(). + */ +void devm_release_resource(struct device *dev, struct resource *new) +{ + WARN_ON(devres_release(dev, devm_resource_release, devm_resource_match, + new)); +} +EXPORT_SYMBOL(devm_release_resource); + struct region_devres { struct resource *parent; resource_size_t start; -- cgit v1.2.3-70-g09d2 From 67cf13ceed89e2c1a967719e98624a20c48dfb5a Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 13 Oct 2014 15:54:03 -0700 Subject: x86: optimize resource lookups for ioremap We have a large university system in the UK that is experiencing very long delays modprobing the driver for a specific I/O device. The delay is from 8-10 minutes per device and there are 31 devices in the system. This 4 to 5 hour delay in starting up those I/O devices is very much a burden on the customer. There are two causes for requiring a restart/reload of the drivers. First is periodic preventive maintenance (PM) and the second is if any of the devices experience a fatal error. Both of these trigger this excessively long delay in bringing the system back up to full capability. The problem was tracked down to a very slow IOREMAP operation and the excessively long ioresource lookup to insure that the user is not attempting to ioremap RAM. These patches provide a speed up to that function. The modprobe time appears to be affected quite a bit by previous activity on the ioresource list, which I suspect is due to cache preloading. While the overall improvement is impacted by other overhead of starting the devices, this drastically improves the modprobe time. Also our system is considerably smaller so the percentages gained will not be the same. Best case improvement with the modprobe on our 20 device smallish system was from 'real 5m51.913s' to 'real 0m18.275s'. This patch (of 2): Since the ioremap operation is verifying that the specified address range is NOT RAM, it will search the entire ioresource list if the condition is true. To make matters worse, it does this one 4k page at a time. For a 128M BAR region this is 32 passes to determine the entire region does not contain any RAM addresses. This patch provides another resource lookup function, region_is_ram, that searches for the entire region specified, verifying that it is completely contained within the resource region. If it is found, then it is checked to be RAM or not, within a single pass. The return result reflects if it was found or not (-1), and whether it is RAM (1) or not (0). This allows the caller to fallback to the previous page by page search if it was not found. [akpm@linux-foundation.org: fix spellos and typos in comment] Signed-off-by: Mike Travis Acked-by: Alex Thorlton Reviewed-by: Cliff Wickman Cc: Thomas Gleixner Cc: H. Peter Anvin Cc: Mark Salter Cc: Dave Young Cc: Rik van Riel Cc: Peter Zijlstra Cc: Mel Gorman Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + kernel/resource.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) (limited to 'kernel/resource.c') diff --git a/include/linux/mm.h b/include/linux/mm.h index fa0d74e0642..4cd45cb95e6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -347,6 +347,7 @@ static inline int put_page_unless_one(struct page *page) } extern int page_is_ram(unsigned long pfn); +extern int region_is_ram(resource_size_t phys_addr, unsigned long size); /* Support for virtually mapped pages */ struct page *vmalloc_to_page(const void *addr); diff --git a/kernel/resource.c b/kernel/resource.c index 46322019ab7..0bcebffc4e7 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -491,6 +491,42 @@ int __weak page_is_ram(unsigned long pfn) } EXPORT_SYMBOL_GPL(page_is_ram); +/* + * Search for a resouce entry that fully contains the specified region. + * If found, return 1 if it is RAM, 0 if not. + * If not found, or region is not fully contained, return -1 + * + * Used by the ioremap functions to ensure the user is not remapping RAM and is + * a vast speed up over walking through the resource table page by page. + */ +int region_is_ram(resource_size_t start, unsigned long size) +{ + struct resource *p; + resource_size_t end = start + size - 1; + int flags = IORESOURCE_MEM | IORESOURCE_BUSY; + const char *name = "System RAM"; + int ret = -1; + + read_lock(&resource_lock); + for (p = iomem_resource.child; p ; p = p->sibling) { + if (end < p->start) + continue; + + if (p->start <= start && end <= p->end) { + /* resource fully contains region */ + if ((p->flags != flags) || strcmp(p->name, name)) + ret = 0; + else + ret = 1; + break; + } + if (p->end < start) + break; /* not found */ + } + read_unlock(&resource_lock); + return ret; +} + void __weak arch_remove_reservations(struct resource *avail) { } -- cgit v1.2.3-70-g09d2