From 9109fb7b3520de187ebc3646c209d66a233f7169 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 23 Jul 2008 21:27:20 -0700 Subject: mm: drop unneeded pgdat argument from free_area_init_node() free_area_init_node() gets passed in the node id as well as the node descriptor. This is redundant as the function can trivially get the node descriptor itself by means of NODE_DATA() and the node's id. I checked all the users and NODE_DATA() seems to be usable everywhere from where this function is called. Signed-off-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/memory_hotplug.c') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 833f854eabe..6e26adc08f1 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -455,7 +455,7 @@ static pg_data_t *hotadd_new_pgdat(int nid, u64 start) /* we can use NODE_DATA(nid) from here */ /* init node's zones as empty zones, we don't have any present pages.*/ - free_area_init_node(nid, pgdat, zones_size, start_pfn, zholes_size); + free_area_init_node(nid, zones_size, start_pfn, zholes_size); return pgdat; } -- cgit v1.2.3-70-g09d2 From d92bc318547507a944a22e7ef936793dc0fe167f Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 23 Jul 2008 21:28:12 -0700 Subject: mm: make register_page_bootmem_info_section() static Make the needlessly global register_page_bootmem_info_section() static. Signed-off-by: Adrian Bunk Acked-by: Yasunori Goto Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/memory_hotplug.c') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6e26adc08f1..ec85c37dcfb 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -86,7 +86,7 @@ void put_page_bootmem(struct page *page) } -void register_page_bootmem_info_section(unsigned long start_pfn) +static void register_page_bootmem_info_section(unsigned long start_pfn) { unsigned long *usemap, mapsize, section_nr, i; struct mem_section *ms; -- cgit v1.2.3-70-g09d2 From af370fb8cb3031f20438f246798d5f0d98089f29 Mon Sep 17 00:00:00 2001 From: Yasunori Goto Date: Wed, 23 Jul 2008 21:28:17 -0700 Subject: memory hotplug: small fixes to bootmem freeing for memory hotremove - Change some naming * Magic -> types * MIX_INFO -> MIX_SECTION_INFO * Change definition of bootmem type from direct hex value - __free_pages_bootmem() becomes __meminit. Signed-off-by: Yasunori Goto Cc: Andy Whitcroft Cc: Badari Pulavarty Cc: Yinghai Lu Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 8 ++++---- mm/memory_hotplug.c | 12 ++++++------ mm/page_alloc.c | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index ea9f5ad9ec8..3628e5088f6 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -13,12 +13,12 @@ struct mem_section; #ifdef CONFIG_MEMORY_HOTPLUG /* - * Magic number for free bootmem. + * Types for free bootmem. * The normal smallest mapcount is -1. Here is smaller value than it. */ -#define SECTION_INFO 0xfffffffe -#define MIX_INFO 0xfffffffd -#define NODE_INFO 0xfffffffc +#define SECTION_INFO (-1 - 1) +#define MIX_SECTION_INFO (-1 - 2) +#define NODE_INFO (-1 - 3) /* * pgdat resizing functions diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index ec85c37dcfb..0fb05b258f0 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -62,9 +62,9 @@ static void release_memory_resource(struct resource *res) #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE #ifndef CONFIG_SPARSEMEM_VMEMMAP -static void get_page_bootmem(unsigned long info, struct page *page, int magic) +static void get_page_bootmem(unsigned long info, struct page *page, int type) { - atomic_set(&page->_mapcount, magic); + atomic_set(&page->_mapcount, type); SetPagePrivate(page); set_page_private(page, info); atomic_inc(&page->_count); @@ -72,10 +72,10 @@ static void get_page_bootmem(unsigned long info, struct page *page, int magic) void put_page_bootmem(struct page *page) { - int magic; + int type; - magic = atomic_read(&page->_mapcount); - BUG_ON(magic >= -1); + type = atomic_read(&page->_mapcount); + BUG_ON(type >= -1); if (atomic_dec_return(&page->_count) == 1) { ClearPagePrivate(page); @@ -119,7 +119,7 @@ static void register_page_bootmem_info_section(unsigned long start_pfn) mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT; for (i = 0; i < mapsize; i++, page++) - get_page_bootmem(section_nr, page, MIX_INFO); + get_page_bootmem(section_nr, page, MIX_SECTION_INFO); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index cd4c41432ef..6da667274df 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -533,7 +533,7 @@ static void __free_pages_ok(struct page *page, unsigned int order) /* * permit the bootmem allocator to evade page validation on high-order frees */ -void __free_pages_bootmem(struct page *page, unsigned int order) +void __meminit __free_pages_bootmem(struct page *page, unsigned int order) { if (order == 0) { __ClearPageReserved(page); -- cgit v1.2.3-70-g09d2 From 2f7f24eca31c4fc2fdb134b2ef743ccd67cfb9a9 Mon Sep 17 00:00:00 2001 From: Kent Liu Date: Wed, 23 Jul 2008 21:28:18 -0700 Subject: memory-hotplug: don't calculate vm_total_pages twice when rebuilding zonelists in online_pages() If zonelist is required to be rebuilt in online_pages(), there is no need to recalculate vm_total_pages in that function, as it has been updated in the call build_all_zonelists(). Signed-off-by: Kent Liu Acked-by: KAMEZAWA Hiroyuki Cc: Yasunori Goto Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'mm/memory_hotplug.c') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 0fb05b258f0..93aba78dc8b 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -429,7 +429,9 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) if (need_zonelists_rebuild) build_all_zonelists(); - vm_total_pages = nr_free_pagecache_pages(); + else + vm_total_pages = nr_free_pagecache_pages(); + writeback_set_ratelimit(); if (onlined_pages) -- cgit v1.2.3-70-g09d2 From 5c755e9fd813810680abd56ec09a5f90143e815b Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Wed, 23 Jul 2008 21:28:19 -0700 Subject: memory-hotplug: add sysfs removable attribute for hotplug memory remove Memory may be hot-removed on a per-memory-block basis, particularly on POWER where the SPARSEMEM section size often matches the memory-block size. A user-level agent must be able to identify which sections of memory are likely to be removable before attempting the potentially expensive operation. This patch adds a file called "removable" to the memory directory in sysfs to help such an agent. In this patch, a memory block is considered removable if; o It contains only MOVABLE pageblocks o It contains only pageblocks with free pages regardless of pageblock type On the other hand, a memory block starting with a PageReserved() page will never be considered removable. Without this patch, the user-agent is forced to choose a memory block to remove randomly. Sample output of the sysfs files: ./memory/memory0/removable: 0 ./memory/memory1/removable: 0 ./memory/memory2/removable: 0 ./memory/memory3/removable: 0 ./memory/memory4/removable: 0 ./memory/memory5/removable: 0 ./memory/memory6/removable: 0 ./memory/memory7/removable: 1 ./memory/memory8/removable: 0 ./memory/memory9/removable: 0 ./memory/memory10/removable: 0 ./memory/memory11/removable: 0 ./memory/memory12/removable: 0 ./memory/memory13/removable: 0 ./memory/memory14/removable: 0 ./memory/memory15/removable: 0 ./memory/memory16/removable: 0 ./memory/memory17/removable: 1 ./memory/memory18/removable: 1 ./memory/memory19/removable: 1 ./memory/memory20/removable: 1 ./memory/memory21/removable: 1 ./memory/memory22/removable: 1 Signed-off-by: Badari Pulavarty Signed-off-by: Mel Gorman Acked-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/sysfs-devices-memory | 24 +++++++++++ drivers/base/memory.c | 19 ++++++++ include/linux/memory_hotplug.h | 12 ++++++ mm/memory_hotplug.c | 60 ++++++++++++++++++++++++++ 4 files changed, 115 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-devices-memory (limited to 'mm/memory_hotplug.c') diff --git a/Documentation/ABI/testing/sysfs-devices-memory b/Documentation/ABI/testing/sysfs-devices-memory new file mode 100644 index 00000000000..7a16fe1e227 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-memory @@ -0,0 +1,24 @@ +What: /sys/devices/system/memory +Date: June 2008 +Contact: Badari Pulavarty +Description: + The /sys/devices/system/memory contains a snapshot of the + internal state of the kernel memory blocks. Files could be + added or removed dynamically to represent hot-add/remove + operations. + +Users: hotplug memory add/remove tools + https://w3.opensource.ibm.com/projects/powerpc-utils/ + +What: /sys/devices/system/memory/memoryX/removable +Date: June 2008 +Contact: Badari Pulavarty +Description: + The file /sys/devices/system/memory/memoryX/removable + indicates whether this memory block is removable or not. + This is useful for a user-level agent to determine + identify removable sections of the memory before attempting + potentially expensive hot-remove memory operation + +Users: hotplug memory remove tools + https://w3.opensource.ibm.com/projects/powerpc-utils/ diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 4d4e0e7b6e9..855ed1a9f97 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -100,6 +100,21 @@ static ssize_t show_mem_phys_index(struct sys_device *dev, return sprintf(buf, "%08lx\n", mem->phys_index); } +/* + * Show whether the section of memory is likely to be hot-removable + */ +static ssize_t show_mem_removable(struct sys_device *dev, char *buf) +{ + unsigned long start_pfn; + int ret; + struct memory_block *mem = + container_of(dev, struct memory_block, sysdev); + + start_pfn = section_nr_to_pfn(mem->phys_index); + ret = is_mem_section_removable(start_pfn, PAGES_PER_SECTION); + return sprintf(buf, "%d\n", ret); +} + /* * online, offline, going offline, etc. */ @@ -262,6 +277,7 @@ static ssize_t show_phys_device(struct sys_device *dev, static SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL); static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state); static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL); +static SYSDEV_ATTR(removable, 0444, show_mem_removable, NULL); #define mem_create_simple_file(mem, attr_name) \ sysdev_create_file(&mem->sysdev, &attr_##attr_name) @@ -350,6 +366,8 @@ static int add_memory_block(unsigned long node_id, struct mem_section *section, ret = mem_create_simple_file(mem, state); if (!ret) ret = mem_create_simple_file(mem, phys_device); + if (!ret) + ret = mem_create_simple_file(mem, removable); return ret; } @@ -394,6 +412,7 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section, mem_remove_simple_file(mem, phys_index); mem_remove_simple_file(mem, state); mem_remove_simple_file(mem, phys_device); + mem_remove_simple_file(mem, removable); unregister_memory(mem, section); return 0; diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 3628e5088f6..763ba81fc0f 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -199,6 +199,18 @@ extern int walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)); +#ifdef CONFIG_MEMORY_HOTREMOVE + +extern int is_mem_section_removable(unsigned long pfn, unsigned long nr_pages); + +#else +static inline int is_mem_section_removable(unsigned long pfn, + unsigned long nr_pages) +{ + return 0; +} +#endif /* CONFIG_MEMORY_HOTREMOVE */ + extern int add_memory(int nid, u64 start, u64 size); extern int arch_add_memory(int nid, u64 start, u64 size); extern int remove_memory(u64 start, u64 size); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 93aba78dc8b..89fee2dcb03 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -522,6 +522,66 @@ error: EXPORT_SYMBOL_GPL(add_memory); #ifdef CONFIG_MEMORY_HOTREMOVE +/* + * A free page on the buddy free lists (not the per-cpu lists) has PageBuddy + * set and the size of the free page is given by page_order(). Using this, + * the function determines if the pageblock contains only free pages. + * Due to buddy contraints, a free page at least the size of a pageblock will + * be located at the start of the pageblock + */ +static inline int pageblock_free(struct page *page) +{ + return PageBuddy(page) && page_order(page) >= pageblock_order; +} + +/* Return the start of the next active pageblock after a given page */ +static struct page *next_active_pageblock(struct page *page) +{ + int pageblocks_stride; + + /* Ensure the starting page is pageblock-aligned */ + BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1)); + + /* Move forward by at least 1 * pageblock_nr_pages */ + pageblocks_stride = 1; + + /* If the entire pageblock is free, move to the end of free page */ + if (pageblock_free(page)) + pageblocks_stride += page_order(page) - pageblock_order; + + return page + (pageblocks_stride * pageblock_nr_pages); +} + +/* Checks if this range of memory is likely to be hot-removable. */ +int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) +{ + int type; + struct page *page = pfn_to_page(start_pfn); + struct page *end_page = page + nr_pages; + + /* Check the starting page of each pageblock within the range */ + for (; page < end_page; page = next_active_pageblock(page)) { + type = get_pageblock_migratetype(page); + + /* + * A pageblock containing MOVABLE or free pages is considered + * removable + */ + if (type != MIGRATE_MOVABLE && !pageblock_free(page)) + return 0; + + /* + * A pageblock starting with a PageReserved page is not + * considered removable. + */ + if (PageReserved(page)) + return 0; + } + + /* All pageblocks in the memory block are likely to be hot-removable */ + return 1; +} + /* * Confirm all pages in a range [start, end) is belongs to the same zone. */ -- cgit v1.2.3-70-g09d2