From 0c2c99b1b8ab5d294f176d631e945ebdefcce4cd Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Thu, 20 Jan 2011 10:43:34 -0600 Subject: memory hotplug: Allow memory blocks to span multiple memory sections Update the memory sysfs code such that each sysfs memory directory is now considered a memory block that can span multiple memory sections per memory block. The default size of each memory block is SECTION_SIZE_BITS to maintain the current behavior of having a single memory section per memory block (i.e. one sysfs directory per memory section). For architectures that want to have memory blocks span multiple memory sections they need only define their own memory_block_size_bytes() routine. Update the memory hotplug documentation to reflect the new behaviors of memory blocks reflected in sysfs. Signed-off-by: Nathan Fontenot Reviewed-by: Robin Holt Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: Greg Kroah-Hartman --- drivers/base/memory.c | 155 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 108 insertions(+), 47 deletions(-) (limited to 'drivers/base/memory.c') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index cafeaaf0428..0b704004258 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -30,6 +30,14 @@ static DEFINE_MUTEX(mem_sysfs_mutex); #define MEMORY_CLASS_NAME "memory" +#define MIN_MEMORY_BLOCK_SIZE (1 << SECTION_SIZE_BITS) + +static int sections_per_block; + +static inline int base_memory_block_id(int section_nr) +{ + return section_nr / sections_per_block; +} static struct sysdev_class memory_sysdev_class = { .name = MEMORY_CLASS_NAME, @@ -84,28 +92,47 @@ EXPORT_SYMBOL(unregister_memory_isolate_notifier); * register_memory - Setup a sysfs device for a memory block */ static -int register_memory(struct memory_block *memory, struct mem_section *section) +int register_memory(struct memory_block *memory) { int error; memory->sysdev.cls = &memory_sysdev_class; - memory->sysdev.id = __section_nr(section); + memory->sysdev.id = memory->phys_index / sections_per_block; error = sysdev_register(&memory->sysdev); return error; } static void -unregister_memory(struct memory_block *memory, struct mem_section *section) +unregister_memory(struct memory_block *memory) { BUG_ON(memory->sysdev.cls != &memory_sysdev_class); - BUG_ON(memory->sysdev.id != __section_nr(section)); /* drop the ref. we got in remove_memory_block() */ kobject_put(&memory->sysdev.kobj); sysdev_unregister(&memory->sysdev); } +unsigned long __weak memory_block_size_bytes(void) +{ + return MIN_MEMORY_BLOCK_SIZE; +} + +static unsigned long get_memory_block_size(void) +{ + unsigned long block_sz; + + block_sz = memory_block_size_bytes(); + + /* Validate blk_sz is a power of 2 and not less than section size */ + if ((block_sz & (block_sz - 1)) || (block_sz < MIN_MEMORY_BLOCK_SIZE)) { + WARN_ON(1); + block_sz = MIN_MEMORY_BLOCK_SIZE; + } + + return block_sz; +} + /* * use this as the physical section index that this memsection * uses. @@ -116,7 +143,7 @@ static ssize_t show_mem_phys_index(struct sys_device *dev, { struct memory_block *mem = container_of(dev, struct memory_block, sysdev); - return sprintf(buf, "%08lx\n", mem->phys_index); + return sprintf(buf, "%08lx\n", mem->phys_index / sections_per_block); } /* @@ -125,13 +152,16 @@ static ssize_t show_mem_phys_index(struct sys_device *dev, static ssize_t show_mem_removable(struct sys_device *dev, struct sysdev_attribute *attr, char *buf) { - unsigned long start_pfn; - int ret; + unsigned long i, pfn; + int ret = 1; struct memory_block *mem = container_of(dev, struct memory_block, sysdev); - start_pfn = section_nr_to_pfn(mem->phys_index); - ret = is_mem_section_removable(start_pfn, PAGES_PER_SECTION); + for (i = 0; i < sections_per_block; i++) { + pfn = section_nr_to_pfn(mem->phys_index + i); + ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION); + } + return sprintf(buf, "%d\n", ret); } @@ -184,17 +214,14 @@ int memory_isolate_notify(unsigned long val, void *v) * OK to have direct references to sparsemem variables in here. */ static int -memory_block_action(struct memory_block *mem, unsigned long action) +memory_section_action(unsigned long phys_index, unsigned long action) { int i; - unsigned long psection; unsigned long start_pfn, start_paddr; struct page *first_page; int ret; - int old_state = mem->state; - psection = mem->phys_index; - first_page = pfn_to_page(psection << PFN_SECTION_SHIFT); + first_page = pfn_to_page(phys_index << PFN_SECTION_SHIFT); /* * The probe routines leave the pages reserved, just @@ -207,8 +234,8 @@ memory_block_action(struct memory_block *mem, unsigned long action) continue; printk(KERN_WARNING "section number %ld page number %d " - "not reserved, was it already online? \n", - psection, i); + "not reserved, was it already online?\n", + phys_index, i); return -EBUSY; } } @@ -219,18 +246,13 @@ memory_block_action(struct memory_block *mem, unsigned long action) ret = online_pages(start_pfn, PAGES_PER_SECTION); break; case MEM_OFFLINE: - mem->state = MEM_GOING_OFFLINE; start_paddr = page_to_pfn(first_page) << PAGE_SHIFT; ret = remove_memory(start_paddr, PAGES_PER_SECTION << PAGE_SHIFT); - if (ret) { - mem->state = old_state; - break; - } break; default: - WARN(1, KERN_WARNING "%s(%p, %ld) unknown action: %ld\n", - __func__, mem, action, action); + WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: " + "%ld\n", __func__, phys_index, action, action); ret = -EINVAL; } @@ -240,7 +262,8 @@ memory_block_action(struct memory_block *mem, unsigned long action) static int memory_block_change_state(struct memory_block *mem, unsigned long to_state, unsigned long from_state_req) { - int ret = 0; + int i, ret = 0; + mutex_lock(&mem->state_mutex); if (mem->state != from_state_req) { @@ -248,8 +271,22 @@ static int memory_block_change_state(struct memory_block *mem, goto out; } - ret = memory_block_action(mem, to_state); - if (!ret) + if (to_state == MEM_OFFLINE) + mem->state = MEM_GOING_OFFLINE; + + for (i = 0; i < sections_per_block; i++) { + ret = memory_section_action(mem->phys_index + i, to_state); + if (ret) + break; + } + + if (ret) { + for (i = 0; i < sections_per_block; i++) + memory_section_action(mem->phys_index + i, + from_state_req); + + mem->state = from_state_req; + } else mem->state = to_state; out: @@ -262,20 +299,15 @@ store_mem_state(struct sys_device *dev, struct sysdev_attribute *attr, const char *buf, size_t count) { struct memory_block *mem; - unsigned int phys_section_nr; int ret = -EINVAL; mem = container_of(dev, struct memory_block, sysdev); - phys_section_nr = mem->phys_index; - - if (!present_section_nr(phys_section_nr)) - goto out; if (!strncmp(buf, "online", min((int)count, 6))) ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); else if(!strncmp(buf, "offline", min((int)count, 7))) ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE); -out: + if (ret) return ret; return count; @@ -315,7 +347,7 @@ static ssize_t print_block_size(struct sysdev_class *class, struct sysdev_class_attribute *attr, char *buf) { - return sprintf(buf, "%lx\n", (unsigned long)PAGES_PER_SECTION * PAGE_SIZE); + return sprintf(buf, "%lx\n", get_memory_block_size()); } static SYSDEV_CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL); @@ -444,6 +476,7 @@ struct memory_block *find_memory_block_hinted(struct mem_section *section, struct sys_device *sysdev; struct memory_block *mem; char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1]; + int block_id = base_memory_block_id(__section_nr(section)); kobj = hint ? &hint->sysdev.kobj : NULL; @@ -451,7 +484,7 @@ struct memory_block *find_memory_block_hinted(struct mem_section *section, * This only works because we know that section == sysdev->id * slightly redundant with sysdev_register() */ - sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, __section_nr(section)); + sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, block_id); kobj = kset_find_obj_hinted(&memory_sysdev_class.kset, name, kobj); if (!kobj) @@ -476,26 +509,27 @@ struct memory_block *find_memory_block(struct mem_section *section) return find_memory_block_hinted(section, NULL); } -static int add_memory_block(int nid, struct mem_section *section, - unsigned long state, enum mem_add_context context) +static int init_memory_block(struct memory_block **memory, + struct mem_section *section, unsigned long state) { - struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL); + struct memory_block *mem; unsigned long start_pfn; + int scn_nr; int ret = 0; + mem = kzalloc(sizeof(*mem), GFP_KERNEL); if (!mem) return -ENOMEM; - mutex_lock(&mem_sysfs_mutex); - - mem->phys_index = __section_nr(section); + scn_nr = __section_nr(section); + mem->phys_index = base_memory_block_id(scn_nr) * sections_per_block; mem->state = state; mem->section_count++; mutex_init(&mem->state_mutex); start_pfn = section_nr_to_pfn(mem->phys_index); mem->phys_device = arch_get_memory_phys_device(start_pfn); - ret = register_memory(mem, section); + ret = register_memory(mem); if (!ret) ret = mem_create_simple_file(mem, phys_index); if (!ret) @@ -504,8 +538,29 @@ static int add_memory_block(int nid, struct mem_section *section, ret = mem_create_simple_file(mem, phys_device); if (!ret) ret = mem_create_simple_file(mem, removable); + + *memory = mem; + return ret; +} + +static int add_memory_section(int nid, struct mem_section *section, + unsigned long state, enum mem_add_context context) +{ + struct memory_block *mem; + int ret = 0; + + mutex_lock(&mem_sysfs_mutex); + + mem = find_memory_block(section); + if (mem) { + mem->section_count++; + kobject_put(&mem->sysdev.kobj); + } else + ret = init_memory_block(&mem, section, state); + if (!ret) { - if (context == HOTPLUG) + if (context == HOTPLUG && + mem->section_count == sections_per_block) ret = register_mem_sect_under_node(mem, nid); } @@ -528,8 +583,10 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section, mem_remove_simple_file(mem, state); mem_remove_simple_file(mem, phys_device); mem_remove_simple_file(mem, removable); - unregister_memory(mem, section); - } + unregister_memory(mem); + kfree(mem); + } else + kobject_put(&mem->sysdev.kobj); mutex_unlock(&mem_sysfs_mutex); return 0; @@ -541,7 +598,7 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section, */ int register_new_memory(int nid, struct mem_section *section) { - return add_memory_block(nid, section, MEM_OFFLINE, HOTPLUG); + return add_memory_section(nid, section, MEM_OFFLINE, HOTPLUG); } int unregister_memory_section(struct mem_section *section) @@ -560,12 +617,16 @@ int __init memory_dev_init(void) unsigned int i; int ret; int err; + unsigned long block_sz; memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops; ret = sysdev_class_register(&memory_sysdev_class); if (ret) goto out; + block_sz = get_memory_block_size(); + sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; + /* * Create entries for memory sections that were found * during boot and have been initialized @@ -573,8 +634,8 @@ int __init memory_dev_init(void) for (i = 0; i < NR_MEM_SECTIONS; i++) { if (!present_section_nr(i)) continue; - err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, - BOOT); + err = add_memory_section(0, __nr_to_section(i), MEM_ONLINE, + BOOT); if (!ret) ret = err; } -- cgit v1.2.3-70-g09d2 From d33601644cd3b09afb2edd9474517edc441c8fad Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Thu, 20 Jan 2011 10:44:29 -0600 Subject: memory hotplug: Update phys_index to [start|end]_section_nr Update the 'phys_index' property of a the memory_block struct to be called start_section_nr, and add a end_section_nr property. The data tracked here is the same but the updated naming is more in line with what is stored here, namely the first and last section number that the memory block spans. The names presented to userspace remain the same, phys_index for start_section_nr and end_phys_index for end_section_nr, to avoid breaking anything in userspace. This also updates the node sysfs code to be aware of the new capability for a memory block to contain multiple memory sections and be aware of the memory block structure name changes (start_section_nr). This requires an additional parameter to unregister_mem_sect_under_nodes so that we know which memory section of the memory block to unregister. Signed-off-by: Nathan Fontenot Reviewed-by: Robin Holt Reviewed-by: KAMEZAWA Hiroyuki Signed-off-by: Greg Kroah-Hartman --- drivers/base/memory.c | 41 +++++++++++++++++++++++++++++++---------- drivers/base/node.c | 12 ++++++++---- include/linux/memory.h | 3 ++- include/linux/node.h | 6 ++++-- 4 files changed, 45 insertions(+), 17 deletions(-) (limited to 'drivers/base/memory.c') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 0b704004258..71b4a32b171 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -97,7 +97,7 @@ int register_memory(struct memory_block *memory) int error; memory->sysdev.cls = &memory_sysdev_class; - memory->sysdev.id = memory->phys_index / sections_per_block; + memory->sysdev.id = memory->start_section_nr / sections_per_block; error = sysdev_register(&memory->sysdev); return error; @@ -138,12 +138,26 @@ static unsigned long get_memory_block_size(void) * uses. */ -static ssize_t show_mem_phys_index(struct sys_device *dev, +static ssize_t show_mem_start_phys_index(struct sys_device *dev, struct sysdev_attribute *attr, char *buf) { struct memory_block *mem = container_of(dev, struct memory_block, sysdev); - return sprintf(buf, "%08lx\n", mem->phys_index / sections_per_block); + unsigned long phys_index; + + phys_index = mem->start_section_nr / sections_per_block; + return sprintf(buf, "%08lx\n", phys_index); +} + +static ssize_t show_mem_end_phys_index(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) +{ + struct memory_block *mem = + container_of(dev, struct memory_block, sysdev); + unsigned long phys_index; + + phys_index = mem->end_section_nr / sections_per_block; + return sprintf(buf, "%08lx\n", phys_index); } /* @@ -158,7 +172,7 @@ static ssize_t show_mem_removable(struct sys_device *dev, container_of(dev, struct memory_block, sysdev); for (i = 0; i < sections_per_block; i++) { - pfn = section_nr_to_pfn(mem->phys_index + i); + pfn = section_nr_to_pfn(mem->start_section_nr + i); ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION); } @@ -275,14 +289,15 @@ static int memory_block_change_state(struct memory_block *mem, mem->state = MEM_GOING_OFFLINE; for (i = 0; i < sections_per_block; i++) { - ret = memory_section_action(mem->phys_index + i, to_state); + ret = memory_section_action(mem->start_section_nr + i, + to_state); if (ret) break; } if (ret) { for (i = 0; i < sections_per_block; i++) - memory_section_action(mem->phys_index + i, + memory_section_action(mem->start_section_nr + i, from_state_req); mem->state = from_state_req; @@ -330,7 +345,8 @@ static ssize_t show_phys_device(struct sys_device *dev, return sprintf(buf, "%d\n", mem->phys_device); } -static SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL); +static SYSDEV_ATTR(phys_index, 0444, show_mem_start_phys_index, NULL); +static SYSDEV_ATTR(end_phys_index, 0444, show_mem_end_phys_index, NULL); static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state); static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL); static SYSDEV_ATTR(removable, 0444, show_mem_removable, NULL); @@ -522,16 +538,20 @@ static int init_memory_block(struct memory_block **memory, return -ENOMEM; scn_nr = __section_nr(section); - mem->phys_index = base_memory_block_id(scn_nr) * sections_per_block; + mem->start_section_nr = + base_memory_block_id(scn_nr) * sections_per_block; + mem->end_section_nr = mem->start_section_nr + sections_per_block - 1; mem->state = state; mem->section_count++; mutex_init(&mem->state_mutex); - start_pfn = section_nr_to_pfn(mem->phys_index); + start_pfn = section_nr_to_pfn(mem->start_section_nr); mem->phys_device = arch_get_memory_phys_device(start_pfn); ret = register_memory(mem); if (!ret) ret = mem_create_simple_file(mem, phys_index); + if (!ret) + ret = mem_create_simple_file(mem, end_phys_index); if (!ret) ret = mem_create_simple_file(mem, state); if (!ret) @@ -575,11 +595,12 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section, mutex_lock(&mem_sysfs_mutex); mem = find_memory_block(section); + unregister_mem_sect_under_nodes(mem, __section_nr(section)); mem->section_count--; if (mem->section_count == 0) { - unregister_mem_sect_under_nodes(mem); mem_remove_simple_file(mem, phys_index); + mem_remove_simple_file(mem, end_phys_index); mem_remove_simple_file(mem, state); mem_remove_simple_file(mem, phys_device); mem_remove_simple_file(mem, removable); diff --git a/drivers/base/node.c b/drivers/base/node.c index 36b43052001..b3b72d64e80 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -375,8 +375,10 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid) return -EFAULT; if (!node_online(nid)) return 0; - sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index); - sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1; + + sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); + sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr); + sect_end_pfn += PAGES_PER_SECTION - 1; for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { int page_nid; @@ -400,7 +402,8 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid) } /* unregister memory section under all nodes that it spans */ -int unregister_mem_sect_under_nodes(struct memory_block *mem_blk) +int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, + unsigned long phys_index) { NODEMASK_ALLOC(nodemask_t, unlinked_nodes, GFP_KERNEL); unsigned long pfn, sect_start_pfn, sect_end_pfn; @@ -412,7 +415,8 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk) if (!unlinked_nodes) return -ENOMEM; nodes_clear(*unlinked_nodes); - sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index); + + sect_start_pfn = section_nr_to_pfn(phys_index); sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1; for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { int nid; diff --git a/include/linux/memory.h b/include/linux/memory.h index 06c1fa0a5c7..e1e3b2b84f8 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -21,7 +21,8 @@ #include struct memory_block { - unsigned long phys_index; + unsigned long start_section_nr; + unsigned long end_section_nr; unsigned long state; int section_count; diff --git a/include/linux/node.h b/include/linux/node.h index 1466945cc9e..92370e22343 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -39,7 +39,8 @@ extern int register_cpu_under_node(unsigned int cpu, unsigned int nid); extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); extern int register_mem_sect_under_node(struct memory_block *mem_blk, int nid); -extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk); +extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, + unsigned long phys_index); #ifdef CONFIG_HUGETLBFS extern void register_hugetlbfs_with_node(node_registration_func_t doregister, @@ -67,7 +68,8 @@ static inline int register_mem_sect_under_node(struct memory_block *mem_blk, { return 0; } -static inline int unregister_mem_sect_under_nodes(struct memory_block *mem_blk) +static inline int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, + unsigned long phys_index) { return 0; } -- cgit v1.2.3-70-g09d2 From 6add7cd618b4d4dc525731beb539c5e06e891855 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Mon, 31 Jan 2011 10:55:23 -0600 Subject: memory hotplug: sysfs probe routine should add all memory sections As a follow-on to the recent patches I submitted that allowed for a sysfs memory block to span multiple memory sections, we should also update the probe routine to online all of the memory sections in a memory block. Without this patch the current code will only add a single memory section. I think the probe routine should add all of the memory sections in the specified memory block so that its behavior is in line with memory hotplug actions through the sysfs interfaces. This patch applies on top of the previous sysfs memory updates to allow a sysfs directory o span multiple memory sections. https://lkml.org/lkml/2011/1/20/245 Signed-off-by: Nathan Fontenot Signed-off-by: Greg Kroah-Hartman --- drivers/base/memory.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'drivers/base/memory.c') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 71b4a32b171..3da6a43b775 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -387,12 +387,19 @@ memory_probe_store(struct class *class, struct class_attribute *attr, { u64 phys_addr; int nid; - int ret; + int i, ret; phys_addr = simple_strtoull(buf, NULL, 0); - nid = memory_add_physaddr_to_nid(phys_addr); - ret = add_memory(nid, phys_addr, PAGES_PER_SECTION << PAGE_SHIFT); + for (i = 0; i < sections_per_block; i++) { + nid = memory_add_physaddr_to_nid(phys_addr); + ret = add_memory(nid, phys_addr, + PAGES_PER_SECTION << PAGE_SHIFT); + if (ret) + break; + + phys_addr += MIN_MEMORY_BLOCK_SIZE; + } if (ret) count = ret; -- cgit v1.2.3-70-g09d2