From 9a30523066cde73c1442b76224bb540de9f9b0b0 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Mon, 14 Dec 2009 17:58:25 -0800 Subject: hugetlb: add per node hstate attributes Add the per huge page size control/query attributes to the per node sysdevs: /sys/devices/system/node/node/hugepages/hugepages-/ nr_hugepages - r/w free_huge_pages - r/o surplus_huge_pages - r/o The patch attempts to re-use/share as much of the existing global hstate attribute initialization and handling, and the "nodes_allowed" constraint processing as possible. Calling set_max_huge_pages() with no node indicates a change to global hstate parameters. In this case, any non-default task mempolicy will be used to generate the nodes_allowed mask. A valid node id indicates an update to that node's hstate parameters, and the count argument specifies the target count for the specified node. From this info, we compute the target global count for the hstate and construct a nodes_allowed node mask contain only the specified node. Setting the node specific nr_hugepages via the per node attribute effectively ignores any task mempolicy or cpuset constraints. With this patch: (me):ls /sys/devices/system/node/node0/hugepages/hugepages-2048kB ./ ../ free_hugepages nr_hugepages surplus_hugepages Starting from: Node 0 HugePages_Total: 0 Node 0 HugePages_Free: 0 Node 0 HugePages_Surp: 0 Node 1 HugePages_Total: 0 Node 1 HugePages_Free: 0 Node 1 HugePages_Surp: 0 Node 2 HugePages_Total: 0 Node 2 HugePages_Free: 0 Node 2 HugePages_Surp: 0 Node 3 HugePages_Total: 0 Node 3 HugePages_Free: 0 Node 3 HugePages_Surp: 0 vm.nr_hugepages = 0 Allocate 16 persistent huge pages on node 2: (me):echo 16 >/sys/devices/system/node/node2/hugepages/hugepages-2048kB/nr_hugepages [Note that this is equivalent to: numactl -m 2 hugeadmin --pool-pages-min 2M:+16 ] Yields: Node 0 HugePages_Total: 0 Node 0 HugePages_Free: 0 Node 0 HugePages_Surp: 0 Node 1 HugePages_Total: 0 Node 1 HugePages_Free: 0 Node 1 HugePages_Surp: 0 Node 2 HugePages_Total: 16 Node 2 HugePages_Free: 16 Node 2 HugePages_Surp: 0 Node 3 HugePages_Total: 0 Node 3 HugePages_Free: 0 Node 3 HugePages_Surp: 0 vm.nr_hugepages = 16 Global controls work as expected--reduce pool to 8 persistent huge pages: (me):echo 8 >/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages Node 0 HugePages_Total: 0 Node 0 HugePages_Free: 0 Node 0 HugePages_Surp: 0 Node 1 HugePages_Total: 0 Node 1 HugePages_Free: 0 Node 1 HugePages_Surp: 0 Node 2 HugePages_Total: 8 Node 2 HugePages_Free: 8 Node 2 HugePages_Surp: 0 Node 3 HugePages_Total: 0 Node 3 HugePages_Free: 0 Node 3 HugePages_Surp: 0 Signed-off-by: Lee Schermerhorn Acked-by: Mel Gorman Reviewed-by: Andi Kleen Cc: KAMEZAWA Hiroyuki Cc: Randy Dunlap Cc: Nishanth Aravamudan Cc: David Rientjes Cc: Adam Litke Cc: Andy Whitcroft Cc: Eric Whitney Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'drivers/base/node.c') diff --git a/drivers/base/node.c b/drivers/base/node.c index 1fe5536d404..f502711d28d 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -173,6 +173,43 @@ static ssize_t node_read_distance(struct sys_device * dev, } static SYSDEV_ATTR(distance, S_IRUGO, node_read_distance, NULL); +#ifdef CONFIG_HUGETLBFS +/* + * hugetlbfs per node attributes registration interface: + * When/if hugetlb[fs] subsystem initializes [sometime after this module], + * it will register its per node attributes for all nodes online at that + * time. It will also call register_hugetlbfs_with_node(), below, to + * register its attribute registration functions with this node driver. + * Once these hooks have been initialized, the node driver will call into + * the hugetlb module to [un]register attributes for hot-plugged nodes. + */ +static node_registration_func_t __hugetlb_register_node; +static node_registration_func_t __hugetlb_unregister_node; + +static inline void hugetlb_register_node(struct node *node) +{ + if (__hugetlb_register_node) + __hugetlb_register_node(node); +} + +static inline void hugetlb_unregister_node(struct node *node) +{ + if (__hugetlb_unregister_node) + __hugetlb_unregister_node(node); +} + +void register_hugetlbfs_with_node(node_registration_func_t doregister, + node_registration_func_t unregister) +{ + __hugetlb_register_node = doregister; + __hugetlb_unregister_node = unregister; +} +#else +static inline void hugetlb_register_node(struct node *node) {} + +static inline void hugetlb_unregister_node(struct node *node) {} +#endif + /* * register_node - Setup a sysfs device for a node. @@ -196,6 +233,7 @@ int register_node(struct node *node, int num, struct node *parent) sysdev_create_file(&node->sysdev, &attr_distance); scan_unevictable_register_node(node); + hugetlb_register_node(node); } return error; } @@ -216,6 +254,7 @@ void unregister_node(struct node *node) sysdev_remove_file(&node->sysdev, &attr_distance); scan_unevictable_unregister_node(node); + hugetlb_unregister_node(node); sysdev_unregister(&node->sysdev); } -- cgit v1.2.3-70-g09d2 From 4faf8d950ec438c49ae4526b897c30f8a2cad741 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Mon, 14 Dec 2009 17:58:35 -0800 Subject: hugetlb: handle memory hot-plug events Register per node hstate attributes only for nodes with memory. As suggested by David Rientjes. With Memory Hotplug, memory can be added to a memoryless node and a node with memory can become memoryless. Therefore, add a memory on/off-line notifier callback to [un]register a node's attributes on transition to/from memoryless state. N.B., Only tested build, boot, libhugetlbfs regression. i.e., no memory hotplug testing. Signed-off-by: Lee Schermerhorn Reviewed-by: Andi Kleen Acked-by: David Rientjes Cc: KAMEZAWA Hiroyuki Cc: Lee Schermerhorn Cc: Mel Gorman Cc: Randy Dunlap Cc: Nishanth Aravamudan Cc: Adam Litke Cc: Andy Whitcroft Cc: Eric Whitney Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/vm/hugetlbpage.txt | 3 ++- drivers/base/node.c | 53 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 50 insertions(+), 6 deletions(-) (limited to 'drivers/base/node.c') diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt index 6a8e4667ab3..bc31636973e 100644 --- a/Documentation/vm/hugetlbpage.txt +++ b/Documentation/vm/hugetlbpage.txt @@ -231,7 +231,8 @@ resulting effect on persistent huge page allocation is as follows: Per Node Hugepages Attributes A subset of the contents of the root huge page control directory in sysfs, -described above, has been replicated under each "node" system device in: +described above, will be replicated under each the system device of each +NUMA node with memory in: /sys/devices/system/node/node[0-9]*/hugepages/ diff --git a/drivers/base/node.c b/drivers/base/node.c index f502711d28d..9e218a6d4a5 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -177,8 +177,8 @@ static SYSDEV_ATTR(distance, S_IRUGO, node_read_distance, NULL); /* * hugetlbfs per node attributes registration interface: * When/if hugetlb[fs] subsystem initializes [sometime after this module], - * it will register its per node attributes for all nodes online at that - * time. It will also call register_hugetlbfs_with_node(), below, to + * it will register its per node attributes for all online nodes with + * memory. It will also call register_hugetlbfs_with_node(), below, to * register its attribute registration functions with this node driver. * Once these hooks have been initialized, the node driver will call into * the hugetlb module to [un]register attributes for hot-plugged nodes. @@ -188,7 +188,8 @@ static node_registration_func_t __hugetlb_unregister_node; static inline void hugetlb_register_node(struct node *node) { - if (__hugetlb_register_node) + if (__hugetlb_register_node && + node_state(node->sysdev.id, N_HIGH_MEMORY)) __hugetlb_register_node(node); } @@ -233,6 +234,7 @@ int register_node(struct node *node, int num, struct node *parent) sysdev_create_file(&node->sysdev, &attr_distance); scan_unevictable_register_node(node); + hugetlb_register_node(node); } return error; @@ -254,7 +256,7 @@ void unregister_node(struct node *node) sysdev_remove_file(&node->sysdev, &attr_distance); scan_unevictable_unregister_node(node); - hugetlb_unregister_node(node); + hugetlb_unregister_node(node); /* no-op, if memoryless node */ sysdev_unregister(&node->sysdev); } @@ -384,8 +386,45 @@ static int link_mem_sections(int nid) } return err; } + +/* + * Handle per node hstate attribute [un]registration on transistions + * to/from memoryless state. + */ + +static int node_memory_callback(struct notifier_block *self, + unsigned long action, void *arg) +{ + struct memory_notify *mnb = arg; + int nid = mnb->status_change_nid; + + switch (action) { + case MEM_ONLINE: /* memory successfully brought online */ + if (nid != NUMA_NO_NODE) + hugetlb_register_node(&node_devices[nid]); + break; + case MEM_OFFLINE: /* or offline */ + if (nid != NUMA_NO_NODE) + hugetlb_unregister_node(&node_devices[nid]); + break; + case MEM_GOING_ONLINE: + case MEM_GOING_OFFLINE: + case MEM_CANCEL_ONLINE: + case MEM_CANCEL_OFFLINE: + default: + break; + } + + return NOTIFY_OK; +} #else static int link_mem_sections(int nid) { return 0; } + +static inline int node_memory_callback(struct notifier_block *self, + unsigned long action, void *arg) +{ + return NOTIFY_OK; +} #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ int register_one_node(int nid) @@ -499,13 +538,17 @@ static int node_states_init(void) return err; } +#define NODE_CALLBACK_PRI 2 /* lower than SLAB */ static int __init register_node_type(void) { int ret; ret = sysdev_class_register(&node_class); - if (!ret) + if (!ret) { ret = node_states_init(); + hotplug_memory_notifier(node_memory_callback, + NODE_CALLBACK_PRI); + } /* * Note: we're not going to unregister the node class if we fail -- cgit v1.2.3-70-g09d2 From 39da08cb074cf19cb249832a2a955dfb28837e65 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Mon, 14 Dec 2009 17:58:36 -0800 Subject: hugetlb: offload per node attribute registrations Offload the registration and unregistration of per node hstate sysfs attributes to a worker thread rather than attempt the allocation/attachment or detachment/freeing of the attributes in the context of the memory hotplug handler. I don't know that this is absolutely required, but the registration can sleep in allocations and other mem hot plug handlers do it this way. If it turns out this is NOT required, we can drop this patch. N.B., Only tested build, boot, libhugetlbfs regression. i.e., no memory hotplug testing. Signed-off-by: Lee Schermerhorn Reviewed-by: Andi Kleen Cc: KAMEZAWA Hiroyuki Cc: Lee Schermerhorn Cc: Mel Gorman Cc: Randy Dunlap Cc: Nishanth Aravamudan Cc: David Rientjes Cc: Adam Litke Cc: Andy Whitcroft Cc: Eric Whitney Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 57 +++++++++++++++++++++++++++++++++++++++++++--------- include/linux/node.h | 5 +++++ 2 files changed, 52 insertions(+), 10 deletions(-) (limited to 'drivers/base/node.c') diff --git a/drivers/base/node.c b/drivers/base/node.c index 9e218a6d4a5..54e5d8eaf70 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -186,11 +186,14 @@ static SYSDEV_ATTR(distance, S_IRUGO, node_read_distance, NULL); static node_registration_func_t __hugetlb_register_node; static node_registration_func_t __hugetlb_unregister_node; -static inline void hugetlb_register_node(struct node *node) +static inline bool hugetlb_register_node(struct node *node) { if (__hugetlb_register_node && - node_state(node->sysdev.id, N_HIGH_MEMORY)) + node_state(node->sysdev.id, N_HIGH_MEMORY)) { __hugetlb_register_node(node); + return true; + } + return false; } static inline void hugetlb_unregister_node(struct node *node) @@ -387,10 +390,31 @@ static int link_mem_sections(int nid) return err; } +#ifdef CONFIG_HUGETLBFS /* * Handle per node hstate attribute [un]registration on transistions * to/from memoryless state. */ +static void node_hugetlb_work(struct work_struct *work) +{ + struct node *node = container_of(work, struct node, node_work); + + /* + * We only get here when a node transitions to/from memoryless state. + * We can detect which transition occurred by examining whether the + * node has memory now. hugetlb_register_node() already check this + * so we try to register the attributes. If that fails, then the + * node has transitioned to memoryless, try to unregister the + * attributes. + */ + if (!hugetlb_register_node(node)) + hugetlb_unregister_node(node); +} + +static void init_node_hugetlb_work(int nid) +{ + INIT_WORK(&node_devices[nid].node_work, node_hugetlb_work); +} static int node_memory_callback(struct notifier_block *self, unsigned long action, void *arg) @@ -399,14 +423,16 @@ static int node_memory_callback(struct notifier_block *self, int nid = mnb->status_change_nid; switch (action) { - case MEM_ONLINE: /* memory successfully brought online */ + case MEM_ONLINE: + case MEM_OFFLINE: + /* + * offload per node hstate [un]registration to a work thread + * when transitioning to/from memoryless state. + */ if (nid != NUMA_NO_NODE) - hugetlb_register_node(&node_devices[nid]); - break; - case MEM_OFFLINE: /* or offline */ - if (nid != NUMA_NO_NODE) - hugetlb_unregister_node(&node_devices[nid]); + schedule_work(&node_devices[nid].node_work); break; + case MEM_GOING_ONLINE: case MEM_GOING_OFFLINE: case MEM_CANCEL_ONLINE: @@ -417,15 +443,23 @@ static int node_memory_callback(struct notifier_block *self, return NOTIFY_OK; } -#else +#endif /* CONFIG_HUGETLBFS */ +#else /* !CONFIG_MEMORY_HOTPLUG_SPARSE */ + static int link_mem_sections(int nid) { return 0; } +#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ +#if !defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || \ + !defined(CONFIG_HUGETLBFS) static inline int node_memory_callback(struct notifier_block *self, unsigned long action, void *arg) { return NOTIFY_OK; } -#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ + +static void init_node_hugetlb_work(int nid) { } + +#endif int register_one_node(int nid) { @@ -449,6 +483,9 @@ int register_one_node(int nid) /* link memory sections under this node */ error = link_mem_sections(nid); + + /* initialize work queue for memory hot plug */ + init_node_hugetlb_work(nid); } return error; diff --git a/include/linux/node.h b/include/linux/node.h index dae1521e1f0..06292dac3ea 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -21,9 +21,14 @@ #include #include +#include struct node { struct sys_device sysdev; + +#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS) + struct work_struct node_work; +#endif }; struct memory_block; -- cgit v1.2.3-70-g09d2 From dee5d0d518defd0337a41f1a504428c9acc87be5 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Mon, 14 Dec 2009 17:59:05 -0800 Subject: mm: add numa node symlink for memory section in sysfs Commit c04fc586c (mm: show node to memory section relationship with symlinks in sysfs) created symlinks from nodes to memory sections, e.g. /sys/devices/system/node/node1/memory135 -> ../../memory/memory135 If you're examining the memory section though and are wondering what node it might belong to, you can find it by grovelling around in sysfs, but it's a little cumbersome. Add a reverse symlink for each memory section that points back to the node to which it belongs. Signed-off-by: Alex Chiang Cc: Gary Hade Cc: Badari Pulavarty Cc: Ingo Molnar Acked-by: David Rientjes Cc: Greg KH Cc: Randy Dunlap Cc: David Rientjes Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/sysfs-devices-memory | 14 +++++++++++++- Documentation/memory-hotplug.txt | 11 +++++++---- drivers/base/node.c | 11 ++++++++++- 3 files changed, 30 insertions(+), 6 deletions(-) (limited to 'drivers/base/node.c') diff --git a/Documentation/ABI/testing/sysfs-devices-memory b/Documentation/ABI/testing/sysfs-devices-memory index 9fe91c02ee4..bf1627b02a0 100644 --- a/Documentation/ABI/testing/sysfs-devices-memory +++ b/Documentation/ABI/testing/sysfs-devices-memory @@ -60,6 +60,19 @@ Description: Users: hotplug memory remove tools https://w3.opensource.ibm.com/projects/powerpc-utils/ + +What: /sys/devices/system/memoryX/nodeY +Date: October 2009 +Contact: Linux Memory Management list +Description: + When CONFIG_NUMA is enabled, a symbolic link that + points to the corresponding NUMA node directory. + + For example, the following symbolic link is created for + memory section 9 on node0: + /sys/devices/system/memory/memory9/node0 -> ../../node/node0 + + What: /sys/devices/system/node/nodeX/memoryY Date: September 2008 Contact: Gary Hade @@ -70,4 +83,3 @@ Description: memory section directory. For example, the following symbolic link is created for memory section 9 on node0. /sys/devices/system/node/node0/memory9 -> ../../memory/memory9 - diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt index bbc8a6a3692..57e7e9cc187 100644 --- a/Documentation/memory-hotplug.txt +++ b/Documentation/memory-hotplug.txt @@ -160,12 +160,15 @@ Under each section, you can see 4 files. NOTE: These directories/files appear after physical memory hotplug phase. -If CONFIG_NUMA is enabled the -/sys/devices/system/memory/memoryXXX memory section -directories can also be accessed via symbolic links located in -the /sys/devices/system/node/node* directories. For example: +If CONFIG_NUMA is enabled the memoryXXX/ directories can also be accessed +via symbolic links located in the /sys/devices/system/node/node* directories. + +For example: /sys/devices/system/node/node0/memory9 -> ../../memory/memory9 +A backlink will also be created: +/sys/devices/system/memory/memory9/node0 -> ../../node/node0 + -------------------------------- 4. Physical memory hot-add phase -------------------------------- diff --git a/drivers/base/node.c b/drivers/base/node.c index 54e5d8eaf70..44eed11bbdf 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -312,6 +312,7 @@ static int get_nid_for_pfn(unsigned long pfn) /* register memory section under specified node if it spans that node */ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid) { + int ret; unsigned long pfn, sect_start_pfn, sect_end_pfn; if (!mem_blk) @@ -328,9 +329,15 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid) continue; if (page_nid != nid) continue; - return sysfs_create_link_nowarn(&node_devices[nid].sysdev.kobj, + ret = sysfs_create_link_nowarn(&node_devices[nid].sysdev.kobj, &mem_blk->sysdev.kobj, kobject_name(&mem_blk->sysdev.kobj)); + if (ret) + return ret; + + return sysfs_create_link_nowarn(&mem_blk->sysdev.kobj, + &node_devices[nid].sysdev.kobj, + kobject_name(&node_devices[nid].sysdev.kobj)); } /* mem section does not span the specified node */ return 0; @@ -359,6 +366,8 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk) continue; sysfs_remove_link(&node_devices[nid].sysdev.kobj, kobject_name(&mem_blk->sysdev.kobj)); + sysfs_remove_link(&mem_blk->sysdev.kobj, + kobject_name(&node_devices[nid].sysdev.kobj)); } return 0; } -- cgit v1.2.3-70-g09d2 From f8246f3159dfdf97b8b40f9e03e715bafedd22fc Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Mon, 14 Dec 2009 17:59:06 -0800 Subject: mm: refactor register_cpu_under_node() By returning early if the node is not online, we can unindent the interesting code by one level. No functional change. Signed-off-by: Alex Chiang Cc: Gary Hade Cc: Badari Pulavarty Cc: Ingo Molnar Cc: David Rientjes Cc: Greg KH Cc: Randy Dunlap Cc: David Rientjes Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'drivers/base/node.c') diff --git a/drivers/base/node.c b/drivers/base/node.c index 44eed11bbdf..eeae035dadc 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -271,16 +271,18 @@ struct node node_devices[MAX_NUMNODES]; */ int register_cpu_under_node(unsigned int cpu, unsigned int nid) { - if (node_online(nid)) { - struct sys_device *obj = get_cpu_sysdev(cpu); - if (!obj) - return 0; - return sysfs_create_link(&node_devices[nid].sysdev.kobj, - &obj->kobj, - kobject_name(&obj->kobj)); - } + struct sys_device *obj; - return 0; + if (!node_online(nid)) + return 0; + + obj = get_cpu_sysdev(cpu); + if (!obj) + return 0; + + return sysfs_create_link(&node_devices[nid].sysdev.kobj, + &obj->kobj, + kobject_name(&obj->kobj)); } int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) -- cgit v1.2.3-70-g09d2 From b9d52dad9447d0db4b52d67d5e9e9d339b5e8302 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Mon, 14 Dec 2009 17:59:07 -0800 Subject: mm: refactor unregister_cpu_under_node() By returning early if the node is not online, we can unindent the interesting code by two levels. No functional change. Signed-off-by: Alex Chiang Cc: Gary Hade Cc: Badari Pulavarty Cc: Ingo Molnar Cc: David Rientjes Cc: Greg KH Cc: Randy Dunlap Cc: David Rientjes Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'drivers/base/node.c') diff --git a/drivers/base/node.c b/drivers/base/node.c index eeae035dadc..9b9acc39a1e 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -287,12 +287,18 @@ int register_cpu_under_node(unsigned int cpu, unsigned int nid) int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) { - if (node_online(nid)) { - struct sys_device *obj = get_cpu_sysdev(cpu); - if (obj) - sysfs_remove_link(&node_devices[nid].sysdev.kobj, - kobject_name(&obj->kobj)); - } + struct sys_device *obj; + + if (!node_online(nid)) + return 0; + + obj = get_cpu_sysdev(cpu); + if (!obj) + return 0; + + sysfs_remove_link(&node_devices[nid].sysdev.kobj, + kobject_name(&obj->kobj)); + return 0; } -- cgit v1.2.3-70-g09d2 From 1830794ae6392ce12d36dbcc5ff52f11298ddab6 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Mon, 14 Dec 2009 17:59:08 -0800 Subject: mm: add numa node symlink for cpu devices in sysfs You can discover which CPUs belong to a NUMA node by examining /sys/devices/system/node/node#/ However, it's not convenient to go in the other direction, when looking at /sys/devices/system/cpu/cpu#/ Yes, you can muck about in sysfs, but adding these symlinks makes life a lot more convenient. Signed-off-by: Alex Chiang Acked-by: David Rientjes Cc: Gary Hade Cc: Badari Pulavarty Cc: Ingo Molnar Cc: David Rientjes Cc: Greg KH Cc: Randy Dunlap Cc: David Rientjes Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/base/node.c') diff --git a/drivers/base/node.c b/drivers/base/node.c index 9b9acc39a1e..41414113b9f 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -271,6 +271,7 @@ struct node node_devices[MAX_NUMNODES]; */ int register_cpu_under_node(unsigned int cpu, unsigned int nid) { + int ret; struct sys_device *obj; if (!node_online(nid)) @@ -280,9 +281,15 @@ int register_cpu_under_node(unsigned int cpu, unsigned int nid) if (!obj) return 0; - return sysfs_create_link(&node_devices[nid].sysdev.kobj, + ret = sysfs_create_link(&node_devices[nid].sysdev.kobj, &obj->kobj, kobject_name(&obj->kobj)); + if (ret) + return ret; + + return sysfs_create_link(&obj->kobj, + &node_devices[nid].sysdev.kobj, + kobject_name(&node_devices[nid].sysdev.kobj)); } int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) @@ -298,6 +305,8 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) sysfs_remove_link(&node_devices[nid].sysdev.kobj, kobject_name(&obj->kobj)); + sysfs_remove_link(&obj->kobj, + kobject_name(&node_devices[nid].sysdev.kobj)); return 0; } -- cgit v1.2.3-70-g09d2 From 9ae49fab239fb49de92a657c7426271e0793c4e1 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 14 Dec 2009 17:59:46 -0800 Subject: mm: slab-allocate memory section nodemask for large systems Nodemasks should not be allocated on the stack for large systems (when it is larger than 256 bytes) since there is a threat of overflow. This patch causes the unregister_mem_sect_under_nodes() nodemask to be allocated on the stack for smaller systems and be allocated by slab for larger systems. GFP_KERNEL is used since remove_memory_block() can block. Cc: Gary Hade Cc: Badari Pulavarty Cc: Alex Chiang Signed-off-by: David Rientjes Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers/base/node.c') diff --git a/drivers/base/node.c b/drivers/base/node.c index 41414113b9f..70122791683 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -363,12 +363,16 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid) /* unregister memory section under all nodes that it spans */ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk) { - nodemask_t unlinked_nodes; + NODEMASK_ALLOC(nodemask_t, unlinked_nodes, GFP_KERNEL); unsigned long pfn, sect_start_pfn, sect_end_pfn; - if (!mem_blk) + if (!mem_blk) { + NODEMASK_FREE(unlinked_nodes); return -EFAULT; - nodes_clear(unlinked_nodes); + } + if (!unlinked_nodes) + return -ENOMEM; + nodes_clear(*unlinked_nodes); sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index); sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1; for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { @@ -379,13 +383,14 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk) continue; if (!node_online(nid)) continue; - if (node_test_and_set(nid, unlinked_nodes)) + if (node_test_and_set(nid, *unlinked_nodes)) continue; sysfs_remove_link(&node_devices[nid].sysdev.kobj, kobject_name(&mem_blk->sysdev.kobj)); sysfs_remove_link(&mem_blk->sysdev.kobj, kobject_name(&node_devices[nid].sysdev.kobj)); } + NODEMASK_FREE(unlinked_nodes); return 0; } -- cgit v1.2.3-70-g09d2