From 89689ae7f95995723fbcd5c116c47933a3bb8b13 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 6 Dec 2006 20:31:45 -0800 Subject: [PATCH] Get rid of zone_table[] The zone table is mostly not needed. If we have a node in the page flags then we can get to the zone via NODE_DATA() which is much more likely to be already in the cpu cache. In case of SMP and UP NODE_DATA() is a constant pointer which allows us to access an exact replica of zonetable in the node_zones field. In all of the above cases there will be no need at all for the zone table. The only remaining case is if in a NUMA system the node numbers do not fit into the page flags. In that case we make sparse generate a table that maps sections to nodes and use that table to to figure out the node number. This table is sized to fit in a single cache line for the known 32 bit NUMA platform which makes it very likely that the information can be obtained without a cache miss. For sparsemem the zone table seems to be have been fairly large based on the maximum possible number of sections and the number of zones per node. There is some memory saving by removing zone_table. The main benefit is to reduce the cache foootprint of the VM from the frequent lookups of zones. Plus it simplifies the page allocator. [akpm@osdl.org: build fix] Signed-off-by: Christoph Lameter Cc: Dave Hansen Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/sparse.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'mm/sparse.c') diff --git a/mm/sparse.c b/mm/sparse.c index b3c82ba3001..158d6a2a526 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -24,6 +24,25 @@ struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] #endif EXPORT_SYMBOL(mem_section); +#ifdef NODE_NOT_IN_PAGE_FLAGS +/* + * If we did not store the node number in the page then we have to + * do a lookup in the section_to_node_table in order to find which + * node the page belongs to. + */ +#if MAX_NUMNODES <= 256 +static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; +#else +static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; +#endif + +unsigned long page_to_nid(struct page *page) +{ + return section_to_node_table[page_to_section(page)]; +} +EXPORT_SYMBOL(page_to_nid); +#endif + #ifdef CONFIG_SPARSEMEM_EXTREME static struct mem_section *sparse_index_alloc(int nid) { @@ -49,6 +68,10 @@ static int sparse_index_init(unsigned long section_nr, int nid) struct mem_section *section; int ret = 0; +#ifdef NODE_NOT_IN_PAGE_FLAGS + section_to_node_table[section_nr] = nid; +#endif + if (mem_section[root]) return -EEXIST; -- cgit v1.2.3-70-g09d2 From 25ba77c141dbcd2602dd0171824d0d72aa023a01 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Wed, 6 Dec 2006 20:33:03 -0800 Subject: [PATCH] numa node ids are int, page_to_nid and zone_to_nid should return int NUMA node ids are passed as either int or unsigned int almost exclusivly page_to_nid and zone_to_nid both return unsigned long. This is a throw back to when page_to_nid was a #define and was thus exposing the real type of the page flags field. In addition to fixing up the definitions of page_to_nid and zone_to_nid I audited the users of these functions identifying the following incorrect uses: 1) mm/page_alloc.c show_node() -- printk dumping the node id, 2) include/asm-ia64/pgalloc.h pgtable_quicklist_free() -- comparison against numa_node_id() which returns an int from cpu_to_node(), and 3) mm/mpolicy.c check_pte_range -- used as an index in node_isset which uses bit_set which in generic code takes an int. Signed-off-by: Andy Whitcroft Cc: Christoph Lameter Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-ia64/pgalloc.h | 2 +- include/linux/mm.h | 6 +++--- mm/mempolicy.c | 2 +- mm/page_alloc.c | 2 +- mm/sparse.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'mm/sparse.c') diff --git a/include/asm-ia64/pgalloc.h b/include/asm-ia64/pgalloc.h index 9cb68e9b377..393e04c42a2 100644 --- a/include/asm-ia64/pgalloc.h +++ b/include/asm-ia64/pgalloc.h @@ -60,7 +60,7 @@ static inline void *pgtable_quicklist_alloc(void) static inline void pgtable_quicklist_free(void *pgtable_entry) { #ifdef CONFIG_NUMA - unsigned long nid = page_to_nid(virt_to_page(pgtable_entry)); + int nid = page_to_nid(virt_to_page(pgtable_entry)); if (unlikely(nid != numa_node_id())) { free_page((unsigned long)pgtable_entry); diff --git a/include/linux/mm.h b/include/linux/mm.h index 840303769c1..0e266fe1b4c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -456,7 +456,7 @@ static inline int page_zone_id(struct page *page) return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK; } -static inline unsigned long zone_to_nid(struct zone *zone) +static inline int zone_to_nid(struct zone *zone) { #ifdef CONFIG_NUMA return zone->node; @@ -466,9 +466,9 @@ static inline unsigned long zone_to_nid(struct zone *zone) } #ifdef NODE_NOT_IN_PAGE_FLAGS -extern unsigned long page_to_nid(struct page *page); +extern int page_to_nid(struct page *page); #else -static inline unsigned long page_to_nid(struct page *page) +static inline int page_to_nid(struct page *page) { return (page->flags >> NODES_PGSHIFT) & NODES_MASK; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index fb907236bbd..e7b69c90cfd 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -221,7 +221,7 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); do { struct page *page; - unsigned int nid; + int nid; if (!pte_present(*pte)) continue; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 86f2984f8b7..614d427854a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1407,7 +1407,7 @@ unsigned int nr_free_pagecache_pages(void) static inline void show_node(struct zone *zone) { if (NUMA_BUILD) - printk("Node %ld ", zone_to_nid(zone)); + printk("Node %d ", zone_to_nid(zone)); } void si_meminfo(struct sysinfo *val) diff --git a/mm/sparse.c b/mm/sparse.c index 158d6a2a526..ac26eb0d73c 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -36,7 +36,7 @@ static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; #endif -unsigned long page_to_nid(struct page *page) +int page_to_nid(struct page *page) { return section_to_node_table[page_to_section(page)]; } -- cgit v1.2.3-70-g09d2