diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-13 13:11:15 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-13 13:11:15 -0800 |
commit | f6e858a00af788bab0fd4c0b7f5cd788000edc18 (patch) | |
tree | f9403ca3671be9821dbf83e726e61dbe75fbca6b /include | |
parent | 193c0d682525987db59ac3a24531a77e4947aa95 (diff) | |
parent | 98870901cce098bbe94d90d2c41d8d1fa8d94392 (diff) |
Merge branch 'akpm' (Andrew's patch-bomb)
Merge misc VM changes from Andrew Morton:
"The rest of most-of-MM. The other MM bits await a slab merge.
This patch includes the addition of a huge zero_page. Not a
performance boost but it an save large amounts of physical memory in
some situations.
Also a bunch of Fujitsu engineers are working on memory hotplug.
Which, as it turns out, was badly broken. About half of their patches
are included here; the remainder are 3.8 material."
However, this merge disables CONFIG_MOVABLE_NODE, which was totally
broken. We don't add new features with "default y", nor do we add
Kconfig questions that are incomprehensible to most people without any
help text. Does the feature even make sense without compaction or
memory hotplug?
* akpm: (54 commits)
mm/bootmem.c: remove unused wrapper function reserve_bootmem_generic()
mm/memory.c: remove unused code from do_wp_page()
asm-generic, mm: pgtable: consolidate zero page helpers
mm/hugetlb.c: fix warning on freeing hwpoisoned hugepage
hwpoison, hugetlbfs: fix RSS-counter warning
hwpoison, hugetlbfs: fix "bad pmd" warning in unmapping hwpoisoned hugepage
mm: protect against concurrent vma expansion
memcg: do not check for mm in __mem_cgroup_count_vm_event
tmpfs: support SEEK_DATA and SEEK_HOLE (reprise)
mm: provide more accurate estimation of pages occupied by memmap
fs/buffer.c: remove redundant initialization in alloc_page_buffers()
fs/buffer.c: do not inline exported function
writeback: fix a typo in comment
mm: introduce new field "managed_pages" to struct zone
mm, oom: remove statically defined arch functions of same name
mm, oom: remove redundant sleep in pagefault oom handler
mm, oom: cleanup pagefault oom handler
memory_hotplug: allow online/offline memory to result movable node
numa: add CONFIG_MOVABLE_NODE for movable-dedicated node
mm, memcg: avoid unnecessary function call when memcg is disabled
...
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-generic/pgtable.h | 26 | ||||
-rw-r--r-- | include/linux/bootmem.h | 3 | ||||
-rw-r--r-- | include/linux/cpuset.h | 2 | ||||
-rw-r--r-- | include/linux/gfp.h | 1 | ||||
-rw-r--r-- | include/linux/huge_mm.h | 18 | ||||
-rw-r--r-- | include/linux/memcontrol.h | 9 | ||||
-rw-r--r-- | include/linux/memory.h | 1 | ||||
-rw-r--r-- | include/linux/mmzone.h | 41 | ||||
-rw-r--r-- | include/linux/nodemask.h | 5 | ||||
-rw-r--r-- | include/linux/res_counter.h | 5 | ||||
-rw-r--r-- | include/linux/vm_event_item.h | 2 |
11 files changed, 92 insertions, 21 deletions
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index b36ce40bd1c..284e80831d2 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -449,6 +449,32 @@ extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, unsigned long size); #endif +#ifdef __HAVE_COLOR_ZERO_PAGE +static inline int is_zero_pfn(unsigned long pfn) +{ + extern unsigned long zero_pfn; + unsigned long offset_from_zero_pfn = pfn - zero_pfn; + return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT); +} + +static inline unsigned long my_zero_pfn(unsigned long addr) +{ + return page_to_pfn(ZERO_PAGE(addr)); +} +#else +static inline int is_zero_pfn(unsigned long pfn) +{ + extern unsigned long zero_pfn; + return pfn == zero_pfn; +} + +static inline unsigned long my_zero_pfn(unsigned long addr) +{ + extern unsigned long zero_pfn; + return zero_pfn; +} +#endif + #ifdef CONFIG_MMU #ifndef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 7b74452c531..3f778c27f82 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -137,9 +137,6 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, #define alloc_bootmem_low_pages_node(pgdat, x) \ __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0) -extern int reserve_bootmem_generic(unsigned long addr, unsigned long size, - int flags); - #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP extern void *alloc_remap(int nid, unsigned long size); #else diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 838320fc3d1..8c8a60d2940 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -144,7 +144,7 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) return node_possible_map; } -#define cpuset_current_mems_allowed (node_states[N_HIGH_MEMORY]) +#define cpuset_current_mems_allowed (node_states[N_MEMORY]) static inline void cpuset_init_current_mems_allowed(void) {} static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 31e8041274f..f74856e17e4 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -34,6 +34,7 @@ struct vm_area_struct; #define ___GFP_NO_KSWAPD 0x400000u #define ___GFP_OTHER_NODE 0x800000u #define ___GFP_WRITE 0x1000000u +/* If the above are modified, __GFP_BITS_SHIFT may need updating */ /* * GFP bitmasks.. diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 1af47755245..092dc5305a3 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -39,6 +39,7 @@ enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_DEFRAG_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG, + TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG, #ifdef CONFIG_DEBUG_VM TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG, #endif @@ -78,6 +79,9 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma); (transparent_hugepage_flags & \ (1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG) && \ (__vma)->vm_flags & VM_HUGEPAGE)) +#define transparent_hugepage_use_zero_page() \ + (transparent_hugepage_flags & \ + (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG)) #ifdef CONFIG_DEBUG_VM #define transparent_hugepage_debug_cow() \ (transparent_hugepage_flags & \ @@ -95,12 +99,14 @@ extern int handle_pte_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *pte, pmd_t *pmd, unsigned int flags); extern int split_huge_page(struct page *page); -extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd); -#define split_huge_page_pmd(__mm, __pmd) \ +extern void __split_huge_page_pmd(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmd); +#define split_huge_page_pmd(__vma, __address, __pmd) \ do { \ pmd_t *____pmd = (__pmd); \ if (unlikely(pmd_trans_huge(*____pmd))) \ - __split_huge_page_pmd(__mm, ____pmd); \ + __split_huge_page_pmd(__vma, __address, \ + ____pmd); \ } while (0) #define wait_split_huge_page(__anon_vma, __pmd) \ do { \ @@ -110,6 +116,8 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd); BUG_ON(pmd_trans_splitting(*____pmd) || \ pmd_trans_huge(*____pmd)); \ } while (0) +extern void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address, + pmd_t *pmd); #if HPAGE_PMD_ORDER > MAX_ORDER #error "hugepages can't be allocated by the buddy allocator" #endif @@ -177,10 +185,12 @@ static inline int split_huge_page(struct page *page) { return 0; } -#define split_huge_page_pmd(__mm, __pmd) \ +#define split_huge_page_pmd(__vma, __address, __pmd) \ do { } while (0) #define wait_split_huge_page(__anon_vma, __pmd) \ do { } while (0) +#define split_huge_page_pmd_mm(__mm, __address, __pmd) \ + do { } while (0) #define compound_trans_head(page) compound_head(page) static inline int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 11ddc7ffeba..e98a74c0c9c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -181,7 +181,14 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, gfp_t gfp_mask, unsigned long *total_scanned); -void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); +void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); +static inline void mem_cgroup_count_vm_event(struct mm_struct *mm, + enum vm_event_item idx) +{ + if (mem_cgroup_disabled()) + return; + __mem_cgroup_count_vm_event(mm, idx); +} #ifdef CONFIG_TRANSPARENT_HUGEPAGE void mem_cgroup_split_huge_fixup(struct page *head); #endif diff --git a/include/linux/memory.h b/include/linux/memory.h index a09216d0dcc..45e93b46887 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -54,6 +54,7 @@ struct memory_notify { unsigned long start_pfn; unsigned long nr_pages; int status_change_nid_normal; + int status_change_nid_high; int status_change_nid; }; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0c0b1d608a6..cd55dad56aa 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -460,17 +460,44 @@ struct zone { unsigned long zone_start_pfn; /* - * zone_start_pfn, spanned_pages and present_pages are all - * protected by span_seqlock. It is a seqlock because it has - * to be read outside of zone->lock, and it is done in the main - * allocator path. But, it is written quite infrequently. + * spanned_pages is the total pages spanned by the zone, including + * holes, which is calculated as: + * spanned_pages = zone_end_pfn - zone_start_pfn; * - * The lock is declared along with zone->lock because it is + * present_pages is physical pages existing within the zone, which + * is calculated as: + * present_pages = spanned_pages - absent_pages(pags in holes); + * + * managed_pages is present pages managed by the buddy system, which + * is calculated as (reserved_pages includes pages allocated by the + * bootmem allocator): + * managed_pages = present_pages - reserved_pages; + * + * So present_pages may be used by memory hotplug or memory power + * management logic to figure out unmanaged pages by checking + * (present_pages - managed_pages). And managed_pages should be used + * by page allocator and vm scanner to calculate all kinds of watermarks + * and thresholds. + * + * Locking rules: + * + * zone_start_pfn and spanned_pages are protected by span_seqlock. + * It is a seqlock because it has to be read outside of zone->lock, + * and it is done in the main allocator path. But, it is written + * quite infrequently. + * + * The span_seq lock is declared along with zone->lock because it is * frequently read in proximity to zone->lock. It's good to * give them a chance of being in the same cacheline. + * + * Write access to present_pages and managed_pages at runtime should + * be protected by lock_memory_hotplug()/unlock_memory_hotplug(). + * Any reader who can't tolerant drift of present_pages and + * managed_pages should hold memory hotplug lock to get a stable value. */ - unsigned long spanned_pages; /* total size, including holes */ - unsigned long present_pages; /* amount of memory (excluding holes) */ + unsigned long spanned_pages; + unsigned long present_pages; + unsigned long managed_pages; /* * rarely used fields: diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 7afc36334d5..4e2cbfa640b 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -380,6 +380,11 @@ enum node_states { #else N_HIGH_MEMORY = N_NORMAL_MEMORY, #endif +#ifdef CONFIG_MOVABLE_NODE + N_MEMORY, /* The node has memory(regular, high, movable) */ +#else + N_MEMORY = N_HIGH_MEMORY, +#endif N_CPU, /* The node has one or more cpus */ NR_NODE_STATES }; diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 7d7fbe2ef78..6f54e40fa21 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -74,14 +74,9 @@ ssize_t res_counter_read(struct res_counter *counter, int member, const char __user *buf, size_t nbytes, loff_t *pos, int (*read_strategy)(unsigned long long val, char *s)); -typedef int (*write_strategy_fn)(const char *buf, unsigned long long *val); - int res_counter_memparse_write_strategy(const char *buf, unsigned long long *res); -int res_counter_write(struct res_counter *counter, int member, - const char *buffer, write_strategy_fn write_strategy); - /* * the field descriptors. one for each member of res_counter */ diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 3d311459437..fe786f07d2b 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -58,6 +58,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, THP_COLLAPSE_ALLOC, THP_COLLAPSE_ALLOC_FAILED, THP_SPLIT, + THP_ZERO_PAGE_ALLOC, + THP_ZERO_PAGE_ALLOC_FAILED, #endif NR_VM_EVENT_ITEMS }; |