diff options
author | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2010-05-07 11:29:25 +1000 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2010-05-07 11:29:25 +1000 |
commit | 1ed31d6db90d51010545921e59d369d2f92b7ac2 (patch) | |
tree | 358a0b346bc8135cd5e53700eb44308b1a7c8c5b /mm | |
parent | ceba1abcb00b0ef0b1efcd715285f6e05523edef (diff) | |
parent | 722154e4cacf015161efe60009ae9be23d492296 (diff) |
Merge commit 'origin/master' into next
Diffstat (limited to 'mm')
-rw-r--r-- | mm/backing-dev.c | 37 | ||||
-rw-r--r-- | mm/bootmem.c | 17 | ||||
-rw-r--r-- | mm/hugetlb.c | 5 | ||||
-rw-r--r-- | mm/ksm.c | 12 | ||||
-rw-r--r-- | mm/memcontrol.c | 22 | ||||
-rw-r--r-- | mm/memory.c | 3 | ||||
-rw-r--r-- | mm/mmap.c | 113 | ||||
-rw-r--r-- | mm/pagewalk.c | 47 | ||||
-rw-r--r-- | mm/readahead.c | 2 | ||||
-rw-r--r-- | mm/rmap.c | 33 | ||||
-rw-r--r-- | mm/slab.c | 13 | ||||
-rw-r--r-- | mm/slub.c | 5 | ||||
-rw-r--r-- | mm/util.c | 21 | ||||
-rw-r--r-- | mm/vmscan.c | 23 |
14 files changed, 244 insertions, 109 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 0e8ca034770..707d0dc6da0 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -11,6 +11,8 @@ #include <linux/writeback.h> #include <linux/device.h> +static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); + void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) { } @@ -25,6 +27,11 @@ struct backing_dev_info default_backing_dev_info = { }; EXPORT_SYMBOL_GPL(default_backing_dev_info); +struct backing_dev_info noop_backing_dev_info = { + .name = "noop", +}; +EXPORT_SYMBOL_GPL(noop_backing_dev_info); + static struct class *bdi_class; /* @@ -227,6 +234,9 @@ static struct device_attribute bdi_dev_attrs[] = { static __init int bdi_class_init(void) { bdi_class = class_create(THIS_MODULE, "bdi"); + if (IS_ERR(bdi_class)) + return PTR_ERR(bdi_class); + bdi_class->dev_attrs = bdi_dev_attrs; bdi_debug_init(); return 0; @@ -712,6 +722,33 @@ void bdi_destroy(struct backing_dev_info *bdi) } EXPORT_SYMBOL(bdi_destroy); +/* + * For use from filesystems to quickly init and register a bdi associated + * with dirty writeback + */ +int bdi_setup_and_register(struct backing_dev_info *bdi, char *name, + unsigned int cap) +{ + char tmp[32]; + int err; + + bdi->name = name; + bdi->capabilities = cap; + err = bdi_init(bdi); + if (err) + return err; + + sprintf(tmp, "%.28s%s", name, "-%d"); + err = bdi_register(bdi, NULL, tmp, atomic_long_inc_return(&bdi_seq)); + if (err) { + bdi_destroy(bdi); + return err; + } + + return 0; +} +EXPORT_SYMBOL(bdi_setup_and_register); + static wait_queue_head_t congestion_wqh[2] = { __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) diff --git a/mm/bootmem.c b/mm/bootmem.c index eff22422057..58c66cc5056 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -304,9 +304,22 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) unsigned long __init free_all_bootmem(void) { #ifdef CONFIG_NO_BOOTMEM - return free_all_memory_core_early(NODE_DATA(0)->node_id); + /* + * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id + * because in some case like Node0 doesnt have RAM installed + * low ram will be on Node1 + * Use MAX_NUMNODES will make sure all ranges in early_node_map[] + * will be used instead of only Node0 related + */ + return free_all_memory_core_early(MAX_NUMNODES); #else - return free_all_bootmem_core(NODE_DATA(0)->bdata); + unsigned long total_pages = 0; + bootmem_data_t *bdata; + + list_for_each_entry(bdata, &bdata_list, list) + total_pages += free_all_bootmem_core(bdata); + + return total_pages; #endif } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6034dc9e979..ffbdfc86aed 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -546,6 +546,7 @@ static void free_huge_page(struct page *page) mapping = (struct address_space *) page_private(page); set_page_private(page, 0); + page->mapping = NULL; BUG_ON(page_count(page)); INIT_LIST_HEAD(&page->lru); @@ -2447,8 +2448,10 @@ retry: spin_lock(&inode->i_lock); inode->i_blocks += blocks_per_huge_page(h); spin_unlock(&inode->i_lock); - } else + } else { lock_page(page); + page->mapping = HUGETLB_POISON; + } } /* @@ -365,7 +365,7 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) do { cond_resched(); page = follow_page(vma, addr, FOLL_GET); - if (!page) + if (IS_ERR_OR_NULL(page)) break; if (PageKsm(page)) ret = handle_mm_fault(vma->vm_mm, vma, addr, @@ -447,7 +447,7 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item) goto out; page = follow_page(vma, addr, FOLL_GET); - if (!page) + if (IS_ERR_OR_NULL(page)) goto out; if (PageAnon(page)) { flush_anon_page(vma, page, addr); @@ -1086,7 +1086,7 @@ struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item, cond_resched(); tree_rmap_item = rb_entry(*new, struct rmap_item, node); tree_page = get_mergeable_page(tree_rmap_item); - if (!tree_page) + if (IS_ERR_OR_NULL(tree_page)) return NULL; /* @@ -1294,7 +1294,7 @@ next_mm: if (ksm_test_exit(mm)) break; *page = follow_page(vma, ksm_scan.address, FOLL_GET); - if (*page && PageAnon(*page)) { + if (!IS_ERR_OR_NULL(*page) && PageAnon(*page)) { flush_anon_page(vma, *page, ksm_scan.address); flush_dcache_page(*page); rmap_item = get_next_rmap_item(slot, @@ -1308,7 +1308,7 @@ next_mm: up_read(&mm->mmap_sem); return rmap_item; } - if (*page) + if (!IS_ERR_OR_NULL(*page)) put_page(*page); ksm_scan.address += PAGE_SIZE; cond_resched(); @@ -1367,7 +1367,7 @@ next_mm: static void ksm_do_scan(unsigned int scan_npages) { struct rmap_item *rmap_item; - struct page *page; + struct page *uninitialized_var(page); while (scan_npages--) { cond_resched(); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9ed760dc744..6c755de385f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1359,16 +1359,19 @@ void mem_cgroup_update_file_mapped(struct page *page, int val) lock_page_cgroup(pc); mem = pc->mem_cgroup; - if (!mem) - goto done; - - if (!PageCgroupUsed(pc)) + if (!mem || !PageCgroupUsed(pc)) goto done; /* * Preemption is already disabled. We can use __this_cpu_xxx */ - __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], val); + if (val > 0) { + __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); + SetPageCgroupFileMapped(pc); + } else { + __this_cpu_dec(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); + ClearPageCgroupFileMapped(pc); + } done: unlock_page_cgroup(pc); @@ -1801,16 +1804,13 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, static void __mem_cgroup_move_account(struct page_cgroup *pc, struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) { - struct page *page; - VM_BUG_ON(from == to); VM_BUG_ON(PageLRU(pc->page)); VM_BUG_ON(!PageCgroupLocked(pc)); VM_BUG_ON(!PageCgroupUsed(pc)); VM_BUG_ON(pc->mem_cgroup != from); - page = pc->page; - if (page_mapped(page) && !PageAnon(page)) { + if (PageCgroupFileMapped(pc)) { /* Update mapped_file data for mem_cgroup */ preempt_disable(); __this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); @@ -2429,11 +2429,11 @@ int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr) } unlock_page_cgroup(pc); + *ptr = mem; if (mem) { - ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false); + ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false); css_put(&mem->css); } - *ptr = mem; return ret; } diff --git a/mm/memory.c b/mm/memory.c index 1d2ea39260e..833952d8b74 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -125,13 +125,12 @@ core_initcall(init_zero_pfn); #if defined(SPLIT_RSS_COUNTING) -void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm) +static void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm) { int i; for (i = 0; i < NR_MM_COUNTERS; i++) { if (task->rss_stat.count[i]) { - BUG_ON(!mm); add_mm_counter(mm, i, task->rss_stat.count[i]); task->rss_stat.count[i] = 0; } diff --git a/mm/mmap.c b/mm/mmap.c index 75557c639ad..456ec6f2788 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -507,11 +507,12 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start, struct address_space *mapping = NULL; struct prio_tree_root *root = NULL; struct file *file = vma->vm_file; - struct anon_vma *anon_vma = NULL; long adjust_next = 0; int remove_next = 0; if (next && !insert) { + struct vm_area_struct *exporter = NULL; + if (end >= next->vm_end) { /* * vma expands, overlapping all the next, and @@ -519,7 +520,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start, */ again: remove_next = 1 + (end > next->vm_end); end = next->vm_end; - anon_vma = next->anon_vma; + exporter = next; importer = vma; } else if (end > next->vm_start) { /* @@ -527,7 +528,7 @@ again: remove_next = 1 + (end > next->vm_end); * mprotect case 5 shifting the boundary up. */ adjust_next = (end - next->vm_start) >> PAGE_SHIFT; - anon_vma = next->anon_vma; + exporter = next; importer = vma; } else if (end < vma->vm_end) { /* @@ -536,28 +537,19 @@ again: remove_next = 1 + (end > next->vm_end); * mprotect case 4 shifting the boundary down. */ adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT); - anon_vma = next->anon_vma; + exporter = vma; importer = next; } - } - /* - * When changing only vma->vm_end, we don't really need anon_vma lock. - */ - if (vma->anon_vma && (insert || importer || start != vma->vm_start)) - anon_vma = vma->anon_vma; - if (anon_vma) { /* * Easily overlooked: when mprotect shifts the boundary, * make sure the expanding vma has anon_vma set if the * shrinking vma had, to cover any anon pages imported. */ - if (importer && !importer->anon_vma) { - /* Block reverse map lookups until things are set up. */ - if (anon_vma_clone(importer, vma)) { + if (exporter && exporter->anon_vma && !importer->anon_vma) { + if (anon_vma_clone(importer, exporter)) return -ENOMEM; - } - importer->anon_vma = anon_vma; + importer->anon_vma = exporter->anon_vma; } } @@ -825,6 +817,61 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, } /* + * Rough compatbility check to quickly see if it's even worth looking + * at sharing an anon_vma. + * + * They need to have the same vm_file, and the flags can only differ + * in things that mprotect may change. + * + * NOTE! The fact that we share an anon_vma doesn't _have_ to mean that + * we can merge the two vma's. For example, we refuse to merge a vma if + * there is a vm_ops->close() function, because that indicates that the + * driver is doing some kind of reference counting. But that doesn't + * really matter for the anon_vma sharing case. + */ +static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b) +{ + return a->vm_end == b->vm_start && + mpol_equal(vma_policy(a), vma_policy(b)) && + a->vm_file == b->vm_file && + !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) && + b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT); +} + +/* + * Do some basic sanity checking to see if we can re-use the anon_vma + * from 'old'. The 'a'/'b' vma's are in VM order - one of them will be + * the same as 'old', the other will be the new one that is trying + * to share the anon_vma. + * + * NOTE! This runs with mm_sem held for reading, so it is possible that + * the anon_vma of 'old' is concurrently in the process of being set up + * by another page fault trying to merge _that_. But that's ok: if it + * is being set up, that automatically means that it will be a singleton + * acceptable for merging, so we can do all of this optimistically. But + * we do that ACCESS_ONCE() to make sure that we never re-load the pointer. + * + * IOW: that the "list_is_singular()" test on the anon_vma_chain only + * matters for the 'stable anon_vma' case (ie the thing we want to avoid + * is to return an anon_vma that is "complex" due to having gone through + * a fork). + * + * We also make sure that the two vma's are compatible (adjacent, + * and with the same memory policies). That's all stable, even with just + * a read lock on the mm_sem. + */ +static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b) +{ + if (anon_vma_compatible(a, b)) { + struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma); + + if (anon_vma && list_is_singular(&old->anon_vma_chain)) + return anon_vma; + } + return NULL; +} + +/* * find_mergeable_anon_vma is used by anon_vma_prepare, to check * neighbouring vmas for a suitable anon_vma, before it goes off * to allocate a new anon_vma. It checks because a repetitive @@ -834,28 +881,16 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, */ struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma) { + struct anon_vma *anon_vma; struct vm_area_struct *near; - unsigned long vm_flags; near = vma->vm_next; if (!near) goto try_prev; - /* - * Since only mprotect tries to remerge vmas, match flags - * which might be mprotected into each other later on. - * Neither mlock nor madvise tries to remerge at present, - * so leave their flags as obstructing a merge. - */ - vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC); - vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC); - - if (near->anon_vma && vma->vm_end == near->vm_start && - mpol_equal(vma_policy(vma), vma_policy(near)) && - can_vma_merge_before(near, vm_flags, - NULL, vma->vm_file, vma->vm_pgoff + - ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT))) - return near->anon_vma; + anon_vma = reusable_anon_vma(near, vma, near); + if (anon_vma) + return anon_vma; try_prev: /* * It is potentially slow to have to call find_vma_prev here. @@ -868,14 +903,9 @@ try_prev: if (!near) goto none; - vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC); - vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC); - - if (near->anon_vma && near->vm_end == vma->vm_start && - mpol_equal(vma_policy(near), vma_policy(vma)) && - can_vma_merge_after(near, vm_flags, - NULL, vma->vm_file, vma->vm_pgoff)) - return near->anon_vma; + anon_vma = reusable_anon_vma(near, near, vma); + if (anon_vma) + return anon_vma; none: /* * There's no absolute need to look only at touching neighbours: @@ -1947,7 +1977,8 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, return 0; /* Clean everything up if vma_adjust failed. */ - new->vm_ops->close(new); + if (new->vm_ops && new->vm_ops->close) + new->vm_ops->close(new); if (new->vm_file) { if (vma->vm_flags & VM_EXECUTABLE) removed_exe_file_vma(mm); diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 7b47a57b664..8b1a2ce21ee 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -80,6 +80,37 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, return err; } +#ifdef CONFIG_HUGETLB_PAGE +static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr, + unsigned long end) +{ + unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h); + return boundary < end ? boundary : end; +} + +static int walk_hugetlb_range(struct vm_area_struct *vma, + unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct hstate *h = hstate_vma(vma); + unsigned long next; + unsigned long hmask = huge_page_mask(h); + pte_t *pte; + int err = 0; + + do { + next = hugetlb_entry_end(h, addr, end); + pte = huge_pte_offset(walk->mm, addr & hmask); + if (pte && walk->hugetlb_entry) + err = walk->hugetlb_entry(pte, hmask, addr, next, walk); + if (err) + return err; + } while (addr = next, addr != end); + + return 0; +} +#endif + /** * walk_page_range - walk a memory map's page tables with a callback * @mm: memory map to walk @@ -128,20 +159,16 @@ int walk_page_range(unsigned long addr, unsigned long end, vma = find_vma(walk->mm, addr); #ifdef CONFIG_HUGETLB_PAGE if (vma && is_vm_hugetlb_page(vma)) { - pte_t *pte; - struct hstate *hs; - if (vma->vm_end < next) next = vma->vm_end; - hs = hstate_vma(vma); - pte = huge_pte_offset(walk->mm, - addr & huge_page_mask(hs)); - if (pte && !huge_pte_none(huge_ptep_get(pte)) - && walk->hugetlb_entry) - err = walk->hugetlb_entry(pte, addr, - next, walk); + /* + * Hugepage is very tightly coupled with vma, so + * walk through hugetlb entries within a given vma. + */ + err = walk_hugetlb_range(vma, addr, next, walk); if (err) break; + pgd = pgd_offset(walk->mm, next); continue; } #endif diff --git a/mm/readahead.c b/mm/readahead.c index 999b54bb462..dfa9a1a03a1 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -503,7 +503,7 @@ void page_cache_sync_readahead(struct address_space *mapping, return; /* be dumb */ - if (filp->f_mode & FMODE_RANDOM) { + if (filp && (filp->f_mode & FMODE_RANDOM)) { force_page_cache_readahead(mapping, filp, offset, req_size); return; } diff --git a/mm/rmap.c b/mm/rmap.c index eaa7a09eb72..07fc9475879 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -133,8 +133,8 @@ int anon_vma_prepare(struct vm_area_struct *vma) goto out_enomem_free_avc; allocated = anon_vma; } - spin_lock(&anon_vma->lock); + spin_lock(&anon_vma->lock); /* page_table_lock to protect against threads */ spin_lock(&mm->page_table_lock); if (likely(!vma->anon_vma)) { @@ -144,14 +144,15 @@ int anon_vma_prepare(struct vm_area_struct *vma) list_add(&avc->same_vma, &vma->anon_vma_chain); list_add(&avc->same_anon_vma, &anon_vma->head); allocated = NULL; + avc = NULL; } spin_unlock(&mm->page_table_lock); - spin_unlock(&anon_vma->lock); - if (unlikely(allocated)) { + + if (unlikely(allocated)) anon_vma_free(allocated); + if (unlikely(avc)) anon_vma_chain_free(avc); - } } return 0; @@ -182,7 +183,7 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) { struct anon_vma_chain *avc, *pavc; - list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) { + list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) { avc = anon_vma_chain_alloc(); if (!avc) goto enomem_failure; @@ -730,13 +731,29 @@ void page_move_anon_rmap(struct page *page, * @page: the page to add the mapping to * @vma: the vm area in which the mapping is added * @address: the user virtual address mapped + * @exclusive: the page is exclusively owned by the current process */ static void __page_set_anon_rmap(struct page *page, - struct vm_area_struct *vma, unsigned long address) + struct vm_area_struct *vma, unsigned long address, int exclusive) { struct anon_vma *anon_vma = vma->anon_vma; BUG_ON(!anon_vma); + + /* + * If the page isn't exclusively mapped into this vma, + * we must use the _oldest_ possible anon_vma for the + * page mapping! + * + * So take the last AVC chain entry in the vma, which is + * the deepest ancestor, and use the anon_vma from that. + */ + if (!exclusive) { + struct anon_vma_chain *avc; + avc = list_entry(vma->anon_vma_chain.prev, struct anon_vma_chain, same_vma); + anon_vma = avc->anon_vma; + } + anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; page->mapping = (struct address_space *) anon_vma; page->index = linear_page_index(vma, address); @@ -791,7 +808,7 @@ void page_add_anon_rmap(struct page *page, VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); if (first) - __page_set_anon_rmap(page, vma, address); + __page_set_anon_rmap(page, vma, address, 0); else __page_check_anon_rmap(page, vma, address); } @@ -813,7 +830,7 @@ void page_add_new_anon_rmap(struct page *page, SetPageSwapBacked(page); atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */ __inc_zone_page_state(page, NR_ANON_PAGES); - __page_set_anon_rmap(page, vma, address); + __page_set_anon_rmap(page, vma, address, 1); if (page_evictable(page, vma)) lru_cache_add_lru(page, LRU_ACTIVE_ANON); else diff --git a/mm/slab.c b/mm/slab.c index a9f325b28be..bac0f4fcc21 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3602,21 +3602,10 @@ EXPORT_SYMBOL(kmem_cache_alloc_notrace); */ int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr) { - unsigned long addr = (unsigned long)ptr; - unsigned long min_addr = PAGE_OFFSET; - unsigned long align_mask = BYTES_PER_WORD - 1; unsigned long size = cachep->buffer_size; struct page *page; - if (unlikely(addr < min_addr)) - goto out; - if (unlikely(addr > (unsigned long)high_memory - size)) - goto out; - if (unlikely(addr & align_mask)) - goto out; - if (unlikely(!kern_addr_valid(addr))) - goto out; - if (unlikely(!kern_addr_valid(addr + size - 1))) + if (unlikely(!kern_ptr_validate(ptr, size))) goto out; page = virt_to_page(ptr); if (unlikely(!PageSlab(page))) diff --git a/mm/slub.c b/mm/slub.c index b364844a106..d2a54fe71ea 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2153,7 +2153,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) int local_node; if (slab_state >= UP && (s < kmalloc_caches || - s > kmalloc_caches + KMALLOC_CACHES)) + s >= kmalloc_caches + KMALLOC_CACHES)) local_node = page_to_nid(virt_to_page(s)); else local_node = 0; @@ -2386,6 +2386,9 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *object) { struct page *page; + if (!kern_ptr_validate(object, s->size)) + return 0; + page = get_object_page(object); if (!page || s != page->slab) diff --git a/mm/util.c b/mm/util.c index 834db7be240..f5712e8964b 100644 --- a/mm/util.c +++ b/mm/util.c @@ -186,6 +186,27 @@ void kzfree(const void *p) } EXPORT_SYMBOL(kzfree); +int kern_ptr_validate(const void *ptr, unsigned long size) +{ + unsigned long addr = (unsigned long)ptr; + unsigned long min_addr = PAGE_OFFSET; + unsigned long align_mask = sizeof(void *) - 1; + + if (unlikely(addr < min_addr)) + goto out; + if (unlikely(addr > (unsigned long)high_memory - size)) + goto out; + if (unlikely(addr & align_mask)) + goto out; + if (unlikely(!kern_addr_valid(addr))) + goto out; + if (unlikely(!kern_addr_valid(addr + size - 1))) + goto out; + return 1; +out: + return 0; +} + /* * strndup_user - duplicate an existing string from user space * @s: The string to duplicate diff --git a/mm/vmscan.c b/mm/vmscan.c index e0e5f15bb72..3ff3311447f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1535,13 +1535,6 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, unsigned long ap, fp; struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); - /* If we have no swap space, do not bother scanning anon pages. */ - if (!sc->may_swap || (nr_swap_pages <= 0)) { - percent[0] = 0; - percent[1] = 100; - return; - } - anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) + zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON); file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) + @@ -1639,20 +1632,22 @@ static void shrink_zone(int priority, struct zone *zone, unsigned long nr_reclaimed = sc->nr_reclaimed; unsigned long nr_to_reclaim = sc->nr_to_reclaim; struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); + int noswap = 0; - get_scan_ratio(zone, sc, percent); + /* If we have no swap space, do not bother scanning anon pages. */ + if (!sc->may_swap || (nr_swap_pages <= 0)) { + noswap = 1; + percent[0] = 0; + percent[1] = 100; + } else + get_scan_ratio(zone, sc, percent); for_each_evictable_lru(l) { int file = is_file_lru(l); unsigned long scan; - if (percent[file] == 0) { - nr[l] = 0; - continue; - } - scan = zone_nr_lru_pages(zone, sc, l); - if (priority) { + if (priority || noswap) { scan >>= priority; scan = (scan * percent[file]) / 100; } |