From d5cff635290aec9ad7e6ee546aa4fae895361cbb Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 11 Jun 2009 13:22:40 +0100 Subject: kmemleak: Add the slab memory allocation/freeing hooks This patch adds the callbacks to kmemleak_(alloc|free) functions from the slab allocator. The patch also adds the SLAB_NOLEAKTRACE flag to avoid recursive calls to kmemleak when it allocates its own data structures. Signed-off-by: Catalin Marinas Reviewed-by: Pekka Enberg --- include/linux/slab.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/slab.h') diff --git a/include/linux/slab.h b/include/linux/slab.h index 24c5602bee9..48803064ced 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -62,6 +62,8 @@ # define SLAB_DEBUG_OBJECTS 0x00000000UL #endif +#define SLAB_NOLEAKTRACE 0x00800000UL /* Avoid kmemleak tracing */ + /* The following flags affect the page allocator grouping pages by mobility */ #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ -- cgit v1.2.3-70-g09d2 From 7e85ee0c1d15ca5f8bff0f514f158eba1742dd87 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 12 Jun 2009 14:03:06 +0300 Subject: slab,slub: don't enable interrupts during early boot As explained by Benjamin Herrenschmidt: Oh and btw, your patch alone doesn't fix powerpc, because it's missing a whole bunch of GFP_KERNEL's in the arch code... You would have to grep the entire kernel for things that check slab_is_available() and even then you'll be missing some. For example, slab_is_available() didn't always exist, and so in the early days on powerpc, we used a mem_init_done global that is set form mem_init() (not perfect but works in practice). And we still have code using that to do the test. Therefore, mask out __GFP_WAIT, __GFP_IO, and __GFP_FS in the slab allocators in early boot code to avoid enabling interrupts. Signed-off-by: Pekka Enberg --- include/linux/gfp.h | 3 +++ include/linux/slab.h | 2 ++ include/linux/slob_def.h | 5 +++++ include/linux/slub_def.h | 2 ++ init/main.c | 1 + mm/slab.c | 18 ++++++++++++++++++ mm/slub.c | 16 ++++++++++++++++ 7 files changed, 47 insertions(+) (limited to 'include/linux/slab.h') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 0bbc15f5453..3760e7c5de0 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -85,6 +85,9 @@ struct vm_area_struct; __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\ __GFP_NORETRY|__GFP_NOMEMALLOC) +/* Control slab gfp mask during early boot */ +#define SLAB_GFP_BOOT_MASK __GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS) + /* Control allocation constraints */ #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE) diff --git a/include/linux/slab.h b/include/linux/slab.h index 48803064ced..219b8fb4651 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -319,4 +319,6 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node) return kmalloc_node(size, flags | __GFP_ZERO, node); } +void __init kmem_cache_init_late(void); + #endif /* _LINUX_SLAB_H */ diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h index 0ec00b39d00..bb5368df4be 100644 --- a/include/linux/slob_def.h +++ b/include/linux/slob_def.h @@ -34,4 +34,9 @@ static __always_inline void *__kmalloc(size_t size, gfp_t flags) return kmalloc(size, flags); } +static inline void kmem_cache_init_late(void) +{ + /* Nothing to do */ +} + #endif /* __LINUX_SLOB_DEF_H */ diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index be5d40c43bd..4dcbc2c7149 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -302,4 +302,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) } #endif +void __init kmem_cache_init_late(void); + #endif /* _LINUX_SLUB_DEF_H */ diff --git a/init/main.c b/init/main.c index b3e8f14c568..f6204f712e7 100644 --- a/init/main.c +++ b/init/main.c @@ -640,6 +640,7 @@ asmlinkage void __init start_kernel(void) "enabled early\n"); early_boot_irqs_on(); local_irq_enable(); + kmem_cache_init_late(); /* * HACK ALERT! This is early. We're enabling the console before diff --git a/mm/slab.c b/mm/slab.c index cd76964b53b..453efcb1c98 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -303,6 +303,12 @@ struct kmem_list3 { int free_touched; /* updated without locking */ }; +/* + * The slab allocator is initialized with interrupts disabled. Therefore, make + * sure early boot allocations don't accidentally enable interrupts. + */ +static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK; + /* * Need this for bootstrapping a per node allocator. */ @@ -1654,6 +1660,14 @@ void __init kmem_cache_init(void) */ } +void __init kmem_cache_init_late(void) +{ + /* + * Interrupts are enabled now so all GFP allocations are safe. + */ + slab_gfp_mask = __GFP_BITS_MASK; +} + static int __init cpucache_init(void) { int cpu; @@ -3354,6 +3368,8 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, unsigned long save_flags; void *ptr; + flags &= slab_gfp_mask; + lockdep_trace_alloc(flags); if (slab_should_failslab(cachep, flags)) @@ -3434,6 +3450,8 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) unsigned long save_flags; void *objp; + flags &= slab_gfp_mask; + lockdep_trace_alloc(flags); if (slab_should_failslab(cachep, flags)) diff --git a/mm/slub.c b/mm/slub.c index 3964d3ce4c1..30354bfeb43 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -178,6 +178,12 @@ static enum { SYSFS /* Sysfs up */ } slab_state = DOWN; +/* + * The slab allocator is initialized with interrupts disabled. Therefore, make + * sure early boot allocations don't accidentally enable interrupts. + */ +static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK; + /* A list of all slab caches on the system */ static DECLARE_RWSEM(slub_lock); static LIST_HEAD(slab_caches); @@ -1595,6 +1601,8 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, unsigned long flags; unsigned int objsize; + gfpflags &= slab_gfp_mask; + lockdep_trace_alloc(gfpflags); might_sleep_if(gfpflags & __GFP_WAIT); @@ -3104,6 +3112,14 @@ void __init kmem_cache_init(void) nr_cpu_ids, nr_node_ids); } +void __init kmem_cache_init_late(void) +{ + /* + * Interrupts are enabled now so all GFP allocations are safe. + */ + slab_gfp_mask = __GFP_BITS_MASK; +} + /* * Find a mergeable slab cache */ -- cgit v1.2.3-70-g09d2 From 2dff440525f8faba8836e9f05297b76f23b4af30 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Sat, 31 May 2008 15:56:17 +0200 Subject: kmemcheck: add mm functions With kmemcheck enabled, the slab allocator needs to do this: 1. Tell kmemcheck to allocate the shadow memory which stores the status of each byte in the allocation proper, e.g. whether it is initialized or uninitialized. 2. Tell kmemcheck which parts of memory that should be marked uninitialized. There are actually a few more states, such as "not yet allocated" and "recently freed". If a slab cache is set up using the SLAB_NOTRACK flag, it will never return memory that can take page faults because of kmemcheck. If a slab cache is NOT set up using the SLAB_NOTRACK flag, callers can still request memory with the __GFP_NOTRACK flag. This does not prevent the page faults from occuring, however, but marks the object in question as being initialized so that no warnings will ever be produced for this object. In addition to (and in contrast to) __GFP_NOTRACK, the __GFP_NOTRACK_FALSE_POSITIVE flag indicates that the allocation should not be tracked _because_ it would produce a false positive. Their values are identical, but need not be so in the future (for example, we could now enable/disable false positives with a config option). Parts of this patch were contributed by Pekka Enberg but merged for atomicity. Signed-off-by: Vegard Nossum Signed-off-by: Pekka Enberg Signed-off-by: Ingo Molnar [rebased for mainline inclusion] Signed-off-by: Vegard Nossum --- arch/x86/kernel/process.c | 2 +- include/linux/gfp.h | 9 +++- include/linux/kmemcheck.h | 47 +++++++++++++++++++++ include/linux/slab.h | 7 ++++ kernel/fork.c | 14 +++---- mm/Makefile | 1 + mm/kmemcheck.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 174 insertions(+), 9 deletions(-) create mode 100644 mm/kmemcheck.c (limited to 'include/linux/slab.h') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 3bb2be1649b..994dd6a4a2a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -63,7 +63,7 @@ void arch_task_cache_init(void) task_xstate_cachep = kmem_cache_create("task_xstate", xstate_size, __alignof__(union thread_xstate), - SLAB_PANIC, NULL); + SLAB_PANIC | SLAB_NOTRACK, NULL); } /* diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 0bbc15f5453..daeaa8fe1bb 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -51,8 +51,15 @@ struct vm_area_struct; #define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */ #define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */ #define __GFP_MOVABLE ((__force gfp_t)0x100000u) /* Page is movable */ +#define __GFP_NOTRACK ((__force gfp_t)0x200000u) /* Don't track with kmemcheck */ -#define __GFP_BITS_SHIFT 21 /* Room for 21 __GFP_FOO bits */ +/* + * This may seem redundant, but it's a way of annotating false positives vs. + * allocations that simply cannot be supported (e.g. page tables). + */ +#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) + +#define __GFP_BITS_SHIFT 22 /* Room for 22 __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /* This equals 0, but use constants in case they ever change */ diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h index 39480c91b2f..5b65f4ebead 100644 --- a/include/linux/kmemcheck.h +++ b/include/linux/kmemcheck.h @@ -7,11 +7,58 @@ #ifdef CONFIG_KMEMCHECK extern int kmemcheck_enabled; +/* The slab-related functions. */ +void kmemcheck_alloc_shadow(struct kmem_cache *s, gfp_t flags, int node, + struct page *page, int order); +void kmemcheck_free_shadow(struct kmem_cache *s, struct page *page, int order); +void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, + size_t size); +void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size); + +void kmemcheck_show_pages(struct page *p, unsigned int n); +void kmemcheck_hide_pages(struct page *p, unsigned int n); + +bool kmemcheck_page_is_tracked(struct page *p); + +void kmemcheck_mark_unallocated(void *address, unsigned int n); +void kmemcheck_mark_uninitialized(void *address, unsigned int n); +void kmemcheck_mark_initialized(void *address, unsigned int n); +void kmemcheck_mark_freed(void *address, unsigned int n); + +void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n); +void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n); + int kmemcheck_show_addr(unsigned long address); int kmemcheck_hide_addr(unsigned long address); #else #define kmemcheck_enabled 0 +static inline void +kmemcheck_alloc_shadow(struct kmem_cache *s, gfp_t flags, int node, + struct page *page, int order) +{ +} + +static inline void +kmemcheck_free_shadow(struct kmem_cache *s, struct page *page, int order) +{ +} + +static inline void +kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, + size_t size) +{ +} + +static inline void kmemcheck_slab_free(struct kmem_cache *s, void *object, + size_t size) +{ +} + +static inline bool kmemcheck_page_is_tracked(struct page *p) +{ + return false; +} #endif /* CONFIG_KMEMCHECK */ #endif /* LINUX_KMEMCHECK_H */ diff --git a/include/linux/slab.h b/include/linux/slab.h index 48803064ced..e339fcf17cd 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -64,6 +64,13 @@ #define SLAB_NOLEAKTRACE 0x00800000UL /* Avoid kmemleak tracing */ +/* Don't track use of uninitialized memory */ +#ifdef CONFIG_KMEMCHECK +# define SLAB_NOTRACK 0x01000000UL +#else +# define SLAB_NOTRACK 0x00000000UL +#endif + /* The following flags affect the page allocator grouping pages by mobility */ #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ diff --git a/kernel/fork.c b/kernel/fork.c index 4430eb1376f..be022c200da 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -178,7 +178,7 @@ void __init fork_init(unsigned long mempages) /* create a slab on which task_structs can be allocated */ task_struct_cachep = kmem_cache_create("task_struct", sizeof(struct task_struct), - ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL); + ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL); #endif /* do the arch specific task caches init */ @@ -1470,20 +1470,20 @@ void __init proc_caches_init(void) { sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU, - sighand_ctor); + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU| + SLAB_NOTRACK, sighand_ctor); signal_cachep = kmem_cache_create("signal_cache", sizeof(struct signal_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); files_cachep = kmem_cache_create("files_cache", sizeof(struct files_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); fs_cachep = kmem_cache_create("fs_cache", sizeof(struct fs_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); mm_cachep = kmem_cache_create("mm_struct", sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC); mmap_init(); } diff --git a/mm/Makefile b/mm/Makefile index e89acb090b4..c379ce08354 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o obj-$(CONFIG_SLAB) += slab.o obj-$(CONFIG_SLUB) += slub.o +obj-$(CONFIG_KMEMCHECK) += kmemcheck.o obj-$(CONFIG_FAILSLAB) += failslab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c new file mode 100644 index 00000000000..eaa41b80261 --- /dev/null +++ b/mm/kmemcheck.c @@ -0,0 +1,103 @@ +#include +#include +#include +#include + +void kmemcheck_alloc_shadow(struct kmem_cache *s, gfp_t flags, int node, + struct page *page, int order) +{ + struct page *shadow; + int pages; + int i; + + pages = 1 << order; + + /* + * With kmemcheck enabled, we need to allocate a memory area for the + * shadow bits as well. + */ + shadow = alloc_pages_node(node, flags, order); + if (!shadow) { + if (printk_ratelimit()) + printk(KERN_ERR "kmemcheck: failed to allocate " + "shadow bitmap\n"); + return; + } + + for(i = 0; i < pages; ++i) + page[i].shadow = page_address(&shadow[i]); + + /* + * Mark it as non-present for the MMU so that our accesses to + * this memory will trigger a page fault and let us analyze + * the memory accesses. + */ + kmemcheck_hide_pages(page, pages); + + /* + * Objects from caches that have a constructor don't get + * cleared when they're allocated, so we need to do it here. + */ + if (s->ctor) + kmemcheck_mark_uninitialized_pages(page, pages); + else + kmemcheck_mark_unallocated_pages(page, pages); +} + +void kmemcheck_free_shadow(struct kmem_cache *s, struct page *page, int order) +{ + struct page *shadow; + int pages; + int i; + + pages = 1 << order; + + kmemcheck_show_pages(page, pages); + + shadow = virt_to_page(page[0].shadow); + + for(i = 0; i < pages; ++i) + page[i].shadow = NULL; + + __free_pages(shadow, order); +} + +void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, + size_t size) +{ + /* + * Has already been memset(), which initializes the shadow for us + * as well. + */ + if (gfpflags & __GFP_ZERO) + return; + + /* No need to initialize the shadow of a non-tracked slab. */ + if (s->flags & SLAB_NOTRACK) + return; + + if (!kmemcheck_enabled || gfpflags & __GFP_NOTRACK) { + /* + * Allow notracked objects to be allocated from + * tracked caches. Note however that these objects + * will still get page faults on access, they just + * won't ever be flagged as uninitialized. If page + * faults are not acceptable, the slab cache itself + * should be marked NOTRACK. + */ + kmemcheck_mark_initialized(object, size); + } else if (!s->ctor) { + /* + * New objects should be marked uninitialized before + * they're returned to the called. + */ + kmemcheck_mark_uninitialized(object, size); + } +} + +void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size) +{ + /* TODO: RCU freeing is unsupported for now; hide false positives. */ + if (!s->ctor && !(s->flags & SLAB_DESTROY_BY_RCU)) + kmemcheck_mark_freed(object, size); +} -- cgit v1.2.3-70-g09d2