diff options
-rw-r--r-- | Documentation/kernel-parameters.txt | 6 | ||||
-rw-r--r-- | Documentation/vm/slub.txt | 2 | ||||
-rw-r--r-- | mm/slab.c | 39 | ||||
-rw-r--r-- | mm/slub.c | 77 |
4 files changed, 82 insertions, 42 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c92b1532f05..a8d389d7240 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2395,6 +2395,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. slram= [HW,MTD] + slab_max_order= [MM, SLAB] + Determines the maximum allowed order for slabs. + A high setting may cause OOMs due to memory + fragmentation. Defaults to 1 for systems with + more than 32MB of RAM, 0 otherwise. + slub_debug[=options[,slabs]] [MM, SLUB] Enabling slub_debug allows one to determine the culprit if slab objects become corrupted. Enabling diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt index f464f47bc60..2acdda9601b 100644 --- a/Documentation/vm/slub.txt +++ b/Documentation/vm/slub.txt @@ -117,7 +117,7 @@ can be influenced by kernel parameters: slub_min_objects=x (default 4) slub_min_order=x (default 0) -slub_max_order=x (default 1) +slub_max_order=x (default 3 (PAGE_ALLOC_COSTLY_ORDER)) slub_min_objects allows to specify how many objects must at least fit into one slab in order for the allocation order to be acceptable. diff --git a/mm/slab.c b/mm/slab.c index 2acfa0d9094..f0bd7857ab3 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -481,11 +481,13 @@ EXPORT_SYMBOL(slab_buffer_size); #endif /* - * Do not go above this order unless 0 objects fit into the slab. + * Do not go above this order unless 0 objects fit into the slab or + * overridden on the command line. */ -#define BREAK_GFP_ORDER_HI 1 -#define BREAK_GFP_ORDER_LO 0 -static int slab_break_gfp_order = BREAK_GFP_ORDER_LO; +#define SLAB_MAX_ORDER_HI 1 +#define SLAB_MAX_ORDER_LO 0 +static int slab_max_order = SLAB_MAX_ORDER_LO; +static bool slab_max_order_set __initdata; /* * Functions for storing/retrieving the cachep and or slab from the page @@ -854,6 +856,17 @@ static int __init noaliencache_setup(char *s) } __setup("noaliencache", noaliencache_setup); +static int __init slab_max_order_setup(char *str) +{ + get_option(&str, &slab_max_order); + slab_max_order = slab_max_order < 0 ? 0 : + min(slab_max_order, MAX_ORDER - 1); + slab_max_order_set = true; + + return 1; +} +__setup("slab_max_order=", slab_max_order_setup); + #ifdef CONFIG_NUMA /* * Special reaping functions for NUMA systems called from cache_reap(). @@ -1502,10 +1515,11 @@ void __init kmem_cache_init(void) /* * Fragmentation resistance on low memory - only use bigger - * page orders on machines with more than 32MB of memory. + * page orders on machines with more than 32MB of memory if + * not overridden on the command line. */ - if (totalram_pages > (32 << 20) >> PAGE_SHIFT) - slab_break_gfp_order = BREAK_GFP_ORDER_HI; + if (!slab_max_order_set && totalram_pages > (32 << 20) >> PAGE_SHIFT) + slab_max_order = SLAB_MAX_ORDER_HI; /* Bootstrap is tricky, because several objects are allocated * from caches that do not exist yet: @@ -1932,8 +1946,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) /* Print header */ if (lines == 0) { printk(KERN_ERR - "Slab corruption: %s start=%p, len=%d\n", - cachep->name, realobj, size); + "Slab corruption (%s): %s start=%p, len=%d\n", + print_tainted(), cachep->name, realobj, size); print_objinfo(cachep, objp, 0); } /* Hexdump the affected line */ @@ -2117,7 +2131,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, * Large number of objects is good, but very large slabs are * currently bad for the gfp()s. */ - if (gfporder >= slab_break_gfp_order) + if (gfporder >= slab_max_order) break; /* @@ -3042,8 +3056,9 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp) if (entries != cachep->num - slabp->inuse) { bad: printk(KERN_ERR "slab: Internal list corruption detected in " - "cache '%s'(%d), slabp %p(%d). Hexdump:\n", - cachep->name, cachep->num, slabp, slabp->inuse); + "cache '%s'(%d), slabp %p(%d). Tainted(%s). Hexdump:\n", + cachep->name, cachep->num, slabp, slabp->inuse, + print_tainted()); print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp, sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t), 1); diff --git a/mm/slub.c b/mm/slub.c index d99acbf14e0..5d37b5e4414 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -570,7 +570,7 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...) va_end(args); printk(KERN_ERR "========================================" "=====================================\n"); - printk(KERN_ERR "BUG %s: %s\n", s->name, buf); + printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf); printk(KERN_ERR "----------------------------------------" "-------------------------------------\n\n"); } @@ -1901,11 +1901,14 @@ static void unfreeze_partials(struct kmem_cache *s) } if (l != m) { - if (l == M_PARTIAL) + if (l == M_PARTIAL) { remove_partial(n, page); - else + stat(s, FREE_REMOVE_PARTIAL); + } else { add_partial(n, page, DEACTIVATE_TO_TAIL); + stat(s, FREE_ADD_PARTIAL); + } l = m; } @@ -2124,6 +2127,37 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, } /* + * Check the page->freelist of a page and either transfer the freelist to the per cpu freelist + * or deactivate the page. + * + * The page is still frozen if the return value is not NULL. + * + * If this function returns NULL then the page has been unfrozen. + */ +static inline void *get_freelist(struct kmem_cache *s, struct page *page) +{ + struct page new; + unsigned long counters; + void *freelist; + + do { + freelist = page->freelist; + counters = page->counters; + new.counters = counters; + VM_BUG_ON(!new.frozen); + + new.inuse = page->objects; + new.frozen = freelist != NULL; + + } while (!cmpxchg_double_slab(s, page, + freelist, counters, + NULL, new.counters, + "get_freelist")); + + return freelist; +} + +/* * Slow path. The lockless freelist is empty or we need to perform * debugging duties. * @@ -2144,8 +2178,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, { void **object; unsigned long flags; - struct page new; - unsigned long counters; local_irq_save(flags); #ifdef CONFIG_PREEMPT @@ -2166,31 +2198,14 @@ redo: goto new_slab; } - stat(s, ALLOC_SLOWPATH); - - do { - object = c->page->freelist; - counters = c->page->counters; - new.counters = counters; - VM_BUG_ON(!new.frozen); - - /* - * If there is no object left then we use this loop to - * deactivate the slab which is simple since no objects - * are left in the slab and therefore we do not need to - * put the page back onto the partial list. - * - * If there are objects left then we retrieve them - * and use them to refill the per cpu queue. - */ + /* must check again c->freelist in case of cpu migration or IRQ */ + object = c->freelist; + if (object) + goto load_freelist; - new.inuse = c->page->objects; - new.frozen = object != NULL; + stat(s, ALLOC_SLOWPATH); - } while (!__cmpxchg_double_slab(s, c->page, - object, counters, - NULL, new.counters, - "__slab_alloc")); + object = get_freelist(s, c->page); if (!object) { c->page = NULL; @@ -3028,7 +3043,9 @@ static int kmem_cache_open(struct kmem_cache *s, * per node list when we run out of per cpu objects. We only fetch 50% * to keep some capacity around for frees. */ - if (s->size >= PAGE_SIZE) + if (kmem_cache_debug(s)) + s->cpu_partial = 0; + else if (s->size >= PAGE_SIZE) s->cpu_partial = 2; else if (s->size >= 1024) s->cpu_partial = 6; @@ -4637,6 +4654,8 @@ static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf, err = strict_strtoul(buf, 10, &objects); if (err) return err; + if (objects && kmem_cache_debug(s)) + return -EINVAL; s->cpu_partial = objects; flush_all(s); |