summaryrefslogtreecommitdiffstats
path: root/mm/slab.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slab.c')
-rw-r--r--mm/slab.c252
1 files changed, 150 insertions, 102 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 4cbac24ae2f..acda7e2d66e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -116,8 +116,7 @@
#include <asm/page.h>
/*
- * DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
- * SLAB_RED_ZONE & SLAB_POISON.
+ * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
* 0 for faster, smaller code (especially in the critical paths).
*
* STATS - 1 to collect stats for /proc/slabinfo.
@@ -149,10 +148,11 @@
* Usually, the kmalloc caches are cache_line_size() aligned, except when
* DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned.
* Some archs want to perform DMA into kmalloc caches and need a guaranteed
- * alignment larger than BYTES_PER_WORD. ARCH_KMALLOC_MINALIGN allows that.
- * Note that this flag disables some debug features.
+ * alignment larger than the alignment of a 64-bit integer.
+ * ARCH_KMALLOC_MINALIGN allows that.
+ * Note that increasing this value may disable some debug features.
*/
-#define ARCH_KMALLOC_MINALIGN 0
+#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
#endif
#ifndef ARCH_SLAB_MINALIGN
@@ -172,15 +172,15 @@
/* Legal flag mask for kmem_cache_create(). */
#if DEBUG
-# define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
+# define CREATE_MASK (SLAB_RED_ZONE | \
SLAB_POISON | SLAB_HWCACHE_ALIGN | \
SLAB_CACHE_DMA | \
- SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \
+ SLAB_STORE_USER | \
SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD)
#else
# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \
- SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
+ SLAB_CACHE_DMA | \
SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD)
#endif
@@ -389,7 +389,6 @@ struct kmem_cache {
unsigned int buffer_size;
u32 reciprocal_buffer_size;
/* 3) touched by every alloc & free from the backend */
- struct kmem_list3 *nodelists[MAX_NUMNODES];
unsigned int flags; /* constant flags */
unsigned int num; /* # of objs per slab */
@@ -444,6 +443,17 @@ struct kmem_cache {
int obj_offset;
int obj_size;
#endif
+ /*
+ * We put nodelists[] at the end of kmem_cache, because we want to size
+ * this array to nr_node_ids slots instead of MAX_NUMNODES
+ * (see kmem_cache_init())
+ * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache
+ * is statically defined, so we reserve the max number of nodes.
+ */
+ struct kmem_list3 *nodelists[MAX_NUMNODES];
+ /*
+ * Do not add fields after nodelists[]
+ */
};
#define CFLGS_OFF_SLAB (0x80000000UL)
@@ -527,19 +537,22 @@ static int obj_size(struct kmem_cache *cachep)
return cachep->obj_size;
}
-static unsigned long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
+static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
{
BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
- return (unsigned long*) (objp+obj_offset(cachep)-BYTES_PER_WORD);
+ return (unsigned long long*) (objp + obj_offset(cachep) -
+ sizeof(unsigned long long));
}
-static unsigned long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
+static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
{
BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
if (cachep->flags & SLAB_STORE_USER)
- return (unsigned long *)(objp + cachep->buffer_size -
- 2 * BYTES_PER_WORD);
- return (unsigned long *)(objp + cachep->buffer_size - BYTES_PER_WORD);
+ return (unsigned long long *)(objp + cachep->buffer_size -
+ sizeof(unsigned long long) -
+ BYTES_PER_WORD);
+ return (unsigned long long *) (objp + cachep->buffer_size -
+ sizeof(unsigned long long));
}
static void **dbg_userword(struct kmem_cache *cachep, void *objp)
@@ -552,8 +565,8 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
#define obj_offset(x) 0
#define obj_size(cachep) (cachep->buffer_size)
-#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long *)NULL;})
-#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long *)NULL;})
+#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
+#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})
#endif
@@ -592,8 +605,7 @@ static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
static inline struct kmem_cache *page_get_cache(struct page *page)
{
- if (unlikely(PageCompound(page)))
- page = (struct page *)page_private(page);
+ page = compound_head(page);
BUG_ON(!PageSlab(page));
return (struct kmem_cache *)page->lru.next;
}
@@ -605,21 +617,19 @@ static inline void page_set_slab(struct page *page, struct slab *slab)
static inline struct slab *page_get_slab(struct page *page)
{
- if (unlikely(PageCompound(page)))
- page = (struct page *)page_private(page);
BUG_ON(!PageSlab(page));
return (struct slab *)page->lru.prev;
}
static inline struct kmem_cache *virt_to_cache(const void *obj)
{
- struct page *page = virt_to_page(obj);
+ struct page *page = virt_to_head_page(obj);
return page_get_cache(page);
}
static inline struct slab *virt_to_slab(const void *obj)
{
- struct page *page = virt_to_page(obj);
+ struct page *page = virt_to_head_page(obj);
return page_get_slab(page);
}
@@ -678,9 +688,6 @@ static struct kmem_cache cache_cache = {
.shared = 1,
.buffer_size = sizeof(struct kmem_cache),
.name = "kmem_cache",
-#if DEBUG
- .obj_size = sizeof(struct kmem_cache),
-#endif
};
#define BAD_ALIEN_MAGIC 0x01020304ul
@@ -1146,7 +1153,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
* Make sure we are not freeing a object from another node to the array
* cache on this cpu.
*/
- if (likely(slabp->nodeid == node) || unlikely(!use_alien_caches))
+ if (likely(slabp->nodeid == node))
return 0;
l3 = cachep->nodelists[node];
@@ -1223,19 +1230,20 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
*/
list_for_each_entry(cachep, &cache_chain, next) {
struct array_cache *nc;
- struct array_cache *shared;
+ struct array_cache *shared = NULL;
struct array_cache **alien = NULL;
nc = alloc_arraycache(node, cachep->limit,
cachep->batchcount);
if (!nc)
goto bad;
- shared = alloc_arraycache(node,
+ if (cachep->shared) {
+ shared = alloc_arraycache(node,
cachep->shared * cachep->batchcount,
0xbaadf00d);
- if (!shared)
- goto bad;
-
+ if (!shared)
+ goto bad;
+ }
if (use_alien_caches) {
alien = alloc_alien_cache(node, cachep->limit);
if (!alien)
@@ -1317,8 +1325,8 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
shared = l3->shared;
if (shared) {
- free_block(cachep, l3->shared->entry,
- l3->shared->avail, node);
+ free_block(cachep, shared->entry,
+ shared->avail, node);
l3->shared = NULL;
}
@@ -1394,6 +1402,9 @@ void __init kmem_cache_init(void)
int order;
int node;
+ if (num_possible_nodes() == 1)
+ use_alien_caches = 0;
+
for (i = 0; i < NUM_INIT_LISTS; i++) {
kmem_list3_init(&initkmem_list3[i]);
if (i < MAX_NUMNODES)
@@ -1436,6 +1447,15 @@ void __init kmem_cache_init(void)
cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE];
+ /*
+ * struct kmem_cache size depends on nr_node_ids, which
+ * can be less than MAX_NUMNODES.
+ */
+ cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
+ nr_node_ids * sizeof(struct kmem_list3 *);
+#if DEBUG
+ cache_cache.obj_size = cache_cache.buffer_size;
+#endif
cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
cache_line_size());
cache_cache.reciprocal_buffer_size =
@@ -1760,7 +1780,7 @@ static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
char *realobj;
if (cachep->flags & SLAB_RED_ZONE) {
- printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n",
+ printk(KERN_ERR "Redzone: 0x%llx/0x%llx.\n",
*dbg_redzone1(cachep, objp),
*dbg_redzone2(cachep, objp));
}
@@ -1929,7 +1949,7 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
* For setting up all the kmem_list3s for cache whose buffer_size is same as
* size of kmem_list3.
*/
-static void set_up_list3s(struct kmem_cache *cachep, int index)
+static void __init set_up_list3s(struct kmem_cache *cachep, int index)
{
int node;
@@ -2151,13 +2171,15 @@ kmem_cache_create (const char *name, size_t size, size_t align,
*/
res = probe_kernel_address(pc->name, tmp);
if (res) {
- printk("SLAB: cache with size %d has lost its name\n",
+ printk(KERN_ERR
+ "SLAB: cache with size %d has lost its name\n",
pc->buffer_size);
continue;
}
if (!strcmp(pc->name, name)) {
- printk("kmem_cache_create: duplicate cache %s\n", name);
+ printk(KERN_ERR
+ "kmem_cache_create: duplicate cache %s\n", name);
dump_stack();
goto oops;
}
@@ -2165,12 +2187,6 @@ kmem_cache_create (const char *name, size_t size, size_t align,
#if DEBUG
WARN_ON(strchr(name, ' ')); /* It confuses parsers */
- if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
- /* No constructor, but inital state check requested */
- printk(KERN_ERR "%s: No con, but init state check "
- "requested - %s\n", __FUNCTION__, name);
- flags &= ~SLAB_DEBUG_INITIAL;
- }
#if FORCED_DEBUG
/*
* Enable redzoning and last user accounting, except for caches with
@@ -2227,7 +2243,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
* is greater than BYTES_PER_WORD.
*/
if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER)
- ralign = BYTES_PER_WORD;
+ ralign = __alignof__(unsigned long long);
/* 2) arch mandated alignment */
if (ralign < ARCH_SLAB_MINALIGN) {
@@ -2238,7 +2254,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
ralign = align;
}
/* disable debug if necessary */
- if (ralign > BYTES_PER_WORD)
+ if (ralign > __alignof__(unsigned long long))
flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
/*
* 4) Store it.
@@ -2259,8 +2275,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
*/
if (flags & SLAB_RED_ZONE) {
/* add space for red zone words */
- cachep->obj_offset += BYTES_PER_WORD;
- size += 2 * BYTES_PER_WORD;
+ cachep->obj_offset += sizeof(unsigned long long);
+ size += 2 * sizeof(unsigned long long);
}
if (flags & SLAB_STORE_USER) {
/* user store requires one word storage behind the end of
@@ -2294,7 +2310,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
left_over = calculate_slab_order(cachep, size, align, flags);
if (!cachep->num) {
- printk("kmem_cache_create: couldn't create cache %s.\n", name);
+ printk(KERN_ERR
+ "kmem_cache_create: couldn't create cache %s.\n", name);
kmem_cache_free(&cache_cache, cachep);
cachep = NULL;
goto oops;
@@ -2733,19 +2750,10 @@ static int cache_grow(struct kmem_cache *cachep,
* Be lazy and only check for valid flags here, keeping it out of the
* critical path in kmem_cache_alloc().
*/
- BUG_ON(flags & ~(GFP_DMA | GFP_LEVEL_MASK | __GFP_NO_GROW));
- if (flags & __GFP_NO_GROW)
- return 0;
+ BUG_ON(flags & ~(GFP_DMA | GFP_LEVEL_MASK));
ctor_flags = SLAB_CTOR_CONSTRUCTOR;
local_flags = (flags & GFP_LEVEL_MASK);
- if (!(local_flags & __GFP_WAIT))
- /*
- * Not allowed to sleep. Need to tell a constructor about
- * this - it might need to know...
- */
- ctor_flags |= SLAB_CTOR_ATOMIC;
-
/* Take the l3 list lock to change the colour_next on this node */
check_irq_off();
l3 = cachep->nodelists[nodeid];
@@ -2829,7 +2837,7 @@ static void kfree_debugcheck(const void *objp)
static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
{
- unsigned long redzone1, redzone2;
+ unsigned long long redzone1, redzone2;
redzone1 = *dbg_redzone1(cache, obj);
redzone2 = *dbg_redzone2(cache, obj);
@@ -2845,7 +2853,7 @@ static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
else
slab_error(cache, "memory outside object was overwritten");
- printk(KERN_ERR "%p: redzone 1:0x%lx, redzone 2:0x%lx.\n",
+ printk(KERN_ERR "%p: redzone 1:0x%llx, redzone 2:0x%llx.\n",
obj, redzone1, redzone2);
}
@@ -2858,7 +2866,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
objp -= obj_offset(cachep);
kfree_debugcheck(objp);
- page = virt_to_page(objp);
+ page = virt_to_head_page(objp);
slabp = page_get_slab(page);
@@ -2875,15 +2883,6 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
BUG_ON(objnr >= cachep->num);
BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
- if (cachep->flags & SLAB_DEBUG_INITIAL) {
- /*
- * Need to call the slab's constructor so the caller can
- * perform a verify of its state (debugging). Called without
- * the cache-lock held.
- */
- cachep->ctor(objp + obj_offset(cachep),
- cachep, SLAB_CTOR_CONSTRUCTOR | SLAB_CTOR_VERIFY);
- }
if (cachep->flags & SLAB_POISON && cachep->dtor) {
/* we want to cache poison the object,
* call the destruction callback
@@ -2987,6 +2986,14 @@ retry:
slabp = list_entry(entry, struct slab, list);
check_slabp(cachep, slabp);
check_spinlock_acquired(cachep);
+
+ /*
+ * The slab was either on partial or free list so
+ * there must be at least one object available for
+ * allocation.
+ */
+ BUG_ON(slabp->inuse < 0 || slabp->inuse >= cachep->num);
+
while (slabp->inuse < cachep->num && batchcount--) {
STATS_INC_ALLOCED(cachep);
STATS_INC_ACTIVE(cachep);
@@ -3062,7 +3069,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
slab_error(cachep, "double free, or memory outside"
" object was overwritten");
printk(KERN_ERR
- "%p: redzone 1:0x%lx, redzone 2:0x%lx\n",
+ "%p: redzone 1:0x%llx, redzone 2:0x%llx\n",
objp, *dbg_redzone1(cachep, objp),
*dbg_redzone2(cachep, objp));
}
@@ -3074,20 +3081,14 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
struct slab *slabp;
unsigned objnr;
- slabp = page_get_slab(virt_to_page(objp));
+ slabp = page_get_slab(virt_to_head_page(objp));
objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size;
slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
}
#endif
objp += obj_offset(cachep);
- if (cachep->ctor && cachep->flags & SLAB_POISON) {
- unsigned long ctor_flags = SLAB_CTOR_CONSTRUCTOR;
-
- if (!(flags & __GFP_WAIT))
- ctor_flags |= SLAB_CTOR_ATOMIC;
-
- cachep->ctor(objp, cachep, ctor_flags);
- }
+ if (cachep->ctor && cachep->flags & SLAB_POISON)
+ cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR);
#if ARCH_SLAB_MINALIGN
if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
@@ -3142,7 +3143,7 @@ static int __init failslab_debugfs(void)
struct dentry *dir;
int err;
- err = init_fault_attr_dentries(&failslab.attr, "failslab");
+ err = init_fault_attr_dentries(&failslab.attr, "failslab");
if (err)
return err;
dir = failslab.attr.dentries.dir;
@@ -3180,9 +3181,6 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
check_irq_off();
- if (should_failslab(cachep, flags))
- return NULL;
-
ac = cpu_cache_get(cachep);
if (likely(ac->avail)) {
STATS_INC_ALLOCHIT(cachep);
@@ -3256,7 +3254,7 @@ retry:
flags | GFP_THISNODE, nid);
}
- if (!obj && !(flags & __GFP_NO_GROW)) {
+ if (!obj) {
/*
* This allocation will be performed within the constraints
* of the current cpuset / memory policy requirements.
@@ -3374,6 +3372,9 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
unsigned long save_flags;
void *ptr;
+ if (should_failslab(cachep, flags))
+ return NULL;
+
cache_alloc_debugcheck_before(cachep, flags);
local_irq_save(save_flags);
@@ -3444,6 +3445,9 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
unsigned long save_flags;
void *objp;
+ if (should_failslab(cachep, flags))
+ return NULL;
+
cache_alloc_debugcheck_before(cachep, flags);
local_irq_save(save_flags);
objp = __do_cache_alloc(cachep, flags);
@@ -3563,7 +3567,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
check_irq_off();
objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
- if (cache_free_alien(cachep, objp))
+ if (use_alien_caches && cache_free_alien(cachep, objp))
return;
if (likely(ac->avail < ac->limit)) {
@@ -3737,6 +3741,53 @@ EXPORT_SYMBOL(__kmalloc);
#endif
/**
+ * krealloc - reallocate memory. The contents will remain unchanged.
+ *
+ * @p: object to reallocate memory for.
+ * @new_size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ *
+ * The contents of the object pointed to are preserved up to the
+ * lesser of the new and old sizes. If @p is %NULL, krealloc()
+ * behaves exactly like kmalloc(). If @size is 0 and @p is not a
+ * %NULL pointer, the object pointed to is freed.
+ */
+void *krealloc(const void *p, size_t new_size, gfp_t flags)
+{
+ struct kmem_cache *cache, *new_cache;
+ void *ret;
+
+ if (unlikely(!p))
+ return kmalloc_track_caller(new_size, flags);
+
+ if (unlikely(!new_size)) {
+ kfree(p);
+ return NULL;
+ }
+
+ cache = virt_to_cache(p);
+ new_cache = __find_general_cachep(new_size, flags);
+
+ /*
+ * If new size fits in the current cache, bail out.
+ */
+ if (likely(cache == new_cache))
+ return (void *)p;
+
+ /*
+ * We are on the slow-path here so do not use __cache_alloc
+ * because it bloats kernel text.
+ */
+ ret = kmalloc_track_caller(new_size, flags);
+ if (ret) {
+ memcpy(ret, p, min(new_size, ksize(p)));
+ kfree(p);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(krealloc);
+
+/**
* kmem_cache_free - Deallocate an object
* @cachep: The cache the allocation was from.
* @objp: The previously allocated object.
@@ -3812,12 +3863,15 @@ static int alloc_kmemlist(struct kmem_cache *cachep)
goto fail;
}
- new_shared = alloc_arraycache(node,
+ new_shared = NULL;
+ if (cachep->shared) {
+ new_shared = alloc_arraycache(node,
cachep->shared*cachep->batchcount,
0xbaadf00d);
- if (!new_shared) {
- free_alien_cache(new_alien);
- goto fail;
+ if (!new_shared) {
+ free_alien_cache(new_alien);
+ goto fail;
+ }
}
l3 = cachep->nodelists[node];
@@ -3975,10 +4029,8 @@ static int enable_cpucache(struct kmem_cache *cachep)
* to a larger limit. Thus disabled by default.
*/
shared = 0;
-#ifdef CONFIG_SMP
- if (cachep->buffer_size <= PAGE_SIZE)
+ if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1)
shared = 8;
-#endif
#if DEBUG
/*
@@ -4380,16 +4432,12 @@ static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
static void show_symbol(struct seq_file *m, unsigned long address)
{
#ifdef CONFIG_KALLSYMS
- char *modname;
- const char *name;
unsigned long offset, size;
- char namebuf[KSYM_NAME_LEN+1];
-
- name = kallsyms_lookup(address, &size, &offset, &modname, namebuf);
+ char modname[MODULE_NAME_LEN + 1], name[KSYM_NAME_LEN + 1];
- if (name) {
+ if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) {
seq_printf(m, "%s+%#lx/%#lx", name, offset, size);
- if (modname)
+ if (modname[0])
seq_printf(m, " [%s]", modname);
return;
}
@@ -4478,7 +4526,7 @@ const struct seq_operations slabstats_op = {
* allocated with either kmalloc() or kmem_cache_alloc(). The object
* must not be freed during the duration of the call.
*/
-unsigned int ksize(const void *objp)
+size_t ksize(const void *objp)
{
if (unlikely(objp == NULL))
return 0;