summaryrefslogtreecommitdiffstats
path: root/mm/slab.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slab.c')
-rw-r--r--mm/slab.c596
1 files changed, 266 insertions, 330 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 9ca3b87edab..a467b308c68 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -191,7 +191,6 @@ struct array_cache {
unsigned int limit;
unsigned int batchcount;
unsigned int touched;
- spinlock_t lock;
void *entry[]; /*
* Must have this definition in here for the proper
* alignment of array_cache. Also simplifies accessing
@@ -203,6 +202,11 @@ struct array_cache {
*/
};
+struct alien_cache {
+ spinlock_t lock;
+ struct array_cache ac;
+};
+
#define SLAB_OBJ_PFMEMALLOC 1
static inline bool is_obj_pfmemalloc(void *objp)
{
@@ -242,7 +246,8 @@ static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
static int drain_freelist(struct kmem_cache *cache,
struct kmem_cache_node *n, int tofree);
static void free_block(struct kmem_cache *cachep, void **objpp, int len,
- int node);
+ int node, struct list_head *list);
+static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list);
static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
static void cache_reap(struct work_struct *unused);
@@ -267,7 +272,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
#define MAKE_LIST(cachep, listp, slab, nodeid) \
do { \
INIT_LIST_HEAD(listp); \
- list_splice(&(cachep->node[nodeid]->slab), listp); \
+ list_splice(&get_node(cachep, nodeid)->slab, listp); \
} while (0)
#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
@@ -386,6 +391,39 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
#endif
+#define OBJECT_FREE (0)
+#define OBJECT_ACTIVE (1)
+
+#ifdef CONFIG_DEBUG_SLAB_LEAK
+
+static void set_obj_status(struct page *page, int idx, int val)
+{
+ int freelist_size;
+ char *status;
+ struct kmem_cache *cachep = page->slab_cache;
+
+ freelist_size = cachep->num * sizeof(freelist_idx_t);
+ status = (char *)page->freelist + freelist_size;
+ status[idx] = val;
+}
+
+static inline unsigned int get_obj_status(struct page *page, int idx)
+{
+ int freelist_size;
+ char *status;
+ struct kmem_cache *cachep = page->slab_cache;
+
+ freelist_size = cachep->num * sizeof(freelist_idx_t);
+ status = (char *)page->freelist + freelist_size;
+
+ return status[idx];
+}
+
+#else
+static inline void set_obj_status(struct page *page, int idx, int val) {}
+
+#endif
+
/*
* Do not go above this order unless 0 objects fit into the slab or
* overridden on the command line.
@@ -434,154 +472,37 @@ static struct kmem_cache kmem_cache_boot = {
#define BAD_ALIEN_MAGIC 0x01020304ul
-#ifdef CONFIG_LOCKDEP
-
-/*
- * Slab sometimes uses the kmalloc slabs to store the slab headers
- * for other slabs "off slab".
- * The locking for this is tricky in that it nests within the locks
- * of all other slabs in a few places; to deal with this special
- * locking we put on-slab caches into a separate lock-class.
- *
- * We set lock class for alien array caches which are up during init.
- * The lock annotation will be lost if all cpus of a node goes down and
- * then comes back up during hotplug
- */
-static struct lock_class_key on_slab_l3_key;
-static struct lock_class_key on_slab_alc_key;
-
-static struct lock_class_key debugobj_l3_key;
-static struct lock_class_key debugobj_alc_key;
-
-static void slab_set_lock_classes(struct kmem_cache *cachep,
- struct lock_class_key *l3_key, struct lock_class_key *alc_key,
- int q)
-{
- struct array_cache **alc;
- struct kmem_cache_node *n;
- int r;
-
- n = cachep->node[q];
- if (!n)
- return;
-
- lockdep_set_class(&n->list_lock, l3_key);
- alc = n->alien;
- /*
- * FIXME: This check for BAD_ALIEN_MAGIC
- * should go away when common slab code is taught to
- * work even without alien caches.
- * Currently, non NUMA code returns BAD_ALIEN_MAGIC
- * for alloc_alien_cache,
- */
- if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
- return;
- for_each_node(r) {
- if (alc[r])
- lockdep_set_class(&alc[r]->lock, alc_key);
- }
-}
-
-static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
-{
- slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node);
-}
-
-static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
-{
- int node;
-
- for_each_online_node(node)
- slab_set_debugobj_lock_classes_node(cachep, node);
-}
-
-static void init_node_lock_keys(int q)
-{
- int i;
-
- if (slab_state < UP)
- return;
-
- for (i = 1; i <= KMALLOC_SHIFT_HIGH; i++) {
- struct kmem_cache_node *n;
- struct kmem_cache *cache = kmalloc_caches[i];
-
- if (!cache)
- continue;
-
- n = cache->node[q];
- if (!n || OFF_SLAB(cache))
- continue;
-
- slab_set_lock_classes(cache, &on_slab_l3_key,
- &on_slab_alc_key, q);
- }
-}
-
-static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q)
-{
- if (!cachep->node[q])
- return;
-
- slab_set_lock_classes(cachep, &on_slab_l3_key,
- &on_slab_alc_key, q);
-}
-
-static inline void on_slab_lock_classes(struct kmem_cache *cachep)
-{
- int node;
-
- VM_BUG_ON(OFF_SLAB(cachep));
- for_each_node(node)
- on_slab_lock_classes_node(cachep, node);
-}
-
-static inline void init_lock_keys(void)
-{
- int node;
-
- for_each_node(node)
- init_node_lock_keys(node);
-}
-#else
-static void init_node_lock_keys(int q)
-{
-}
+static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
-static inline void init_lock_keys(void)
+static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
{
+ return cachep->array[smp_processor_id()];
}
-static inline void on_slab_lock_classes(struct kmem_cache *cachep)
+static size_t calculate_freelist_size(int nr_objs, size_t align)
{
-}
+ size_t freelist_size;
-static inline void on_slab_lock_classes_node(struct kmem_cache *cachep, int node)
-{
-}
+ freelist_size = nr_objs * sizeof(freelist_idx_t);
+ if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK))
+ freelist_size += nr_objs * sizeof(char);
-static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
-{
-}
+ if (align)
+ freelist_size = ALIGN(freelist_size, align);
-static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
-{
-}
-#endif
-
-static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
-
-static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
-{
- return cachep->array[smp_processor_id()];
+ return freelist_size;
}
static int calculate_nr_objs(size_t slab_size, size_t buffer_size,
size_t idx_size, size_t align)
{
int nr_objs;
+ size_t remained_size;
size_t freelist_size;
+ int extra_space = 0;
+ if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK))
+ extra_space = sizeof(char);
/*
* Ignore padding for the initial guess. The padding
* is at most @align-1 bytes, and @buffer_size is at
@@ -590,14 +511,15 @@ static int calculate_nr_objs(size_t slab_size, size_t buffer_size,
* into the memory allocation when taking the padding
* into account.
*/
- nr_objs = slab_size / (buffer_size + idx_size);
+ nr_objs = slab_size / (buffer_size + idx_size + extra_space);
/*
* This calculated number will be either the right
* amount, or one greater than what we want.
*/
- freelist_size = slab_size - nr_objs * buffer_size;
- if (freelist_size < ALIGN(nr_objs * idx_size, align))
+ remained_size = slab_size - nr_objs * buffer_size;
+ freelist_size = calculate_freelist_size(nr_objs, align);
+ if (remained_size < freelist_size)
nr_objs--;
return nr_objs;
@@ -635,7 +557,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
} else {
nr_objs = calculate_nr_objs(slab_size, buffer_size,
sizeof(freelist_idx_t), align);
- mgmt_size = ALIGN(nr_objs * sizeof(freelist_idx_t), align);
+ mgmt_size = calculate_freelist_size(nr_objs, align);
}
*num = nr_objs;
*left_over = slab_size - nr_objs*buffer_size - mgmt_size;
@@ -740,13 +662,8 @@ static void start_cpu_timer(int cpu)
}
}
-static struct array_cache *alloc_arraycache(int node, int entries,
- int batchcount, gfp_t gfp)
+static void init_arraycache(struct array_cache *ac, int limit, int batch)
{
- int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
- struct array_cache *nc = NULL;
-
- nc = kmalloc_node(memsize, gfp, node);
/*
* The array_cache structures contain pointers to free object.
* However, when such objects are allocated or transferred to another
@@ -754,15 +671,24 @@ static struct array_cache *alloc_arraycache(int node, int entries,
* valid references during a kmemleak scan. Therefore, kmemleak must
* not scan such objects.
*/
- kmemleak_no_scan(nc);
- if (nc) {
- nc->avail = 0;
- nc->limit = entries;
- nc->batchcount = batchcount;
- nc->touched = 0;
- spin_lock_init(&nc->lock);
+ kmemleak_no_scan(ac);
+ if (ac) {
+ ac->avail = 0;
+ ac->limit = limit;
+ ac->batchcount = batch;
+ ac->touched = 0;
}
- return nc;
+}
+
+static struct array_cache *alloc_arraycache(int node, int entries,
+ int batchcount, gfp_t gfp)
+{
+ size_t memsize = sizeof(void *) * entries + sizeof(struct array_cache);
+ struct array_cache *ac = NULL;
+
+ ac = kmalloc_node(memsize, gfp, node);
+ init_arraycache(ac, entries, batchcount);
+ return ac;
}
static inline bool is_slab_pfmemalloc(struct page *page)
@@ -774,7 +700,7 @@ static inline bool is_slab_pfmemalloc(struct page *page)
static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
struct array_cache *ac)
{
- struct kmem_cache_node *n = cachep->node[numa_mem_id()];
+ struct kmem_cache_node *n = get_node(cachep, numa_mem_id());
struct page *page;
unsigned long flags;
@@ -829,7 +755,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
* If there are empty slabs on the slabs_free list and we are
* being forced to refill the cache, mark this one !pfmemalloc.
*/
- n = cachep->node[numa_mem_id()];
+ n = get_node(cachep, numa_mem_id());
if (!list_empty(&n->slabs_free) && force_refill) {
struct page *page = virt_to_head_page(objp);
ClearPageSlabPfmemalloc(page);
@@ -909,12 +835,13 @@ static int transfer_objects(struct array_cache *to,
#define drain_alien_cache(cachep, alien) do { } while (0)
#define reap_alien(cachep, n) do { } while (0)
-static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
+static inline struct alien_cache **alloc_alien_cache(int node,
+ int limit, gfp_t gfp)
{
- return (struct array_cache **)BAD_ALIEN_MAGIC;
+ return (struct alien_cache **)BAD_ALIEN_MAGIC;
}
-static inline void free_alien_cache(struct array_cache **ac_ptr)
+static inline void free_alien_cache(struct alien_cache **ac_ptr)
{
}
@@ -940,46 +867,60 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep,
static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
-static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
+static struct alien_cache *__alloc_alien_cache(int node, int entries,
+ int batch, gfp_t gfp)
+{
+ size_t memsize = sizeof(void *) * entries + sizeof(struct alien_cache);
+ struct alien_cache *alc = NULL;
+
+ alc = kmalloc_node(memsize, gfp, node);
+ init_arraycache(&alc->ac, entries, batch);
+ spin_lock_init(&alc->lock);
+ return alc;
+}
+
+static struct alien_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
{
- struct array_cache **ac_ptr;
- int memsize = sizeof(void *) * nr_node_ids;
+ struct alien_cache **alc_ptr;
+ size_t memsize = sizeof(void *) * nr_node_ids;
int i;
if (limit > 1)
limit = 12;
- ac_ptr = kzalloc_node(memsize, gfp, node);
- if (ac_ptr) {
- for_each_node(i) {
- if (i == node || !node_online(i))
- continue;
- ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);
- if (!ac_ptr[i]) {
- for (i--; i >= 0; i--)
- kfree(ac_ptr[i]);
- kfree(ac_ptr);
- return NULL;
- }
+ alc_ptr = kzalloc_node(memsize, gfp, node);
+ if (!alc_ptr)
+ return NULL;
+
+ for_each_node(i) {
+ if (i == node || !node_online(i))
+ continue;
+ alc_ptr[i] = __alloc_alien_cache(node, limit, 0xbaadf00d, gfp);
+ if (!alc_ptr[i]) {
+ for (i--; i >= 0; i--)
+ kfree(alc_ptr[i]);
+ kfree(alc_ptr);
+ return NULL;
}
}
- return ac_ptr;
+ return alc_ptr;
}
-static void free_alien_cache(struct array_cache **ac_ptr)
+static void free_alien_cache(struct alien_cache **alc_ptr)
{
int i;
- if (!ac_ptr)
+ if (!alc_ptr)
return;
for_each_node(i)
- kfree(ac_ptr[i]);
- kfree(ac_ptr);
+ kfree(alc_ptr[i]);
+ kfree(alc_ptr);
}
static void __drain_alien_cache(struct kmem_cache *cachep,
- struct array_cache *ac, int node)
+ struct array_cache *ac, int node,
+ struct list_head *list)
{
- struct kmem_cache_node *n = cachep->node[node];
+ struct kmem_cache_node *n = get_node(cachep, node);
if (ac->avail) {
spin_lock(&n->list_lock);
@@ -991,7 +932,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
if (n->shared)
transfer_objects(n->shared, ac, ac->limit);
- free_block(cachep, ac->entry, ac->avail, node);
+ free_block(cachep, ac->entry, ac->avail, node, list);
ac->avail = 0;
spin_unlock(&n->list_lock);
}
@@ -1005,28 +946,40 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n)
int node = __this_cpu_read(slab_reap_node);
if (n->alien) {
- struct array_cache *ac = n->alien[node];
+ struct alien_cache *alc = n->alien[node];
+ struct array_cache *ac;
+
+ if (alc) {
+ ac = &alc->ac;
+ if (ac->avail && spin_trylock_irq(&alc->lock)) {
+ LIST_HEAD(list);
- if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
- __drain_alien_cache(cachep, ac, node);
- spin_unlock_irq(&ac->lock);
+ __drain_alien_cache(cachep, ac, node, &list);
+ spin_unlock_irq(&alc->lock);
+ slabs_destroy(cachep, &list);
+ }
}
}
}
static void drain_alien_cache(struct kmem_cache *cachep,
- struct array_cache **alien)
+ struct alien_cache **alien)
{
int i = 0;
+ struct alien_cache *alc;
struct array_cache *ac;
unsigned long flags;
for_each_online_node(i) {
- ac = alien[i];
- if (ac) {
- spin_lock_irqsave(&ac->lock, flags);
- __drain_alien_cache(cachep, ac, i);
- spin_unlock_irqrestore(&ac->lock, flags);
+ alc = alien[i];
+ if (alc) {
+ LIST_HEAD(list);
+
+ ac = &alc->ac;
+ spin_lock_irqsave(&alc->lock, flags);
+ __drain_alien_cache(cachep, ac, i, &list);
+ spin_unlock_irqrestore(&alc->lock, flags);
+ slabs_destroy(cachep, &list);
}
}
}
@@ -1035,8 +988,10 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
{
int nodeid = page_to_nid(virt_to_page(objp));
struct kmem_cache_node *n;
- struct array_cache *alien = NULL;
+ struct alien_cache *alien = NULL;
+ struct array_cache *ac;
int node;
+ LIST_HEAD(list);
node = numa_mem_id();
@@ -1047,21 +1002,25 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
if (likely(nodeid == node))
return 0;
- n = cachep->node[node];
+ n = get_node(cachep, node);
STATS_INC_NODEFREES(cachep);
if (n->alien && n->alien[nodeid]) {
alien = n->alien[nodeid];
+ ac = &alien->ac;
spin_lock(&alien->lock);
- if (unlikely(alien->avail == alien->limit)) {
+ if (unlikely(ac->avail == ac->limit)) {
STATS_INC_ACOVERFLOW(cachep);
- __drain_alien_cache(cachep, alien, nodeid);
+ __drain_alien_cache(cachep, ac, nodeid, &list);
}
- ac_put_obj(cachep, alien, objp);
+ ac_put_obj(cachep, ac, objp);
spin_unlock(&alien->lock);
+ slabs_destroy(cachep, &list);
} else {
- spin_lock(&(cachep->node[nodeid])->list_lock);
- free_block(cachep, &objp, 1, nodeid);
- spin_unlock(&(cachep->node[nodeid])->list_lock);
+ n = get_node(cachep, nodeid);
+ spin_lock(&n->list_lock);
+ free_block(cachep, &objp, 1, nodeid, &list);
+ spin_unlock(&n->list_lock);
+ slabs_destroy(cachep, &list);
}
return 1;
}
@@ -1080,7 +1039,7 @@ static int init_cache_node_node(int node)
{
struct kmem_cache *cachep;
struct kmem_cache_node *n;
- const int memsize = sizeof(struct kmem_cache_node);
+ const size_t memsize = sizeof(struct kmem_cache_node);
list_for_each_entry(cachep, &slab_caches, list) {
/*
@@ -1088,7 +1047,8 @@ static int init_cache_node_node(int node)
* begin anything. Make sure some other cpu on this
* node has not already allocated this
*/
- if (!cachep->node[node]) {
+ n = get_node(cachep, node);
+ if (!n) {
n = kmalloc_node(memsize, GFP_KERNEL, node);
if (!n)
return -ENOMEM;
@@ -1104,11 +1064,11 @@ static int init_cache_node_node(int node)
cachep->node[node] = n;
}
- spin_lock_irq(&cachep->node[node]->list_lock);
- cachep->node[node]->free_limit =
+ spin_lock_irq(&n->list_lock);
+ n->free_limit =
(1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
- spin_unlock_irq(&cachep->node[node]->list_lock);
+ spin_unlock_irq(&n->list_lock);
}
return 0;
}
@@ -1129,12 +1089,13 @@ static void cpuup_canceled(long cpu)
list_for_each_entry(cachep, &slab_caches, list) {
struct array_cache *nc;
struct array_cache *shared;
- struct array_cache **alien;
+ struct alien_cache **alien;
+ LIST_HEAD(list);
/* cpu is dead; no one can alloc from it. */
nc = cachep->array[cpu];
cachep->array[cpu] = NULL;
- n = cachep->node[node];
+ n = get_node(cachep, node);
if (!n)
goto free_array_cache;
@@ -1144,7 +1105,7 @@ static void cpuup_canceled(long cpu)
/* Free limit for this kmem_cache_node */
n->free_limit -= cachep->batchcount;
if (nc)
- free_block(cachep, nc->entry, nc->avail, node);
+ free_block(cachep, nc->entry, nc->avail, node, &list);
if (!cpumask_empty(mask)) {
spin_unlock_irq(&n->list_lock);
@@ -1154,7 +1115,7 @@ static void cpuup_canceled(long cpu)
shared = n->shared;
if (shared) {
free_block(cachep, shared->entry,
- shared->avail, node);
+ shared->avail, node, &list);
n->shared = NULL;
}
@@ -1169,6 +1130,7 @@ static void cpuup_canceled(long cpu)
free_alien_cache(alien);
}
free_array_cache:
+ slabs_destroy(cachep, &list);
kfree(nc);
}
/*
@@ -1177,7 +1139,7 @@ free_array_cache:
* shrink each nodelist to its limit.
*/
list_for_each_entry(cachep, &slab_caches, list) {
- n = cachep->node[node];
+ n = get_node(cachep, node);
if (!n)
continue;
drain_freelist(cachep, n, slabs_tofree(cachep, n));
@@ -1208,7 +1170,7 @@ static int cpuup_prepare(long cpu)
list_for_each_entry(cachep, &slab_caches, list) {
struct array_cache *nc;
struct array_cache *shared = NULL;
- struct array_cache **alien = NULL;
+ struct alien_cache **alien = NULL;
nc = alloc_arraycache(node, cachep->limit,
cachep->batchcount, GFP_KERNEL);
@@ -1232,7 +1194,7 @@ static int cpuup_prepare(long cpu)
}
}
cachep->array[cpu] = nc;
- n = cachep->node[node];
+ n = get_node(cachep, node);
BUG_ON(!n);
spin_lock_irq(&n->list_lock);
@@ -1253,13 +1215,7 @@ static int cpuup_prepare(long cpu)
spin_unlock_irq(&n->list_lock);
kfree(shared);
free_alien_cache(alien);
- if (cachep->flags & SLAB_DEBUG_OBJECTS)
- slab_set_debugobj_lock_classes_node(cachep, node);
- else if (!OFF_SLAB(cachep) &&
- !(cachep->flags & SLAB_DESTROY_BY_RCU))
- on_slab_lock_classes_node(cachep, node);
}
- init_node_lock_keys(node);
return 0;
bad:
@@ -1343,7 +1299,7 @@ static int __meminit drain_cache_node_node(int node)
list_for_each_entry(cachep, &slab_caches, list) {
struct kmem_cache_node *n;
- n = cachep->node[node];
+ n = get_node(cachep, node);
if (!n)
continue;
@@ -1523,10 +1479,6 @@ void __init kmem_cache_init(void)
memcpy(ptr, cpu_cache_get(kmem_cache),
sizeof(struct arraycache_init));
- /*
- * Do not assume that spinlocks can be initialized via memcpy:
- */
- spin_lock_init(&ptr->lock);
kmem_cache->array[smp_processor_id()] = ptr;
@@ -1536,10 +1488,6 @@ void __init kmem_cache_init(void)
!= &initarray_generic.cache);
memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
sizeof(struct arraycache_init));
- /*
- * Do not assume that spinlocks can be initialized via memcpy:
- */
- spin_lock_init(&ptr->lock);
kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr;
}
@@ -1576,9 +1524,6 @@ void __init kmem_cache_init_late(void)
BUG();
mutex_unlock(&slab_mutex);
- /* Annotate slab for lockdep -- annotate the malloc caches */
- init_lock_keys();
-
/* Done! */
slab_state = FULL;
@@ -1638,14 +1583,10 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
printk(KERN_WARNING " cache: %s, object size: %d, order: %d\n",
cachep->name, cachep->size, cachep->gfporder);
- for_each_online_node(node) {
+ for_each_kmem_cache_node(cachep, node, n) {
unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
unsigned long active_slabs = 0, num_slabs = 0;
- n = cachep->node[node];
- if (!n)
- continue;
-
spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->slabs_full, lru) {
active_objs += cachep->num;
@@ -1672,7 +1613,8 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
}
/*
- * Interface to system's page allocator. No need to hold the cache-lock.
+ * Interface to system's page allocator. No need to hold the
+ * kmem_cache_node ->list_lock.
*
* If we requested dmaable memory, we will get it. Even if we
* did not request dmaable memory, we might get it, but that
@@ -1974,9 +1916,9 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
* @cachep: cache pointer being destroyed
* @page: page pointer being destroyed
*
- * Destroy all the objs in a slab, and release the mem back to the system.
- * Before calling the slab must have been unlinked from the cache. The
- * cache-lock is not held/needed.
+ * Destroy all the objs in a slab page, and release the mem back to the system.
+ * Before calling the slab page must have been unlinked from the cache. The
+ * kmem_cache_node ->list_lock is not held/needed.
*/
static void slab_destroy(struct kmem_cache *cachep, struct page *page)
{
@@ -2008,6 +1950,16 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page)
kmem_cache_free(cachep->freelist_cache, freelist);
}
+static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list)
+{
+ struct page *page, *n;
+
+ list_for_each_entry_safe(page, n, list, lru) {
+ list_del(&page->lru);
+ slab_destroy(cachep, page);
+ }
+}
+
/**
* calculate_slab_order - calculate size (page order) of slabs
* @cachep: pointer to the cache that is being created
@@ -2041,13 +1993,16 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
break;
if (flags & CFLGS_OFF_SLAB) {
+ size_t freelist_size_per_obj = sizeof(freelist_idx_t);
/*
* Max number of objs-per-slab for caches which
* use off-slab slabs. Needed to avoid a possible
* looping condition in cache_grow().
*/
+ if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK))
+ freelist_size_per_obj += sizeof(char);
offslab_limit = size;
- offslab_limit /= sizeof(freelist_idx_t);
+ offslab_limit /= freelist_size_per_obj;
if (num > offslab_limit)
break;
@@ -2294,8 +2249,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
if (!cachep->num)
return -E2BIG;
- freelist_size =
- ALIGN(cachep->num * sizeof(freelist_idx_t), cachep->align);
+ freelist_size = calculate_freelist_size(cachep->num, cachep->align);
/*
* If the slab has been placed off-slab, and we have enough space then
@@ -2308,7 +2262,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
if (flags & CFLGS_OFF_SLAB) {
/* really off slab. No need for manual alignment */
- freelist_size = cachep->num * sizeof(freelist_idx_t);
+ freelist_size = calculate_freelist_size(cachep->num, 0);
#ifdef CONFIG_PAGE_POISONING
/* If we're going to use the generic kernel_map_pages()
@@ -2351,17 +2305,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
return err;
}
- if (flags & SLAB_DEBUG_OBJECTS) {
- /*
- * Would deadlock through slab_destroy()->call_rcu()->
- * debug_object_activate()->kmem_cache_alloc().
- */
- WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
-
- slab_set_debugobj_lock_classes(cachep);
- } else if (!OFF_SLAB(cachep) && !(flags & SLAB_DESTROY_BY_RCU))
- on_slab_lock_classes(cachep);
-
return 0;
}
@@ -2380,7 +2323,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep)
{
#ifdef CONFIG_SMP
check_irq_off();
- assert_spin_locked(&cachep->node[numa_mem_id()]->list_lock);
+ assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
#endif
}
@@ -2388,7 +2331,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
{
#ifdef CONFIG_SMP
check_irq_off();
- assert_spin_locked(&cachep->node[node]->list_lock);
+ assert_spin_locked(&get_node(cachep, node)->list_lock);
#endif
}
@@ -2408,12 +2351,16 @@ static void do_drain(void *arg)
struct kmem_cache *cachep = arg;
struct array_cache *ac;
int node = numa_mem_id();
+ struct kmem_cache_node *n;
+ LIST_HEAD(list);
check_irq_off();
ac = cpu_cache_get(cachep);
- spin_lock(&cachep->node[node]->list_lock);
- free_block(cachep, ac->entry, ac->avail, node);
- spin_unlock(&cachep->node[node]->list_lock);
+ n = get_node(cachep, node);
+ spin_lock(&n->list_lock);
+ free_block(cachep, ac->entry, ac->avail, node, &list);
+ spin_unlock(&n->list_lock);
+ slabs_destroy(cachep, &list);
ac->avail = 0;
}
@@ -2424,17 +2371,12 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
on_each_cpu(do_drain, cachep, 1);
check_irq_on();
- for_each_online_node(node) {
- n = cachep->node[node];
- if (n && n->alien)
+ for_each_kmem_cache_node(cachep, node, n)
+ if (n->alien)
drain_alien_cache(cachep, n->alien);
- }
- for_each_online_node(node) {
- n = cachep->node[node];
- if (n)
- drain_array(cachep, n, n->shared, 1, node);
- }
+ for_each_kmem_cache_node(cachep, node, n)
+ drain_array(cachep, n, n->shared, 1, node);
}
/*
@@ -2480,17 +2422,14 @@ out:
int __kmem_cache_shrink(struct kmem_cache *cachep)
{
- int ret = 0, i = 0;
+ int ret = 0;
+ int node;
struct kmem_cache_node *n;
drain_cpu_caches(cachep);
check_irq_on();
- for_each_online_node(i) {
- n = cachep->node[i];
- if (!n)
- continue;
-
+ for_each_kmem_cache_node(cachep, node, n) {
drain_freelist(cachep, n, slabs_tofree(cachep, n));
ret += !list_empty(&n->slabs_full) ||
@@ -2512,13 +2451,11 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
kfree(cachep->array[i]);
/* NUMA: free the node structures */
- for_each_online_node(i) {
- n = cachep->node[i];
- if (n) {
- kfree(n->shared);
- free_alien_cache(n->alien);
- kfree(n);
- }
+ for_each_kmem_cache_node(cachep, i, n) {
+ kfree(n->shared);
+ free_alien_cache(n->alien);
+ kfree(n);
+ cachep->node[i] = NULL;
}
return 0;
}
@@ -2612,6 +2549,7 @@ static void cache_init_objs(struct kmem_cache *cachep,
if (cachep->ctor)
cachep->ctor(objp);
#endif
+ set_obj_status(page, i, OBJECT_FREE);
set_free_obj(page, i, i);
}
}
@@ -2696,7 +2634,7 @@ static int cache_grow(struct kmem_cache *cachep,
/* Take the node list lock to change the colour_next on this node */
check_irq_off();
- n = cachep->node[nodeid];
+ n = get_node(cachep, nodeid);
spin_lock(&n->list_lock);
/* Get colour for the slab, and cal the next value. */
@@ -2820,6 +2758,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
BUG_ON(objnr >= cachep->num);
BUG_ON(objp != index_to_obj(cachep, page, objnr));
+ set_obj_status(page, objnr, OBJECT_FREE);
if (cachep->flags & SLAB_POISON) {
#ifdef CONFIG_DEBUG_PAGEALLOC
if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
@@ -2864,7 +2803,7 @@ retry:
*/
batchcount = BATCHREFILL_LIMIT;
}
- n = cachep->node[node];
+ n = get_node(cachep, node);
BUG_ON(ac->avail > 0 || !n);
spin_lock(&n->list_lock);
@@ -2953,6 +2892,8 @@ static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
gfp_t flags, void *objp, unsigned long caller)
{
+ struct page *page;
+
if (!objp)
return objp;
if (cachep->flags & SLAB_POISON) {
@@ -2983,6 +2924,9 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
*dbg_redzone1(cachep, objp) = RED_ACTIVE;
*dbg_redzone2(cachep, objp) = RED_ACTIVE;
}
+
+ page = virt_to_head_page(objp);
+ set_obj_status(page, obj_to_index(cachep, page, objp), OBJECT_ACTIVE);
objp += obj_offset(cachep);
if (cachep->ctor && cachep->flags & SLAB_POISON)
cachep->ctor(objp);
@@ -2999,7 +2943,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
{
- if (cachep == kmem_cache)
+ if (unlikely(cachep == kmem_cache))
return false;
return should_failslab(cachep->object_size, flags, cachep->flags);
@@ -3108,8 +3052,8 @@ retry:
nid = zone_to_nid(zone);
if (cpuset_zone_allowed_hardwall(zone, flags) &&
- cache->node[nid] &&
- cache->node[nid]->free_objects) {
+ get_node(cache, nid) &&
+ get_node(cache, nid)->free_objects) {
obj = ____cache_alloc_node(cache,
flags | GFP_THISNODE, nid);
if (obj)
@@ -3172,7 +3116,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
int x;
VM_BUG_ON(nodeid > num_online_nodes());
- n = cachep->node[nodeid];
+ n = get_node(cachep, nodeid);
BUG_ON(!n);
retry:
@@ -3243,7 +3187,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
if (nodeid == NUMA_NO_NODE)
nodeid = slab_node;
- if (unlikely(!cachep->node[nodeid])) {
+ if (unlikely(!get_node(cachep, nodeid))) {
/* Node not bootstrapped yet */
ptr = fallback_alloc(cachep, flags);
goto out;
@@ -3344,12 +3288,13 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
/*
* Caller needs to acquire correct kmem_cache_node's list_lock
+ * @list: List of detached free slabs should be freed by caller
*/
-static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
- int node)
+static void free_block(struct kmem_cache *cachep, void **objpp,
+ int nr_objects, int node, struct list_head *list)
{
int i;
- struct kmem_cache_node *n;
+ struct kmem_cache_node *n = get_node(cachep, node);
for (i = 0; i < nr_objects; i++) {
void *objp;
@@ -3359,7 +3304,6 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
objp = objpp[i];
page = virt_to_head_page(objp);
- n = cachep->node[node];
list_del(&page->lru);
check_spinlock_acquired_node(cachep, node);
slab_put_obj(cachep, page, objp, node);
@@ -3370,13 +3314,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
if (page->active == 0) {
if (n->free_objects > n->free_limit) {
n->free_objects -= cachep->num;
- /* No need to drop any previously held
- * lock here, even if we have a off-slab slab
- * descriptor it is guaranteed to come from
- * a different cache, refer to comments before
- * alloc_slabmgmt.
- */
- slab_destroy(cachep, page);
+ list_add_tail(&page->lru, list);
} else {
list_add(&page->lru, &n->slabs_free);
}
@@ -3395,13 +3333,14 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
int batchcount;
struct kmem_cache_node *n;
int node = numa_mem_id();
+ LIST_HEAD(list);
batchcount = ac->batchcount;
#if DEBUG
BUG_ON(!batchcount || batchcount > ac->avail);
#endif
check_irq_off();
- n = cachep->node[node];
+ n = get_node(cachep, node);
spin_lock(&n->list_lock);
if (n->shared) {
struct array_cache *shared_array = n->shared;
@@ -3416,7 +3355,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
}
}
- free_block(cachep, ac->entry, batchcount, node);
+ free_block(cachep, ac->entry, batchcount, node, &list);
free_done:
#if STATS
{
@@ -3437,6 +3376,7 @@ free_done:
}
#endif
spin_unlock(&n->list_lock);
+ slabs_destroy(cachep, &list);
ac->avail -= batchcount;
memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
}
@@ -3693,7 +3633,7 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
int node;
struct kmem_cache_node *n;
struct array_cache *new_shared;
- struct array_cache **new_alien = NULL;
+ struct alien_cache **new_alien = NULL;
for_each_online_node(node) {
@@ -3714,15 +3654,16 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
}
}
- n = cachep->node[node];
+ n = get_node(cachep, node);
if (n) {
struct array_cache *shared = n->shared;
+ LIST_HEAD(list);
spin_lock_irq(&n->list_lock);
if (shared)
free_block(cachep, shared->entry,
- shared->avail, node);
+ shared->avail, node, &list);
n->shared = new_shared;
if (!n->alien) {
@@ -3732,6 +3673,7 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
n->free_limit = (1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
spin_unlock_irq(&n->list_lock);
+ slabs_destroy(cachep, &list);
kfree(shared);
free_alien_cache(new_alien);
continue;
@@ -3759,9 +3701,8 @@ fail:
/* Cache is not active yet. Roll back what we did */
node--;
while (node >= 0) {
- if (cachep->node[node]) {
- n = cachep->node[node];
-
+ n = get_node(cachep, node);
+ if (n) {
kfree(n->shared);
free_alien_cache(n->alien);
kfree(n);
@@ -3822,12 +3763,20 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
cachep->shared = shared;
for_each_online_cpu(i) {
+ LIST_HEAD(list);
struct array_cache *ccold = new->new[i];
+ int node;
+ struct kmem_cache_node *n;
+
if (!ccold)
continue;
- spin_lock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
- free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
- spin_unlock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
+
+ node = cpu_to_mem(i);
+ n = get_node(cachep, node);
+ spin_lock_irq(&n->list_lock);
+ free_block(cachep, ccold->entry, ccold->avail, node, &list);
+ spin_unlock_irq(&n->list_lock);
+ slabs_destroy(cachep, &list);
kfree(ccold);
}
kfree(new);
@@ -3935,6 +3884,7 @@ skip_setup:
static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
struct array_cache *ac, int force, int node)
{
+ LIST_HEAD(list);
int tofree;
if (!ac || !ac->avail)
@@ -3947,12 +3897,13 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
tofree = force ? ac->avail : (ac->limit + 4) / 5;
if (tofree > ac->avail)
tofree = (ac->avail + 1) / 2;
- free_block(cachep, ac->entry, tofree, node);
+ free_block(cachep, ac->entry, tofree, node, &list);
ac->avail -= tofree;
memmove(ac->entry, &(ac->entry[tofree]),
sizeof(void *) * ac->avail);
}
spin_unlock_irq(&n->list_lock);
+ slabs_destroy(cachep, &list);
}
}
@@ -3987,7 +3938,7 @@ static void cache_reap(struct work_struct *w)
* have established with reasonable certainty that
* we can do some work if the lock was obtained.
*/
- n = searchp->node[node];
+ n = get_node(searchp, node);
reap_alien(searchp, n);
@@ -4039,10 +3990,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
active_objs = 0;
num_slabs = 0;
- for_each_online_node(node) {
- n = cachep->node[node];
- if (!n)
- continue;
+ for_each_kmem_cache_node(cachep, node, n) {
check_irq_on();
spin_lock_irq(&n->list_lock);
@@ -4219,21 +4167,12 @@ static void handle_slab(unsigned long *n, struct kmem_cache *c,
struct page *page)
{
void *p;
- int i, j;
+ int i;
if (n[0] == n[1])
return;
for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) {
- bool active = true;
-
- for (j = page->active; j < c->num; j++) {
- /* Skip freed item */
- if (get_free_obj(page, j) == i) {
- active = false;
- break;
- }
- }
- if (!active)
+ if (get_obj_status(page, i) != OBJECT_ACTIVE)
continue;
if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
@@ -4276,10 +4215,7 @@ static int leaks_show(struct seq_file *m, void *p)
x[1] = 0;
- for_each_online_node(node) {
- n = cachep->node[node];
- if (!n)
- continue;
+ for_each_kmem_cache_node(cachep, node, n) {
check_irq_on();
spin_lock_irq(&n->list_lock);