diff options
Diffstat (limited to 'mm/slab.c')
-rw-r--r-- | mm/slab.c | 596 |
1 files changed, 266 insertions, 330 deletions
diff --git a/mm/slab.c b/mm/slab.c index 9ca3b87edab..a467b308c68 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -191,7 +191,6 @@ struct array_cache { unsigned int limit; unsigned int batchcount; unsigned int touched; - spinlock_t lock; void *entry[]; /* * Must have this definition in here for the proper * alignment of array_cache. Also simplifies accessing @@ -203,6 +202,11 @@ struct array_cache { */ }; +struct alien_cache { + spinlock_t lock; + struct array_cache ac; +}; + #define SLAB_OBJ_PFMEMALLOC 1 static inline bool is_obj_pfmemalloc(void *objp) { @@ -242,7 +246,8 @@ static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS]; static int drain_freelist(struct kmem_cache *cache, struct kmem_cache_node *n, int tofree); static void free_block(struct kmem_cache *cachep, void **objpp, int len, - int node); + int node, struct list_head *list); +static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list); static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); static void cache_reap(struct work_struct *unused); @@ -267,7 +272,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent) #define MAKE_LIST(cachep, listp, slab, nodeid) \ do { \ INIT_LIST_HEAD(listp); \ - list_splice(&(cachep->node[nodeid]->slab), listp); \ + list_splice(&get_node(cachep, nodeid)->slab, listp); \ } while (0) #define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ @@ -386,6 +391,39 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp) #endif +#define OBJECT_FREE (0) +#define OBJECT_ACTIVE (1) + +#ifdef CONFIG_DEBUG_SLAB_LEAK + +static void set_obj_status(struct page *page, int idx, int val) +{ + int freelist_size; + char *status; + struct kmem_cache *cachep = page->slab_cache; + + freelist_size = cachep->num * sizeof(freelist_idx_t); + status = (char *)page->freelist + freelist_size; + status[idx] = val; +} + +static inline unsigned int get_obj_status(struct page *page, int idx) +{ + int freelist_size; + char *status; + struct kmem_cache *cachep = page->slab_cache; + + freelist_size = cachep->num * sizeof(freelist_idx_t); + status = (char *)page->freelist + freelist_size; + + return status[idx]; +} + +#else +static inline void set_obj_status(struct page *page, int idx, int val) {} + +#endif + /* * Do not go above this order unless 0 objects fit into the slab or * overridden on the command line. @@ -434,154 +472,37 @@ static struct kmem_cache kmem_cache_boot = { #define BAD_ALIEN_MAGIC 0x01020304ul -#ifdef CONFIG_LOCKDEP - -/* - * Slab sometimes uses the kmalloc slabs to store the slab headers - * for other slabs "off slab". - * The locking for this is tricky in that it nests within the locks - * of all other slabs in a few places; to deal with this special - * locking we put on-slab caches into a separate lock-class. - * - * We set lock class for alien array caches which are up during init. - * The lock annotation will be lost if all cpus of a node goes down and - * then comes back up during hotplug - */ -static struct lock_class_key on_slab_l3_key; -static struct lock_class_key on_slab_alc_key; - -static struct lock_class_key debugobj_l3_key; -static struct lock_class_key debugobj_alc_key; - -static void slab_set_lock_classes(struct kmem_cache *cachep, - struct lock_class_key *l3_key, struct lock_class_key *alc_key, - int q) -{ - struct array_cache **alc; - struct kmem_cache_node *n; - int r; - - n = cachep->node[q]; - if (!n) - return; - - lockdep_set_class(&n->list_lock, l3_key); - alc = n->alien; - /* - * FIXME: This check for BAD_ALIEN_MAGIC - * should go away when common slab code is taught to - * work even without alien caches. - * Currently, non NUMA code returns BAD_ALIEN_MAGIC - * for alloc_alien_cache, - */ - if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) - return; - for_each_node(r) { - if (alc[r]) - lockdep_set_class(&alc[r]->lock, alc_key); - } -} - -static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node) -{ - slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node); -} - -static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep) -{ - int node; - - for_each_online_node(node) - slab_set_debugobj_lock_classes_node(cachep, node); -} - -static void init_node_lock_keys(int q) -{ - int i; - - if (slab_state < UP) - return; - - for (i = 1; i <= KMALLOC_SHIFT_HIGH; i++) { - struct kmem_cache_node *n; - struct kmem_cache *cache = kmalloc_caches[i]; - - if (!cache) - continue; - - n = cache->node[q]; - if (!n || OFF_SLAB(cache)) - continue; - - slab_set_lock_classes(cache, &on_slab_l3_key, - &on_slab_alc_key, q); - } -} - -static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q) -{ - if (!cachep->node[q]) - return; - - slab_set_lock_classes(cachep, &on_slab_l3_key, - &on_slab_alc_key, q); -} - -static inline void on_slab_lock_classes(struct kmem_cache *cachep) -{ - int node; - - VM_BUG_ON(OFF_SLAB(cachep)); - for_each_node(node) - on_slab_lock_classes_node(cachep, node); -} - -static inline void init_lock_keys(void) -{ - int node; - - for_each_node(node) - init_node_lock_keys(node); -} -#else -static void init_node_lock_keys(int q) -{ -} +static DEFINE_PER_CPU(struct delayed_work, slab_reap_work); -static inline void init_lock_keys(void) +static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) { + return cachep->array[smp_processor_id()]; } -static inline void on_slab_lock_classes(struct kmem_cache *cachep) +static size_t calculate_freelist_size(int nr_objs, size_t align) { -} + size_t freelist_size; -static inline void on_slab_lock_classes_node(struct kmem_cache *cachep, int node) -{ -} + freelist_size = nr_objs * sizeof(freelist_idx_t); + if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK)) + freelist_size += nr_objs * sizeof(char); -static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node) -{ -} + if (align) + freelist_size = ALIGN(freelist_size, align); -static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep) -{ -} -#endif - -static DEFINE_PER_CPU(struct delayed_work, slab_reap_work); - -static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) -{ - return cachep->array[smp_processor_id()]; + return freelist_size; } static int calculate_nr_objs(size_t slab_size, size_t buffer_size, size_t idx_size, size_t align) { int nr_objs; + size_t remained_size; size_t freelist_size; + int extra_space = 0; + if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK)) + extra_space = sizeof(char); /* * Ignore padding for the initial guess. The padding * is at most @align-1 bytes, and @buffer_size is at @@ -590,14 +511,15 @@ static int calculate_nr_objs(size_t slab_size, size_t buffer_size, * into the memory allocation when taking the padding * into account. */ - nr_objs = slab_size / (buffer_size + idx_size); + nr_objs = slab_size / (buffer_size + idx_size + extra_space); /* * This calculated number will be either the right * amount, or one greater than what we want. */ - freelist_size = slab_size - nr_objs * buffer_size; - if (freelist_size < ALIGN(nr_objs * idx_size, align)) + remained_size = slab_size - nr_objs * buffer_size; + freelist_size = calculate_freelist_size(nr_objs, align); + if (remained_size < freelist_size) nr_objs--; return nr_objs; @@ -635,7 +557,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, } else { nr_objs = calculate_nr_objs(slab_size, buffer_size, sizeof(freelist_idx_t), align); - mgmt_size = ALIGN(nr_objs * sizeof(freelist_idx_t), align); + mgmt_size = calculate_freelist_size(nr_objs, align); } *num = nr_objs; *left_over = slab_size - nr_objs*buffer_size - mgmt_size; @@ -740,13 +662,8 @@ static void start_cpu_timer(int cpu) } } -static struct array_cache *alloc_arraycache(int node, int entries, - int batchcount, gfp_t gfp) +static void init_arraycache(struct array_cache *ac, int limit, int batch) { - int memsize = sizeof(void *) * entries + sizeof(struct array_cache); - struct array_cache *nc = NULL; - - nc = kmalloc_node(memsize, gfp, node); /* * The array_cache structures contain pointers to free object. * However, when such objects are allocated or transferred to another @@ -754,15 +671,24 @@ static struct array_cache *alloc_arraycache(int node, int entries, * valid references during a kmemleak scan. Therefore, kmemleak must * not scan such objects. */ - kmemleak_no_scan(nc); - if (nc) { - nc->avail = 0; - nc->limit = entries; - nc->batchcount = batchcount; - nc->touched = 0; - spin_lock_init(&nc->lock); + kmemleak_no_scan(ac); + if (ac) { + ac->avail = 0; + ac->limit = limit; + ac->batchcount = batch; + ac->touched = 0; } - return nc; +} + +static struct array_cache *alloc_arraycache(int node, int entries, + int batchcount, gfp_t gfp) +{ + size_t memsize = sizeof(void *) * entries + sizeof(struct array_cache); + struct array_cache *ac = NULL; + + ac = kmalloc_node(memsize, gfp, node); + init_arraycache(ac, entries, batchcount); + return ac; } static inline bool is_slab_pfmemalloc(struct page *page) @@ -774,7 +700,7 @@ static inline bool is_slab_pfmemalloc(struct page *page) static void recheck_pfmemalloc_active(struct kmem_cache *cachep, struct array_cache *ac) { - struct kmem_cache_node *n = cachep->node[numa_mem_id()]; + struct kmem_cache_node *n = get_node(cachep, numa_mem_id()); struct page *page; unsigned long flags; @@ -829,7 +755,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, * If there are empty slabs on the slabs_free list and we are * being forced to refill the cache, mark this one !pfmemalloc. */ - n = cachep->node[numa_mem_id()]; + n = get_node(cachep, numa_mem_id()); if (!list_empty(&n->slabs_free) && force_refill) { struct page *page = virt_to_head_page(objp); ClearPageSlabPfmemalloc(page); @@ -909,12 +835,13 @@ static int transfer_objects(struct array_cache *to, #define drain_alien_cache(cachep, alien) do { } while (0) #define reap_alien(cachep, n) do { } while (0) -static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) +static inline struct alien_cache **alloc_alien_cache(int node, + int limit, gfp_t gfp) { - return (struct array_cache **)BAD_ALIEN_MAGIC; + return (struct alien_cache **)BAD_ALIEN_MAGIC; } -static inline void free_alien_cache(struct array_cache **ac_ptr) +static inline void free_alien_cache(struct alien_cache **ac_ptr) { } @@ -940,46 +867,60 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep, static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); static void *alternate_node_alloc(struct kmem_cache *, gfp_t); -static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) +static struct alien_cache *__alloc_alien_cache(int node, int entries, + int batch, gfp_t gfp) +{ + size_t memsize = sizeof(void *) * entries + sizeof(struct alien_cache); + struct alien_cache *alc = NULL; + + alc = kmalloc_node(memsize, gfp, node); + init_arraycache(&alc->ac, entries, batch); + spin_lock_init(&alc->lock); + return alc; +} + +static struct alien_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) { - struct array_cache **ac_ptr; - int memsize = sizeof(void *) * nr_node_ids; + struct alien_cache **alc_ptr; + size_t memsize = sizeof(void *) * nr_node_ids; int i; if (limit > 1) limit = 12; - ac_ptr = kzalloc_node(memsize, gfp, node); - if (ac_ptr) { - for_each_node(i) { - if (i == node || !node_online(i)) - continue; - ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp); - if (!ac_ptr[i]) { - for (i--; i >= 0; i--) - kfree(ac_ptr[i]); - kfree(ac_ptr); - return NULL; - } + alc_ptr = kzalloc_node(memsize, gfp, node); + if (!alc_ptr) + return NULL; + + for_each_node(i) { + if (i == node || !node_online(i)) + continue; + alc_ptr[i] = __alloc_alien_cache(node, limit, 0xbaadf00d, gfp); + if (!alc_ptr[i]) { + for (i--; i >= 0; i--) + kfree(alc_ptr[i]); + kfree(alc_ptr); + return NULL; } } - return ac_ptr; + return alc_ptr; } -static void free_alien_cache(struct array_cache **ac_ptr) +static void free_alien_cache(struct alien_cache **alc_ptr) { int i; - if (!ac_ptr) + if (!alc_ptr) return; for_each_node(i) - kfree(ac_ptr[i]); - kfree(ac_ptr); + kfree(alc_ptr[i]); + kfree(alc_ptr); } static void __drain_alien_cache(struct kmem_cache *cachep, - struct array_cache *ac, int node) + struct array_cache *ac, int node, + struct list_head *list) { - struct kmem_cache_node *n = cachep->node[node]; + struct kmem_cache_node *n = get_node(cachep, node); if (ac->avail) { spin_lock(&n->list_lock); @@ -991,7 +932,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep, if (n->shared) transfer_objects(n->shared, ac, ac->limit); - free_block(cachep, ac->entry, ac->avail, node); + free_block(cachep, ac->entry, ac->avail, node, list); ac->avail = 0; spin_unlock(&n->list_lock); } @@ -1005,28 +946,40 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n) int node = __this_cpu_read(slab_reap_node); if (n->alien) { - struct array_cache *ac = n->alien[node]; + struct alien_cache *alc = n->alien[node]; + struct array_cache *ac; + + if (alc) { + ac = &alc->ac; + if (ac->avail && spin_trylock_irq(&alc->lock)) { + LIST_HEAD(list); - if (ac && ac->avail && spin_trylock_irq(&ac->lock)) { - __drain_alien_cache(cachep, ac, node); - spin_unlock_irq(&ac->lock); + __drain_alien_cache(cachep, ac, node, &list); + spin_unlock_irq(&alc->lock); + slabs_destroy(cachep, &list); + } } } } static void drain_alien_cache(struct kmem_cache *cachep, - struct array_cache **alien) + struct alien_cache **alien) { int i = 0; + struct alien_cache *alc; struct array_cache *ac; unsigned long flags; for_each_online_node(i) { - ac = alien[i]; - if (ac) { - spin_lock_irqsave(&ac->lock, flags); - __drain_alien_cache(cachep, ac, i); - spin_unlock_irqrestore(&ac->lock, flags); + alc = alien[i]; + if (alc) { + LIST_HEAD(list); + + ac = &alc->ac; + spin_lock_irqsave(&alc->lock, flags); + __drain_alien_cache(cachep, ac, i, &list); + spin_unlock_irqrestore(&alc->lock, flags); + slabs_destroy(cachep, &list); } } } @@ -1035,8 +988,10 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) { int nodeid = page_to_nid(virt_to_page(objp)); struct kmem_cache_node *n; - struct array_cache *alien = NULL; + struct alien_cache *alien = NULL; + struct array_cache *ac; int node; + LIST_HEAD(list); node = numa_mem_id(); @@ -1047,21 +1002,25 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) if (likely(nodeid == node)) return 0; - n = cachep->node[node]; + n = get_node(cachep, node); STATS_INC_NODEFREES(cachep); if (n->alien && n->alien[nodeid]) { alien = n->alien[nodeid]; + ac = &alien->ac; spin_lock(&alien->lock); - if (unlikely(alien->avail == alien->limit)) { + if (unlikely(ac->avail == ac->limit)) { STATS_INC_ACOVERFLOW(cachep); - __drain_alien_cache(cachep, alien, nodeid); + __drain_alien_cache(cachep, ac, nodeid, &list); } - ac_put_obj(cachep, alien, objp); + ac_put_obj(cachep, ac, objp); spin_unlock(&alien->lock); + slabs_destroy(cachep, &list); } else { - spin_lock(&(cachep->node[nodeid])->list_lock); - free_block(cachep, &objp, 1, nodeid); - spin_unlock(&(cachep->node[nodeid])->list_lock); + n = get_node(cachep, nodeid); + spin_lock(&n->list_lock); + free_block(cachep, &objp, 1, nodeid, &list); + spin_unlock(&n->list_lock); + slabs_destroy(cachep, &list); } return 1; } @@ -1080,7 +1039,7 @@ static int init_cache_node_node(int node) { struct kmem_cache *cachep; struct kmem_cache_node *n; - const int memsize = sizeof(struct kmem_cache_node); + const size_t memsize = sizeof(struct kmem_cache_node); list_for_each_entry(cachep, &slab_caches, list) { /* @@ -1088,7 +1047,8 @@ static int init_cache_node_node(int node) * begin anything. Make sure some other cpu on this * node has not already allocated this */ - if (!cachep->node[node]) { + n = get_node(cachep, node); + if (!n) { n = kmalloc_node(memsize, GFP_KERNEL, node); if (!n) return -ENOMEM; @@ -1104,11 +1064,11 @@ static int init_cache_node_node(int node) cachep->node[node] = n; } - spin_lock_irq(&cachep->node[node]->list_lock); - cachep->node[node]->free_limit = + spin_lock_irq(&n->list_lock); + n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num; - spin_unlock_irq(&cachep->node[node]->list_lock); + spin_unlock_irq(&n->list_lock); } return 0; } @@ -1129,12 +1089,13 @@ static void cpuup_canceled(long cpu) list_for_each_entry(cachep, &slab_caches, list) { struct array_cache *nc; struct array_cache *shared; - struct array_cache **alien; + struct alien_cache **alien; + LIST_HEAD(list); /* cpu is dead; no one can alloc from it. */ nc = cachep->array[cpu]; cachep->array[cpu] = NULL; - n = cachep->node[node]; + n = get_node(cachep, node); if (!n) goto free_array_cache; @@ -1144,7 +1105,7 @@ static void cpuup_canceled(long cpu) /* Free limit for this kmem_cache_node */ n->free_limit -= cachep->batchcount; if (nc) - free_block(cachep, nc->entry, nc->avail, node); + free_block(cachep, nc->entry, nc->avail, node, &list); if (!cpumask_empty(mask)) { spin_unlock_irq(&n->list_lock); @@ -1154,7 +1115,7 @@ static void cpuup_canceled(long cpu) shared = n->shared; if (shared) { free_block(cachep, shared->entry, - shared->avail, node); + shared->avail, node, &list); n->shared = NULL; } @@ -1169,6 +1130,7 @@ static void cpuup_canceled(long cpu) free_alien_cache(alien); } free_array_cache: + slabs_destroy(cachep, &list); kfree(nc); } /* @@ -1177,7 +1139,7 @@ free_array_cache: * shrink each nodelist to its limit. */ list_for_each_entry(cachep, &slab_caches, list) { - n = cachep->node[node]; + n = get_node(cachep, node); if (!n) continue; drain_freelist(cachep, n, slabs_tofree(cachep, n)); @@ -1208,7 +1170,7 @@ static int cpuup_prepare(long cpu) list_for_each_entry(cachep, &slab_caches, list) { struct array_cache *nc; struct array_cache *shared = NULL; - struct array_cache **alien = NULL; + struct alien_cache **alien = NULL; nc = alloc_arraycache(node, cachep->limit, cachep->batchcount, GFP_KERNEL); @@ -1232,7 +1194,7 @@ static int cpuup_prepare(long cpu) } } cachep->array[cpu] = nc; - n = cachep->node[node]; + n = get_node(cachep, node); BUG_ON(!n); spin_lock_irq(&n->list_lock); @@ -1253,13 +1215,7 @@ static int cpuup_prepare(long cpu) spin_unlock_irq(&n->list_lock); kfree(shared); free_alien_cache(alien); - if (cachep->flags & SLAB_DEBUG_OBJECTS) - slab_set_debugobj_lock_classes_node(cachep, node); - else if (!OFF_SLAB(cachep) && - !(cachep->flags & SLAB_DESTROY_BY_RCU)) - on_slab_lock_classes_node(cachep, node); } - init_node_lock_keys(node); return 0; bad: @@ -1343,7 +1299,7 @@ static int __meminit drain_cache_node_node(int node) list_for_each_entry(cachep, &slab_caches, list) { struct kmem_cache_node *n; - n = cachep->node[node]; + n = get_node(cachep, node); if (!n) continue; @@ -1523,10 +1479,6 @@ void __init kmem_cache_init(void) memcpy(ptr, cpu_cache_get(kmem_cache), sizeof(struct arraycache_init)); - /* - * Do not assume that spinlocks can be initialized via memcpy: - */ - spin_lock_init(&ptr->lock); kmem_cache->array[smp_processor_id()] = ptr; @@ -1536,10 +1488,6 @@ void __init kmem_cache_init(void) != &initarray_generic.cache); memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]), sizeof(struct arraycache_init)); - /* - * Do not assume that spinlocks can be initialized via memcpy: - */ - spin_lock_init(&ptr->lock); kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr; } @@ -1576,9 +1524,6 @@ void __init kmem_cache_init_late(void) BUG(); mutex_unlock(&slab_mutex); - /* Annotate slab for lockdep -- annotate the malloc caches */ - init_lock_keys(); - /* Done! */ slab_state = FULL; @@ -1638,14 +1583,10 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) printk(KERN_WARNING " cache: %s, object size: %d, order: %d\n", cachep->name, cachep->size, cachep->gfporder); - for_each_online_node(node) { + for_each_kmem_cache_node(cachep, node, n) { unsigned long active_objs = 0, num_objs = 0, free_objects = 0; unsigned long active_slabs = 0, num_slabs = 0; - n = cachep->node[node]; - if (!n) - continue; - spin_lock_irqsave(&n->list_lock, flags); list_for_each_entry(page, &n->slabs_full, lru) { active_objs += cachep->num; @@ -1672,7 +1613,8 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) } /* - * Interface to system's page allocator. No need to hold the cache-lock. + * Interface to system's page allocator. No need to hold the + * kmem_cache_node ->list_lock. * * If we requested dmaable memory, we will get it. Even if we * did not request dmaable memory, we might get it, but that @@ -1974,9 +1916,9 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, * @cachep: cache pointer being destroyed * @page: page pointer being destroyed * - * Destroy all the objs in a slab, and release the mem back to the system. - * Before calling the slab must have been unlinked from the cache. The - * cache-lock is not held/needed. + * Destroy all the objs in a slab page, and release the mem back to the system. + * Before calling the slab page must have been unlinked from the cache. The + * kmem_cache_node ->list_lock is not held/needed. */ static void slab_destroy(struct kmem_cache *cachep, struct page *page) { @@ -2008,6 +1950,16 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page) kmem_cache_free(cachep->freelist_cache, freelist); } +static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list) +{ + struct page *page, *n; + + list_for_each_entry_safe(page, n, list, lru) { + list_del(&page->lru); + slab_destroy(cachep, page); + } +} + /** * calculate_slab_order - calculate size (page order) of slabs * @cachep: pointer to the cache that is being created @@ -2041,13 +1993,16 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, break; if (flags & CFLGS_OFF_SLAB) { + size_t freelist_size_per_obj = sizeof(freelist_idx_t); /* * Max number of objs-per-slab for caches which * use off-slab slabs. Needed to avoid a possible * looping condition in cache_grow(). */ + if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK)) + freelist_size_per_obj += sizeof(char); offslab_limit = size; - offslab_limit /= sizeof(freelist_idx_t); + offslab_limit /= freelist_size_per_obj; if (num > offslab_limit) break; @@ -2294,8 +2249,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) if (!cachep->num) return -E2BIG; - freelist_size = - ALIGN(cachep->num * sizeof(freelist_idx_t), cachep->align); + freelist_size = calculate_freelist_size(cachep->num, cachep->align); /* * If the slab has been placed off-slab, and we have enough space then @@ -2308,7 +2262,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) if (flags & CFLGS_OFF_SLAB) { /* really off slab. No need for manual alignment */ - freelist_size = cachep->num * sizeof(freelist_idx_t); + freelist_size = calculate_freelist_size(cachep->num, 0); #ifdef CONFIG_PAGE_POISONING /* If we're going to use the generic kernel_map_pages() @@ -2351,17 +2305,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) return err; } - if (flags & SLAB_DEBUG_OBJECTS) { - /* - * Would deadlock through slab_destroy()->call_rcu()-> - * debug_object_activate()->kmem_cache_alloc(). - */ - WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU); - - slab_set_debugobj_lock_classes(cachep); - } else if (!OFF_SLAB(cachep) && !(flags & SLAB_DESTROY_BY_RCU)) - on_slab_lock_classes(cachep); - return 0; } @@ -2380,7 +2323,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep) { #ifdef CONFIG_SMP check_irq_off(); - assert_spin_locked(&cachep->node[numa_mem_id()]->list_lock); + assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock); #endif } @@ -2388,7 +2331,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) { #ifdef CONFIG_SMP check_irq_off(); - assert_spin_locked(&cachep->node[node]->list_lock); + assert_spin_locked(&get_node(cachep, node)->list_lock); #endif } @@ -2408,12 +2351,16 @@ static void do_drain(void *arg) struct kmem_cache *cachep = arg; struct array_cache *ac; int node = numa_mem_id(); + struct kmem_cache_node *n; + LIST_HEAD(list); check_irq_off(); ac = cpu_cache_get(cachep); - spin_lock(&cachep->node[node]->list_lock); - free_block(cachep, ac->entry, ac->avail, node); - spin_unlock(&cachep->node[node]->list_lock); + n = get_node(cachep, node); + spin_lock(&n->list_lock); + free_block(cachep, ac->entry, ac->avail, node, &list); + spin_unlock(&n->list_lock); + slabs_destroy(cachep, &list); ac->avail = 0; } @@ -2424,17 +2371,12 @@ static void drain_cpu_caches(struct kmem_cache *cachep) on_each_cpu(do_drain, cachep, 1); check_irq_on(); - for_each_online_node(node) { - n = cachep->node[node]; - if (n && n->alien) + for_each_kmem_cache_node(cachep, node, n) + if (n->alien) drain_alien_cache(cachep, n->alien); - } - for_each_online_node(node) { - n = cachep->node[node]; - if (n) - drain_array(cachep, n, n->shared, 1, node); - } + for_each_kmem_cache_node(cachep, node, n) + drain_array(cachep, n, n->shared, 1, node); } /* @@ -2480,17 +2422,14 @@ out: int __kmem_cache_shrink(struct kmem_cache *cachep) { - int ret = 0, i = 0; + int ret = 0; + int node; struct kmem_cache_node *n; drain_cpu_caches(cachep); check_irq_on(); - for_each_online_node(i) { - n = cachep->node[i]; - if (!n) - continue; - + for_each_kmem_cache_node(cachep, node, n) { drain_freelist(cachep, n, slabs_tofree(cachep, n)); ret += !list_empty(&n->slabs_full) || @@ -2512,13 +2451,11 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep) kfree(cachep->array[i]); /* NUMA: free the node structures */ - for_each_online_node(i) { - n = cachep->node[i]; - if (n) { - kfree(n->shared); - free_alien_cache(n->alien); - kfree(n); - } + for_each_kmem_cache_node(cachep, i, n) { + kfree(n->shared); + free_alien_cache(n->alien); + kfree(n); + cachep->node[i] = NULL; } return 0; } @@ -2612,6 +2549,7 @@ static void cache_init_objs(struct kmem_cache *cachep, if (cachep->ctor) cachep->ctor(objp); #endif + set_obj_status(page, i, OBJECT_FREE); set_free_obj(page, i, i); } } @@ -2696,7 +2634,7 @@ static int cache_grow(struct kmem_cache *cachep, /* Take the node list lock to change the colour_next on this node */ check_irq_off(); - n = cachep->node[nodeid]; + n = get_node(cachep, nodeid); spin_lock(&n->list_lock); /* Get colour for the slab, and cal the next value. */ @@ -2820,6 +2758,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, BUG_ON(objnr >= cachep->num); BUG_ON(objp != index_to_obj(cachep, page, objnr)); + set_obj_status(page, objnr, OBJECT_FREE); if (cachep->flags & SLAB_POISON) { #ifdef CONFIG_DEBUG_PAGEALLOC if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { @@ -2864,7 +2803,7 @@ retry: */ batchcount = BATCHREFILL_LIMIT; } - n = cachep->node[node]; + n = get_node(cachep, node); BUG_ON(ac->avail > 0 || !n); spin_lock(&n->list_lock); @@ -2953,6 +2892,8 @@ static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, gfp_t flags, void *objp, unsigned long caller) { + struct page *page; + if (!objp) return objp; if (cachep->flags & SLAB_POISON) { @@ -2983,6 +2924,9 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, *dbg_redzone1(cachep, objp) = RED_ACTIVE; *dbg_redzone2(cachep, objp) = RED_ACTIVE; } + + page = virt_to_head_page(objp); + set_obj_status(page, obj_to_index(cachep, page, objp), OBJECT_ACTIVE); objp += obj_offset(cachep); if (cachep->ctor && cachep->flags & SLAB_POISON) cachep->ctor(objp); @@ -2999,7 +2943,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags) { - if (cachep == kmem_cache) + if (unlikely(cachep == kmem_cache)) return false; return should_failslab(cachep->object_size, flags, cachep->flags); @@ -3108,8 +3052,8 @@ retry: nid = zone_to_nid(zone); if (cpuset_zone_allowed_hardwall(zone, flags) && - cache->node[nid] && - cache->node[nid]->free_objects) { + get_node(cache, nid) && + get_node(cache, nid)->free_objects) { obj = ____cache_alloc_node(cache, flags | GFP_THISNODE, nid); if (obj) @@ -3172,7 +3116,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int x; VM_BUG_ON(nodeid > num_online_nodes()); - n = cachep->node[nodeid]; + n = get_node(cachep, nodeid); BUG_ON(!n); retry: @@ -3243,7 +3187,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, if (nodeid == NUMA_NO_NODE) nodeid = slab_node; - if (unlikely(!cachep->node[nodeid])) { + if (unlikely(!get_node(cachep, nodeid))) { /* Node not bootstrapped yet */ ptr = fallback_alloc(cachep, flags); goto out; @@ -3344,12 +3288,13 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller) /* * Caller needs to acquire correct kmem_cache_node's list_lock + * @list: List of detached free slabs should be freed by caller */ -static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, - int node) +static void free_block(struct kmem_cache *cachep, void **objpp, + int nr_objects, int node, struct list_head *list) { int i; - struct kmem_cache_node *n; + struct kmem_cache_node *n = get_node(cachep, node); for (i = 0; i < nr_objects; i++) { void *objp; @@ -3359,7 +3304,6 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, objp = objpp[i]; page = virt_to_head_page(objp); - n = cachep->node[node]; list_del(&page->lru); check_spinlock_acquired_node(cachep, node); slab_put_obj(cachep, page, objp, node); @@ -3370,13 +3314,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, if (page->active == 0) { if (n->free_objects > n->free_limit) { n->free_objects -= cachep->num; - /* No need to drop any previously held - * lock here, even if we have a off-slab slab - * descriptor it is guaranteed to come from - * a different cache, refer to comments before - * alloc_slabmgmt. - */ - slab_destroy(cachep, page); + list_add_tail(&page->lru, list); } else { list_add(&page->lru, &n->slabs_free); } @@ -3395,13 +3333,14 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) int batchcount; struct kmem_cache_node *n; int node = numa_mem_id(); + LIST_HEAD(list); batchcount = ac->batchcount; #if DEBUG BUG_ON(!batchcount || batchcount > ac->avail); #endif check_irq_off(); - n = cachep->node[node]; + n = get_node(cachep, node); spin_lock(&n->list_lock); if (n->shared) { struct array_cache *shared_array = n->shared; @@ -3416,7 +3355,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) } } - free_block(cachep, ac->entry, batchcount, node); + free_block(cachep, ac->entry, batchcount, node, &list); free_done: #if STATS { @@ -3437,6 +3376,7 @@ free_done: } #endif spin_unlock(&n->list_lock); + slabs_destroy(cachep, &list); ac->avail -= batchcount; memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); } @@ -3693,7 +3633,7 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp) int node; struct kmem_cache_node *n; struct array_cache *new_shared; - struct array_cache **new_alien = NULL; + struct alien_cache **new_alien = NULL; for_each_online_node(node) { @@ -3714,15 +3654,16 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp) } } - n = cachep->node[node]; + n = get_node(cachep, node); if (n) { struct array_cache *shared = n->shared; + LIST_HEAD(list); spin_lock_irq(&n->list_lock); if (shared) free_block(cachep, shared->entry, - shared->avail, node); + shared->avail, node, &list); n->shared = new_shared; if (!n->alien) { @@ -3732,6 +3673,7 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp) n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num; spin_unlock_irq(&n->list_lock); + slabs_destroy(cachep, &list); kfree(shared); free_alien_cache(new_alien); continue; @@ -3759,9 +3701,8 @@ fail: /* Cache is not active yet. Roll back what we did */ node--; while (node >= 0) { - if (cachep->node[node]) { - n = cachep->node[node]; - + n = get_node(cachep, node); + if (n) { kfree(n->shared); free_alien_cache(n->alien); kfree(n); @@ -3822,12 +3763,20 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit, cachep->shared = shared; for_each_online_cpu(i) { + LIST_HEAD(list); struct array_cache *ccold = new->new[i]; + int node; + struct kmem_cache_node *n; + if (!ccold) continue; - spin_lock_irq(&cachep->node[cpu_to_mem(i)]->list_lock); - free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i)); - spin_unlock_irq(&cachep->node[cpu_to_mem(i)]->list_lock); + + node = cpu_to_mem(i); + n = get_node(cachep, node); + spin_lock_irq(&n->list_lock); + free_block(cachep, ccold->entry, ccold->avail, node, &list); + spin_unlock_irq(&n->list_lock); + slabs_destroy(cachep, &list); kfree(ccold); } kfree(new); @@ -3935,6 +3884,7 @@ skip_setup: static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n, struct array_cache *ac, int force, int node) { + LIST_HEAD(list); int tofree; if (!ac || !ac->avail) @@ -3947,12 +3897,13 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n, tofree = force ? ac->avail : (ac->limit + 4) / 5; if (tofree > ac->avail) tofree = (ac->avail + 1) / 2; - free_block(cachep, ac->entry, tofree, node); + free_block(cachep, ac->entry, tofree, node, &list); ac->avail -= tofree; memmove(ac->entry, &(ac->entry[tofree]), sizeof(void *) * ac->avail); } spin_unlock_irq(&n->list_lock); + slabs_destroy(cachep, &list); } } @@ -3987,7 +3938,7 @@ static void cache_reap(struct work_struct *w) * have established with reasonable certainty that * we can do some work if the lock was obtained. */ - n = searchp->node[node]; + n = get_node(searchp, node); reap_alien(searchp, n); @@ -4039,10 +3990,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) active_objs = 0; num_slabs = 0; - for_each_online_node(node) { - n = cachep->node[node]; - if (!n) - continue; + for_each_kmem_cache_node(cachep, node, n) { check_irq_on(); spin_lock_irq(&n->list_lock); @@ -4219,21 +4167,12 @@ static void handle_slab(unsigned long *n, struct kmem_cache *c, struct page *page) { void *p; - int i, j; + int i; if (n[0] == n[1]) return; for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) { - bool active = true; - - for (j = page->active; j < c->num; j++) { - /* Skip freed item */ - if (get_free_obj(page, j) == i) { - active = false; - break; - } - } - if (!active) + if (get_obj_status(page, i) != OBJECT_ACTIVE) continue; if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) @@ -4276,10 +4215,7 @@ static int leaks_show(struct seq_file *m, void *p) x[1] = 0; - for_each_online_node(node) { - n = cachep->node[node]; - if (!n) - continue; + for_each_kmem_cache_node(cachep, node, n) { check_irq_on(); spin_lock_irq(&n->list_lock); |