From ca279cf1065fb689abea1dc7d8c11787729bb185 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Thu, 4 Oct 2012 17:13:20 -0700 Subject: genalloc: make it possible to use a custom allocation algorithm Premit use of another algorithm than the default first-fit one. For example a custom algorithm could be used to manage alignment requirements. As I can't predict all the possible requirements/needs for all allocation uses cases, I add a "free" field 'void *data' to pass any needed information to the allocation function. For example 'data' could be used to handle a structure where you store the alignment, the expected memory bank, the requester device, or any information that could influence the allocation algorithm. An usage example may look like this: struct my_pool_constraints { int align; int bank; ... }; unsigned long my_custom_algo(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, void *data) { struct my_pool_constraints *constraints = data; ... deal with allocation contraints ... return the index in bitmap where perform the allocation } void create_my_pool() { struct my_pool_constraints c; struct gen_pool *pool = gen_pool_create(...); gen_pool_add(pool, ...); gen_pool_set_algo(pool, my_custom_algo, &c); } Add of best-fit algorithm function: most of the time best-fit is slower then first-fit but memory fragmentation is lower. The random buffer allocation/free tests don't show any arithmetic relation between the allocation time and fragmentation but the best-fit algorithm is sometime able to perform the allocation when the first-fit can't. This new algorithm help to remove static allocations on ESRAM, a small but fast on-chip RAM of few KB, used for high-performance uses cases like DMA linked lists, graphic accelerators, encoders/decoders. On the Ux500 (in the ARM tree) we have define 5 ESRAM banks of 128 KB each and use of static allocations becomes unmaintainable: cd arch/arm/mach-ux500 && grep -r ESRAM . ./include/mach/db8500-regs.h:/* Base address and bank offsets for ESRAM */ ./include/mach/db8500-regs.h:#define U8500_ESRAM_BASE 0x40000000 ./include/mach/db8500-regs.h:#define U8500_ESRAM_BANK_SIZE 0x00020000 ./include/mach/db8500-regs.h:#define U8500_ESRAM_BANK0 U8500_ESRAM_BASE ./include/mach/db8500-regs.h:#define U8500_ESRAM_BANK1 (U8500_ESRAM_BASE + U8500_ESRAM_BANK_SIZE) ./include/mach/db8500-regs.h:#define U8500_ESRAM_BANK2 (U8500_ESRAM_BANK1 + U8500_ESRAM_BANK_SIZE) ./include/mach/db8500-regs.h:#define U8500_ESRAM_BANK3 (U8500_ESRAM_BANK2 + U8500_ESRAM_BANK_SIZE) ./include/mach/db8500-regs.h:#define U8500_ESRAM_BANK4 (U8500_ESRAM_BANK3 + U8500_ESRAM_BANK_SIZE) ./include/mach/db8500-regs.h:#define U8500_ESRAM_DMA_LCPA_OFFSET 0x10000 ./include/mach/db8500-regs.h:#define U8500_DMA_LCPA_BASE (U8500_ESRAM_BANK0 + U8500_ESRAM_DMA_LCPA_OFFSET) ./include/mach/db8500-regs.h:#define U8500_DMA_LCLA_BASE U8500_ESRAM_BANK4 I want to use genalloc to do dynamic allocations but I need to be able to fine tune the allocation algorithm. I my case best-fit algorithm give better results than first-fit, but it will not be true for every use case. Signed-off-by: Benjamin Gaignard Cc: Huang Ying Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/genalloc.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 84 insertions(+), 4 deletions(-) (limited to 'lib/genalloc.c') diff --git a/lib/genalloc.c b/lib/genalloc.c index 6bc04aab6ec..ca208a92628 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -152,6 +152,8 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid) spin_lock_init(&pool->lock); INIT_LIST_HEAD(&pool->chunks); pool->min_alloc_order = min_alloc_order; + pool->algo = gen_pool_first_fit; + pool->data = NULL; } return pool; } @@ -255,8 +257,9 @@ EXPORT_SYMBOL(gen_pool_destroy); * @size: number of bytes to allocate from the pool * * Allocate the requested number of bytes from the specified pool. - * Uses a first-fit algorithm. Can not be used in NMI handler on - * architectures without NMI-safe cmpxchg implementation. + * Uses the pool allocation function (with first-fit algorithm by default). + * Can not be used in NMI handler on architectures without + * NMI-safe cmpxchg implementation. */ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) { @@ -280,8 +283,8 @@ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) end_bit = (chunk->end_addr - chunk->start_addr) >> order; retry: - start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, - start_bit, nbits, 0); + start_bit = pool->algo(chunk->bits, end_bit, start_bit, nbits, + pool->data); if (start_bit >= end_bit) continue; remain = bitmap_set_ll(chunk->bits, start_bit, nbits); @@ -400,3 +403,80 @@ size_t gen_pool_size(struct gen_pool *pool) return size; } EXPORT_SYMBOL_GPL(gen_pool_size); + +/** + * gen_pool_set_algo - set the allocation algorithm + * @pool: pool to change allocation algorithm + * @algo: custom algorithm function + * @data: additional data used by @algo + * + * Call @algo for each memory allocation in the pool. + * If @algo is NULL use gen_pool_first_fit as default + * memory allocation function. + */ +void gen_pool_set_algo(struct gen_pool *pool, genpool_algo_t algo, void *data) +{ + rcu_read_lock(); + + pool->algo = algo; + if (!pool->algo) + pool->algo = gen_pool_first_fit; + + pool->data = data; + + rcu_read_unlock(); +} +EXPORT_SYMBOL(gen_pool_set_algo); + +/** + * gen_pool_first_fit - find the first available region + * of memory matching the size requirement (no alignment constraint) + * @map: The address to base the search on + * @size: The bitmap size in bits + * @start: The bitnumber to start searching at + * @nr: The number of zeroed bits we're looking for + * @data: additional data - unused + */ +unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size, + unsigned long start, unsigned int nr, void *data) +{ + return bitmap_find_next_zero_area(map, size, start, nr, 0); +} +EXPORT_SYMBOL(gen_pool_first_fit); + +/** + * gen_pool_best_fit - find the best fitting region of memory + * macthing the size requirement (no alignment constraint) + * @map: The address to base the search on + * @size: The bitmap size in bits + * @start: The bitnumber to start searching at + * @nr: The number of zeroed bits we're looking for + * @data: additional data - unused + * + * Iterate over the bitmap to find the smallest free region + * which we can allocate the memory. + */ +unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size, + unsigned long start, unsigned int nr, void *data) +{ + unsigned long start_bit = size; + unsigned long len = size + 1; + unsigned long index; + + index = bitmap_find_next_zero_area(map, size, start, nr, 0); + + while (index < size) { + int next_bit = find_next_bit(map, size, index + nr); + if ((next_bit - index) < len) { + len = next_bit - index; + start_bit = index; + if (len == nr) + return start_bit; + } + index = bitmap_find_next_zero_area(map, size, + next_bit + 1, nr, 0); + } + + return start_bit; +} +EXPORT_SYMBOL(gen_pool_best_fit); -- cgit v1.2.3-70-g09d2 From eedce141cd2dad8d0cefc5468ef41898949a7031 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 25 Oct 2012 13:37:51 -0700 Subject: genalloc: stop crashing the system when destroying a pool The genalloc code uses the bitmap API from include/linux/bitmap.h and lib/bitmap.c, which is based on long values. Both bitmap_set from lib/bitmap.c and bitmap_set_ll, which is the lockless version from genalloc.c, use BITMAP_LAST_WORD_MASK to set the first bits in a long in the bitmap. That one uses (1 << bits) - 1, 0b111, if you are setting the first three bits. This means that the API counts from the least significant bits (LSB from now on) to the MSB. The LSB in the first long is bit 0, then. The same works for the lookup functions. The genalloc code uses longs for the bitmap, as it should. In include/linux/genalloc.h, struct gen_pool_chunk has unsigned long bits[0] as its last member. When allocating the struct, genalloc should reserve enough space for the bitmap. This should be a proper number of longs that can fit the amount of bits in the bitmap. However, genalloc allocates an integer number of bytes that fit the amount of bits, but may not be an integer amount of longs. 9 bytes, for example, could be allocated for 70 bits. This is a problem in itself if the Least Significat Bit in a long is in the byte with the largest address, which happens in Big Endian machines. This means genalloc is not allocating the byte in which it will try to set or check for a bit. This may end up in memory corruption, where genalloc will try to set the bits it has not allocated. In fact, genalloc may not set these bits because it may find them already set, because they were not zeroed since they were not allocated. And that's what causes a BUG when gen_pool_destroy is called and check for any set bits. What really happens is that genalloc uses kmalloc_node with __GFP_ZERO on gen_pool_add_virt. With SLAB and SLUB, this means the whole slab will be cleared, not only the requested bytes. Since struct gen_pool_chunk has a size that is a multiple of 8, and slab sizes are multiples of 8, we get lucky and allocate and clear the right amount of bytes. Hower, this is not the case with SLOB or with older code that did memset after allocating instead of using __GFP_ZERO. So, a simple module as this (running 3.6.0), will cause a crash when rmmod'ed. [root@phantom-lp2 foo]# cat foo.c #include #include #include #include MODULE_LICENSE("GPL"); MODULE_VERSION("0.1"); static struct gen_pool *foo_pool; static __init int foo_init(void) { int ret; foo_pool = gen_pool_create(10, -1); if (!foo_pool) return -ENOMEM; ret = gen_pool_add(foo_pool, 0xa0000000, 32 << 10, -1); if (ret) { gen_pool_destroy(foo_pool); return ret; } return 0; } static __exit void foo_exit(void) { gen_pool_destroy(foo_pool); } module_init(foo_init); module_exit(foo_exit); [root@phantom-lp2 foo]# zcat /proc/config.gz | grep SLOB CONFIG_SLOB=y [root@phantom-lp2 foo]# insmod ./foo.ko [root@phantom-lp2 foo]# rmmod foo ------------[ cut here ]------------ kernel BUG at lib/genalloc.c:243! cpu 0x4: Vector: 700 (Program Check) at [c0000000bb0e7960] pc: c0000000003cb50c: .gen_pool_destroy+0xac/0x110 lr: c0000000003cb4fc: .gen_pool_destroy+0x9c/0x110 sp: c0000000bb0e7be0 msr: 8000000000029032 current = 0xc0000000bb0e0000 paca = 0xc000000006d30e00 softe: 0 irq_happened: 0x01 pid = 13044, comm = rmmod kernel BUG at lib/genalloc.c:243! [c0000000bb0e7ca0] d000000004b00020 .foo_exit+0x20/0x38 [foo] [c0000000bb0e7d20] c0000000000dff98 .SyS_delete_module+0x1a8/0x290 [c0000000bb0e7e30] c0000000000097d4 syscall_exit+0x0/0x94 --- Exception: c00 (System Call) at 000000800753d1a0 SP (fffd0b0e640) is in userspace Signed-off-by: Thadeu Lima de Souza Cascardo Cc: Paul Gortmaker Cc: Benjamin Gaignard Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/genalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/genalloc.c') diff --git a/lib/genalloc.c b/lib/genalloc.c index ca208a92628..54920433705 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -178,7 +178,7 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy struct gen_pool_chunk *chunk; int nbits = size >> pool->min_alloc_order; int nbytes = sizeof(struct gen_pool_chunk) + - (nbits + BITS_PER_BYTE - 1) / BITS_PER_BYTE; + BITS_TO_LONGS(nbits) * sizeof(long); chunk = kmalloc_node(nbytes, GFP_KERNEL | __GFP_ZERO, nid); if (unlikely(chunk == NULL)) -- cgit v1.2.3-70-g09d2