From 73c101011926c5832e6e141682180c4debe2cf45 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 8 Mar 2011 13:19:51 +0100 Subject: block: initial patch for on-stack per-task plugging This patch adds support for creating a queuing context outside of the queue itself. This enables us to batch up pieces of IO before grabbing the block device queue lock and submitting them to the IO scheduler. The context is created on the stack of the process and assigned in the task structure, so that we can auto-unplug it if we hit a schedule event. The current queue plugging happens implicitly if IO is submitted to an empty device, yet callers have to remember to unplug that IO when they are going to wait for it. This is an ugly API and has caused bugs in the past. Additionally, it requires hacks in the vm (->sync_page() callback) to handle that logic. By switching to an explicit plugging scheme we make the API a lot nicer and can get rid of the ->sync_page() hack in the vm. Signed-off-by: Jens Axboe --- kernel/fork.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 25e429152dd..027c80e5162 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1204,6 +1204,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; +#ifdef CONFIG_BLOCK + p->plug = NULL; +#endif #ifdef CONFIG_FUTEX p->robust_list = NULL; #ifdef CONFIG_COMPAT -- cgit v1.2.3-70-g09d2 From 77c100c83e84316ced2507c5799f79c2c80bc6b9 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 1 Feb 2011 09:51:46 -0500 Subject: export pid symbols needed for kvm_vcpu_on_spin Export the symbols required for a race-free kvm_vcpu_on_spin. Signed-off-by: Rik van Riel Signed-off-by: Avi Kivity --- kernel/fork.c | 1 + kernel/pid.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 25e429152dd..05b92c45701 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -193,6 +193,7 @@ void __put_task_struct(struct task_struct *tsk) if (!profile_handoff_task(tsk)) free_task(tsk); } +EXPORT_SYMBOL_GPL(__put_task_struct); /* * macro override instead of weak attribute alias, to workaround diff --git a/kernel/pid.c b/kernel/pid.c index 39b65b69584..02f22127426 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -435,6 +435,7 @@ struct pid *get_task_pid(struct task_struct *task, enum pid_type type) rcu_read_unlock(); return pid; } +EXPORT_SYMBOL_GPL(get_task_pid); struct task_struct *get_pid_task(struct pid *pid, enum pid_type type) { @@ -446,6 +447,7 @@ struct task_struct *get_pid_task(struct pid *pid, enum pid_type type) rcu_read_unlock(); return result; } +EXPORT_SYMBOL_GPL(get_pid_task); struct pid *find_get_pid(pid_t nr) { -- cgit v1.2.3-70-g09d2 From 504f52b5439aaf26d3e2c1d45ec10fce38c8dd27 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Mar 2011 16:30:41 -0700 Subject: mm: NUMA aware alloc_task_struct_node() All kthreads being created from a single helper task, they all use memory from a single node for their kernel stack and task struct. This patch suite creates kthread_create_on_cpu(), adding a 'cpu' parameter to parameters already used by kthread_create(). This parameter serves in allocating memory for the new kthread on its memory node if available. Users of this new function are : ksoftirqd, kworker, migration, pktgend... This patch: Add a node parameter to alloc_task_struct(), and change its name to alloc_task_struct_node() This change is needed to allow NUMA aware kthread_create_on_cpu() Signed-off-by: Eric Dumazet Acked-by: David S. Miller Reviewed-by: Andi Kleen Acked-by: Rusty Russell Cc: Tejun Heo Cc: Tony Luck Cc: Fenghua Yu Cc: David Howells Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/frv/include/asm/processor.h | 2 +- arch/frv/kernel/process.c | 5 +++-- arch/ia64/include/asm/thread_info.h | 9 ++++++++- arch/um/include/asm/processor-generic.h | 2 +- kernel/fork.c | 10 ++++++---- 5 files changed, 19 insertions(+), 9 deletions(-) (limited to 'kernel/fork.c') diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h index 3744f2e47f4..4b789ab182b 100644 --- a/arch/frv/include/asm/processor.h +++ b/arch/frv/include/asm/processor.h @@ -137,7 +137,7 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk)->thread.frame0->sp) /* Allocation and freeing of basic task resources. */ -extern struct task_struct *alloc_task_struct(void); +extern struct task_struct *alloc_task_struct_node(int node); extern void free_task_struct(struct task_struct *p); #define cpu_relax() barrier() diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index efad12071c2..9d359752646 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -44,9 +44,10 @@ asmlinkage void ret_from_fork(void); void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); -struct task_struct *alloc_task_struct(void) +struct task_struct *alloc_task_struct_node(int node) { - struct task_struct *p = kmalloc(THREAD_SIZE, GFP_KERNEL); + struct task_struct *p = kmalloc_node(THREAD_SIZE, GFP_KERNEL, node); + if (p) atomic_set((atomic_t *)(p+1), 1); return p; diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index b6a5ba2aca3..342004bbefe 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -84,7 +84,14 @@ struct thread_info { #define end_of_stack(p) (unsigned long *)((void *)(p) + IA64_RBS_OFFSET) #define __HAVE_ARCH_TASK_STRUCT_ALLOCATOR -#define alloc_task_struct() ((struct task_struct *)__get_free_pages(GFP_KERNEL | __GFP_COMP, KERNEL_STACK_SIZE_ORDER)) +#define alloc_task_struct_node(node) \ +({ \ + struct page *page = alloc_pages_node(node, GFP_KERNEL | __GFP_COMP, \ + KERNEL_STACK_SIZE_ORDER); \ + struct task_struct *ret = page ? page_address(page) : NULL; \ + \ + ret; +}) #define free_task_struct(tsk) free_pages((unsigned long) (tsk), KERNEL_STACK_SIZE_ORDER) #endif /* !__ASSEMBLY */ diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index bed668824b5..d1d1b0d8a0c 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -66,7 +66,7 @@ struct thread_struct { .request = { 0 } \ } -extern struct task_struct *alloc_task_struct(void); +extern struct task_struct *alloc_task_struct_node(int node); static inline void release_thread(struct task_struct *task) { diff --git a/kernel/fork.c b/kernel/fork.c index 05b92c45701..cffbe8a4e1f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -109,8 +109,10 @@ int nr_processes(void) } #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR -# define alloc_task_struct() kmem_cache_alloc(task_struct_cachep, GFP_KERNEL) -# define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk)) +# define alloc_task_struct_node(node) \ + kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node) +# define free_task_struct(tsk) \ + kmem_cache_free(task_struct_cachep, (tsk)) static struct kmem_cache *task_struct_cachep; #endif @@ -249,12 +251,12 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) struct task_struct *tsk; struct thread_info *ti; unsigned long *stackend; - + int node = numa_node_id(); int err; prepare_to_copy(orig); - tsk = alloc_task_struct(); + tsk = alloc_task_struct_node(node); if (!tsk) return NULL; -- cgit v1.2.3-70-g09d2 From b6a84016bd2598e35ead635147fa53619982648d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Mar 2011 16:30:42 -0700 Subject: mm: NUMA aware alloc_thread_info_node() Add a node parameter to alloc_thread_info(), and change its name to alloc_thread_info_node() This change is needed to allow NUMA aware kthread_create_on_cpu() Signed-off-by: Eric Dumazet Acked-by: David S. Miller Reviewed-by: Andi Kleen Acked-by: Rusty Russell Cc: Tejun Heo Cc: Tony Luck Cc: Fenghua Yu Cc: David Howells Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/include/asm/thread_info.h | 2 +- arch/frv/include/asm/thread_info.h | 13 ++++--------- arch/ia64/include/asm/thread_info.h | 5 +++-- arch/m32r/include/asm/thread_info.h | 13 ++++--------- arch/mips/include/asm/thread_info.h | 6 ++++-- arch/mn10300/include/asm/thread_info.h | 6 ++++-- arch/powerpc/include/asm/thread_info.h | 2 +- arch/powerpc/kernel/process.c | 4 ++-- arch/score/include/asm/thread_info.h | 2 +- arch/sh/include/asm/thread_info.h | 2 +- arch/sh/kernel/process.c | 16 +++++++++------- arch/sparc/include/asm/thread_info_32.h | 6 +++--- arch/sparc/include/asm/thread_info_64.h | 24 ++++++++++++------------ arch/sparc/mm/srmmu.c | 4 ++-- arch/sparc/mm/sun4c.c | 4 ++-- arch/tile/include/asm/thread_info.h | 2 +- arch/tile/kernel/process.c | 4 ++-- arch/x86/include/asm/thread_info.h | 10 ++++++++-- kernel/fork.c | 9 ++++++--- 19 files changed, 70 insertions(+), 64 deletions(-) (limited to 'kernel/fork.c') diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h index 91776069ca8..29b74a10583 100644 --- a/arch/cris/include/asm/thread_info.h +++ b/arch/cris/include/asm/thread_info.h @@ -68,7 +68,7 @@ struct thread_info { #define init_thread_info (init_thread_union.thread_info) /* thread information allocation */ -#define alloc_thread_info(tsk) ((struct thread_info *) __get_free_pages(GFP_KERNEL,1)) +#define alloc_thread_info(tsk, node) ((struct thread_info *) __get_free_pages(GFP_KERNEL,1)) #define free_thread_info(ti) free_pages((unsigned long) (ti), 1) #endif /* !__ASSEMBLY__ */ diff --git a/arch/frv/include/asm/thread_info.h b/arch/frv/include/asm/thread_info.h index 11f33ead29b..8582e9c7531 100644 --- a/arch/frv/include/asm/thread_info.h +++ b/arch/frv/include/asm/thread_info.h @@ -84,16 +84,11 @@ register struct thread_info *__current_thread_info asm("gr15"); /* thread information allocation */ #ifdef CONFIG_DEBUG_STACK_USAGE -#define alloc_thread_info(tsk) \ - ({ \ - struct thread_info *ret; \ - \ - ret = kzalloc(THREAD_SIZE, GFP_KERNEL); \ - \ - ret; \ - }) +#define alloc_thread_info_node(tsk, node) \ + kzalloc_node(THREAD_SIZE, GFP_KERNEL, node) #else -#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL) +#define alloc_thread_info_node(tsk) \ + kmalloc_node(THREAD_SIZE, GFP_KERNEL, node) #endif #define free_thread_info(info) kfree(info) diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index 342004bbefe..6392908e8f9 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -59,11 +59,12 @@ struct thread_info { #ifndef ASM_OFFSETS_C /* how to get the thread information struct from C */ #define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE)) -#define alloc_thread_info(tsk) ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) +#define alloc_thread_info_node(tsk, node) \ + ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) #define task_thread_info(tsk) ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) #else #define current_thread_info() ((struct thread_info *) 0) -#define alloc_thread_info(tsk) ((struct thread_info *) 0) +#define alloc_thread_info_node(tsk, node) ((struct thread_info *) 0) #define task_thread_info(tsk) ((struct thread_info *) 0) #endif #define free_thread_info(ti) /* nothing */ diff --git a/arch/m32r/include/asm/thread_info.h b/arch/m32r/include/asm/thread_info.h index 71faff5bcc2..0227dba4406 100644 --- a/arch/m32r/include/asm/thread_info.h +++ b/arch/m32r/include/asm/thread_info.h @@ -96,16 +96,11 @@ static inline struct thread_info *current_thread_info(void) /* thread information allocation */ #ifdef CONFIG_DEBUG_STACK_USAGE -#define alloc_thread_info(tsk) \ - ({ \ - struct thread_info *ret; \ - \ - ret = kzalloc(THREAD_SIZE, GFP_KERNEL); \ - \ - ret; \ - }) +#define alloc_thread_info_node(tsk, node) \ + kzalloc_node(THREAD_SIZE, GFP_KERNEL, node) #else -#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL) +#define alloc_thread_info_node(tsk, node) \ + kmalloc_node(THREAD_SIZE, GFP_KERNEL, node) #endif #define free_thread_info(info) kfree(info) diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index d309556cacf..d71160de4d1 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -88,9 +88,11 @@ register struct thread_info *__current_thread_info __asm__("$28"); #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR #ifdef CONFIG_DEBUG_STACK_USAGE -#define alloc_thread_info(tsk) kzalloc(THREAD_SIZE, GFP_KERNEL) +#define alloc_thread_info_node(tsk, node) \ + kzalloc_node(THREAD_SIZE, GFP_KERNEL, node) #else -#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL) +#define alloc_thread_info_node(tsk, node) \ + kmalloc_node(THREAD_SIZE, GFP_KERNEL, node) #endif #define free_thread_info(info) kfree(info) diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h index aa07a4a5d79..8d53f09c878 100644 --- a/arch/mn10300/include/asm/thread_info.h +++ b/arch/mn10300/include/asm/thread_info.h @@ -124,9 +124,11 @@ static inline unsigned long current_stack_pointer(void) /* thread information allocation */ #ifdef CONFIG_DEBUG_STACK_USAGE -#define alloc_thread_info(tsk) kzalloc(THREAD_SIZE, GFP_KERNEL) +#define alloc_thread_info_node(tsk, node) \ + kzalloc_node(THREAD_SIZE, GFP_KERNEL, node) #else -#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL) +#define alloc_thread_info_node(tsk, node) \ + kmalloc_node(THREAD_SIZE, GFP_KERNEL, node) #endif #define free_thread_info(ti) kfree((ti)) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 65eb85976a0..d8529ef13b2 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -72,7 +72,7 @@ struct thread_info { #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR -extern struct thread_info *alloc_thread_info(struct task_struct *tsk); +extern struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node); extern void free_thread_info(struct thread_info *ti); #endif /* THREAD_SHIFT < PAGE_SHIFT */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 8303a6c65ef..f74f355a961 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1218,11 +1218,11 @@ void __ppc64_runlatch_off(void) static struct kmem_cache *thread_info_cache; -struct thread_info *alloc_thread_info(struct task_struct *tsk) +struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node) { struct thread_info *ti; - ti = kmem_cache_alloc(thread_info_cache, GFP_KERNEL); + ti = kmem_cache_alloc_node(thread_info_cache, GFP_KERNEL, node); if (unlikely(ti == NULL)) return NULL; #ifdef CONFIG_DEBUG_STACK_USAGE diff --git a/arch/score/include/asm/thread_info.h b/arch/score/include/asm/thread_info.h index 8570d08f58c..2205c62284d 100644 --- a/arch/score/include/asm/thread_info.h +++ b/arch/score/include/asm/thread_info.h @@ -71,7 +71,7 @@ struct thread_info { register struct thread_info *__current_thread_info __asm__("r28"); #define current_thread_info() __current_thread_info -#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL) +#define alloc_thread_info_node(tsk, node) kmalloc_node(THREAD_SIZE, GFP_KERNEL, node) #define free_thread_info(info) kfree(info) #endif /* !__ASSEMBLY__ */ diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h index c228946926e..ea2d5089de1 100644 --- a/arch/sh/include/asm/thread_info.h +++ b/arch/sh/include/asm/thread_info.h @@ -95,7 +95,7 @@ static inline struct thread_info *current_thread_info(void) #endif -extern struct thread_info *alloc_thread_info(struct task_struct *tsk); +extern struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node); extern void free_thread_info(struct thread_info *ti); extern void arch_task_cache_init(void); #define arch_task_cache_init arch_task_cache_init diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c index dcb126dc76f..f39ad57296b 100644 --- a/arch/sh/kernel/process.c +++ b/arch/sh/kernel/process.c @@ -32,16 +32,16 @@ void free_thread_xstate(struct task_struct *tsk) #if THREAD_SHIFT < PAGE_SHIFT static struct kmem_cache *thread_info_cache; -struct thread_info *alloc_thread_info(struct task_struct *tsk) +struct thread_info *alloc_thread_info(struct task_struct *tsk, int node) { struct thread_info *ti; - - ti = kmem_cache_alloc(thread_info_cache, GFP_KERNEL); - if (unlikely(ti == NULL)) - return NULL; #ifdef CONFIG_DEBUG_STACK_USAGE - memset(ti, 0, THREAD_SIZE); + gfp_t mask = GFP_KERNEL | __GFP_ZERO; +#else + gfp_t mask = GFP_KERNEL; #endif + + ti = kmem_cache_alloc_node(thread_info_cache, mask, node); return ti; } @@ -64,7 +64,9 @@ struct thread_info *alloc_thread_info(struct task_struct *tsk) #else gfp_t mask = GFP_KERNEL; #endif - return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER); + struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER); + + return page ? page_address(page) : NULL; } void free_thread_info(struct thread_info *ti) diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h index 9dd0318d3dd..fa575323341 100644 --- a/arch/sparc/include/asm/thread_info_32.h +++ b/arch/sparc/include/asm/thread_info_32.h @@ -82,8 +82,8 @@ register struct thread_info *current_thread_info_reg asm("g6"); #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR -BTFIXUPDEF_CALL(struct thread_info *, alloc_thread_info, void) -#define alloc_thread_info(tsk) BTFIXUP_CALL(alloc_thread_info)() +BTFIXUPDEF_CALL(struct thread_info *, alloc_thread_info_node, int) +#define alloc_thread_info_node(tsk, node) BTFIXUP_CALL(alloc_thread_info_node)(node) BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *) #define free_thread_info(ti) BTFIXUP_CALL(free_thread_info)(ti) @@ -92,7 +92,7 @@ BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *) /* * Size of kernel stack for each process. - * Observe the order of get_free_pages() in alloc_thread_info(). + * Observe the order of get_free_pages() in alloc_thread_info_node(). * The sun4 has 8K stack too, because it's short on memory, and 16K is a waste. */ #define THREAD_SIZE 8192 diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index fb2ea7705a4..60d86be1a53 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -146,21 +146,21 @@ register struct thread_info *current_thread_info_reg asm("g6"); #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR #ifdef CONFIG_DEBUG_STACK_USAGE -#define alloc_thread_info(tsk) \ -({ \ - struct thread_info *ret; \ - \ - ret = (struct thread_info *) \ - __get_free_pages(GFP_KERNEL, __THREAD_INFO_ORDER); \ - if (ret) \ - memset(ret, 0, PAGE_SIZE<<__THREAD_INFO_ORDER); \ - ret; \ -}) +#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO) #else -#define alloc_thread_info(tsk) \ - ((struct thread_info *)__get_free_pages(GFP_KERNEL, __THREAD_INFO_ORDER)) +#define THREAD_FLAGS (GFP_KERNEL) #endif +#define alloc_thread_info_node(tsk, node) \ +({ \ + struct page *page = alloc_pages_node(node, THREAD_FLAGS, \ + __THREAD_INFO_ORDER); \ + struct thread_info *ret; \ + \ + ret = page ? page_address(page) : NULL; \ + ret; \ +}) + #define free_thread_info(ti) \ free_pages((unsigned long)(ti),__THREAD_INFO_ORDER) diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index 92319aa8b66..fe09fd8be69 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -650,7 +650,7 @@ static void srmmu_unmapiorange(unsigned long virt_addr, unsigned int len) * mappings on the kernel stack without any special code as we did * need on the sun4c. */ -static struct thread_info *srmmu_alloc_thread_info(void) +static struct thread_info *srmmu_alloc_thread_info_node(int node) { struct thread_info *ret; @@ -2271,7 +2271,7 @@ void __init ld_mmu_srmmu(void) BTFIXUPSET_CALL(mmu_info, srmmu_mmu_info, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(alloc_thread_info, srmmu_alloc_thread_info, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(alloc_thread_info_node, srmmu_alloc_thread_info_node, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(free_thread_info, srmmu_free_thread_info, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(pte_to_pgoff, srmmu_pte_to_pgoff, BTFIXUPCALL_NORM); diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c index b5137cc2aba..a2350b5e68a 100644 --- a/arch/sparc/mm/sun4c.c +++ b/arch/sparc/mm/sun4c.c @@ -922,7 +922,7 @@ static inline void garbage_collect(int entry) free_locked_segment(BUCKET_ADDR(entry)); } -static struct thread_info *sun4c_alloc_thread_info(void) +static struct thread_info *sun4c_alloc_thread_info_node(int node) { unsigned long addr, pages; int entry; @@ -2155,7 +2155,7 @@ void __init ld_mmu_sun4c(void) BTFIXUPSET_CALL(__swp_offset, sun4c_swp_offset, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(__swp_entry, sun4c_swp_entry, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(alloc_thread_info, sun4c_alloc_thread_info, BTFIXUPCALL_NORM); + BTFIXUPSET_CALL(alloc_thread_info_node, sun4c_alloc_thread_info_node, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(free_thread_info, sun4c_free_thread_info, BTFIXUPCALL_NORM); BTFIXUPSET_CALL(mmu_info, sun4c_mmu_info, BTFIXUPCALL_NORM); diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index 9e8e9c4dfa2..3405b52853b 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -84,7 +84,7 @@ register unsigned long stack_pointer __asm__("sp"); ((struct thread_info *)(stack_pointer & -THREAD_SIZE)) #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR -extern struct thread_info *alloc_thread_info(struct task_struct *task); +extern struct thread_info *alloc_thread_info_node(struct task_struct *task, int node); extern void free_thread_info(struct thread_info *info); /* Sit on a nap instruction until interrupted. */ diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index b9cd962e1d3..d0065103eb7 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -109,7 +109,7 @@ void cpu_idle(void) } } -struct thread_info *alloc_thread_info(struct task_struct *task) +struct thread_info *alloc_thread_info_node(struct task_struct *task, int node) { struct page *page; gfp_t flags = GFP_KERNEL; @@ -118,7 +118,7 @@ struct thread_info *alloc_thread_info(struct task_struct *task) flags |= __GFP_ZERO; #endif - page = alloc_pages(flags, THREAD_SIZE_ORDER); + page = alloc_pages_node(node, flags, THREAD_SIZE_ORDER); if (!page) return NULL; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index f0b6e5dbc5a..1f2e61e2898 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -161,8 +161,14 @@ struct thread_info { #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR -#define alloc_thread_info(tsk) \ - ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) +#define alloc_thread_info_node(tsk, node) \ +({ \ + struct page *page = alloc_pages_node(node, THREAD_FLAGS, \ + THREAD_ORDER); \ + struct thread_info *ret = page ? page_address(page) : NULL; \ + \ + ret; \ +}) #ifdef CONFIG_X86_32 diff --git a/kernel/fork.c b/kernel/fork.c index cffbe8a4e1f..cbc6adc6e89 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -117,14 +117,17 @@ static struct kmem_cache *task_struct_cachep; #endif #ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR -static inline struct thread_info *alloc_thread_info(struct task_struct *tsk) +static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, + int node) { #ifdef CONFIG_DEBUG_STACK_USAGE gfp_t mask = GFP_KERNEL | __GFP_ZERO; #else gfp_t mask = GFP_KERNEL; #endif - return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER); + struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER); + + return page ? page_address(page) : NULL; } static inline void free_thread_info(struct thread_info *ti) @@ -260,7 +263,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) if (!tsk) return NULL; - ti = alloc_thread_info(tsk); + ti = alloc_thread_info_node(tsk, node); if (!ti) { free_task_struct(tsk); return NULL; -- cgit v1.2.3-70-g09d2 From 207205a2ba2655652fe46a60b49838af6c16a919 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Mar 2011 16:30:44 -0700 Subject: kthread: NUMA aware kthread_create_on_node() All kthreads being created from a single helper task, they all use memory from a single node for their kernel stack and task struct. This patch suite creates kthread_create_on_node(), adding a 'cpu' parameter to parameters already used by kthread_create(). This parameter serves in allocating memory for the new kthread on its memory node if possible. Signed-off-by: Eric Dumazet Acked-by: David S. Miller Reviewed-by: Andi Kleen Acked-by: Rusty Russell Cc: Tejun Heo Cc: Tony Luck Cc: Fenghua Yu Cc: David Howells Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kthread.h | 14 ++++++++++---- include/linux/sched.h | 1 + kernel/fork.c | 3 ++- kernel/kthread.c | 31 +++++++++++++++++++++++++------ 4 files changed, 38 insertions(+), 11 deletions(-) (limited to 'kernel/fork.c') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 7ff16f7d3ed..1e923e5e88e 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -4,10 +4,15 @@ #include #include -struct task_struct *kthread_create(int (*threadfn)(void *data), - void *data, - const char namefmt[], ...) - __attribute__((format(printf, 3, 4))); +struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), + void *data, + int node, + const char namefmt[], ...) + __attribute__((format(printf, 4, 5))); + +#define kthread_create(threadfn, data, namefmt, arg...) \ + kthread_create_on_node(threadfn, data, -1, namefmt, ##arg) + /** * kthread_run - create and wake a thread. @@ -34,6 +39,7 @@ void *kthread_data(struct task_struct *k); int kthreadd(void *unused); extern struct task_struct *kthreadd_task; +extern int tsk_fork_get_node(struct task_struct *tsk); /* * Simple work processor based on kthread. diff --git a/include/linux/sched.h b/include/linux/sched.h index c15936fe998..4b601be3dac 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1471,6 +1471,7 @@ struct task_struct { #ifdef CONFIG_NUMA struct mempolicy *mempolicy; /* Protected by alloc_lock */ short il_next; + short pref_node_fork; #endif atomic_t fs_excl; /* holding fs exclusive resources */ struct rcu_head rcu; diff --git a/kernel/fork.c b/kernel/fork.c index cbc6adc6e89..a8f64f8ec7e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -254,7 +255,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) struct task_struct *tsk; struct thread_info *ti; unsigned long *stackend; - int node = numa_node_id(); + int node = tsk_fork_get_node(orig); int err; prepare_to_copy(orig); diff --git a/kernel/kthread.c b/kernel/kthread.c index c55afba990a..684ab3f7dd7 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -27,6 +27,7 @@ struct kthread_create_info /* Information passed to kthread() from kthreadd. */ int (*threadfn)(void *data); void *data; + int node; /* Result passed back to kthread_create() from kthreadd. */ struct task_struct *result; @@ -98,10 +99,23 @@ static int kthread(void *_create) do_exit(ret); } +/* called from do_fork() to get node information for about to be created task */ +int tsk_fork_get_node(struct task_struct *tsk) +{ +#ifdef CONFIG_NUMA + if (tsk == kthreadd_task) + return tsk->pref_node_fork; +#endif + return numa_node_id(); +} + static void create_kthread(struct kthread_create_info *create) { int pid; +#ifdef CONFIG_NUMA + current->pref_node_fork = create->node; +#endif /* We want our own signal handler (we take no signals by default). */ pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); if (pid < 0) { @@ -111,15 +125,18 @@ static void create_kthread(struct kthread_create_info *create) } /** - * kthread_create - create a kthread. + * kthread_create_on_node - create a kthread. * @threadfn: the function to run until signal_pending(current). * @data: data ptr for @threadfn. + * @node: memory node number. * @namefmt: printf-style name for the thread. * * Description: This helper function creates and names a kernel * thread. The thread will be stopped: use wake_up_process() to start * it. See also kthread_run(). * + * If thread is going to be bound on a particular cpu, give its node + * in @node, to get NUMA affinity for kthread stack, or else give -1. * When woken, the thread will run @threadfn() with @data as its * argument. @threadfn() can either call do_exit() directly if it is a * standalone thread for which noone will call kthread_stop(), or @@ -129,15 +146,17 @@ static void create_kthread(struct kthread_create_info *create) * * Returns a task_struct or ERR_PTR(-ENOMEM). */ -struct task_struct *kthread_create(int (*threadfn)(void *data), - void *data, - const char namefmt[], - ...) +struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), + void *data, + int node, + const char namefmt[], + ...) { struct kthread_create_info create; create.threadfn = threadfn; create.data = data; + create.node = node; init_completion(&create.done); spin_lock(&kthread_create_lock); @@ -164,7 +183,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), } return create.result; } -EXPORT_SYMBOL(kthread_create); +EXPORT_SYMBOL(kthread_create_on_node); /** * kthread_bind - bind a just-created kthread to a cpu. -- cgit v1.2.3-70-g09d2 From 9bfb23fc4a481650e60d22dbe84c0fd5a9d49bba Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 22 Mar 2011 16:34:09 -0700 Subject: sys_unshare: remove the dead CLONE_THREAD/SIGHAND/VM code Cleanup: kill the dead code which does nothing but complicates the code and confuses the reader. sys_unshare(CLONE_THREAD/SIGHAND/VM) is not really implemented, and I doubt very much it will ever work. At least, nobody even tried since the original 99d1419d96d7df9cfa56 ("unshare system call -v5: system call handler function") was applied more than 4 years ago. And the code is not consistent. unshare_thread() always fails unconditionally, while unshare_sighand() and unshare_vm() pretend to work if there is nothing to unshare. Remove unshare_thread(), unshare_sighand(), unshare_vm() helpers and related variables and add a simple CLONE_THREAD | CLONE_SIGHAND| CLONE_VM check into check_unshare_flags(). Also, move the "CLONE_NEWNS needs CLONE_FS" check from check_unshare_flags() to sys_unshare(). This looks more consistent and matches the similar do_sysvsem check in sys_unshare(). Note: with or without this patch "atomic_read(mm->mm_users) > 1" can give a false positive due to get_task_mm(). Signed-off-by: Oleg Nesterov Acked-by: Roland McGrath Cc: Janak Desai Cc: Daniel Lezcano Cc: "Eric W. Biederman" Cc: KOSAKI Motohiro Cc: Alexey Dobriyan Acked-by: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 123 ++++++++++++---------------------------------------------- 1 file changed, 25 insertions(+), 98 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index a8f64f8ec7e..f2b494d7c55 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1519,38 +1519,24 @@ void __init proc_caches_init(void) } /* - * Check constraints on flags passed to the unshare system call and - * force unsharing of additional process context as appropriate. + * Check constraints on flags passed to the unshare system call. */ -static void check_unshare_flags(unsigned long *flags_ptr) +static int check_unshare_flags(unsigned long unshare_flags) { + if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| + CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| + CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) + return -EINVAL; /* - * If unsharing a thread from a thread group, must also - * unshare vm. - */ - if (*flags_ptr & CLONE_THREAD) - *flags_ptr |= CLONE_VM; - - /* - * If unsharing vm, must also unshare signal handlers. - */ - if (*flags_ptr & CLONE_VM) - *flags_ptr |= CLONE_SIGHAND; - - /* - * If unsharing namespace, must also unshare filesystem information. + * Not implemented, but pretend it works if there is nothing to + * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND + * needs to unshare vm. */ - if (*flags_ptr & CLONE_NEWNS) - *flags_ptr |= CLONE_FS; -} - -/* - * Unsharing of tasks created with CLONE_THREAD is not supported yet - */ -static int unshare_thread(unsigned long unshare_flags) -{ - if (unshare_flags & CLONE_THREAD) - return -EINVAL; + if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) { + /* FIXME: get_task_mm() increments ->mm_users */ + if (atomic_read(¤t->mm->mm_users) > 1) + return -EINVAL; + } return 0; } @@ -1576,34 +1562,6 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) return 0; } -/* - * Unsharing of sighand is not supported yet - */ -static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp) -{ - struct sighand_struct *sigh = current->sighand; - - if ((unshare_flags & CLONE_SIGHAND) && atomic_read(&sigh->count) > 1) - return -EINVAL; - else - return 0; -} - -/* - * Unshare vm if it is being shared - */ -static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp) -{ - struct mm_struct *mm = current->mm; - - if ((unshare_flags & CLONE_VM) && - (mm && atomic_read(&mm->mm_users) > 1)) { - return -EINVAL; - } - - return 0; -} - /* * Unshare file descriptor table if it is being shared */ @@ -1632,23 +1590,21 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp */ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) { - int err = 0; struct fs_struct *fs, *new_fs = NULL; - struct sighand_struct *new_sigh = NULL; - struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; struct files_struct *fd, *new_fd = NULL; struct nsproxy *new_nsproxy = NULL; int do_sysvsem = 0; + int err; - check_unshare_flags(&unshare_flags); - - /* Return -EINVAL for all unsupported flags */ - err = -EINVAL; - if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| - CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| - CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) + err = check_unshare_flags(unshare_flags); + if (err) goto bad_unshare_out; + /* + * If unsharing namespace, must also unshare filesystem information. + */ + if (unshare_flags & CLONE_NEWNS) + unshare_flags |= CLONE_FS; /* * CLONE_NEWIPC must also detach from the undolist: after switching * to a new ipc namespace, the semaphore arrays from the old @@ -1656,21 +1612,15 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) */ if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) do_sysvsem = 1; - if ((err = unshare_thread(unshare_flags))) - goto bad_unshare_out; if ((err = unshare_fs(unshare_flags, &new_fs))) - goto bad_unshare_cleanup_thread; - if ((err = unshare_sighand(unshare_flags, &new_sigh))) - goto bad_unshare_cleanup_fs; - if ((err = unshare_vm(unshare_flags, &new_mm))) - goto bad_unshare_cleanup_sigh; + goto bad_unshare_out; if ((err = unshare_fd(unshare_flags, &new_fd))) - goto bad_unshare_cleanup_vm; + goto bad_unshare_cleanup_fs; if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs))) goto bad_unshare_cleanup_fd; - if (new_fs || new_mm || new_fd || do_sysvsem || new_nsproxy) { + if (new_fs || new_fd || do_sysvsem || new_nsproxy) { if (do_sysvsem) { /* * CLONE_SYSVSEM is equivalent to sys_exit(). @@ -1696,19 +1646,6 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) spin_unlock(&fs->lock); } - if (new_mm) { - mm = current->mm; - active_mm = current->active_mm; - current->mm = new_mm; - current->active_mm = new_mm; - if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) { - atomic_dec(&mm->oom_disable_count); - atomic_inc(&new_mm->oom_disable_count); - } - activate_mm(active_mm, new_mm); - new_mm = mm; - } - if (new_fd) { fd = current->files; current->files = new_fd; @@ -1725,20 +1662,10 @@ bad_unshare_cleanup_fd: if (new_fd) put_files_struct(new_fd); -bad_unshare_cleanup_vm: - if (new_mm) - mmput(new_mm); - -bad_unshare_cleanup_sigh: - if (new_sigh) - if (atomic_dec_and_test(&new_sigh->count)) - kmem_cache_free(sighand_cachep, new_sigh); - bad_unshare_cleanup_fs: if (new_fs) free_fs_struct(new_fs); -bad_unshare_cleanup_thread: bad_unshare_out: return err; } -- cgit v1.2.3-70-g09d2 From 45a68628d37222e655219febce9e91b6484789b2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 23 Mar 2011 16:43:12 -0700 Subject: pid: remove the child_reaper special case in init/main.c This patchset is a cleanup and a preparation to unshare the pid namespace. These prerequisites prepare for Eric's patchset to give a file descriptor to a namespace and join an existing namespace. This patch: It turns out that the existing assignment in copy_process of the child_reaper can handle the initial assignment of child_reaper we just need to generalize the test in kernel/fork.c Signed-off-by: Eric W. Biederman Signed-off-by: Daniel Lezcano Cc: Oleg Nesterov Cc: Alexey Dobriyan Acked-by: Serge E. Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pid.h | 11 +++++++++++ init/main.c | 9 --------- kernel/fork.c | 2 +- 3 files changed, 12 insertions(+), 10 deletions(-) (limited to 'kernel/fork.c') diff --git a/include/linux/pid.h b/include/linux/pid.h index 49f1c2f66e9..efceda0a51b 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -140,6 +140,17 @@ static inline struct pid_namespace *ns_of_pid(struct pid *pid) return ns; } +/* + * is_child_reaper returns true if the pid is the init process + * of the current namespace. As this one could be checked before + * pid_ns->child_reaper is assigned in copy_process, we check + * with the pid number. + */ +static inline bool is_child_reaper(struct pid *pid) +{ + return pid->numbers[pid->level].nr == 1; +} + /* * the helpers to get the pid's id seen from different namespaces * diff --git a/init/main.c b/init/main.c index 3627bb37225..4a9479ef454 100644 --- a/init/main.c +++ b/init/main.c @@ -787,15 +787,6 @@ static int __init kernel_init(void * unused) * init can run on any cpu. */ set_cpus_allowed_ptr(current, cpu_all_mask); - /* - * Tell the world that we're going to be the grim - * reaper of innocent orphaned children. - * - * We don't want people to have to make incorrect - * assumptions about where in the task array this - * can be found. - */ - init_pid_ns.child_reaper = current; cad_pid = task_pid(current); diff --git a/kernel/fork.c b/kernel/fork.c index f2b494d7c55..17aed4378ed 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1296,7 +1296,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, tracehook_finish_clone(p, clone_flags, trace); if (thread_group_leader(p)) { - if (clone_flags & CLONE_NEWPID) + if (is_child_reaper(pid)) p->nsproxy->pid_ns->child_reaper = p; p->signal->leader_pid = pid; -- cgit v1.2.3-70-g09d2 From 4308eebbeb2026827d4492ce8c23d99f7f144a82 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 23 Mar 2011 16:43:13 -0700 Subject: pidns: call pid_ns_prepare_proc() from create_pid_namespace() Reorganize proc_get_sb() so it can be called before the struct pid of the first process is allocated. Signed-off-by: Eric W. Biederman Signed-off-by: Daniel Lezcano Cc: Oleg Nesterov Cc: Alexey Dobriyan Acked-by: Serge E. Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/root.c | 25 +++++++------------------ kernel/fork.c | 6 ------ kernel/pid_namespace.c | 11 +++++++++-- 3 files changed, 16 insertions(+), 26 deletions(-) (limited to 'kernel/fork.c') diff --git a/fs/proc/root.c b/fs/proc/root.c index ef9fa8e24ad..e5e2bfa7a03 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -43,17 +43,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, struct pid_namespace *ns; struct proc_inode *ei; - if (proc_mnt) { - /* Seed the root directory with a pid so it doesn't need - * to be special in base.c. I would do this earlier but - * the only task alive when /proc is mounted the first time - * is the init_task and it doesn't have any pids. - */ - ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode); - if (!ei->pid) - ei->pid = find_get_pid(1); - } - if (flags & MS_KERNMOUNT) ns = (struct pid_namespace *)data; else @@ -71,16 +60,16 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, return ERR_PTR(err); } - ei = PROC_I(sb->s_root->d_inode); - if (!ei->pid) { - rcu_read_lock(); - ei->pid = get_pid(find_pid_ns(1, ns)); - rcu_read_unlock(); - } - sb->s_flags |= MS_ACTIVE; } + ei = PROC_I(sb->s_root->d_inode); + if (!ei->pid) { + rcu_read_lock(); + ei->pid = get_pid(find_pid_ns(1, ns)); + rcu_read_unlock(); + } + return dget(sb->s_root); } diff --git a/kernel/fork.c b/kernel/fork.c index 17aed4378ed..457fff2e17e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1187,12 +1187,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, pid = alloc_pid(p->nsproxy->pid_ns); if (!pid) goto bad_fork_cleanup_io; - - if (clone_flags & CLONE_NEWPID) { - retval = pid_ns_prepare_proc(p->nsproxy->pid_ns); - if (retval < 0) - goto bad_fork_free_pid; - } } p->pid = pid_nr(pid); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index a5aff94e1f0..e9c9adc84ca 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -14,6 +14,7 @@ #include #include #include +#include #define BITS_PER_PAGE (PAGE_SIZE*8) @@ -72,7 +73,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p { struct pid_namespace *ns; unsigned int level = parent_pid_ns->level + 1; - int i; + int i, err = -ENOMEM; ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL); if (ns == NULL) @@ -96,14 +97,20 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p for (i = 1; i < PIDMAP_ENTRIES; i++) atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); + err = pid_ns_prepare_proc(ns); + if (err) + goto out_put_parent_pid_ns; + return ns; +out_put_parent_pid_ns: + put_pid_ns(parent_pid_ns); out_free_map: kfree(ns->pidmap[0].page); out_free: kmem_cache_free(pid_ns_cachep, ns); out: - return ERR_PTR(-ENOMEM); + return ERR_PTR(err); } static void destroy_pid_namespace(struct pid_namespace *ns) -- cgit v1.2.3-70-g09d2