From c876ad7682155958d0c9c27afe9017925c230d64 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 21 Dec 2012 20:27:12 -0800 Subject: pidns: Stop pid allocation when init dies Oleg pointed out that in a pid namespace the sequence. - pid 1 becomes a zombie - setns(thepidns), fork,... - reaping pid 1. - The injected processes exiting. Can lead to processes attempting access their child reaper and instead following a stale pointer. That waitpid for init can return before all of the processes in the pid namespace have exited is also unfortunate. Avoid these problems by disabling the allocation of new pids in a pid namespace when init dies, instead of when the last process in a pid namespace is reaped. Pointed-out-by: Oleg Nesterov Reviewed-by: Oleg Nesterov Signed-off-by: "Eric W. Biederman" --- kernel/pid.c | 15 ++++++++++++--- kernel/pid_namespace.c | 4 ++++ 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/pid.c b/kernel/pid.c index 36aa02ff17d..de9af600006 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -270,7 +270,6 @@ void free_pid(struct pid *pid) wake_up_process(ns->child_reaper); break; case 0: - ns->nr_hashed = -1; schedule_work(&ns->proc_work); break; } @@ -319,7 +318,7 @@ struct pid *alloc_pid(struct pid_namespace *ns) upid = pid->numbers + ns->level; spin_lock_irq(&pidmap_lock); - if (ns->nr_hashed < 0) + if (!(ns->nr_hashed & PIDNS_HASH_ADDING)) goto out_unlock; for ( ; upid >= pid->numbers; --upid) { hlist_add_head_rcu(&upid->pid_chain, @@ -342,6 +341,13 @@ out_free: goto out; } +void disable_pid_allocation(struct pid_namespace *ns) +{ + spin_lock_irq(&pidmap_lock); + ns->nr_hashed &= ~PIDNS_HASH_ADDING; + spin_unlock_irq(&pidmap_lock); +} + struct pid *find_pid_ns(int nr, struct pid_namespace *ns) { struct hlist_node *elem; @@ -573,6 +579,9 @@ void __init pidhash_init(void) void __init pidmap_init(void) { + /* Veryify no one has done anything silly */ + BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING); + /* bump default and minimum pid_max based on number of cpus */ pid_max = min(pid_max_max, max_t(int, pid_max, PIDS_PER_CPU_DEFAULT * num_possible_cpus())); @@ -584,7 +593,7 @@ void __init pidmap_init(void) /* Reserve PID 0. We never call free_pidmap(0) */ set_bit(0, init_pid_ns.pidmap[0].page); atomic_dec(&init_pid_ns.pidmap[0].nr_free); - init_pid_ns.nr_hashed = 1; + init_pid_ns.nr_hashed = PIDNS_HASH_ADDING; init_pid_ns.pid_cachep = KMEM_CACHE(pid, SLAB_HWCACHE_ALIGN | SLAB_PANIC); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index fdbd0cdf271..c1c3dc1c602 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -115,6 +115,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns ns->level = level; ns->parent = get_pid_ns(parent_pid_ns); ns->user_ns = get_user_ns(user_ns); + ns->nr_hashed = PIDNS_HASH_ADDING; INIT_WORK(&ns->proc_work, proc_cleanup_work); set_bit(0, ns->pidmap[0].page); @@ -181,6 +182,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) int rc; struct task_struct *task, *me = current; + /* Don't allow any more processes into the pid namespace */ + disable_pid_allocation(pid_ns); + /* Ignore SIGCHLD causing any terminated children to autoreap */ spin_lock_irq(&me->sighand->siglock); me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN; -- cgit v1.2.3-70-g09d2