diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/exit.c | 9 | ||||
-rw-r--r-- | kernel/fork.c | 2 | ||||
-rw-r--r-- | kernel/futex_compat.c | 4 | ||||
-rw-r--r-- | kernel/kmod.c | 2 | ||||
-rw-r--r-- | kernel/ptrace.c | 1 | ||||
-rw-r--r-- | kernel/sched.c | 25 | ||||
-rw-r--r-- | kernel/sched_debug.c | 1 | ||||
-rw-r--r-- | kernel/sched_fair.c | 122 | ||||
-rw-r--r-- | kernel/signal.c | 8 | ||||
-rw-r--r-- | kernel/sysctl.c | 8 | ||||
-rw-r--r-- | kernel/time/ntp.c | 2 | ||||
-rw-r--r-- | kernel/time/tick-broadcast.c | 17 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 12 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 10 | ||||
-rw-r--r-- | kernel/user.c | 45 | ||||
-rw-r--r-- | kernel/user_namespace.c | 4 | ||||
-rw-r--r-- | kernel/utsname.c | 2 |
17 files changed, 181 insertions, 93 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index 06b24b3aa37..993369ee94d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -24,7 +24,6 @@ #include <linux/pid_namespace.h> #include <linux/ptrace.h> #include <linux/profile.h> -#include <linux/signalfd.h> #include <linux/mount.h> #include <linux/proc_fs.h> #include <linux/kthread.h> @@ -86,14 +85,6 @@ static void __exit_signal(struct task_struct *tsk) sighand = rcu_dereference(tsk->sighand); spin_lock(&sighand->siglock); - /* - * Notify that this sighand has been detached. This must - * be called with the tsk->sighand lock held. Also, this - * access tsk->sighand internally, so it must be called - * before tsk->sighand is reset. - */ - signalfd_detach_locked(tsk); - posix_cpu_timers_exit(tsk); if (atomic_dec_and_test(&sig->count)) posix_cpu_timers_exit_group(tsk); diff --git a/kernel/fork.c b/kernel/fork.c index 7332e236d36..33f12f48684 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1438,7 +1438,7 @@ static void sighand_ctor(void *data, struct kmem_cache *cachep, struct sighand_struct *sighand = data; spin_lock_init(&sighand->siglock); - INIT_LIST_HEAD(&sighand->signalfd_list); + init_waitqueue_head(&sighand->signalfd_wqh); } void __init proc_caches_init(void) diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index f7921360efa..7e52eb051f2 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -61,10 +61,10 @@ void compat_exit_robust_list(struct task_struct *curr) if (fetch_robust_entry(&upending, &pending, &head->list_op_pending, &pip)) return; - if (upending) + if (pending) handle_futex_death((void __user *)pending + futex_offset, curr, pip); - while (compat_ptr(uentry) != &head->list) { + while (entry != (struct robust_list __user *) &head->list) { /* * A pending lock might already be on the list, so * dont process it twice: diff --git a/kernel/kmod.c b/kernel/kmod.c index 9809cc1f33d..c6a4f8aebeb 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -505,7 +505,7 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp, if (ret < 0) goto out; - return call_usermodehelper_exec(sub_info, 1); + return call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); out: call_usermodehelper_freeinfo(sub_info); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 82a558b655d..3eca7a55f2e 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -233,6 +233,7 @@ int ptrace_detach(struct task_struct *child, unsigned int data) /* Architecture-specific hardware disable .. */ ptrace_disable(child); + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); write_lock_irq(&tasklist_lock); /* protect against de_thread()->release_task() */ diff --git a/kernel/sched.c b/kernel/sched.c index b533d6db78a..6107a0cd632 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -668,7 +668,7 @@ static u64 div64_likely32(u64 divident, unsigned long divisor) /* * Shift right and round: */ -#define RSR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) +#define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) static unsigned long calc_delta_mine(unsigned long delta_exec, unsigned long weight, @@ -684,10 +684,10 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, * Check whether we'd overflow the 64-bit multiplication: */ if (unlikely(tmp > WMULT_CONST)) - tmp = RSR(RSR(tmp, WMULT_SHIFT/2) * lw->inv_weight, + tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight, WMULT_SHIFT/2); else - tmp = RSR(tmp * lw->inv_weight, WMULT_SHIFT); + tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT); return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); } @@ -858,7 +858,6 @@ static void dec_nr_running(struct task_struct *p, struct rq *rq) static void set_load_weight(struct task_struct *p) { - task_rq(p)->cfs.wait_runtime -= p->se.wait_runtime; p->se.wait_runtime = 0; if (task_has_rt_policy(p)) { @@ -1683,6 +1682,11 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) p->prio = effective_prio(p); + if (rt_prio(p->prio)) + p->sched_class = &rt_sched_class; + else + p->sched_class = &fair_sched_class; + if (!p->sched_class->task_new || !sysctl_sched_child_runs_first || (clone_flags & CLONE_VM) || task_cpu(p) != this_cpu || !current->se.on_rq) { @@ -2512,7 +2516,7 @@ group_next: * a think about bumping its value to force at least one task to be * moved */ - if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task) { + if (*imbalance < busiest_load_per_task) { unsigned long tmp, pwr_now, pwr_move; unsigned int imbn; @@ -2564,10 +2568,8 @@ small_imbalance: pwr_move /= SCHED_LOAD_SCALE; /* Move if we gain throughput */ - if (pwr_move <= pwr_now) - goto out_balanced; - - *imbalance = busiest_load_per_task; + if (pwr_move > pwr_now) + *imbalance = busiest_load_per_task; } return busiest; @@ -4553,10 +4555,7 @@ asmlinkage long sys_sched_yield(void) struct rq *rq = this_rq_lock(); schedstat_inc(rq, yld_cnt); - if (unlikely(rq->nr_running == 1)) - schedstat_inc(rq, yld_act_empty); - else - current->sched_class->yield_task(rq, current); + current->sched_class->yield_task(rq, current); /* * Since we are going to call schedule() anyway, there's diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index ab18f45f2ab..c3ee38bd342 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -283,4 +283,5 @@ void proc_sched_set_task(struct task_struct *p) p->se.wait_runtime_overruns = p->se.wait_runtime_underruns = 0; #endif p->se.sum_exec_runtime = 0; + p->se.prev_sum_exec_runtime = 0; } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ce39282d9c0..c9fbe8e73a4 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -43,6 +43,14 @@ unsigned int sysctl_sched_latency __read_mostly = 20000000ULL; unsigned int sysctl_sched_min_granularity __read_mostly = 2000000ULL; /* + * sys_sched_yield() compat mode + * + * This option switches the agressive yield implementation of the + * old scheduler back on. + */ +unsigned int __read_mostly sysctl_sched_compat_yield; + +/* * SCHED_BATCH wake-up granularity. * (default: 25 msec, units: nanoseconds) * @@ -194,6 +202,8 @@ __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) update_load_add(&cfs_rq->load, se->load.weight); cfs_rq->nr_running++; se->on_rq = 1; + + schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); } static inline void @@ -205,6 +215,8 @@ __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) update_load_sub(&cfs_rq->load, se->load.weight); cfs_rq->nr_running--; se->on_rq = 0; + + schedstat_add(cfs_rq, wait_runtime, -se->wait_runtime); } static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq) @@ -291,7 +303,7 @@ niced_granularity(struct sched_entity *curr, unsigned long granularity) /* * It will always fit into 'long': */ - return (long) (tmp >> WMULT_SHIFT); + return (long) (tmp >> (WMULT_SHIFT-NICE_0_SHIFT)); } static inline void @@ -574,7 +586,6 @@ static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) prev_runtime = se->wait_runtime; __add_wait_runtime(cfs_rq, se, delta_fair); - schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); delta_fair = se->wait_runtime - prev_runtime; /* @@ -662,7 +673,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) if (tsk->state & TASK_UNINTERRUPTIBLE) se->block_start = rq_of(cfs_rq)->clock; } - cfs_rq->wait_runtime -= se->wait_runtime; #endif } __dequeue_entity(cfs_rq, se); @@ -671,22 +681,39 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) /* * Preempt the current task with a newly woken task if needed: */ -static int +static void __check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se, struct sched_entity *curr, unsigned long granularity) { s64 __delta = curr->fair_key - se->fair_key; + unsigned long ideal_runtime, delta_exec; + + /* + * ideal_runtime is compared against sum_exec_runtime, which is + * walltime, hence do not scale. + */ + ideal_runtime = max(sysctl_sched_latency / cfs_rq->nr_running, + (unsigned long)sysctl_sched_min_granularity); + + /* + * If we executed more than what the latency constraint suggests, + * reduce the rescheduling granularity. This way the total latency + * of how much a task is not scheduled converges to + * sysctl_sched_latency: + */ + delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; + if (delta_exec > ideal_runtime) + granularity = 0; /* * Take scheduling granularity into account - do not * preempt the current task unless the best task has * a larger than sched_granularity fairness advantage: + * + * scale granularity as key space is in fair_clock. */ - if (__delta > niced_granularity(curr, granularity)) { + if (__delta > niced_granularity(curr, granularity)) resched_task(rq_of(cfs_rq)->curr); - return 1; - } - return 0; } static inline void @@ -702,6 +729,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) update_stats_wait_end(cfs_rq, se); update_stats_curr_start(cfs_rq, se); set_cfs_rq_curr(cfs_rq, se); + se->prev_sum_exec_runtime = se->sum_exec_runtime; } static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) @@ -731,7 +759,6 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) { - unsigned long gran, ideal_runtime, delta_exec; struct sched_entity *next; /* @@ -748,22 +775,8 @@ static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) if (next == curr) return; - gran = sched_granularity(cfs_rq); - ideal_runtime = niced_granularity(curr, - max(sysctl_sched_latency / cfs_rq->nr_running, - (unsigned long)sysctl_sched_min_granularity)); - /* - * If we executed more than what the latency constraint suggests, - * reduce the rescheduling granularity. This way the total latency - * of how much a task is not scheduled converges to - * sysctl_sched_latency: - */ - delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; - if (delta_exec > ideal_runtime) - gran = 0; - - if (__check_preempt_curr_fair(cfs_rq, next, curr, gran)) - curr->prev_sum_exec_runtime = curr->sum_exec_runtime; + __check_preempt_curr_fair(cfs_rq, next, curr, + sched_granularity(cfs_rq)); } /************************************************** @@ -892,19 +905,62 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) } /* - * sched_yield() support is very simple - we dequeue and enqueue + * sched_yield() support is very simple - we dequeue and enqueue. + * + * If compat_yield is turned on then we requeue to the end of the tree. */ static void yield_task_fair(struct rq *rq, struct task_struct *p) { struct cfs_rq *cfs_rq = task_cfs_rq(p); + struct rb_node **link = &cfs_rq->tasks_timeline.rb_node; + struct sched_entity *rightmost, *se = &p->se; + struct rb_node *parent; - __update_rq_clock(rq); /* - * Dequeue and enqueue the task to update its - * position within the tree: + * Are we the only task in the tree? + */ + if (unlikely(cfs_rq->nr_running == 1)) + return; + + if (likely(!sysctl_sched_compat_yield)) { + __update_rq_clock(rq); + /* + * Dequeue and enqueue the task to update its + * position within the tree: + */ + dequeue_entity(cfs_rq, &p->se, 0); + enqueue_entity(cfs_rq, &p->se, 0); + + return; + } + /* + * Find the rightmost entry in the rbtree: + */ + do { + parent = *link; + link = &parent->rb_right; + } while (*link); + + rightmost = rb_entry(parent, struct sched_entity, run_node); + /* + * Already in the rightmost position? + */ + if (unlikely(rightmost == se)) + return; + + /* + * Minimally necessary key value to be last in the tree: + */ + se->fair_key = rightmost->fair_key + 1; + + if (cfs_rq->rb_leftmost == &se->run_node) + cfs_rq->rb_leftmost = rb_next(&se->run_node); + /* + * Relink the task to the rightmost position: */ - dequeue_entity(cfs_rq, &p->se, 0); - enqueue_entity(cfs_rq, &p->se, 0); + rb_erase(&se->run_node, &cfs_rq->tasks_timeline); + rb_link_node(&se->run_node, parent, link); + rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); } /* @@ -1121,10 +1177,8 @@ static void task_new_fair(struct rq *rq, struct task_struct *p) * The statistical average of wait_runtime is about * -granularity/2, so initialize the task with that: */ - if (sysctl_sched_features & SCHED_FEAT_START_DEBIT) { + if (sysctl_sched_features & SCHED_FEAT_START_DEBIT) se->wait_runtime = -(sched_granularity(cfs_rq) / 2); - schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); - } __enqueue_entity(cfs_rq, se); } diff --git a/kernel/signal.c b/kernel/signal.c index 3169bed0b4d..9fb91a32edd 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -378,8 +378,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) /* We only dequeue private signals from ourselves, we don't let * signalfd steal them */ - if (likely(tsk == current)) - signr = __dequeue_signal(&tsk->pending, mask, info); + signr = __dequeue_signal(&tsk->pending, mask, info); if (!signr) { signr = __dequeue_signal(&tsk->signal->shared_pending, mask, info); @@ -407,8 +406,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) } } } - if (likely(tsk == current)) - recalc_sigpending(); + recalc_sigpending(); if (signr && unlikely(sig_kernel_stop(signr))) { /* * Set a marker that we have dequeued a stop signal. Our @@ -425,7 +423,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; } - if (signr && likely(tsk == current) && + if (signr && ((info->si_code & __SI_MASK) == __SI_TIMER) && info->si_sys_private){ /* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 6ace893c17c..53a456ebf6d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -303,6 +303,14 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_compat_yield", + .data = &sysctl_sched_compat_yield, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #ifdef CONFIG_PROVE_LOCKING { .ctl_name = CTL_UNNUMBERED, diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index cd91237dbfe..de6a2d6b3eb 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -226,7 +226,7 @@ static void sync_cmos_clock(unsigned long dummy) static void notify_cmos_timer(void) { - if (no_sync_cmos_clock) + if (!no_sync_cmos_clock) mod_timer(&sync_cmos_timer, jiffies + 1); } diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index db8e0f3d409..0962e057766 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -383,11 +383,7 @@ static int tick_broadcast_set_event(ktime_t expires, int force) int tick_resume_broadcast_oneshot(struct clock_event_device *bc) { clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); - - if(!cpus_empty(tick_broadcast_oneshot_mask)) - tick_broadcast_set_event(ktime_get(), 1); - - return cpu_isset(smp_processor_id(), tick_broadcast_oneshot_mask); + return 0; } /* @@ -549,20 +545,17 @@ void tick_broadcast_switch_to_oneshot(void) */ void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { - struct clock_event_device *bc; unsigned long flags; unsigned int cpu = *cpup; spin_lock_irqsave(&tick_broadcast_lock, flags); - bc = tick_broadcast_device.evtdev; + /* + * Clear the broadcast mask flag for the dead cpu, but do not + * stop the broadcast device! + */ cpu_clear(cpu, tick_broadcast_oneshot_mask); - if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) { - if (bc && cpus_empty(tick_broadcast_oneshot_mask)) - clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN); - } - spin_unlock_irqrestore(&tick_broadcast_lock, flags); } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index b416995b975..8c3fef1db09 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -160,6 +160,18 @@ void tick_nohz_stop_sched_tick(void) cpu = smp_processor_id(); ts = &per_cpu(tick_cpu_sched, cpu); + /* + * If this cpu is offline and it is the one which updates + * jiffies, then give up the assignment and let it be taken by + * the cpu which runs the tick timer next. If we don't drop + * this here the jiffies might be stale and do_timer() never + * invoked. + */ + if (unlikely(!cpu_online(cpu))) { + if (cpu == tick_do_timer_cpu) + tick_do_timer_cpu = -1; + } + if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) goto end; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index acc417b5a9b..4ad79f6bdec 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -217,6 +217,7 @@ static void change_clocksource(void) } #else static inline void change_clocksource(void) { } +static inline s64 __get_nsec_offset(void) { return 0; } #endif /** @@ -280,6 +281,8 @@ void __init timekeeping_init(void) static int timekeeping_suspended; /* time in seconds when suspend began */ static unsigned long timekeeping_suspend_time; +/* xtime offset when we went into suspend */ +static s64 timekeeping_suspend_nsecs; /** * timekeeping_resume - Resumes the generic timekeeping subsystem. @@ -305,6 +308,8 @@ static int timekeeping_resume(struct sys_device *dev) wall_to_monotonic.tv_sec -= sleep_length; total_sleep_time += sleep_length; } + /* Make sure that we have the correct xtime reference */ + timespec_add_ns(&xtime, timekeeping_suspend_nsecs); /* re-base the last cycle value */ clock->cycle_last = clocksource_read(clock); clock->error = 0; @@ -325,9 +330,12 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) { unsigned long flags; + timekeeping_suspend_time = read_persistent_clock(); + write_seqlock_irqsave(&xtime_lock, flags); + /* Get the current xtime offset */ + timekeeping_suspend_nsecs = __get_nsec_offset(); timekeeping_suspended = 1; - timekeeping_suspend_time = read_persistent_clock(); write_sequnlock_irqrestore(&xtime_lock, flags); clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); diff --git a/kernel/user.c b/kernel/user.c index e7d11cef699..9ca2848fc35 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -55,25 +55,22 @@ struct user_struct root_user = { /* * These routines must be called with the uidhash spinlock held! */ -static inline void uid_hash_insert(struct user_struct *up, struct list_head *hashent) +static inline void uid_hash_insert(struct user_struct *up, struct hlist_head *hashent) { - list_add(&up->uidhash_list, hashent); + hlist_add_head(&up->uidhash_node, hashent); } static inline void uid_hash_remove(struct user_struct *up) { - list_del(&up->uidhash_list); + hlist_del_init(&up->uidhash_node); } -static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *hashent) +static inline struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) { - struct list_head *up; - - list_for_each(up, hashent) { - struct user_struct *user; - - user = list_entry(up, struct user_struct, uidhash_list); + struct user_struct *user; + struct hlist_node *h; + hlist_for_each_entry(user, h, hashent, uidhash_node) { if(user->uid == uid) { atomic_inc(&user->__count); return user; @@ -122,7 +119,7 @@ void free_uid(struct user_struct *up) struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) { - struct list_head *hashent = uidhashentry(ns, uid); + struct hlist_head *hashent = uidhashentry(ns, uid); struct user_struct *up; spin_lock_irq(&uidhash_lock); @@ -202,6 +199,30 @@ void switch_uid(struct user_struct *new_user) suid_keys(current); } +void release_uids(struct user_namespace *ns) +{ + int i; + unsigned long flags; + struct hlist_head *head; + struct hlist_node *nd; + + spin_lock_irqsave(&uidhash_lock, flags); + /* + * collapse the chains so that the user_struct-s will + * be still alive, but not in hashes. subsequent free_uid() + * will free them. + */ + for (i = 0; i < UIDHASH_SZ; i++) { + head = ns->uidhash_table + i; + while (!hlist_empty(head)) { + nd = head->first; + hlist_del_init(nd); + } + } + spin_unlock_irqrestore(&uidhash_lock, flags); + + free_uid(ns->root_user); +} static int __init uid_cache_init(void) { @@ -211,7 +232,7 @@ static int __init uid_cache_init(void) 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); for(n = 0; n < UIDHASH_SZ; ++n) - INIT_LIST_HEAD(init_user_ns.uidhash_table + n); + INIT_HLIST_HEAD(init_user_ns.uidhash_table + n); /* Insert the root user immediately (init already runs as root) */ spin_lock_irq(&uidhash_lock); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 85af9422ea6..7af90fc4f0f 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -39,7 +39,7 @@ static struct user_namespace *clone_user_ns(struct user_namespace *old_ns) kref_init(&ns->kref); for (n = 0; n < UIDHASH_SZ; ++n) - INIT_LIST_HEAD(ns->uidhash_table + n); + INIT_HLIST_HEAD(ns->uidhash_table + n); /* Insert new root user. */ ns->root_user = alloc_uid(ns, 0); @@ -81,7 +81,7 @@ void free_user_ns(struct kref *kref) struct user_namespace *ns; ns = container_of(kref, struct user_namespace, kref); - free_uid(ns->root_user); + release_uids(ns); kfree(ns); } diff --git a/kernel/utsname.c b/kernel/utsname.c index 9d8180a0f0d..816d7b24fa0 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -28,7 +28,9 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) if (!ns) return ERR_PTR(-ENOMEM); + down_read(&uts_sem); memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); + up_read(&uts_sem); kref_init(&ns->kref); return ns; } |