From e4ecda1b60bfd2333c12bbe71b153d3b6bdc831a Mon Sep 17 00:00:00 2001 From: Mark Lord Date: Sat, 25 Sep 2010 11:17:22 +0200 Subject: Fix compile error in blk-exec.c for !CONFIG_DETECT_HUNG_TASK Ensure that 'sysctl_hung_task_timeout_secs' is defined even when CONFIG_DETECT_HUNG_TASK is not set. This way we can safely reference it without need for ifdefs in the code elsewhere. eg. in block/blk-exec.c Signed-off-by: Mark Lord Signed-off-by: Jens Axboe --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1e2a6db2d7d..dbafa9e34a2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -336,6 +336,9 @@ extern unsigned long sysctl_hung_task_warnings; extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +#else +/* Avoid need for ifdefs elsewhere in the code */ +enum { sysctl_hung_task_timeout_secs = 0 }; #endif /* Attach to any functions which should be ignored in wchan output. */ -- cgit v1.2.3-70-g09d2 From b2b5ce022acf5e9f52f7b78c5579994fdde191d4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 15 Oct 2010 15:24:15 +0200 Subject: sched, cgroup: Fixup broken cgroup movement Dima noticed that we fail to correct the ->vruntime of sleeping tasks when we move them between cgroups. Reported-by: Dima Zavin Signed-off-by: Peter Zijlstra Tested-by: Mike Galbraith LKML-Reference: <1287150604.29097.1513.camel@twins> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- kernel/sched.c | 8 ++++---- kernel/sched_fair.c | 25 +++++++++++++++++++------ 3 files changed, 24 insertions(+), 11 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2cca9a92f5e..be312c12978 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1073,7 +1073,7 @@ struct sched_class { struct task_struct *task); #ifdef CONFIG_FAIR_GROUP_SCHED - void (*moved_group) (struct task_struct *p, int on_rq); + void (*task_move_group) (struct task_struct *p, int on_rq); #endif }; diff --git a/kernel/sched.c b/kernel/sched.c index 5998222f901..3fe253e6a6e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -8498,12 +8498,12 @@ void sched_move_task(struct task_struct *tsk) if (unlikely(running)) tsk->sched_class->put_prev_task(rq, tsk); - set_task_rq(tsk, task_cpu(tsk)); - #ifdef CONFIG_FAIR_GROUP_SCHED - if (tsk->sched_class->moved_group) - tsk->sched_class->moved_group(tsk, on_rq); + if (tsk->sched_class->task_move_group) + tsk->sched_class->task_move_group(tsk, on_rq); + else #endif + set_task_rq(tsk, task_cpu(tsk)); if (unlikely(running)) tsk->sched_class->set_curr_task(rq); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 74cccfae87a..3acc2a487c1 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -3866,13 +3866,26 @@ static void set_curr_task_fair(struct rq *rq) } #ifdef CONFIG_FAIR_GROUP_SCHED -static void moved_group_fair(struct task_struct *p, int on_rq) +static void task_move_group_fair(struct task_struct *p, int on_rq) { - struct cfs_rq *cfs_rq = task_cfs_rq(p); - - update_curr(cfs_rq); + /* + * If the task was not on the rq at the time of this cgroup movement + * it must have been asleep, sleeping tasks keep their ->vruntime + * absolute on their old rq until wakeup (needed for the fair sleeper + * bonus in place_entity()). + * + * If it was on the rq, we've just 'preempted' it, which does convert + * ->vruntime to a relative base. + * + * Make sure both cases convert their relative position when migrating + * to another cgroup's rq. This does somewhat interfere with the + * fair sleeper stuff for the first placement, but who cares. + */ + if (!on_rq) + p->se.vruntime -= cfs_rq_of(&p->se)->min_vruntime; + set_task_rq(p, task_cpu(p)); if (!on_rq) - place_entity(cfs_rq, &p->se, 1); + p->se.vruntime += cfs_rq_of(&p->se)->min_vruntime; } #endif @@ -3924,7 +3937,7 @@ static const struct sched_class fair_sched_class = { .get_rr_interval = get_rr_interval_fair, #ifdef CONFIG_FAIR_GROUP_SCHED - .moved_group = moved_group_fair, + .task_move_group = task_move_group_fair, #endif }; -- cgit v1.2.3-70-g09d2 From 766f9164193f6dda1497bbf3861060198421fb92 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Oct 2010 14:22:45 -0700 Subject: kernel: remove PF_FLUSHER PF_FLUSHER is only ever set, not tested, remove it. Signed-off-by: Peter Zijlstra Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 2 +- include/linux/sched.h | 1 - mm/backing-dev.c | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux/sched.h') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index b5aae4bd0ac..9e46aec10d1 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -794,7 +794,7 @@ int bdi_writeback_thread(void *data) struct backing_dev_info *bdi = wb->bdi; long pages_written; - current->flags |= PF_FLUSHER | PF_SWAPWRITE; + current->flags |= PF_SWAPWRITE; set_freezable(); wb->last_active = jiffies; diff --git a/include/linux/sched.h b/include/linux/sched.h index 56154bbb8da..393ce94e54b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1706,7 +1706,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define PF_DUMPCORE 0x00000200 /* dumped core */ #define PF_SIGNALED 0x00000400 /* killed by a signal */ #define PF_MEMALLOC 0x00000800 /* Allocating memory */ -#define PF_FLUSHER 0x00001000 /* responsible for disk writeback */ #define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ #define PF_FREEZING 0x00004000 /* freeze in progress. do not account to load */ #define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 5ad3c106606..f2eb27884ff 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -362,7 +362,7 @@ static int bdi_forker_thread(void *ptr) { struct bdi_writeback *me = ptr; - current->flags |= PF_FLUSHER | PF_SWAPWRITE; + current->flags |= PF_SWAPWRITE; set_freezable(); /* -- cgit v1.2.3-70-g09d2 From b8ed374e202e23caaf9bd77dcadc9de6447faaa8 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:34:06 -0700 Subject: signals: annotate lock_task_sighand() lock_task_sighand() grabs sighand->siglock in case of returning non-NULL but unlock_task_sighand() releases it unconditionally. This leads sparse to complain about the lock context imbalance. Rename and wrap lock_task_sighand() using __cond_lock() macro to make sparse happy. Suggested-by: Eric Dumazet Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 9 ++++++++- kernel/signal.c | 3 ++- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 393ce94e54b..3ff5c8519ab 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2236,9 +2236,16 @@ static inline void task_unlock(struct task_struct *p) spin_unlock(&p->alloc_lock); } -extern struct sighand_struct *lock_task_sighand(struct task_struct *tsk, +extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, unsigned long *flags); +#define lock_task_sighand(tsk, flags) \ +({ struct sighand_struct *__ss; \ + __cond_lock(&(tsk)->sighand->siglock, \ + (__ss = __lock_task_sighand(tsk, flags))); \ + __ss; \ +}) \ + static inline void unlock_task_sighand(struct task_struct *tsk, unsigned long *flags) { diff --git a/kernel/signal.c b/kernel/signal.c index 919562c3d6b..e921409b85a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1105,7 +1105,8 @@ int zap_other_threads(struct task_struct *p) return count; } -struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags) +struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, + unsigned long *flags) { struct sighand_struct *sighand; -- cgit v1.2.3-70-g09d2 From 9b1bf12d5d51bca178dea21b04a0805e29d60cf1 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 27 Oct 2010 15:34:08 -0700 Subject: signals: move cred_guard_mutex from task_struct to signal_struct Oleg Nesterov pointed out we have to prevent multiple-threads-inside-exec itself and we can reuse ->cred_guard_mutex for it. Yes, concurrent execve() has no worth. Let's move ->cred_guard_mutex from task_struct to signal_struct. It naturally prevent multiple-threads-inside-exec. Signed-off-by: KOSAKI Motohiro Reviewed-by: Oleg Nesterov Acked-by: Roland McGrath Acked-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 10 +++++----- fs/proc/base.c | 8 ++++---- include/linux/init_task.h | 4 ++-- include/linux/sched.h | 7 ++++--- include/linux/tracehook.h | 2 +- kernel/cred.c | 4 +--- kernel/fork.c | 2 ++ kernel/ptrace.c | 4 ++-- 8 files changed, 21 insertions(+), 20 deletions(-) (limited to 'include/linux/sched.h') diff --git a/fs/exec.c b/fs/exec.c index 3aa75b8888a..9722909c4d8 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1083,14 +1083,14 @@ EXPORT_SYMBOL(setup_new_exec); */ int prepare_bprm_creds(struct linux_binprm *bprm) { - if (mutex_lock_interruptible(¤t->cred_guard_mutex)) + if (mutex_lock_interruptible(¤t->signal->cred_guard_mutex)) return -ERESTARTNOINTR; bprm->cred = prepare_exec_creds(); if (likely(bprm->cred)) return 0; - mutex_unlock(¤t->cred_guard_mutex); + mutex_unlock(¤t->signal->cred_guard_mutex); return -ENOMEM; } @@ -1098,7 +1098,7 @@ void free_bprm(struct linux_binprm *bprm) { free_arg_pages(bprm); if (bprm->cred) { - mutex_unlock(¤t->cred_guard_mutex); + mutex_unlock(¤t->signal->cred_guard_mutex); abort_creds(bprm->cred); } kfree(bprm); @@ -1119,13 +1119,13 @@ void install_exec_creds(struct linux_binprm *bprm) * credentials; any time after this it may be unlocked. */ security_bprm_committed_creds(bprm); - mutex_unlock(¤t->cred_guard_mutex); + mutex_unlock(¤t->signal->cred_guard_mutex); } EXPORT_SYMBOL(install_exec_creds); /* * determine how safe it is to execute the proposed program - * - the caller must hold current->cred_guard_mutex to protect against + * - the caller must hold ->cred_guard_mutex to protect against * PTRACE_ATTACH */ int check_unsafe_exec(struct linux_binprm *bprm) diff --git a/fs/proc/base.c b/fs/proc/base.c index 9b094c1c846..f3d02ca461e 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -226,7 +226,7 @@ struct mm_struct *mm_for_maps(struct task_struct *task) { struct mm_struct *mm; - if (mutex_lock_killable(&task->cred_guard_mutex)) + if (mutex_lock_killable(&task->signal->cred_guard_mutex)) return NULL; mm = get_task_mm(task); @@ -235,7 +235,7 @@ struct mm_struct *mm_for_maps(struct task_struct *task) mmput(mm); mm = NULL; } - mutex_unlock(&task->cred_guard_mutex); + mutex_unlock(&task->signal->cred_guard_mutex); return mm; } @@ -2354,14 +2354,14 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, goto out_free; /* Guard against adverse ptrace interaction */ - length = mutex_lock_interruptible(&task->cred_guard_mutex); + length = mutex_lock_interruptible(&task->signal->cred_guard_mutex); if (length < 0) goto out_free; length = security_setprocattr(task, (char*)file->f_path.dentry->d_name.name, (void*)page, count); - mutex_unlock(&task->cred_guard_mutex); + mutex_unlock(&task->signal->cred_guard_mutex); out_free: free_page((unsigned long) page); out: diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 2fea6c8ef6b..1f8c06ce0fa 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -29,6 +29,8 @@ extern struct fs_struct init_fs; .running = 0, \ .lock = __SPIN_LOCK_UNLOCKED(sig.cputimer.lock), \ }, \ + .cred_guard_mutex = \ + __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ } extern struct nsproxy init_nsproxy; @@ -145,8 +147,6 @@ extern struct cred init_cred; .group_leader = &tsk, \ RCU_INIT_POINTER(.real_cred, &init_cred), \ RCU_INIT_POINTER(.cred, &init_cred), \ - .cred_guard_mutex = \ - __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \ .comm = "swapper", \ .thread = INIT_THREAD, \ .fs = &init_fs, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 3ff5c8519ab..be7adb7588e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -626,6 +626,10 @@ struct signal_struct { int oom_adj; /* OOM kill score adjustment (bit shift) */ int oom_score_adj; /* OOM kill score adjustment */ + + struct mutex cred_guard_mutex; /* guard against foreign influences on + * credential calculations + * (notably. ptrace) */ }; /* Context switch must be unlocked if interrupts are to be enabled */ @@ -1305,9 +1309,6 @@ struct task_struct { * credentials (COW) */ const struct cred __rcu *cred; /* effective (overridable) subjective task * credentials (COW) */ - struct mutex cred_guard_mutex; /* guard against foreign influences on - * credential calculations - * (notably. ptrace) */ struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */ char comm[TASK_COMM_LEN]; /* executable name excluding path diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 10db0102a89..3a2e66d88a3 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -150,7 +150,7 @@ static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step) * * Return %LSM_UNSAFE_* bits applied to an exec because of tracing. * - * @task->cred_guard_mutex is held by the caller through the do_execve(). + * @task->signal->cred_guard_mutex is held by the caller through the do_execve(). */ static inline int tracehook_unsafe_exec(struct task_struct *task) { diff --git a/kernel/cred.c b/kernel/cred.c index 9a3e22641fe..6a1aa004e37 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -325,7 +325,7 @@ EXPORT_SYMBOL(prepare_creds); /* * Prepare credentials for current to perform an execve() - * - The caller must hold current->cred_guard_mutex + * - The caller must hold ->cred_guard_mutex */ struct cred *prepare_exec_creds(void) { @@ -384,8 +384,6 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) struct cred *new; int ret; - mutex_init(&p->cred_guard_mutex); - if ( #ifdef CONFIG_KEYS !p->cred->thread_keyring && diff --git a/kernel/fork.c b/kernel/fork.c index e87aaaaf513..3b159c5991b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -908,6 +908,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->oom_adj = current->signal->oom_adj; sig->oom_score_adj = current->signal->oom_score_adj; + mutex_init(&sig->cred_guard_mutex); + return 0; } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index ea7ce0215cd..99bbaa3e5b0 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -181,7 +181,7 @@ int ptrace_attach(struct task_struct *task) * under ptrace. */ retval = -ERESTARTNOINTR; - if (mutex_lock_interruptible(&task->cred_guard_mutex)) + if (mutex_lock_interruptible(&task->signal->cred_guard_mutex)) goto out; task_lock(task); @@ -208,7 +208,7 @@ int ptrace_attach(struct task_struct *task) unlock_tasklist: write_unlock_irq(&tasklist_lock); unlock_creds: - mutex_unlock(&task->cred_guard_mutex); + mutex_unlock(&task->signal->cred_guard_mutex); out: return retval; } -- cgit v1.2.3-70-g09d2 From 4afeff8505cb8a38e36c1ef2bd3447c4b8f87367 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Oct 2010 17:21:58 -0400 Subject: fanotify: limit number of listeners per user fanotify currently has no limit on the number of listeners a given user can have open. This patch limits the total number of listeners per user to 128. This is the same as the inotify default limit. Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify.c | 11 ++++++++++- fs/notify/fanotify/fanotify_user.c | 11 +++++++++++ include/linux/fsnotify_backend.h | 1 + include/linux/sched.h | 3 +++ 4 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 85366c78cc3..60c11c306fd 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -200,10 +200,19 @@ static bool fanotify_should_send_event(struct fsnotify_group *group, return false; } +static void fanotify_free_group_priv(struct fsnotify_group *group) +{ + struct user_struct *user; + + user = group->fanotify_data.user; + atomic_dec(&user->fanotify_listeners); + free_uid(user); +} + const struct fsnotify_ops fanotify_fsnotify_ops = { .handle_event = fanotify_handle_event, .should_send_event = fanotify_should_send_event, - .free_group_priv = NULL, + .free_group_priv = fanotify_free_group_priv, .free_event_priv = NULL, .freeing_mark = NULL, }; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index f9216102b42..a7d9369482d 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -18,6 +18,7 @@ #define FANOTIFY_DEFAULT_MAX_EVENTS 16384 #define FANOTIFY_DEFAULT_MAX_MARKS 8192 +#define FANOTIFY_DEFAULT_MAX_LISTENERS 128 extern const struct fsnotify_ops fanotify_fsnotify_ops; @@ -656,6 +657,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) { struct fsnotify_group *group; int f_flags, fd; + struct user_struct *user; pr_debug("%s: flags=%d event_f_flags=%d\n", __func__, flags, event_f_flags); @@ -666,6 +668,12 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) if (flags & ~FAN_ALL_INIT_FLAGS) return -EINVAL; + user = get_current_user(); + if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) { + free_uid(user); + return -EMFILE; + } + f_flags = O_RDWR | FMODE_NONOTIFY; if (flags & FAN_CLOEXEC) f_flags |= O_CLOEXEC; @@ -677,6 +685,9 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) if (IS_ERR(group)) return PTR_ERR(group); + group->fanotify_data.user = user; + atomic_inc(&user->fanotify_listeners); + group->fanotify_data.f_flags = event_f_flags; #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS mutex_init(&group->fanotify_data.access_mutex); diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 49ceed6e92b..4366f458a86 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -170,6 +170,7 @@ struct fsnotify_group { #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */ int f_flags; unsigned int max_marks; + struct user_struct *user; } fanotify_data; #endif /* CONFIG_FANOTIFY */ }; diff --git a/include/linux/sched.h b/include/linux/sched.h index be7adb7588e..6f420baf37c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -672,6 +672,9 @@ struct user_struct { atomic_t inotify_watches; /* How many inotify watches does this user have? */ atomic_t inotify_devs; /* How many inotify devs does this user have opened? */ #endif +#ifdef CONFIG_FANOTIFY + atomic_t fanotify_listeners; +#endif #ifdef CONFIG_EPOLL atomic_t epoll_watches; /* The number of file descriptors currently watched */ #endif -- cgit v1.2.3-70-g09d2