From 9489424454c93f4d225d7af47978f8c7e84bf4d4 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 Mar 2009 22:05:12 -0600 Subject: cpumask: use mm_cpumask() wrapper: kernel/fork.c Impact: futureproof Makes code futureproof against the impending change to mm->cpu_vm_mask. It's also a chance to use the new cpumask_ ops which take a pointer. Signed-off-by: Rusty Russell --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 6715ebc3761..47c15840a38 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -284,7 +284,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) mm->free_area_cache = oldmm->mmap_base; mm->cached_hole_size = ~0UL; mm->map_count = 0; - cpus_clear(mm->cpu_vm_mask); + cpumask_clear(mm_cpumask(mm)); mm->mm_rb = RB_ROOT; rb_link = &mm->mm_rb.rb_node; rb_parent = NULL; -- cgit v1.2.3-70-g09d2 From 3e93cd671813e204c258f1e6c797959920cf7772 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 Mar 2009 19:00:13 -0400 Subject: Take fs_struct handling to new file (fs/fs_struct.c) Pure code move; two new helper functions for nfsd and daemonize (unshare_fs_struct() and daemonize_fs_struct() resp.; for now - the same code as used to be in callers). unshare_fs_struct() exported (for nfsd, as copy_fs_struct()/exit_fs() used to be), copy_fs_struct() and exit_fs() don't need exports anymore. Signed-off-by: Al Viro --- fs/Makefile | 2 +- fs/fs_struct.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++ fs/internal.h | 6 ++ fs/namei.c | 7 --- fs/namespace.c | 68 ---------------------- fs/nfsd/nfssvc.c | 7 +-- include/linux/fs_struct.h | 2 + kernel/exit.c | 31 +--------- kernel/fork.c | 29 +--------- 9 files changed, 155 insertions(+), 138 deletions(-) create mode 100644 fs/fs_struct.c (limited to 'kernel/fork.c') diff --git a/fs/Makefile b/fs/Makefile index 6e82a307bcd..b5cd8e18dd9 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \ attr.o bad_inode.o file.o filesystems.o namespace.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o drop_caches.o splice.o sync.o utimes.o \ - stack.o + stack.o fs_struct.o ifeq ($(CONFIG_BLOCK),y) obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o diff --git a/fs/fs_struct.c b/fs/fs_struct.c new file mode 100644 index 00000000000..36e0a123bbf --- /dev/null +++ b/fs/fs_struct.c @@ -0,0 +1,141 @@ +#include +#include +#include +#include +#include + +/* + * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. + * It can block. + */ +void set_fs_root(struct fs_struct *fs, struct path *path) +{ + struct path old_root; + + write_lock(&fs->lock); + old_root = fs->root; + fs->root = *path; + path_get(path); + write_unlock(&fs->lock); + if (old_root.dentry) + path_put(&old_root); +} + +/* + * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values. + * It can block. + */ +void set_fs_pwd(struct fs_struct *fs, struct path *path) +{ + struct path old_pwd; + + write_lock(&fs->lock); + old_pwd = fs->pwd; + fs->pwd = *path; + path_get(path); + write_unlock(&fs->lock); + + if (old_pwd.dentry) + path_put(&old_pwd); +} + +void chroot_fs_refs(struct path *old_root, struct path *new_root) +{ + struct task_struct *g, *p; + struct fs_struct *fs; + int count = 0; + + read_lock(&tasklist_lock); + do_each_thread(g, p) { + task_lock(p); + fs = p->fs; + if (fs) { + write_lock(&fs->lock); + if (fs->root.dentry == old_root->dentry + && fs->root.mnt == old_root->mnt) { + path_get(new_root); + fs->root = *new_root; + count++; + } + if (fs->pwd.dentry == old_root->dentry + && fs->pwd.mnt == old_root->mnt) { + path_get(new_root); + fs->pwd = *new_root; + count++; + } + write_unlock(&fs->lock); + } + task_unlock(p); + } while_each_thread(g, p); + read_unlock(&tasklist_lock); + while (count--) + path_put(old_root); +} + +void put_fs_struct(struct fs_struct *fs) +{ + /* No need to hold fs->lock if we are killing it */ + if (atomic_dec_and_test(&fs->count)) { + path_put(&fs->root); + path_put(&fs->pwd); + kmem_cache_free(fs_cachep, fs); + } +} + +void exit_fs(struct task_struct *tsk) +{ + struct fs_struct * fs = tsk->fs; + + if (fs) { + task_lock(tsk); + tsk->fs = NULL; + task_unlock(tsk); + put_fs_struct(fs); + } +} + +struct fs_struct *copy_fs_struct(struct fs_struct *old) +{ + struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); + /* We don't need to lock fs - think why ;-) */ + if (fs) { + atomic_set(&fs->count, 1); + rwlock_init(&fs->lock); + fs->umask = old->umask; + read_lock(&old->lock); + fs->root = old->root; + path_get(&old->root); + fs->pwd = old->pwd; + path_get(&old->pwd); + read_unlock(&old->lock); + } + return fs; +} + +int unshare_fs_struct(void) +{ + struct fs_struct *fsp = copy_fs_struct(current->fs); + if (!fsp) + return -ENOMEM; + exit_fs(current); + current->fs = fsp; + return 0; +} +EXPORT_SYMBOL_GPL(unshare_fs_struct); + +/* to be mentioned only in INIT_TASK */ +struct fs_struct init_fs = { + .count = ATOMIC_INIT(1), + .lock = __RW_LOCK_UNLOCKED(init_fs.lock), + .umask = 0022, +}; + +void daemonize_fs_struct(void) +{ + struct fs_struct *fs; + + exit_fs(current); /* current->fs->count--; */ + fs = &init_fs; + current->fs = fs; + atomic_inc(&fs->count); +} diff --git a/fs/internal.h b/fs/internal.h index 53af885f173..477a105f8df 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -11,6 +11,7 @@ struct super_block; struct linux_binprm; +struct path; /* * block_dev.c @@ -60,3 +61,8 @@ extern void umount_tree(struct vfsmount *, int, struct list_head *); extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); extern void __init mnt_init(void); + +/* + * fs_struct.c + */ +extern void chroot_fs_refs(struct path *, struct path *); diff --git a/fs/namei.c b/fs/namei.c index d040ce11785..4c65a646013 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2897,10 +2897,3 @@ EXPORT_SYMBOL(vfs_symlink); EXPORT_SYMBOL(vfs_unlink); EXPORT_SYMBOL(dentry_unhash); EXPORT_SYMBOL(generic_readlink); - -/* to be mentioned only in INIT_TASK */ -struct fs_struct init_fs = { - .count = ATOMIC_INIT(1), - .lock = __RW_LOCK_UNLOCKED(init_fs.lock), - .umask = 0022, -}; diff --git a/fs/namespace.c b/fs/namespace.c index f7ec283ccfb..1e56303c718 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2092,74 +2092,6 @@ out1: return retval; } -/* - * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. - * It can block. Requires the big lock held. - */ -void set_fs_root(struct fs_struct *fs, struct path *path) -{ - struct path old_root; - - write_lock(&fs->lock); - old_root = fs->root; - fs->root = *path; - path_get(path); - write_unlock(&fs->lock); - if (old_root.dentry) - path_put(&old_root); -} - -/* - * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values. - * It can block. Requires the big lock held. - */ -void set_fs_pwd(struct fs_struct *fs, struct path *path) -{ - struct path old_pwd; - - write_lock(&fs->lock); - old_pwd = fs->pwd; - fs->pwd = *path; - path_get(path); - write_unlock(&fs->lock); - - if (old_pwd.dentry) - path_put(&old_pwd); -} - -static void chroot_fs_refs(struct path *old_root, struct path *new_root) -{ - struct task_struct *g, *p; - struct fs_struct *fs; - int count = 0; - - read_lock(&tasklist_lock); - do_each_thread(g, p) { - task_lock(p); - fs = p->fs; - if (fs) { - write_lock(&fs->lock); - if (fs->root.dentry == old_root->dentry - && fs->root.mnt == old_root->mnt) { - path_get(new_root); - fs->root = *new_root; - count++; - } - if (fs->pwd.dentry == old_root->dentry - && fs->pwd.mnt == old_root->mnt) { - path_get(new_root); - fs->pwd = *new_root; - count++; - } - write_unlock(&fs->lock); - } - task_unlock(p); - } while_each_thread(g, p); - read_unlock(&tasklist_lock); - while (count--) - path_put(old_root); -} - /* * pivot_root Semantics: * Moves the root file system of the current process to the directory put_old, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 07e4f5d7baa..144d6991861 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -404,7 +404,6 @@ static int nfsd(void *vrqstp) { struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp; - struct fs_struct *fsp; int err, preverr = 0; /* Lock module and set up kernel thread */ @@ -413,13 +412,11 @@ nfsd(void *vrqstp) /* At this point, the thread shares current->fs * with the init process. We need to create files with a * umask of 0 instead of init's umask. */ - fsp = copy_fs_struct(current->fs); - if (!fsp) { + if (unshare_fs_struct() < 0) { printk("Unable to start nfsd thread: out of memory\n"); goto out; } - exit_fs(current); - current->fs = fsp; + current->fs->umask = 0; /* diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index 18b467dbe27..298cef1c079 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -20,5 +20,7 @@ extern void set_fs_root(struct fs_struct *, struct path *); extern void set_fs_pwd(struct fs_struct *, struct path *); extern struct fs_struct *copy_fs_struct(struct fs_struct *); extern void put_fs_struct(struct fs_struct *); +extern void daemonize_fs_struct(void); +extern int unshare_fs_struct(void); #endif /* _LINUX_FS_STRUCT_H */ diff --git a/kernel/exit.c b/kernel/exit.c index 167e1e3ad7c..ad8375758a7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -429,7 +429,6 @@ EXPORT_SYMBOL(disallow_signal); void daemonize(const char *name, ...) { va_list args; - struct fs_struct *fs; sigset_t blocked; va_start(args, name); @@ -462,11 +461,7 @@ void daemonize(const char *name, ...) /* Become as one with the init task */ - exit_fs(current); /* current->fs->count--; */ - fs = init_task.fs; - current->fs = fs; - atomic_inc(&fs->count); - + daemonize_fs_struct(); exit_files(current); current->files = init_task.files; atomic_inc(¤t->files->count); @@ -565,30 +560,6 @@ void exit_files(struct task_struct *tsk) } } -void put_fs_struct(struct fs_struct *fs) -{ - /* No need to hold fs->lock if we are killing it */ - if (atomic_dec_and_test(&fs->count)) { - path_put(&fs->root); - path_put(&fs->pwd); - kmem_cache_free(fs_cachep, fs); - } -} - -void exit_fs(struct task_struct *tsk) -{ - struct fs_struct * fs = tsk->fs; - - if (fs) { - task_lock(tsk); - tsk->fs = NULL; - task_unlock(tsk); - put_fs_struct(fs); - } -} - -EXPORT_SYMBOL_GPL(exit_fs); - #ifdef CONFIG_MM_OWNER /* * Task p is exiting and it owned mm, lets find a new owner for it diff --git a/kernel/fork.c b/kernel/fork.c index 47c15840a38..05c02dc586b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -681,38 +681,13 @@ fail_nomem: return retval; } -static struct fs_struct *__copy_fs_struct(struct fs_struct *old) -{ - struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); - /* We don't need to lock fs - think why ;-) */ - if (fs) { - atomic_set(&fs->count, 1); - rwlock_init(&fs->lock); - fs->umask = old->umask; - read_lock(&old->lock); - fs->root = old->root; - path_get(&old->root); - fs->pwd = old->pwd; - path_get(&old->pwd); - read_unlock(&old->lock); - } - return fs; -} - -struct fs_struct *copy_fs_struct(struct fs_struct *old) -{ - return __copy_fs_struct(old); -} - -EXPORT_SYMBOL_GPL(copy_fs_struct); - static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) { if (clone_flags & CLONE_FS) { atomic_inc(¤t->fs->count); return 0; } - tsk->fs = __copy_fs_struct(current->fs); + tsk->fs = copy_fs_struct(current->fs); if (!tsk->fs) return -ENOMEM; return 0; @@ -1545,7 +1520,7 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) if ((unshare_flags & CLONE_FS) && (fs && atomic_read(&fs->count) > 1)) { - *new_fsp = __copy_fs_struct(current->fs); + *new_fsp = copy_fs_struct(current->fs); if (!*new_fsp) return -ENOMEM; } -- cgit v1.2.3-70-g09d2 From 498052bba55ecaff58db6a1436b0e25bfd75a7ff Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 30 Mar 2009 07:20:30 -0400 Subject: New locking/refcounting for fs_struct * all changes of current->fs are done under task_lock and write_lock of old fs->lock * refcount is not atomic anymore (same protection) * its decrements are done when removing reference from current; at the same time we decide whether to free it. * put_fs_struct() is gone * new field - ->in_exec. Set by check_unsafe_exec() if we are trying to do execve() and only subthreads share fs_struct. Cleared when finishing exec (success and failure alike). Makes CLONE_FS fail with -EAGAIN if set. * check_unsafe_exec() may fail with -EAGAIN if another execve() from subthread is in progress. Signed-off-by: Al Viro --- fs/compat.c | 16 +++++++++-- fs/exec.c | 31 +++++++++++++++++---- fs/fs_struct.c | 69 +++++++++++++++++++++++++++++++++-------------- fs/internal.h | 2 +- fs/proc/task_nommu.c | 2 +- include/linux/fs_struct.h | 8 +++--- kernel/fork.c | 37 ++++++++++++++++++------- 7 files changed, 121 insertions(+), 44 deletions(-) (limited to 'kernel/fork.c') diff --git a/fs/compat.c b/fs/compat.c index 55efdfebdf5..baabf203b84 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -1441,12 +1442,15 @@ int compat_do_execve(char * filename, bprm->cred = prepare_exec_creds(); if (!bprm->cred) goto out_unlock; - check_unsafe_exec(bprm); + + retval = check_unsafe_exec(bprm); + if (retval) + goto out_unlock; file = open_exec(filename); retval = PTR_ERR(file); if (IS_ERR(file)) - goto out_unlock; + goto out_unmark; sched_exec(); @@ -1488,6 +1492,9 @@ int compat_do_execve(char * filename, goto out; /* execve succeeded */ + write_lock(¤t->fs->lock); + current->fs->in_exec = 0; + write_unlock(¤t->fs->lock); current->in_execve = 0; mutex_unlock(¤t->cred_exec_mutex); acct_update_integrals(current); @@ -1506,6 +1513,11 @@ out_file: fput(bprm->file); } +out_unmark: + write_lock(¤t->fs->lock); + current->fs->in_exec = 0; + write_unlock(¤t->fs->lock); + out_unlock: current->in_execve = 0; mutex_unlock(¤t->cred_exec_mutex); diff --git a/fs/exec.c b/fs/exec.c index c5128fbc916..07a059664b7 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1056,16 +1056,18 @@ EXPORT_SYMBOL(install_exec_creds); * - the caller must hold current->cred_exec_mutex to protect against * PTRACE_ATTACH */ -void check_unsafe_exec(struct linux_binprm *bprm) +int check_unsafe_exec(struct linux_binprm *bprm) { struct task_struct *p = current, *t; unsigned long flags; unsigned n_fs, n_sighand; + int res = 0; bprm->unsafe = tracehook_unsafe_exec(p); n_fs = 1; n_sighand = 1; + write_lock(&p->fs->lock); lock_task_sighand(p, &flags); for (t = next_thread(p); t != p; t = next_thread(t)) { if (t->fs == p->fs) @@ -1073,11 +1075,19 @@ void check_unsafe_exec(struct linux_binprm *bprm) n_sighand++; } - if (atomic_read(&p->fs->count) > n_fs || - atomic_read(&p->sighand->count) > n_sighand) + if (p->fs->users > n_fs || + atomic_read(&p->sighand->count) > n_sighand) { bprm->unsafe |= LSM_UNSAFE_SHARE; + } else { + if (p->fs->in_exec) + res = -EAGAIN; + p->fs->in_exec = 1; + } unlock_task_sighand(p, &flags); + write_unlock(&p->fs->lock); + + return res; } /* @@ -1296,12 +1306,15 @@ int do_execve(char * filename, bprm->cred = prepare_exec_creds(); if (!bprm->cred) goto out_unlock; - check_unsafe_exec(bprm); + + retval = check_unsafe_exec(bprm); + if (retval) + goto out_unlock; file = open_exec(filename); retval = PTR_ERR(file); if (IS_ERR(file)) - goto out_unlock; + goto out_unmark; sched_exec(); @@ -1344,6 +1357,9 @@ int do_execve(char * filename, goto out; /* execve succeeded */ + write_lock(¤t->fs->lock); + current->fs->in_exec = 0; + write_unlock(¤t->fs->lock); current->in_execve = 0; mutex_unlock(¤t->cred_exec_mutex); acct_update_integrals(current); @@ -1362,6 +1378,11 @@ out_file: fput(bprm->file); } +out_unmark: + write_lock(¤t->fs->lock); + current->fs->in_exec = 0; + write_unlock(¤t->fs->lock); + out_unlock: current->in_execve = 0; mutex_unlock(¤t->cred_exec_mutex); diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 36e0a123bbf..41cff72b377 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -72,25 +72,27 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) path_put(old_root); } -void put_fs_struct(struct fs_struct *fs) +void free_fs_struct(struct fs_struct *fs) { - /* No need to hold fs->lock if we are killing it */ - if (atomic_dec_and_test(&fs->count)) { - path_put(&fs->root); - path_put(&fs->pwd); - kmem_cache_free(fs_cachep, fs); - } + path_put(&fs->root); + path_put(&fs->pwd); + kmem_cache_free(fs_cachep, fs); } void exit_fs(struct task_struct *tsk) { - struct fs_struct * fs = tsk->fs; + struct fs_struct *fs = tsk->fs; if (fs) { + int kill; task_lock(tsk); + write_lock(&fs->lock); tsk->fs = NULL; + kill = !--fs->users; + write_unlock(&fs->lock); task_unlock(tsk); - put_fs_struct(fs); + if (kill) + free_fs_struct(fs); } } @@ -99,7 +101,8 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); /* We don't need to lock fs - think why ;-) */ if (fs) { - atomic_set(&fs->count, 1); + fs->users = 1; + fs->in_exec = 0; rwlock_init(&fs->lock); fs->umask = old->umask; read_lock(&old->lock); @@ -114,28 +117,54 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) int unshare_fs_struct(void) { - struct fs_struct *fsp = copy_fs_struct(current->fs); - if (!fsp) + struct fs_struct *fs = current->fs; + struct fs_struct *new_fs = copy_fs_struct(fs); + int kill; + + if (!new_fs) return -ENOMEM; - exit_fs(current); - current->fs = fsp; + + task_lock(current); + write_lock(&fs->lock); + kill = !--fs->users; + current->fs = new_fs; + write_unlock(&fs->lock); + task_unlock(current); + + if (kill) + free_fs_struct(fs); + return 0; } EXPORT_SYMBOL_GPL(unshare_fs_struct); /* to be mentioned only in INIT_TASK */ struct fs_struct init_fs = { - .count = ATOMIC_INIT(1), + .users = 1, .lock = __RW_LOCK_UNLOCKED(init_fs.lock), .umask = 0022, }; void daemonize_fs_struct(void) { - struct fs_struct *fs; + struct fs_struct *fs = current->fs; + + if (fs) { + int kill; + + task_lock(current); - exit_fs(current); /* current->fs->count--; */ - fs = &init_fs; - current->fs = fs; - atomic_inc(&fs->count); + write_lock(&init_fs.lock); + init_fs.users++; + write_unlock(&init_fs.lock); + + write_lock(&fs->lock); + current->fs = &init_fs; + kill = !--fs->users; + write_unlock(&fs->lock); + + task_unlock(current); + if (kill) + free_fs_struct(fs); + } } diff --git a/fs/internal.h b/fs/internal.h index 477a105f8df..b4dac4fb6b6 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -44,7 +44,7 @@ extern void __init chrdev_init(void); /* * exec.c */ -extern void check_unsafe_exec(struct linux_binprm *); +extern int check_unsafe_exec(struct linux_binprm *); /* * namespace.c diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 343ea1216bc..6ca01052c5b 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -49,7 +49,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) else bytes += kobjsize(mm); - if (current->fs && atomic_read(¤t->fs->count) > 1) + if (current->fs && current->fs->users > 1) sbytes += kobjsize(current->fs); else bytes += kobjsize(current->fs); diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index 298cef1c079..78a05bfcd8e 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -4,12 +4,10 @@ #include struct fs_struct { - atomic_t count; /* This usage count is used by check_unsafe_exec() for - * security checking purposes - therefore it may not be - * incremented, except by clone(CLONE_FS). - */ + int users; rwlock_t lock; int umask; + int in_exec; struct path root, pwd; }; @@ -19,7 +17,7 @@ extern void exit_fs(struct task_struct *); extern void set_fs_root(struct fs_struct *, struct path *); extern void set_fs_pwd(struct fs_struct *, struct path *); extern struct fs_struct *copy_fs_struct(struct fs_struct *); -extern void put_fs_struct(struct fs_struct *); +extern void free_fs_struct(struct fs_struct *); extern void daemonize_fs_struct(void); extern int unshare_fs_struct(void); diff --git a/kernel/fork.c b/kernel/fork.c index 05c02dc586b..51f138a131d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -683,11 +683,19 @@ fail_nomem: static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) { + struct fs_struct *fs = current->fs; if (clone_flags & CLONE_FS) { - atomic_inc(¤t->fs->count); + /* tsk->fs is already what we want */ + write_lock(&fs->lock); + if (fs->in_exec) { + write_unlock(&fs->lock); + return -EAGAIN; + } + fs->users++; + write_unlock(&fs->lock); return 0; } - tsk->fs = copy_fs_struct(current->fs); + tsk->fs = copy_fs_struct(fs); if (!tsk->fs) return -ENOMEM; return 0; @@ -1518,12 +1526,16 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) { struct fs_struct *fs = current->fs; - if ((unshare_flags & CLONE_FS) && - (fs && atomic_read(&fs->count) > 1)) { - *new_fsp = copy_fs_struct(current->fs); - if (!*new_fsp) - return -ENOMEM; - } + if (!(unshare_flags & CLONE_FS) || !fs) + return 0; + + /* don't need lock here; in the worst case we'll do useless copy */ + if (fs->users == 1) + return 0; + + *new_fsp = copy_fs_struct(fs); + if (!*new_fsp) + return -ENOMEM; return 0; } @@ -1639,8 +1651,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) if (new_fs) { fs = current->fs; + write_lock(&fs->lock); current->fs = new_fs; - new_fs = fs; + if (--fs->users) + new_fs = NULL; + else + new_fs = fs; + write_unlock(&fs->lock); } if (new_mm) { @@ -1679,7 +1696,7 @@ bad_unshare_cleanup_sigh: bad_unshare_cleanup_fs: if (new_fs) - put_fs_struct(new_fs); + free_fs_struct(new_fs); bad_unshare_cleanup_thread: bad_unshare_out: -- cgit v1.2.3-70-g09d2 From 5ad4e53bd5406ee214ddc5a41f03f779b8b2d526 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 Mar 2009 19:50:06 -0400 Subject: Get rid of indirect include of fs_struct.h Don't pull it in sched.h; very few files actually need it and those can include directly. sched.h itself only needs forward declaration of struct fs_struct; Signed-off-by: Al Viro --- arch/cris/kernel/process.c | 1 - fs/dcache.c | 1 + fs/exec.c | 1 + fs/fs_struct.c | 1 + fs/namei.c | 1 + fs/namespace.c | 1 + fs/open.c | 1 + fs/proc/base.c | 1 + fs/proc/task_nommu.c | 1 + include/linux/mnt_namespace.h | 2 ++ include/linux/nsproxy.h | 1 + include/linux/sched.h | 3 ++- init/do_mounts.c | 1 + kernel/auditsc.c | 1 + kernel/exec_domain.c | 1 + kernel/exit.c | 1 + kernel/fork.c | 1 + kernel/sys.c | 1 + security/tomoyo/realpath.c | 1 + 19 files changed, 20 insertions(+), 2 deletions(-) (limited to 'kernel/fork.c') diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index 60816e87645..4df0b320d52 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/dcache.c b/fs/dcache.c index 90bbd7e1b11..0dc4de21f08 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "internal.h" int sysctl_vfs_cache_pressure __read_mostly = 100; diff --git a/fs/exec.c b/fs/exec.c index 614991bf0c8..052a961e41a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 6ac21933867..eee059052db 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -3,6 +3,7 @@ #include #include #include +#include /* * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. diff --git a/fs/namei.c b/fs/namei.c index 964c0249444..b8433ebfae0 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) diff --git a/fs/namespace.c b/fs/namespace.c index 1e56303c718..c6f54e4c429 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include "pnode.h" diff --git a/fs/open.c b/fs/open.c index 75b61677daa..377eb25b6ab 100644 --- a/fs/open.c +++ b/fs/open.c @@ -29,6 +29,7 @@ #include #include #include +#include int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) { diff --git a/fs/proc/base.c b/fs/proc/base.c index e0afd326b68..f71559784bf 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -80,6 +80,7 @@ #include #include #include +#include #include "internal.h" /* NOTE: diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 6ca01052c5b..253afc04484 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 830bbcd449d..3a059298cc1 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -22,6 +22,8 @@ struct proc_mounts { int event; }; +struct fs_struct; + extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, struct fs_struct *); extern void __put_mnt_ns(struct mnt_namespace *ns); diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index afad7dec1b3..7b370c7cfef 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -8,6 +8,7 @@ struct mnt_namespace; struct uts_namespace; struct ipc_namespace; struct pid_namespace; +struct fs_struct; /* * A structure to contain pointers to all per-process diff --git a/include/linux/sched.h b/include/linux/sched.h index 29df6374d2d..b4e065ea0de 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -68,7 +68,7 @@ struct sched_param { #include #include #include -#include +#include #include #include #include @@ -97,6 +97,7 @@ struct futex_pi_state; struct robust_list_head; struct bio; struct bts_tracer; +struct fs_struct; /* * List of flags we want to share for kernel threads, diff --git a/init/do_mounts.c b/init/do_mounts.c index 8d4ff5afc1d..dd7ee5f203f 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 8cbddff6c28..2bfc6478676 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -66,6 +66,7 @@ #include #include #include +#include #include "audit.h" diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index cb8e9626c21..c35452cadde 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c @@ -18,6 +18,7 @@ #include #include #include +#include static void default_handler(int, struct pt_regs *); diff --git a/kernel/exit.c b/kernel/exit.c index ad8375758a7..b5d656845c9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include diff --git a/kernel/fork.c b/kernel/fork.c index 51f138a131d..e82a14577a9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include diff --git a/kernel/sys.c b/kernel/sys.c index 37f458e6882..ce182aaed20 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c index d47f16b844b..3bbe01a7a4b 100644 --- a/security/tomoyo/realpath.c +++ b/security/tomoyo/realpath.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "common.h" #include "realpath.h" -- cgit v1.2.3-70-g09d2 From 33e5d76979cf01e3834814fe0aea569d1d602c1a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Apr 2009 16:56:32 -0700 Subject: nommu: fix a number of issues with the per-MM VMA patch Fix a number of issues with the per-MM VMA patch: (1) Make mmap_pages_allocated an atomic_long_t, just in case this is used on a NOMMU system with more than 2G pages. Makes no difference on a 32-bit system. (2) Report vma->vm_pgoff * PAGE_SIZE as a 64-bit value, not a 32-bit value, lest it overflow. (3) Move the allocation of the vm_area_struct slab back for fork.c. (4) Use KMEM_CACHE() for both vm_area_struct and vm_region slabs. (5) Use BUG_ON() rather than if () BUG(). (6) Make the default validate_nommu_regions() a static inline rather than a #define. (7) Make free_page_series()'s objection to pages with a refcount != 1 more informative. (8) Adjust the __put_nommu_region() banner comment to indicate that the semaphore must be held for writing. (9) Limit the number of warnings about munmaps of non-mmapped regions. Reported-by: Andrew Morton Signed-off-by: David Howells Cc: Greg Ungerer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/meminfo.c | 2 +- fs/proc/task_nommu.c | 4 ++-- include/linux/mm.h | 2 +- kernel/fork.c | 1 + mm/mmap.c | 3 --- mm/nommu.c | 52 +++++++++++++++++++++++++--------------------------- 6 files changed, 30 insertions(+), 34 deletions(-) (limited to 'kernel/fork.c') diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 43d23948384..74ea974f5ca 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -120,7 +120,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) K(i.freeram-i.freehigh), #endif #ifndef CONFIG_MMU - K((unsigned long) atomic_read(&mmap_pages_allocated)), + K((unsigned long) atomic_long_read(&mmap_pages_allocated)), #endif K(i.totalswap), K(i.freeswap), diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 343ea1216bc..370be0a2c90 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -136,14 +136,14 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) } seq_printf(m, - "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", + "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", vma->vm_start, vma->vm_end, flags & VM_READ ? 'r' : '-', flags & VM_WRITE ? 'w' : '-', flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', - vma->vm_pgoff << PAGE_SHIFT, + (unsigned long long) vma->vm_pgoff << PAGE_SHIFT, MAJOR(dev), MINOR(dev), ino, &len); if (file) { diff --git a/include/linux/mm.h b/include/linux/mm.h index aeabe953ba4..bff1f0d475c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1079,7 +1079,7 @@ static inline void setup_per_cpu_pageset(void) {} #endif /* nommu.c */ -extern atomic_t mmap_pages_allocated; +extern atomic_long_t mmap_pages_allocated; /* prio_tree.c */ void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old); diff --git a/kernel/fork.c b/kernel/fork.c index 47c15840a38..51d1aa21483 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1488,6 +1488,7 @@ void __init proc_caches_init(void) mm_cachep = kmem_cache_create("mm_struct", sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC); mmap_init(); } diff --git a/mm/mmap.c b/mm/mmap.c index 1abb9185a68..4a3841186c1 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2481,7 +2481,4 @@ void mm_drop_all_locks(struct mm_struct *mm) */ void __init mmap_init(void) { - vm_area_cachep = kmem_cache_create("vm_area_struct", - sizeof(struct vm_area_struct), 0, - SLAB_PANIC, NULL); } diff --git a/mm/nommu.c b/mm/nommu.c index 2fcf47d449b..72eda4aee2c 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -69,7 +69,7 @@ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; int sysctl_nr_trim_pages = 1; /* page trimming behaviour */ int heap_stack_gap = 0; -atomic_t mmap_pages_allocated; +atomic_long_t mmap_pages_allocated; EXPORT_SYMBOL(mem_map); EXPORT_SYMBOL(num_physpages); @@ -463,12 +463,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) */ void __init mmap_init(void) { - vm_region_jar = kmem_cache_create("vm_region_jar", - sizeof(struct vm_region), 0, - SLAB_PANIC, NULL); - vm_area_cachep = kmem_cache_create("vm_area_struct", - sizeof(struct vm_area_struct), 0, - SLAB_PANIC, NULL); + vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC); } /* @@ -486,27 +481,24 @@ static noinline void validate_nommu_regions(void) return; last = rb_entry(lastp, struct vm_region, vm_rb); - if (unlikely(last->vm_end <= last->vm_start)) - BUG(); - if (unlikely(last->vm_top < last->vm_end)) - BUG(); + BUG_ON(unlikely(last->vm_end <= last->vm_start)); + BUG_ON(unlikely(last->vm_top < last->vm_end)); while ((p = rb_next(lastp))) { region = rb_entry(p, struct vm_region, vm_rb); last = rb_entry(lastp, struct vm_region, vm_rb); - if (unlikely(region->vm_end <= region->vm_start)) - BUG(); - if (unlikely(region->vm_top < region->vm_end)) - BUG(); - if (unlikely(region->vm_start < last->vm_top)) - BUG(); + BUG_ON(unlikely(region->vm_end <= region->vm_start)); + BUG_ON(unlikely(region->vm_top < region->vm_end)); + BUG_ON(unlikely(region->vm_start < last->vm_top)); lastp = p; } } #else -#define validate_nommu_regions() do {} while(0) +static void validate_nommu_regions(void) +{ +} #endif /* @@ -563,16 +555,17 @@ static void free_page_series(unsigned long from, unsigned long to) struct page *page = virt_to_page(from); kdebug("- free %lx", from); - atomic_dec(&mmap_pages_allocated); + atomic_long_dec(&mmap_pages_allocated); if (page_count(page) != 1) - kdebug("free page %p [%d]", page, page_count(page)); + kdebug("free page %p: refcount not one: %d", + page, page_count(page)); put_page(page); } } /* * release a reference to a region - * - the caller must hold the region semaphore, which this releases + * - the caller must hold the region semaphore for writing, which this releases * - the region may not have been added to the tree yet, in which case vm_top * will equal vm_start */ @@ -1096,7 +1089,7 @@ static int do_mmap_private(struct vm_area_struct *vma, goto enomem; total = 1 << order; - atomic_add(total, &mmap_pages_allocated); + atomic_long_add(total, &mmap_pages_allocated); point = rlen >> PAGE_SHIFT; @@ -1107,7 +1100,7 @@ static int do_mmap_private(struct vm_area_struct *vma, order = ilog2(total - point); n = 1 << order; kdebug("shave %lu/%lu @%lu", n, total - point, total); - atomic_sub(n, &mmap_pages_allocated); + atomic_long_sub(n, &mmap_pages_allocated); total -= n; set_page_refcounted(pages + total); __free_pages(pages + total, order); @@ -1536,10 +1529,15 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) /* find the first potentially overlapping VMA */ vma = find_vma(mm, start); if (!vma) { - printk(KERN_WARNING - "munmap of memory not mmapped by process %d (%s):" - " 0x%lx-0x%lx\n", - current->pid, current->comm, start, start + len - 1); + static int limit = 0; + if (limit < 5) { + printk(KERN_WARNING + "munmap of memory not mmapped by process %d" + " (%s): 0x%lx-0x%lx\n", + current->pid, current->comm, + start, start + len - 1); + limit++; + } return -EINVAL; } -- cgit v1.2.3-70-g09d2 From 6f2c55b843836d26528c56a0968689accaedbc67 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 2 Apr 2009 16:56:59 -0700 Subject: Simplify copy_thread() First argument unused since 2.3.11. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Alexey Dobriyan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/process.c | 2 +- arch/arm/kernel/process.c | 2 +- arch/avr32/kernel/process.c | 2 +- arch/blackfin/kernel/process.c | 2 +- arch/cris/arch-v10/kernel/process.c | 2 +- arch/cris/arch-v32/kernel/process.c | 2 +- arch/frv/kernel/process.c | 2 +- arch/h8300/kernel/process.c | 2 +- arch/ia64/kernel/process.c | 2 +- arch/m32r/kernel/process.c | 2 +- arch/m68k/kernel/process.c | 2 +- arch/m68knommu/kernel/process.c | 2 +- arch/mips/kernel/process.c | 2 +- arch/mn10300/kernel/process.c | 2 +- arch/parisc/kernel/process.c | 2 +- arch/powerpc/kernel/process.c | 2 +- arch/s390/kernel/process.c | 2 +- arch/sh/kernel/process_32.c | 2 +- arch/sh/kernel/process_64.c | 2 +- arch/sparc/kernel/process_32.c | 2 +- arch/sparc/kernel/process_64.c | 2 +- arch/um/kernel/process.c | 2 +- arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/process_64.c | 2 +- arch/xtensa/kernel/process.c | 2 +- include/linux/sched.h | 3 ++- kernel/fork.c | 2 +- 27 files changed, 28 insertions(+), 27 deletions(-) (limited to 'kernel/fork.c') diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 8d0097f1020..3a2fb7a02db 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -272,7 +272,7 @@ alpha_vfork(struct pt_regs *regs) */ int -copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct * p, struct pt_regs * regs) { diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 2de14e2afdc..c3265a2e7cd 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -301,7 +301,7 @@ void release_thread(struct task_struct *dead_task) asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); int -copy_thread(int nr, unsigned long clone_flags, unsigned long stack_start, +copy_thread(unsigned long clone_flags, unsigned long stack_start, unsigned long stk_sz, struct task_struct *p, struct pt_regs *regs) { struct thread_info *thread = task_thread_info(p); diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index 43ae555ecb3..1bbe1da5486 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c @@ -332,7 +332,7 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) asmlinkage void ret_from_fork(void); -int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index 33e2e8993f7..f49427293ca 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c @@ -193,7 +193,7 @@ asmlinkage int bfin_clone(struct pt_regs *regs) } int -copy_thread(int nr, unsigned long clone_flags, +copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long topstk, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c index bd9b3ff63f6..c4c69cf721e 100644 --- a/arch/cris/arch-v10/kernel/process.c +++ b/arch/cris/arch-v10/kernel/process.c @@ -115,7 +115,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) */ asmlinkage void ret_from_fork(void); -int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c index ced5b725d9b..120e7f796fe 100644 --- a/arch/cris/arch-v32/kernel/process.c +++ b/arch/cris/arch-v32/kernel/process.c @@ -131,7 +131,7 @@ kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) extern asmlinkage void ret_from_fork(void); int -copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index 9583a338e9d..0de50df7497 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -204,7 +204,7 @@ void prepare_to_copy(struct task_struct *tsk) /* * set up the kernel stack and exception frames for a new process */ -int copy_thread(int nr, unsigned long clone_flags, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long topstk, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index a8ef654a5a0..e2f33d0f996 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -191,7 +191,7 @@ asmlinkage int h8300_clone(struct pt_regs *regs) } -int copy_thread(int nr, unsigned long clone_flags, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long topstk, struct task_struct * p, struct pt_regs * regs) { diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index c5716270514..5d7c0e5b9e7 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -413,7 +413,7 @@ ia64_load_extra (struct task_struct *task) * so there is nothing to worry about. */ int -copy_thread (int nr, unsigned long clone_flags, +copy_thread(unsigned long clone_flags, unsigned long user_stack_base, unsigned long user_stack_size, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index 7103d91e1a2..3e876f0baeb 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -225,7 +225,7 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) return 0; /* Task didn't use the fpu at all. */ } -int copy_thread(int nr, unsigned long clone_flags, unsigned long spu, +int copy_thread(unsigned long clone_flags, unsigned long spu, unsigned long unused, struct task_struct *tsk, struct pt_regs *regs) { struct pt_regs *childregs = task_pt_regs(tsk); diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 632ce016014..ec37fb56c12 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -233,7 +233,7 @@ asmlinkage int m68k_clone(struct pt_regs *regs) parent_tidptr, child_tidptr); } -int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct * p, struct pt_regs * regs) { diff --git a/arch/m68knommu/kernel/process.c b/arch/m68knommu/kernel/process.c index 3f2d7745f31..1e96c6eb631 100644 --- a/arch/m68knommu/kernel/process.c +++ b/arch/m68knommu/kernel/process.c @@ -199,7 +199,7 @@ asmlinkage int m68k_clone(struct pt_regs *regs) return do_fork(clone_flags, newsp, regs, 0, NULL, NULL); } -int copy_thread(int nr, unsigned long clone_flags, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long topstk, struct task_struct * p, struct pt_regs * regs) { diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index ca2e4026ad2..1eaaa450e20 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -99,7 +99,7 @@ void flush_thread(void) { } -int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { struct thread_info *ti = task_thread_info(p); diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index b28c9a60445..234cf344cdc 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -193,7 +193,7 @@ void prepare_to_copy(struct task_struct *tsk) * set up the kernel stack for a new thread and copy arch-specific thread * control information */ -int copy_thread(int nr, unsigned long clone_flags, +int copy_thread(unsigned long clone_flags, unsigned long c_usp, unsigned long ustk_size, struct task_struct *p, struct pt_regs *kregs) { diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index b80e02a4d81..8aa591ed912 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -263,7 +263,7 @@ sys_vfork(struct pt_regs *regs) } int -copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, /* in ia64 this is "user_stack_size" */ struct task_struct * p, struct pt_regs * pregs) { diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index eac06494878..7b44a33f03c 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -598,7 +598,7 @@ void prepare_to_copy(struct task_struct *tsk) /* * Copy a thread.. */ -int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index b48e961a38f..a3acd8e60af 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -160,7 +160,7 @@ void release_thread(struct task_struct *dead_task) { } -int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp, +int copy_thread(unsigned long clone_flags, unsigned long new_stackp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index ddafbbbab2a..694bc15f84f 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -170,7 +170,7 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) asmlinkage void ret_from_fork(void); -int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index c90c7e5e5fe..96be839040f 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -425,7 +425,7 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) asmlinkage void ret_from_fork(void); -int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index f4bee35a1b4..2830b415e21 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -455,7 +455,7 @@ asmlinkage int sparc_do_fork(unsigned long clone_flags, */ extern void ret_from_fork(void); -int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, +int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index a73954b87f0..4041f94e772 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -561,7 +561,7 @@ asmlinkage long sparc_do_fork(unsigned long clone_flags, * Parent --> %o0 == childs pid, %o1 == 0 * Child --> %o0 == parents pid, %o1 == 1 */ -int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, +int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index a1c6d07cac3..4a28a1568d8 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -179,7 +179,7 @@ void fork_handler(void) userspace(¤t->thread.regs.regs); } -int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, +int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long stack_top, struct task_struct * p, struct pt_regs *regs) { diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 14014d766ca..76f8f84043a 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -245,7 +245,7 @@ void prepare_to_copy(struct task_struct *tsk) unlazy_fpu(tsk); } -int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, +int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index abb7e6a7f0c..b751a41392b 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -278,7 +278,7 @@ void prepare_to_copy(struct task_struct *tsk) unlazy_fpu(tsk); } -int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, +int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 9185597eb6a..031f3668571 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -172,7 +172,7 @@ void prepare_to_copy(struct task_struct *tsk) * childregs. */ -int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct * p, struct pt_regs * regs) { diff --git a/include/linux/sched.h b/include/linux/sched.h index 481fad3a9b4..9186f8c5d5f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1975,7 +1975,8 @@ extern void mm_release(struct task_struct *, struct mm_struct *); /* Allocate a new mm structure and copy contents from tsk->mm */ extern struct mm_struct *dup_mm(struct task_struct *tsk); -extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *); +extern int copy_thread(unsigned long, unsigned long, unsigned long, + struct task_struct *, struct pt_regs *); extern void flush_thread(void); extern void exit_thread(void); diff --git a/kernel/fork.c b/kernel/fork.c index 51d1aa21483..d7eb727eb53 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1125,7 +1125,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto bad_fork_cleanup_mm; if ((retval = copy_io(clone_flags, p))) goto bad_fork_cleanup_namespaces; - retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); + retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_io; -- cgit v1.2.3-70-g09d2 From b3bfa0cba867f23365b81658b47efd906830879b Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 2 Apr 2009 16:58:08 -0700 Subject: signals: protect cinit from blocked fatal signals Normally SIG_DFL signals to global and container-init are dropped early. But if a signal is blocked when it is posted, we cannot drop the signal since the receiver may install a handler before unblocking the signal. Once this signal is queued however, the receiver container-init has no way of knowing if the signal was sent from an ancestor or descendant namespace. This patch ensures that contianer-init drops all SIG_DFL signals in get_signal_to_deliver() except SIGKILL/SIGSTOP. If SIGSTOP/SIGKILL originate from a descendant of container-init they are never queued (i.e dropped in sig_ignored() in an earler patch). If SIGSTOP/SIGKILL originate from parent namespace, the signal is queued and container-init processes the signal. IOW, if get_signal_to_deliver() sees a sig_kernel_only() signal for global or container-init, the signal must have been generated internally or must have come from an ancestor ns and we process the signal. Further, the signal_group_exit() check was needed to cover the case of a multi-threaded init sending SIGKILL to other threads when doing an exit() or exec(). But since the new sig_kernel_only() check covers the SIGKILL, the signal_group_exit() check is no longer needed and can be removed. Finally, now that we have all pieces in place, set SIGNAL_UNKILLABLE for container-inits. Signed-off-by: Sukadev Bhattiprolu Cc: Oleg Nesterov Cc: Roland McGrath Cc: "Eric W. Biederman" Cc: Daniel Lezcano Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 2 ++ kernel/signal.c | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index d7eb727eb53..adbea16ec64 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -841,6 +841,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) atomic_set(&sig->live, 1); init_waitqueue_head(&sig->wait_chldexit); sig->flags = 0; + if (clone_flags & CLONE_NEWPID) + sig->flags |= SIGNAL_UNKILLABLE; sig->group_exit_code = 0; sig->group_exit_task = NULL; sig->group_stop_count = 0; diff --git a/kernel/signal.c b/kernel/signal.c index fb19aae2363..ba3da25f0ee 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1870,9 +1870,16 @@ relock: /* * Global init gets no signals it doesn't want. + * Container-init gets no signals it doesn't want from same + * container. + * + * Note that if global/container-init sees a sig_kernel_only() + * signal here, the signal must have been generated internally + * or must have come from an ancestor namespace. In either + * case, the signal cannot be dropped. */ if (unlikely(signal->flags & SIGNAL_UNKILLABLE) && - !signal_group_exit(signal)) + !sig_kernel_only(signr)) continue; if (sig_kernel_stop(signr)) { -- cgit v1.2.3-70-g09d2 From 1b0f7ffd0ea27cd3a0b9ca04e3df9522048c32a3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 2 Apr 2009 16:58:39 -0700 Subject: pids: kill signal_struct-> __pgrp/__session and friends We are wasting 2 words in signal_struct without any reason to implement task_pgrp_nr() and task_session_nr(). task_session_nr() has no callers since 2e2ba22ea4fd4bb85f0fa37c521066db6775cbef, we can remove it. task_pgrp_nr() is still (I believe wrongly) used in fs/autofsX and fs/coda. This patch reimplements task_pgrp_nr() via task_pgrp_nr_ns(), and kills __pgrp/__session and the related helpers. The change in drivers/char/tty_io.c is cosmetic, but hopefully makes sense anyway. Signed-off-by: Oleg Nesterov Acked-by: Alan Cox [tty parts] Cc: Cedric Le Goater Cc: Dave Hansen Cc: Eric Biederman Cc: Pavel Emelyanov Cc: Serge Hallyn Cc: Sukadev Bhattiprolu Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/tty_io.c | 4 ++-- include/linux/sched.h | 43 ++++++------------------------------------- kernel/exit.c | 10 +++------- kernel/fork.c | 2 -- kernel/sys.c | 4 +--- 5 files changed, 12 insertions(+), 51 deletions(-) (limited to 'kernel/fork.c') diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index a44b701c5bb..66b99a2049e 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -2681,7 +2681,7 @@ void __do_SAK(struct tty_struct *tty) /* Kill the entire session */ do_each_pid_task(session, PIDTYPE_SID, p) { printk(KERN_NOTICE "SAK: killed process %d" - " (%s): task_session_nr(p)==tty->session\n", + " (%s): task_session(p)==tty->session\n", task_pid_nr(p), p->comm); send_sig(SIGKILL, p, 1); } while_each_pid_task(session, PIDTYPE_SID, p); @@ -2691,7 +2691,7 @@ void __do_SAK(struct tty_struct *tty) do_each_thread(g, p) { if (p->signal->tty == tty) { printk(KERN_NOTICE "SAK: killed process %d" - " (%s): task_session_nr(p)==tty->session\n", + " (%s): task_session(p)==tty->session\n", task_pid_nr(p), p->comm); send_sig(SIGKILL, p, 1); continue; diff --git a/include/linux/sched.h b/include/linux/sched.h index 49df878a0ca..206ac003e8c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -547,25 +547,8 @@ struct signal_struct { struct list_head cpu_timers[3]; - /* job control IDs */ - - /* - * pgrp and session fields are deprecated. - * use the task_session_Xnr and task_pgrp_Xnr routines below - */ - - union { - pid_t pgrp __deprecated; - pid_t __pgrp; - }; - struct pid *tty_old_pgrp; - union { - pid_t session __deprecated; - pid_t __session; - }; - /* boolean value for session group leader */ int leader; @@ -1469,16 +1452,6 @@ static inline int rt_task(struct task_struct *p) return rt_prio(p->prio); } -static inline void set_task_session(struct task_struct *tsk, pid_t session) -{ - tsk->signal->__session = session; -} - -static inline void set_task_pgrp(struct task_struct *tsk, pid_t pgrp) -{ - tsk->signal->__pgrp = pgrp; -} - static inline struct pid *task_pid(struct task_struct *task) { return task->pids[PIDTYPE_PID].pid; @@ -1552,11 +1525,6 @@ static inline pid_t task_tgid_vnr(struct task_struct *tsk) } -static inline pid_t task_pgrp_nr(struct task_struct *tsk) -{ - return tsk->signal->__pgrp; -} - static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { @@ -1569,11 +1537,6 @@ static inline pid_t task_pgrp_vnr(struct task_struct *tsk) } -static inline pid_t task_session_nr(struct task_struct *tsk) -{ - return tsk->signal->__session; -} - static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { @@ -1585,6 +1548,12 @@ static inline pid_t task_session_vnr(struct task_struct *tsk) return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); } +/* obsolete, do not use */ +static inline pid_t task_pgrp_nr(struct task_struct *tsk) +{ + return task_pgrp_nr_ns(tsk, &init_pid_ns); +} + /** * pid_alive - check that a task structure is not stale * @p: Task structure to be checked. diff --git a/kernel/exit.c b/kernel/exit.c index 384f09caf2e..3bec141c82f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -357,16 +357,12 @@ static void reparent_to_kthreadd(void) void __set_special_pids(struct pid *pid) { struct task_struct *curr = current->group_leader; - pid_t nr = pid_nr(pid); - if (task_session(curr) != pid) { + if (task_session(curr) != pid) change_pid(curr, PIDTYPE_SID, pid); - set_task_session(curr, nr); - } - if (task_pgrp(curr) != pid) { + + if (task_pgrp(curr) != pid) change_pid(curr, PIDTYPE_PGID, pid); - set_task_pgrp(curr, nr); - } } static void set_special_pids(struct pid *pid) diff --git a/kernel/fork.c b/kernel/fork.c index adbea16ec64..f7445823144 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1265,8 +1265,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->signal->leader_pid = pid; tty_kref_put(p->signal->tty); p->signal->tty = tty_kref_get(current->signal->tty); - set_task_pgrp(p, task_pgrp_nr(current)); - set_task_session(p, task_session_nr(current)); attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail_rcu(&p->tasks, &init_task.tasks); diff --git a/kernel/sys.c b/kernel/sys.c index 37f458e6882..742cefa527e 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1013,10 +1013,8 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) if (err) goto out; - if (task_pgrp(p) != pgrp) { + if (task_pgrp(p) != pgrp) change_pid(p, PIDTYPE_PGID, pgrp); - set_task_pgrp(p, pid_nr(pgrp)); - } err = 0; out: -- cgit v1.2.3-70-g09d2