From 6b4e306aa3dc94a0545eb9279475b1ab6209a31f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 7 Mar 2010 16:41:34 -0800 Subject: ns: proc files for namespace naming policy. Create files under /proc//ns/ to allow controlling the namespaces of a process. This addresses three specific problems that can make namespaces hard to work with. - Namespaces require a dedicated process to pin them in memory. - It is not possible to use a namespace unless you are the child of the original creator. - Namespaces don't have names that userspace can use to talk about them. The namespace files under /proc//ns/ can be opened and the file descriptor can be used to talk about a specific namespace, and to keep the specified namespace alive. A namespace can be kept alive by either holding the file descriptor open or bind mounting the file someplace else. aka: mount --bind /proc/self/ns/net /some/filesystem/path mount --bind /proc/self/fd/ /some/filesystem/path This allows namespaces to be named with userspace policy. It requires additional support to make use of these filedescriptors and that will be comming in the following patches. Acked-by: Daniel Lezcano Signed-off-by: Eric W. Biederman --- include/linux/proc_fs.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux/proc_fs.h') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 838c1149251..a6d2c6da5e5 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -179,6 +179,8 @@ extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); extern struct file *get_mm_exe_file(struct mm_struct *mm); extern void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm); +extern struct file *proc_ns_fget(int fd); + #else #define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; }) @@ -239,6 +241,11 @@ static inline void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm) {} +static inline struct file *proc_ns_fget(int fd) +{ + return ERR_PTR(-EINVAL); +} + #endif /* CONFIG_PROC_FS */ #if !defined(CONFIG_PROC_KCORE) @@ -250,6 +257,15 @@ kclist_add(struct kcore_list *new, void *addr, size_t size, int type) extern void kclist_add(struct kcore_list *, void *, size_t, int type); #endif +struct nsproxy; +struct proc_ns_operations { + const char *name; + int type; + void *(*get)(struct task_struct *task); + void (*put)(void *ns); + int (*install)(struct nsproxy *nsproxy, void *ns); +}; + union proc_op { int (*proc_get_link)(struct inode *, struct path *); int (*proc_read)(struct task_struct *task, char *page); @@ -268,6 +284,8 @@ struct proc_inode { struct proc_dir_entry *pde; struct ctl_table_header *sysctl; struct ctl_table *sysctl_entry; + void *ns; + const struct proc_ns_operations *ns_ops; struct inode vfs_inode; }; -- cgit v1.2.3-70-g09d2 From 13b6f57623bc485e116344fe91fbcb29f149242b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 7 Mar 2010 18:14:23 -0800 Subject: ns proc: Add support for the network namespace. Implementing file descriptors for the network namespace is simple and straight forward. Acked-by: David S. Miller Acked-by: Daniel Lezcano Signed-off-by: Eric W. Biederman --- fs/proc/namespaces.c | 3 +++ include/linux/proc_fs.h | 1 + net/core/net_namespace.c | 31 +++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+) (limited to 'include/linux/proc_fs.h') diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 6ae9f07d59e..dcbd483e991 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -16,6 +16,9 @@ static const struct proc_ns_operations *ns_entries[] = { +#ifdef CONFIG_NET_NS + &netns_operations, +#endif }; static const struct file_operations ns_file_operations = { diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index a6d2c6da5e5..62126ec6ede 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -265,6 +265,7 @@ struct proc_ns_operations { void (*put)(void *ns); int (*install)(struct nsproxy *nsproxy, void *ns); }; +extern const struct proc_ns_operations netns_operations; union proc_op { int (*proc_get_link)(struct inode *, struct path *); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 3f860261c5e..bf7707e09a8 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -573,3 +573,34 @@ void unregister_pernet_device(struct pernet_operations *ops) mutex_unlock(&net_mutex); } EXPORT_SYMBOL_GPL(unregister_pernet_device); + +#ifdef CONFIG_NET_NS +static void *netns_get(struct task_struct *task) +{ + struct net *net; + rcu_read_lock(); + net = get_net(task->nsproxy->net_ns); + rcu_read_unlock(); + return net; +} + +static void netns_put(void *ns) +{ + put_net(ns); +} + +static int netns_install(struct nsproxy *nsproxy, void *ns) +{ + put_net(nsproxy->net_ns); + nsproxy->net_ns = get_net(ns); + return 0; +} + +const struct proc_ns_operations netns_operations = { + .name = "net", + .type = CLONE_NEWNET, + .get = netns_get, + .put = netns_put, + .install = netns_install, +}; +#endif -- cgit v1.2.3-70-g09d2 From 34482e89a5218f0f9317abf1cfba3bb38b5c29dd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 7 Mar 2010 18:43:27 -0800 Subject: ns proc: Add support for the uts namespace Acked-by: Daniel Lezcano Signed-off-by: Eric W. Biederman --- fs/proc/namespaces.c | 3 +++ include/linux/proc_fs.h | 1 + kernel/utsname.c | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+) (limited to 'include/linux/proc_fs.h') diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index dcbd483e991..b017181f127 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -19,6 +19,9 @@ static const struct proc_ns_operations *ns_entries[] = { #ifdef CONFIG_NET_NS &netns_operations, #endif +#ifdef CONFIG_UTS_NS + &utsns_operations, +#endif }; static const struct file_operations ns_file_operations = { diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 62126ec6ede..52aa89df8a6 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -266,6 +266,7 @@ struct proc_ns_operations { int (*install)(struct nsproxy *nsproxy, void *ns); }; extern const struct proc_ns_operations netns_operations; +extern const struct proc_ns_operations utsns_operations; union proc_op { int (*proc_get_link)(struct inode *, struct path *); diff --git a/kernel/utsname.c b/kernel/utsname.c index 44646179eab..bff131b9510 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -15,6 +15,7 @@ #include #include #include +#include static struct uts_namespace *create_uts_ns(void) { @@ -79,3 +80,41 @@ void free_uts_ns(struct kref *kref) put_user_ns(ns->user_ns); kfree(ns); } + +static void *utsns_get(struct task_struct *task) +{ + struct uts_namespace *ns = NULL; + struct nsproxy *nsproxy; + + rcu_read_lock(); + nsproxy = task_nsproxy(task); + if (nsproxy) { + ns = nsproxy->uts_ns; + get_uts_ns(ns); + } + rcu_read_unlock(); + + return ns; +} + +static void utsns_put(void *ns) +{ + put_uts_ns(ns); +} + +static int utsns_install(struct nsproxy *nsproxy, void *ns) +{ + get_uts_ns(ns); + put_uts_ns(nsproxy->uts_ns); + nsproxy->uts_ns = ns; + return 0; +} + +const struct proc_ns_operations utsns_operations = { + .name = "uts", + .type = CLONE_NEWUTS, + .get = utsns_get, + .put = utsns_put, + .install = utsns_install, +}; + -- cgit v1.2.3-70-g09d2 From a00eaf11a223c63fbb212369d6db69ce4c55a2d1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 7 Mar 2010 18:48:39 -0800 Subject: ns proc: Add support for the ipc namespace Acked-by: Daniel Lezcano Signed-off-by: Eric W. Biederman --- fs/proc/namespaces.c | 3 +++ include/linux/proc_fs.h | 1 + ipc/namespace.c | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+) (limited to 'include/linux/proc_fs.h') diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index b017181f127..f18d6d58bf7 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -22,6 +22,9 @@ static const struct proc_ns_operations *ns_entries[] = { #ifdef CONFIG_UTS_NS &utsns_operations, #endif +#ifdef CONFIG_IPC_NS + &ipcns_operations, +#endif }; static const struct file_operations ns_file_operations = { diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 52aa89df8a6..a23f0b72a02 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -267,6 +267,7 @@ struct proc_ns_operations { }; extern const struct proc_ns_operations netns_operations; extern const struct proc_ns_operations utsns_operations; +extern const struct proc_ns_operations ipcns_operations; union proc_op { int (*proc_get_link)(struct inode *, struct path *); diff --git a/ipc/namespace.c b/ipc/namespace.c index 8054c8e5faf..ce0a647869b 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "util.h" @@ -140,3 +141,39 @@ void put_ipc_ns(struct ipc_namespace *ns) free_ipc_ns(ns); } } + +static void *ipcns_get(struct task_struct *task) +{ + struct ipc_namespace *ns = NULL; + struct nsproxy *nsproxy; + + rcu_read_lock(); + nsproxy = task_nsproxy(task); + if (nsproxy) + ns = get_ipc_ns(nsproxy->ipc_ns); + rcu_read_unlock(); + + return ns; +} + +static void ipcns_put(void *ns) +{ + return put_ipc_ns(ns); +} + +static int ipcns_install(struct nsproxy *nsproxy, void *ns) +{ + /* Ditch state from the old ipc namespace */ + exit_sem(current); + put_ipc_ns(nsproxy->ipc_ns); + nsproxy->ipc_ns = get_ipc_ns(ns); + return 0; +} + +const struct proc_ns_operations ipcns_operations = { + .name = "ipc", + .type = CLONE_NEWIPC, + .get = ipcns_get, + .put = ipcns_put, + .install = ipcns_install, +}; -- cgit v1.2.3-70-g09d2 From f12a20fc9bfba4218ecbc4e40c8e08dc2a85dc99 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 17 May 2011 15:44:12 -0700 Subject: procfs: add stub for proc_mkdir_mode() Provide a stub for proc_mkdir_mode() when CONFIG_PROC_FS is not enabled, just like the stub for proc_mkdir(). Fixes this linux-next build error: drivers/net/wireless/airo.c:4504: error: implicit declaration of function 'proc_mkdir_mode' Signed-off-by: Randy Dunlap Cc: Stephen Rothwell Cc: Alexey Dobriyan Cc: "John W. Linville" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/proc_fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/proc_fs.h') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 838c1149251..eaf4350c0f9 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -208,6 +208,8 @@ static inline struct proc_dir_entry *proc_symlink(const char *name, struct proc_dir_entry *parent,const char *dest) {return NULL;} static inline struct proc_dir_entry *proc_mkdir(const char *name, struct proc_dir_entry *parent) {return NULL;} +static inline struct proc_dir_entry *proc_mkdir_mode(const char *name, + mode_t mode, struct proc_dir_entry *parent) { return NULL; } static inline struct proc_dir_entry *create_proc_read_entry(const char *name, mode_t mode, struct proc_dir_entry *base, -- cgit v1.2.3-70-g09d2 From f2beb7983613ecca20a61604f01ab50cc7a797e6 Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Tue, 24 May 2011 17:12:48 -0700 Subject: proc: make struct proc_maps_private truly private Now that mm/mempolicy.c is no longer implementing /proc/pid/numa_maps there is no need to export struct proc_maps_private to the world. Move it to fs/proc/internal.h instead. Signed-off-by: Stephen Wilson Reviewed-by: KOSAKI Motohiro Cc: Hugh Dickins Cc: David Rientjes Cc: Lee Schermerhorn Cc: Alexey Dobriyan Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/internal.h | 8 ++++++++ include/linux/proc_fs.h | 8 -------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux/proc_fs.h') diff --git a/fs/proc/internal.h b/fs/proc/internal.h index c03e8d3a3a5..3763b436e69 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -61,6 +61,14 @@ extern const struct file_operations proc_pagemap_operations; extern const struct file_operations proc_net_operations; extern const struct inode_operations proc_net_inode_operations; +struct proc_maps_private { + struct pid *pid; + struct task_struct *task; +#ifdef CONFIG_MMU + struct vm_area_struct *tail_vma; +#endif +}; + void proc_init_inodecache(void); static inline struct pid *proc_pid(struct inode *inode) diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index eaf4350c0f9..3686cd6c9ac 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -288,12 +288,4 @@ static inline struct net *PDE_NET(struct proc_dir_entry *pde) return pde->parent->data; } -struct proc_maps_private { - struct pid *pid; - struct task_struct *task; -#ifdef CONFIG_MMU - struct vm_area_struct *tail_vma; -#endif -}; - #endif /* _LINUX_PROC_FS_H */ -- cgit v1.2.3-70-g09d2 From 3864601387cf4196371e3c1897fdffa5228296f9 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 26 May 2011 16:25:46 -0700 Subject: mm: extract exe_file handling from procfs Setup and cleanup of mm_struct->exe_file is currently done in fs/proc/. This was because exe_file was needed only for /proc//exe. Since we will need the exe_file functionality also for core dumps (so core name can contain full binary path), built this functionality always into the kernel. To achieve that move that out of proc FS to the kernel/ where in fact it should belong. By doing that we can make dup_mm_exe_file static. Also we can drop linux/proc_fs.h inclusion in fs/exec.c and kernel/fork.c. Signed-off-by: Jiri Slaby Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 1 - fs/proc/base.c | 51 ----------------------------------------------- include/linux/mm.h | 10 ++-------- include/linux/mm_types.h | 2 -- include/linux/proc_fs.h | 19 ------------------ kernel/fork.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++- 6 files changed, 53 insertions(+), 82 deletions(-) (limited to 'include/linux/proc_fs.h') diff --git a/fs/exec.c b/fs/exec.c index 936f5776655..88a16c57228 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/proc/base.c b/fs/proc/base.c index dc8bca72b00..c2ac2fb123c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1576,57 +1576,6 @@ static const struct file_operations proc_pid_set_comm_operations = { .release = single_release, }; -/* - * We added or removed a vma mapping the executable. The vmas are only mapped - * during exec and are not mapped with the mmap system call. - * Callers must hold down_write() on the mm's mmap_sem for these - */ -void added_exe_file_vma(struct mm_struct *mm) -{ - mm->num_exe_file_vmas++; -} - -void removed_exe_file_vma(struct mm_struct *mm) -{ - mm->num_exe_file_vmas--; - if ((mm->num_exe_file_vmas == 0) && mm->exe_file){ - fput(mm->exe_file); - mm->exe_file = NULL; - } - -} - -void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) -{ - if (new_exe_file) - get_file(new_exe_file); - if (mm->exe_file) - fput(mm->exe_file); - mm->exe_file = new_exe_file; - mm->num_exe_file_vmas = 0; -} - -struct file *get_mm_exe_file(struct mm_struct *mm) -{ - struct file *exe_file; - - /* We need mmap_sem to protect against races with removal of - * VM_EXECUTABLE vmas */ - down_read(&mm->mmap_sem); - exe_file = mm->exe_file; - if (exe_file) - get_file(exe_file); - up_read(&mm->mmap_sem); - return exe_file; -} - -void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm) -{ - /* It's safe to write the exe_file pointer without exe_file_lock because - * this is called during fork when the task is not yet in /proc */ - newmm->exe_file = get_mm_exe_file(oldmm); -} - static int proc_exe_link(struct inode *inode, struct path *exe_path) { struct task_struct *task; diff --git a/include/linux/mm.h b/include/linux/mm.h index fb8e814f78d..9670f71d7be 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1408,17 +1408,11 @@ extern void exit_mmap(struct mm_struct *); extern int mm_take_all_locks(struct mm_struct *mm); extern void mm_drop_all_locks(struct mm_struct *mm); -#ifdef CONFIG_PROC_FS /* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */ extern void added_exe_file_vma(struct mm_struct *mm); extern void removed_exe_file_vma(struct mm_struct *mm); -#else -static inline void added_exe_file_vma(struct mm_struct *mm) -{} - -static inline void removed_exe_file_vma(struct mm_struct *mm) -{} -#endif /* CONFIG_PROC_FS */ +extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); +extern struct file *get_mm_exe_file(struct mm_struct *mm); extern int may_expand_vm(struct mm_struct *mm, unsigned long npages); extern int install_special_mapping(struct mm_struct *mm, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6fe96c19f85..2a78aae78c6 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -302,11 +302,9 @@ struct mm_struct { struct task_struct __rcu *owner; #endif -#ifdef CONFIG_PROC_FS /* store ref to file /proc//exe symlink points to */ struct file *exe_file; unsigned long num_exe_file_vmas; -#endif #ifdef CONFIG_MMU_NOTIFIER struct mmu_notifier_mm *mmu_notifier_mm; #endif diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 648c9c58add..e7576cf9e32 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -173,12 +173,6 @@ extern void proc_net_remove(struct net *net, const char *name); extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, struct proc_dir_entry *parent); -/* While the {get|set|dup}_mm_exe_file functions are for mm_structs, they are - * only needed to implement /proc/|self/exe so we define them here. */ -extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); -extern struct file *get_mm_exe_file(struct mm_struct *mm); -extern void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm); - extern struct file *proc_ns_fget(int fd); #else @@ -230,19 +224,6 @@ static inline void pid_ns_release_proc(struct pid_namespace *ns) { } -static inline void set_mm_exe_file(struct mm_struct *mm, - struct file *new_exe_file) -{} - -static inline struct file *get_mm_exe_file(struct mm_struct *mm) -{ - return NULL; -} - -static inline void dup_mm_exe_file(struct mm_struct *oldmm, - struct mm_struct *newmm) -{} - static inline struct file *proc_ns_fget(int fd) { return ERR_PTR(-EINVAL); diff --git a/kernel/fork.c b/kernel/fork.c index 1f84099ecce..ca406d91671 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -59,7 +59,6 @@ #include #include #include -#include #include #include #include @@ -597,6 +596,57 @@ void mmput(struct mm_struct *mm) } EXPORT_SYMBOL_GPL(mmput); +/* + * We added or removed a vma mapping the executable. The vmas are only mapped + * during exec and are not mapped with the mmap system call. + * Callers must hold down_write() on the mm's mmap_sem for these + */ +void added_exe_file_vma(struct mm_struct *mm) +{ + mm->num_exe_file_vmas++; +} + +void removed_exe_file_vma(struct mm_struct *mm) +{ + mm->num_exe_file_vmas--; + if ((mm->num_exe_file_vmas == 0) && mm->exe_file){ + fput(mm->exe_file); + mm->exe_file = NULL; + } + +} + +void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) +{ + if (new_exe_file) + get_file(new_exe_file); + if (mm->exe_file) + fput(mm->exe_file); + mm->exe_file = new_exe_file; + mm->num_exe_file_vmas = 0; +} + +struct file *get_mm_exe_file(struct mm_struct *mm) +{ + struct file *exe_file; + + /* We need mmap_sem to protect against races with removal of + * VM_EXECUTABLE vmas */ + down_read(&mm->mmap_sem); + exe_file = mm->exe_file; + if (exe_file) + get_file(exe_file); + up_read(&mm->mmap_sem); + return exe_file; +} + +static void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm) +{ + /* It's safe to write the exe_file pointer without exe_file_lock because + * this is called during fork when the task is not yet in /proc */ + newmm->exe_file = get_mm_exe_file(oldmm); +} + /** * get_task_mm - acquire a reference to the task's mm * -- cgit v1.2.3-70-g09d2