From 0ce8974d504913a0f0ae2d97b20a5ac665431a41 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 6 Jan 2012 03:13:27 -0800 Subject: sysctl: Consolidate !CONFIG_SYSCTL handling - In sysctl.h move functions only available if CONFIG_SYSCL is defined inside of #ifdef CONFIG_SYSCTL - Move the stub function definitions for !CONFIG_SYSCTL into sysctl.h and make them static inlines. Signed-off-by: Eric W. Biederman --- include/linux/sysctl.h | 95 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 62 insertions(+), 33 deletions(-) (limited to 'include/linux/sysctl.h') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index bb9127dd814..cf3ee7f246d 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -937,30 +937,8 @@ enum struct ctl_table; struct nsproxy; struct ctl_table_root; - -struct ctl_table_set { - struct list_head list; - struct ctl_table_set *parent; - int (*is_seen)(struct ctl_table_set *); -}; - -extern void setup_sysctl_set(struct ctl_table_set *p, - struct ctl_table_set *parent, - int (*is_seen)(struct ctl_table_set *)); - struct ctl_table_header; -extern void sysctl_head_get(struct ctl_table_header *); -extern void sysctl_head_put(struct ctl_table_header *); -extern int sysctl_is_seen(struct ctl_table_header *); -extern struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *); -extern struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev); -extern struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, - struct ctl_table_header *prev); -extern void sysctl_head_finish(struct ctl_table_header *prev); -extern int sysctl_perm(struct ctl_table_root *root, - struct ctl_table *table, int op); - typedef struct ctl_table ctl_table; typedef int proc_handler (struct ctl_table *ctl, int write, @@ -1023,8 +1001,6 @@ static inline void *proc_sys_poll_event(struct ctl_table_poll *poll) return (void *)(unsigned long)atomic_read(&poll->event); } -void proc_sys_poll_notify(struct ctl_table_poll *poll); - #define __CTL_TABLE_POLL_INITIALIZER(name) { \ .event = ATOMIC_INIT(0), \ .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait) } @@ -1047,15 +1023,6 @@ struct ctl_table void *extra2; }; -struct ctl_table_root { - struct list_head root_list; - struct ctl_table_set default_set; - struct ctl_table_set *(*lookup)(struct ctl_table_root *root, - struct nsproxy *namespaces); - int (*permissions)(struct ctl_table_root *root, - struct nsproxy *namespaces, struct ctl_table *table); -}; - /* struct ctl_table_header is used to maintain dynamic lists of struct ctl_table trees. */ struct ctl_table_header @@ -1078,11 +1045,45 @@ struct ctl_table_header struct ctl_table_header *parent; }; +struct ctl_table_set { + struct list_head list; + struct ctl_table_set *parent; + int (*is_seen)(struct ctl_table_set *); +}; + +struct ctl_table_root { + struct list_head root_list; + struct ctl_table_set default_set; + struct ctl_table_set *(*lookup)(struct ctl_table_root *root, + struct nsproxy *namespaces); + int (*permissions)(struct ctl_table_root *root, + struct nsproxy *namespaces, struct ctl_table *table); +}; + /* struct ctl_path describes where in the hierarchy a table is added */ struct ctl_path { const char *procname; }; +#ifdef CONFIG_SYSCTL + +void proc_sys_poll_notify(struct ctl_table_poll *poll); + +extern void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_set *parent, + int (*is_seen)(struct ctl_table_set *)); + +extern void sysctl_head_get(struct ctl_table_header *); +extern void sysctl_head_put(struct ctl_table_header *); +extern int sysctl_is_seen(struct ctl_table_header *); +extern struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *); +extern struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev); +extern struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, + struct ctl_table_header *prev); +extern void sysctl_head_finish(struct ctl_table_header *prev); +extern int sysctl_perm(struct ctl_table_root *root, + struct ctl_table *table, int op); + void register_sysctl_root(struct ctl_table_root *root); struct ctl_table_header *__register_sysctl_paths( struct ctl_table_root *root, struct nsproxy *namespaces, @@ -1094,6 +1095,34 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, void unregister_sysctl_table(struct ctl_table_header * table); int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table); +#else /* CONFIG_SYSCTL */ +static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) +{ + return NULL; +} + +static inline struct ctl_table_header *register_sysctl_paths( + const struct ctl_path *path, struct ctl_table *table) +{ + return NULL; +} + +static inline void unregister_sysctl_table(struct ctl_table_header * table) +{ +} + +static inline void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_set *parent, + int (*is_seen)(struct ctl_table_set *)) +{ +} + +static inline void sysctl_head_put(struct ctl_table_header *head) +{ +} + +#endif /* CONFIG_SYSCTL */ + #endif /* __KERNEL__ */ #endif /* _LINUX_SYSCTL_H */ -- cgit v1.2.3-70-g09d2 From de4e83bd6b5e16d491ec068cd22801d5d063b07a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 6 Jan 2012 03:34:20 -0800 Subject: sysctl: Register the base sysctl table like any other sysctl table. Simplify the code by treating the base sysctl table like any other sysctl table and register it with register_sysctl_table. To ensure this table is registered early enough to avoid problems call sysctl_init from proc_sys_init. Rename sysctl_net.c:sysctl_init() to net_sysctl_init() to avoid name conflicts now that kernel/sysctl.c:sysctl_init() is no longer static. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 3 ++- include/linux/sysctl.h | 1 + kernel/sysctl.c | 13 ++++--------- net/sysctl_net.c | 4 ++-- 4 files changed, 9 insertions(+), 12 deletions(-) (limited to 'include/linux/sysctl.h') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index d82f4a8b4b8..9d29d28af57 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -468,5 +468,6 @@ int __init proc_sys_init(void) proc_sys_root->proc_iops = &proc_sys_dir_operations; proc_sys_root->proc_fops = &proc_sys_dir_file_operations; proc_sys_root->nlink = 0; - return 0; + + return sysctl_init(); } diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index cf3ee7f246d..5e3532e9599 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1095,6 +1095,7 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, void unregister_sysctl_table(struct ctl_table_header * table); int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table); +extern int sysctl_init(void); #else /* CONFIG_SYSCTL */ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d5bbddd0de2..ad460248acc 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -192,7 +192,7 @@ static int sysrq_sysctl_handler(ctl_table *table, int write, #endif -static struct ctl_table root_table[]; +static struct ctl_table root_table[1]; static struct ctl_table_root sysctl_table_root; static struct ctl_table_header root_table_header = { {{.count = 1, @@ -222,7 +222,7 @@ int sysctl_legacy_va_layout; /* The default sysctl tables: */ -static struct ctl_table root_table[] = { +static struct ctl_table sysctl_base_table[] = { { .procname = "kernel", .mode = 0555, @@ -1747,17 +1747,12 @@ static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) } } -static __init int sysctl_init(void) +int __init sysctl_init(void) { - sysctl_set_parent(NULL, root_table); -#ifdef CONFIG_SYSCTL_SYSCALL_CHECK - sysctl_check_table(current->nsproxy, root_table); -#endif + register_sysctl_table(sysctl_base_table); return 0; } -core_initcall(sysctl_init); - static struct ctl_table *is_branch_in(struct ctl_table *branch, struct ctl_table *table) { diff --git a/net/sysctl_net.c b/net/sysctl_net.c index e75813904f2..a6bbee2bc71 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -90,7 +90,7 @@ static struct pernet_operations sysctl_pernet_ops = { .exit = sysctl_net_exit, }; -static __init int sysctl_init(void) +static __init int net_sysctl_init(void) { int ret; ret = register_pernet_subsys(&sysctl_pernet_ops); @@ -102,7 +102,7 @@ static __init int sysctl_init(void) out: return ret; } -subsys_initcall(sysctl_init); +subsys_initcall(net_sysctl_init); struct ctl_table_header *register_net_sysctl_table(struct net *net, const struct ctl_path *path, struct ctl_table *table) -- cgit v1.2.3-70-g09d2 From 1f87f0b52b1d6581168cb80f86746bc4df918d01 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 6 Jan 2012 04:07:15 -0800 Subject: sysctl: Move the implementation into fs/proc/proc_sysctl.c Move the core sysctl code from kernel/sysctl.c and kernel/sysctl_check.c into fs/proc/proc_sysctl.c. Currently sysctl maintenance is hampered by the sysctl implementation being split across 3 files with artificial layering between them. Consolidate the entire sysctl implementation into 1 file so that it is easier to see what is going on and hopefully allowing for simpler maintenance. For functions that are now only used in fs/proc/proc_sysctl.c remove their declarations from sysctl.h and make them static in fs/proc/proc_sysctl.c Signed-off-by: Eric W. Biederman --- fs/proc/internal.h | 3 + fs/proc/proc_sysctl.c | 622 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/sysctl.h | 16 -- kernel/Makefile | 1 - kernel/sysctl.c | 464 ------------------------------------ kernel/sysctl_check.c | 160 ------------- 6 files changed, 625 insertions(+), 641 deletions(-) delete mode 100644 kernel/sysctl_check.c (limited to 'include/linux/sysctl.h') diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 292577531ad..3b5ecd960d6 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -10,12 +10,15 @@ */ #include +struct ctl_table_header; extern struct proc_dir_entry proc_root; #ifdef CONFIG_PROC_SYSCTL extern int proc_sys_init(void); +extern void sysctl_head_put(struct ctl_table_header *head); #else static inline void proc_sys_init(void) { } +static inline void sysctl_head_put(struct ctl_table_header *head) { } #endif #ifdef CONFIG_NET extern int proc_net_init(void); diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 9d29d28af57..06e6f10ee8e 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "internal.h" static const struct dentry_operations proc_sys_dentry_operations; @@ -24,6 +25,209 @@ void proc_sys_poll_notify(struct ctl_table_poll *poll) wake_up_interruptible(&poll->wait); } +static struct ctl_table root_table[1]; +static struct ctl_table_root sysctl_table_root; +static struct ctl_table_header root_table_header = { + {{.count = 1, + .ctl_table = root_table, + .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, + .root = &sysctl_table_root, + .set = &sysctl_table_root.default_set, +}; +static struct ctl_table_root sysctl_table_root = { + .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), + .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), +}; + +static DEFINE_SPINLOCK(sysctl_lock); + +/* called under sysctl_lock */ +static int use_table(struct ctl_table_header *p) +{ + if (unlikely(p->unregistering)) + return 0; + p->used++; + return 1; +} + +/* called under sysctl_lock */ +static void unuse_table(struct ctl_table_header *p) +{ + if (!--p->used) + if (unlikely(p->unregistering)) + complete(p->unregistering); +} + +/* called under sysctl_lock, will reacquire if has to wait */ +static void start_unregistering(struct ctl_table_header *p) +{ + /* + * if p->used is 0, nobody will ever touch that entry again; + * we'll eliminate all paths to it before dropping sysctl_lock + */ + if (unlikely(p->used)) { + struct completion wait; + init_completion(&wait); + p->unregistering = &wait; + spin_unlock(&sysctl_lock); + wait_for_completion(&wait); + spin_lock(&sysctl_lock); + } else { + /* anything non-NULL; we'll never dereference it */ + p->unregistering = ERR_PTR(-EINVAL); + } + /* + * do not remove from the list until nobody holds it; walking the + * list in do_sysctl() relies on that. + */ + list_del_init(&p->ctl_entry); +} + +static void sysctl_head_get(struct ctl_table_header *head) +{ + spin_lock(&sysctl_lock); + head->count++; + spin_unlock(&sysctl_lock); +} + +void sysctl_head_put(struct ctl_table_header *head) +{ + spin_lock(&sysctl_lock); + if (!--head->count) + kfree_rcu(head, rcu); + spin_unlock(&sysctl_lock); +} + +static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) +{ + if (!head) + BUG(); + spin_lock(&sysctl_lock); + if (!use_table(head)) + head = ERR_PTR(-ENOENT); + spin_unlock(&sysctl_lock); + return head; +} + +static void sysctl_head_finish(struct ctl_table_header *head) +{ + if (!head) + return; + spin_lock(&sysctl_lock); + unuse_table(head); + spin_unlock(&sysctl_lock); +} + +static struct ctl_table_set * +lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) +{ + struct ctl_table_set *set = &root->default_set; + if (root->lookup) + set = root->lookup(root, namespaces); + return set; +} + +static struct list_head * +lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) +{ + struct ctl_table_set *set = lookup_header_set(root, namespaces); + return &set->list; +} + +static struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, + struct ctl_table_header *prev) +{ + struct ctl_table_root *root; + struct list_head *header_list; + struct ctl_table_header *head; + struct list_head *tmp; + + spin_lock(&sysctl_lock); + if (prev) { + head = prev; + tmp = &prev->ctl_entry; + unuse_table(prev); + goto next; + } + tmp = &root_table_header.ctl_entry; + for (;;) { + head = list_entry(tmp, struct ctl_table_header, ctl_entry); + + if (!use_table(head)) + goto next; + spin_unlock(&sysctl_lock); + return head; + next: + root = head->root; + tmp = tmp->next; + header_list = lookup_header_list(root, namespaces); + if (tmp != header_list) + continue; + + do { + root = list_entry(root->root_list.next, + struct ctl_table_root, root_list); + if (root == &sysctl_table_root) + goto out; + header_list = lookup_header_list(root, namespaces); + } while (list_empty(header_list)); + tmp = header_list->next; + } +out: + spin_unlock(&sysctl_lock); + return NULL; +} + +static struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev) +{ + return __sysctl_head_next(current->nsproxy, prev); +} + +void register_sysctl_root(struct ctl_table_root *root) +{ + spin_lock(&sysctl_lock); + list_add_tail(&root->root_list, &sysctl_table_root.root_list); + spin_unlock(&sysctl_lock); +} + +/* + * sysctl_perm does NOT grant the superuser all rights automatically, because + * some sysctl variables are readonly even to root. + */ + +static int test_perm(int mode, int op) +{ + if (!current_euid()) + mode >>= 6; + else if (in_egroup_p(0)) + mode >>= 3; + if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) + return 0; + return -EACCES; +} + +static int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) +{ + int mode; + + if (root->permissions) + mode = root->permissions(root, current->nsproxy, table); + else + mode = table->mode; + + return test_perm(mode, op); +} + +static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) +{ + for (; table->procname; table++) { + table->parent = parent; + if (table->child) + sysctl_set_parent(table, table->child); + } +} + + static struct inode *proc_sys_make_inode(struct super_block *sb, struct ctl_table_header *head, struct ctl_table *table) { @@ -435,6 +639,21 @@ static int proc_sys_delete(const struct dentry *dentry) return !!PROC_I(dentry->d_inode)->sysctl->unregistering; } +static int sysctl_is_seen(struct ctl_table_header *p) +{ + struct ctl_table_set *set = p->set; + int res; + spin_lock(&sysctl_lock); + if (p->unregistering) + res = 0; + else if (!set->is_seen) + res = 1; + else + res = set->is_seen(set); + spin_unlock(&sysctl_lock); + return res; +} + static int proc_sys_compare(const struct dentry *parent, const struct inode *pinode, const struct dentry *dentry, const struct inode *inode, @@ -460,6 +679,409 @@ static const struct dentry_operations proc_sys_dentry_operations = { .d_compare = proc_sys_compare, }; +static struct ctl_table *is_branch_in(struct ctl_table *branch, + struct ctl_table *table) +{ + struct ctl_table *p; + const char *s = branch->procname; + + /* branch should have named subdirectory as its first element */ + if (!s || !branch->child) + return NULL; + + /* ... and nothing else */ + if (branch[1].procname) + return NULL; + + /* table should contain subdirectory with the same name */ + for (p = table; p->procname; p++) { + if (!p->child) + continue; + if (p->procname && strcmp(p->procname, s) == 0) + return p; + } + return NULL; +} + +/* see if attaching q to p would be an improvement */ +static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) +{ + struct ctl_table *to = p->ctl_table, *by = q->ctl_table; + struct ctl_table *next; + int is_better = 0; + int not_in_parent = !p->attached_by; + + while ((next = is_branch_in(by, to)) != NULL) { + if (by == q->attached_by) + is_better = 1; + if (to == p->attached_by) + not_in_parent = 1; + by = by->child; + to = next->child; + } + + if (is_better && not_in_parent) { + q->attached_by = by; + q->attached_to = to; + q->parent = p; + } +} + +#ifdef CONFIG_SYSCTL_SYSCALL_CHECK +static int sysctl_depth(struct ctl_table *table) +{ + struct ctl_table *tmp; + int depth; + + depth = 0; + for (tmp = table; tmp->parent; tmp = tmp->parent) + depth++; + + return depth; +} + +static struct ctl_table *sysctl_parent(struct ctl_table *table, int n) +{ + int i; + + for (i = 0; table && i < n; i++) + table = table->parent; + + return table; +} + + +static void sysctl_print_path(struct ctl_table *table) +{ + struct ctl_table *tmp; + int depth, i; + depth = sysctl_depth(table); + if (table->procname) { + for (i = depth; i >= 0; i--) { + tmp = sysctl_parent(table, i); + printk("/%s", tmp->procname?tmp->procname:""); + } + } + printk(" "); +} + +static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces, + struct ctl_table *table) +{ + struct ctl_table_header *head; + struct ctl_table *ref, *test; + int depth, cur_depth; + + depth = sysctl_depth(table); + + for (head = __sysctl_head_next(namespaces, NULL); head; + head = __sysctl_head_next(namespaces, head)) { + cur_depth = depth; + ref = head->ctl_table; +repeat: + test = sysctl_parent(table, cur_depth); + for (; ref->procname; ref++) { + int match = 0; + if (cur_depth && !ref->child) + continue; + + if (test->procname && ref->procname && + (strcmp(test->procname, ref->procname) == 0)) + match++; + + if (match) { + if (cur_depth != 0) { + cur_depth--; + ref = ref->child; + goto repeat; + } + goto out; + } + } + } + ref = NULL; +out: + sysctl_head_finish(head); + return ref; +} + +static void set_fail(const char **fail, struct ctl_table *table, const char *str) +{ + if (*fail) { + printk(KERN_ERR "sysctl table check failed: "); + sysctl_print_path(table); + printk(" %s\n", *fail); + dump_stack(); + } + *fail = str; +} + +static void sysctl_check_leaf(struct nsproxy *namespaces, + struct ctl_table *table, const char **fail) +{ + struct ctl_table *ref; + + ref = sysctl_check_lookup(namespaces, table); + if (ref && (ref != table)) + set_fail(fail, table, "Sysctl already exists"); +} + +static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) +{ + int error = 0; + for (; table->procname; table++) { + const char *fail = NULL; + + if (table->parent) { + if (!table->parent->procname) + set_fail(&fail, table, "Parent without procname"); + } + if (table->child) { + if (table->data) + set_fail(&fail, table, "Directory with data?"); + if (table->maxlen) + set_fail(&fail, table, "Directory with maxlen?"); + if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode) + set_fail(&fail, table, "Writable sysctl directory"); + if (table->proc_handler) + set_fail(&fail, table, "Directory with proc_handler"); + if (table->extra1) + set_fail(&fail, table, "Directory with extra1"); + if (table->extra2) + set_fail(&fail, table, "Directory with extra2"); + } else { + if ((table->proc_handler == proc_dostring) || + (table->proc_handler == proc_dointvec) || + (table->proc_handler == proc_dointvec_minmax) || + (table->proc_handler == proc_dointvec_jiffies) || + (table->proc_handler == proc_dointvec_userhz_jiffies) || + (table->proc_handler == proc_dointvec_ms_jiffies) || + (table->proc_handler == proc_doulongvec_minmax) || + (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { + if (!table->data) + set_fail(&fail, table, "No data"); + if (!table->maxlen) + set_fail(&fail, table, "No maxlen"); + } +#ifdef CONFIG_PROC_SYSCTL + if (!table->proc_handler) + set_fail(&fail, table, "No proc_handler"); +#endif + sysctl_check_leaf(namespaces, table, &fail); + } + if (table->mode > 0777) + set_fail(&fail, table, "bogus .mode"); + if (fail) { + set_fail(&fail, table, NULL); + error = -EINVAL; + } + if (table->child) + error |= sysctl_check_table(namespaces, table->child); + } + return error; +} +#endif /* CONFIG_SYSCTL_SYSCALL_CHECK */ + +/** + * __register_sysctl_paths - register a sysctl hierarchy + * @root: List of sysctl headers to register on + * @namespaces: Data to compute which lists of sysctl entries are visible + * @path: The path to the directory the sysctl table is in. + * @table: the top-level table structure + * + * Register a sysctl table hierarchy. @table should be a filled in ctl_table + * array. A completely 0 filled entry terminates the table. + * + * The members of the &struct ctl_table structure are used as follows: + * + * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not + * enter a sysctl file + * + * data - a pointer to data for use by proc_handler + * + * maxlen - the maximum size in bytes of the data + * + * mode - the file permissions for the /proc/sys file, and for sysctl(2) + * + * child - a pointer to the child sysctl table if this entry is a directory, or + * %NULL. + * + * proc_handler - the text handler routine (described below) + * + * de - for internal use by the sysctl routines + * + * extra1, extra2 - extra pointers usable by the proc handler routines + * + * Leaf nodes in the sysctl tree will be represented by a single file + * under /proc; non-leaf nodes will be represented by directories. + * + * sysctl(2) can automatically manage read and write requests through + * the sysctl table. The data and maxlen fields of the ctl_table + * struct enable minimal validation of the values being written to be + * performed, and the mode field allows minimal authentication. + * + * There must be a proc_handler routine for any terminal nodes + * mirrored under /proc/sys (non-terminals are handled by a built-in + * directory handler). Several default handlers are available to + * cover common cases - + * + * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), + * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), + * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax() + * + * It is the handler's job to read the input buffer from user memory + * and process it. The handler should return 0 on success. + * + * This routine returns %NULL on a failure to register, and a pointer + * to the table header on success. + */ +struct ctl_table_header *__register_sysctl_paths( + struct ctl_table_root *root, + struct nsproxy *namespaces, + const struct ctl_path *path, struct ctl_table *table) +{ + struct ctl_table_header *header; + struct ctl_table *new, **prevp; + unsigned int n, npath; + struct ctl_table_set *set; + + /* Count the path components */ + for (npath = 0; path[npath].procname; ++npath) + ; + + /* + * For each path component, allocate a 2-element ctl_table array. + * The first array element will be filled with the sysctl entry + * for this, the second will be the sentinel (procname == 0). + * + * We allocate everything in one go so that we don't have to + * worry about freeing additional memory in unregister_sysctl_table. + */ + header = kzalloc(sizeof(struct ctl_table_header) + + (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL); + if (!header) + return NULL; + + new = (struct ctl_table *) (header + 1); + + /* Now connect the dots */ + prevp = &header->ctl_table; + for (n = 0; n < npath; ++n, ++path) { + /* Copy the procname */ + new->procname = path->procname; + new->mode = 0555; + + *prevp = new; + prevp = &new->child; + + new += 2; + } + *prevp = table; + header->ctl_table_arg = table; + + INIT_LIST_HEAD(&header->ctl_entry); + header->used = 0; + header->unregistering = NULL; + header->root = root; + sysctl_set_parent(NULL, header->ctl_table); + header->count = 1; +#ifdef CONFIG_SYSCTL_SYSCALL_CHECK + if (sysctl_check_table(namespaces, header->ctl_table)) { + kfree(header); + return NULL; + } +#endif + spin_lock(&sysctl_lock); + header->set = lookup_header_set(root, namespaces); + header->attached_by = header->ctl_table; + header->attached_to = root_table; + header->parent = &root_table_header; + for (set = header->set; set; set = set->parent) { + struct ctl_table_header *p; + list_for_each_entry(p, &set->list, ctl_entry) { + if (p->unregistering) + continue; + try_attach(p, header); + } + } + header->parent->count++; + list_add_tail(&header->ctl_entry, &header->set->list); + spin_unlock(&sysctl_lock); + + return header; +} + +/** + * register_sysctl_table_path - register a sysctl table hierarchy + * @path: The path to the directory the sysctl table is in. + * @table: the top-level table structure + * + * Register a sysctl table hierarchy. @table should be a filled in ctl_table + * array. A completely 0 filled entry terminates the table. + * + * See __register_sysctl_paths for more details. + */ +struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, + struct ctl_table *table) +{ + return __register_sysctl_paths(&sysctl_table_root, current->nsproxy, + path, table); +} +EXPORT_SYMBOL(register_sysctl_paths); + +/** + * register_sysctl_table - register a sysctl table hierarchy + * @table: the top-level table structure + * + * Register a sysctl table hierarchy. @table should be a filled in ctl_table + * array. A completely 0 filled entry terminates the table. + * + * See register_sysctl_paths for more details. + */ +struct ctl_table_header *register_sysctl_table(struct ctl_table *table) +{ + static const struct ctl_path null_path[] = { {} }; + + return register_sysctl_paths(null_path, table); +} +EXPORT_SYMBOL(register_sysctl_table); + +/** + * unregister_sysctl_table - unregister a sysctl table hierarchy + * @header: the header returned from register_sysctl_table + * + * Unregisters the sysctl table and all children. proc entries may not + * actually be removed until they are no longer used by anyone. + */ +void unregister_sysctl_table(struct ctl_table_header * header) +{ + might_sleep(); + + if (header == NULL) + return; + + spin_lock(&sysctl_lock); + start_unregistering(header); + if (!--header->parent->count) { + WARN_ON(1); + kfree_rcu(header->parent, rcu); + } + if (!--header->count) + kfree_rcu(header, rcu); + spin_unlock(&sysctl_lock); +} +EXPORT_SYMBOL(unregister_sysctl_table); + +void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_set *parent, + int (*is_seen)(struct ctl_table_set *)) +{ + INIT_LIST_HEAD(&p->list); + p->parent = parent ? parent : &sysctl_table_root.default_set; + p->is_seen = is_seen; +} + + int __init proc_sys_init(void) { struct proc_dir_entry *proc_sys_root; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 5e3532e9599..08cabbfddac 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1073,17 +1073,6 @@ extern void setup_sysctl_set(struct ctl_table_set *p, struct ctl_table_set *parent, int (*is_seen)(struct ctl_table_set *)); -extern void sysctl_head_get(struct ctl_table_header *); -extern void sysctl_head_put(struct ctl_table_header *); -extern int sysctl_is_seen(struct ctl_table_header *); -extern struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *); -extern struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev); -extern struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, - struct ctl_table_header *prev); -extern void sysctl_head_finish(struct ctl_table_header *prev); -extern int sysctl_perm(struct ctl_table_root *root, - struct ctl_table *table, int op); - void register_sysctl_root(struct ctl_table_root *root); struct ctl_table_header *__register_sysctl_paths( struct ctl_table_root *root, struct nsproxy *namespaces, @@ -1093,7 +1082,6 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, struct ctl_table *table); void unregister_sysctl_table(struct ctl_table_header * table); -int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table); extern int sysctl_init(void); #else /* CONFIG_SYSCTL */ @@ -1118,10 +1106,6 @@ static inline void setup_sysctl_set(struct ctl_table_set *p, { } -static inline void sysctl_head_put(struct ctl_table_header *head) -{ -} - #endif /* CONFIG_SYSCTL */ #endif /* __KERNEL__ */ diff --git a/kernel/Makefile b/kernel/Makefile index 2d9de86b7e7..cb41b9547c9 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -27,7 +27,6 @@ obj-y += power/ obj-$(CONFIG_FREEZER) += freezer.o obj-$(CONFIG_PROFILING) += profile.o -obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ad460248acc..b774909ed46 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -192,20 +192,6 @@ static int sysrq_sysctl_handler(ctl_table *table, int write, #endif -static struct ctl_table root_table[1]; -static struct ctl_table_root sysctl_table_root; -static struct ctl_table_header root_table_header = { - {{.count = 1, - .ctl_table = root_table, - .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, - .root = &sysctl_table_root, - .set = &sysctl_table_root.default_set, -}; -static struct ctl_table_root sysctl_table_root = { - .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), - .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), -}; - static struct ctl_table kern_table[]; static struct ctl_table vm_table[]; static struct ctl_table fs_table[]; @@ -1559,459 +1545,12 @@ static struct ctl_table dev_table[] = { { } }; -static DEFINE_SPINLOCK(sysctl_lock); - -/* called under sysctl_lock */ -static int use_table(struct ctl_table_header *p) -{ - if (unlikely(p->unregistering)) - return 0; - p->used++; - return 1; -} - -/* called under sysctl_lock */ -static void unuse_table(struct ctl_table_header *p) -{ - if (!--p->used) - if (unlikely(p->unregistering)) - complete(p->unregistering); -} - -/* called under sysctl_lock, will reacquire if has to wait */ -static void start_unregistering(struct ctl_table_header *p) -{ - /* - * if p->used is 0, nobody will ever touch that entry again; - * we'll eliminate all paths to it before dropping sysctl_lock - */ - if (unlikely(p->used)) { - struct completion wait; - init_completion(&wait); - p->unregistering = &wait; - spin_unlock(&sysctl_lock); - wait_for_completion(&wait); - spin_lock(&sysctl_lock); - } else { - /* anything non-NULL; we'll never dereference it */ - p->unregistering = ERR_PTR(-EINVAL); - } - /* - * do not remove from the list until nobody holds it; walking the - * list in do_sysctl() relies on that. - */ - list_del_init(&p->ctl_entry); -} - -void sysctl_head_get(struct ctl_table_header *head) -{ - spin_lock(&sysctl_lock); - head->count++; - spin_unlock(&sysctl_lock); -} - -void sysctl_head_put(struct ctl_table_header *head) -{ - spin_lock(&sysctl_lock); - if (!--head->count) - kfree_rcu(head, rcu); - spin_unlock(&sysctl_lock); -} - -struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) -{ - if (!head) - BUG(); - spin_lock(&sysctl_lock); - if (!use_table(head)) - head = ERR_PTR(-ENOENT); - spin_unlock(&sysctl_lock); - return head; -} - -void sysctl_head_finish(struct ctl_table_header *head) -{ - if (!head) - return; - spin_lock(&sysctl_lock); - unuse_table(head); - spin_unlock(&sysctl_lock); -} - -static struct ctl_table_set * -lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) -{ - struct ctl_table_set *set = &root->default_set; - if (root->lookup) - set = root->lookup(root, namespaces); - return set; -} - -static struct list_head * -lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) -{ - struct ctl_table_set *set = lookup_header_set(root, namespaces); - return &set->list; -} - -struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, - struct ctl_table_header *prev) -{ - struct ctl_table_root *root; - struct list_head *header_list; - struct ctl_table_header *head; - struct list_head *tmp; - - spin_lock(&sysctl_lock); - if (prev) { - head = prev; - tmp = &prev->ctl_entry; - unuse_table(prev); - goto next; - } - tmp = &root_table_header.ctl_entry; - for (;;) { - head = list_entry(tmp, struct ctl_table_header, ctl_entry); - - if (!use_table(head)) - goto next; - spin_unlock(&sysctl_lock); - return head; - next: - root = head->root; - tmp = tmp->next; - header_list = lookup_header_list(root, namespaces); - if (tmp != header_list) - continue; - - do { - root = list_entry(root->root_list.next, - struct ctl_table_root, root_list); - if (root == &sysctl_table_root) - goto out; - header_list = lookup_header_list(root, namespaces); - } while (list_empty(header_list)); - tmp = header_list->next; - } -out: - spin_unlock(&sysctl_lock); - return NULL; -} - -struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev) -{ - return __sysctl_head_next(current->nsproxy, prev); -} - -void register_sysctl_root(struct ctl_table_root *root) -{ - spin_lock(&sysctl_lock); - list_add_tail(&root->root_list, &sysctl_table_root.root_list); - spin_unlock(&sysctl_lock); -} - -/* - * sysctl_perm does NOT grant the superuser all rights automatically, because - * some sysctl variables are readonly even to root. - */ - -static int test_perm(int mode, int op) -{ - if (!current_euid()) - mode >>= 6; - else if (in_egroup_p(0)) - mode >>= 3; - if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) - return 0; - return -EACCES; -} - -int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) -{ - int mode; - - if (root->permissions) - mode = root->permissions(root, current->nsproxy, table); - else - mode = table->mode; - - return test_perm(mode, op); -} - -static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) -{ - for (; table->procname; table++) { - table->parent = parent; - if (table->child) - sysctl_set_parent(table, table->child); - } -} - int __init sysctl_init(void) { register_sysctl_table(sysctl_base_table); return 0; } -static struct ctl_table *is_branch_in(struct ctl_table *branch, - struct ctl_table *table) -{ - struct ctl_table *p; - const char *s = branch->procname; - - /* branch should have named subdirectory as its first element */ - if (!s || !branch->child) - return NULL; - - /* ... and nothing else */ - if (branch[1].procname) - return NULL; - - /* table should contain subdirectory with the same name */ - for (p = table; p->procname; p++) { - if (!p->child) - continue; - if (p->procname && strcmp(p->procname, s) == 0) - return p; - } - return NULL; -} - -/* see if attaching q to p would be an improvement */ -static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) -{ - struct ctl_table *to = p->ctl_table, *by = q->ctl_table; - struct ctl_table *next; - int is_better = 0; - int not_in_parent = !p->attached_by; - - while ((next = is_branch_in(by, to)) != NULL) { - if (by == q->attached_by) - is_better = 1; - if (to == p->attached_by) - not_in_parent = 1; - by = by->child; - to = next->child; - } - - if (is_better && not_in_parent) { - q->attached_by = by; - q->attached_to = to; - q->parent = p; - } -} - -/** - * __register_sysctl_paths - register a sysctl hierarchy - * @root: List of sysctl headers to register on - * @namespaces: Data to compute which lists of sysctl entries are visible - * @path: The path to the directory the sysctl table is in. - * @table: the top-level table structure - * - * Register a sysctl table hierarchy. @table should be a filled in ctl_table - * array. A completely 0 filled entry terminates the table. - * - * The members of the &struct ctl_table structure are used as follows: - * - * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not - * enter a sysctl file - * - * data - a pointer to data for use by proc_handler - * - * maxlen - the maximum size in bytes of the data - * - * mode - the file permissions for the /proc/sys file, and for sysctl(2) - * - * child - a pointer to the child sysctl table if this entry is a directory, or - * %NULL. - * - * proc_handler - the text handler routine (described below) - * - * de - for internal use by the sysctl routines - * - * extra1, extra2 - extra pointers usable by the proc handler routines - * - * Leaf nodes in the sysctl tree will be represented by a single file - * under /proc; non-leaf nodes will be represented by directories. - * - * sysctl(2) can automatically manage read and write requests through - * the sysctl table. The data and maxlen fields of the ctl_table - * struct enable minimal validation of the values being written to be - * performed, and the mode field allows minimal authentication. - * - * There must be a proc_handler routine for any terminal nodes - * mirrored under /proc/sys (non-terminals are handled by a built-in - * directory handler). Several default handlers are available to - * cover common cases - - * - * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), - * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), - * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax() - * - * It is the handler's job to read the input buffer from user memory - * and process it. The handler should return 0 on success. - * - * This routine returns %NULL on a failure to register, and a pointer - * to the table header on success. - */ -struct ctl_table_header *__register_sysctl_paths( - struct ctl_table_root *root, - struct nsproxy *namespaces, - const struct ctl_path *path, struct ctl_table *table) -{ - struct ctl_table_header *header; - struct ctl_table *new, **prevp; - unsigned int n, npath; - struct ctl_table_set *set; - - /* Count the path components */ - for (npath = 0; path[npath].procname; ++npath) - ; - - /* - * For each path component, allocate a 2-element ctl_table array. - * The first array element will be filled with the sysctl entry - * for this, the second will be the sentinel (procname == 0). - * - * We allocate everything in one go so that we don't have to - * worry about freeing additional memory in unregister_sysctl_table. - */ - header = kzalloc(sizeof(struct ctl_table_header) + - (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL); - if (!header) - return NULL; - - new = (struct ctl_table *) (header + 1); - - /* Now connect the dots */ - prevp = &header->ctl_table; - for (n = 0; n < npath; ++n, ++path) { - /* Copy the procname */ - new->procname = path->procname; - new->mode = 0555; - - *prevp = new; - prevp = &new->child; - - new += 2; - } - *prevp = table; - header->ctl_table_arg = table; - - INIT_LIST_HEAD(&header->ctl_entry); - header->used = 0; - header->unregistering = NULL; - header->root = root; - sysctl_set_parent(NULL, header->ctl_table); - header->count = 1; -#ifdef CONFIG_SYSCTL_SYSCALL_CHECK - if (sysctl_check_table(namespaces, header->ctl_table)) { - kfree(header); - return NULL; - } -#endif - spin_lock(&sysctl_lock); - header->set = lookup_header_set(root, namespaces); - header->attached_by = header->ctl_table; - header->attached_to = root_table; - header->parent = &root_table_header; - for (set = header->set; set; set = set->parent) { - struct ctl_table_header *p; - list_for_each_entry(p, &set->list, ctl_entry) { - if (p->unregistering) - continue; - try_attach(p, header); - } - } - header->parent->count++; - list_add_tail(&header->ctl_entry, &header->set->list); - spin_unlock(&sysctl_lock); - - return header; -} - -/** - * register_sysctl_table_path - register a sysctl table hierarchy - * @path: The path to the directory the sysctl table is in. - * @table: the top-level table structure - * - * Register a sysctl table hierarchy. @table should be a filled in ctl_table - * array. A completely 0 filled entry terminates the table. - * - * See __register_sysctl_paths for more details. - */ -struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, - struct ctl_table *table) -{ - return __register_sysctl_paths(&sysctl_table_root, current->nsproxy, - path, table); -} - -/** - * register_sysctl_table - register a sysctl table hierarchy - * @table: the top-level table structure - * - * Register a sysctl table hierarchy. @table should be a filled in ctl_table - * array. A completely 0 filled entry terminates the table. - * - * See register_sysctl_paths for more details. - */ -struct ctl_table_header *register_sysctl_table(struct ctl_table *table) -{ - static const struct ctl_path null_path[] = { {} }; - - return register_sysctl_paths(null_path, table); -} - -/** - * unregister_sysctl_table - unregister a sysctl table hierarchy - * @header: the header returned from register_sysctl_table - * - * Unregisters the sysctl table and all children. proc entries may not - * actually be removed until they are no longer used by anyone. - */ -void unregister_sysctl_table(struct ctl_table_header * header) -{ - might_sleep(); - - if (header == NULL) - return; - - spin_lock(&sysctl_lock); - start_unregistering(header); - if (!--header->parent->count) { - WARN_ON(1); - kfree_rcu(header->parent, rcu); - } - if (!--header->count) - kfree_rcu(header, rcu); - spin_unlock(&sysctl_lock); -} - -int sysctl_is_seen(struct ctl_table_header *p) -{ - struct ctl_table_set *set = p->set; - int res; - spin_lock(&sysctl_lock); - if (p->unregistering) - res = 0; - else if (!set->is_seen) - res = 1; - else - res = set->is_seen(set); - spin_unlock(&sysctl_lock); - return res; -} - -void setup_sysctl_set(struct ctl_table_set *p, - struct ctl_table_set *parent, - int (*is_seen)(struct ctl_table_set *)) -{ - INIT_LIST_HEAD(&p->list); - p->parent = parent ? parent : &sysctl_table_root.default_set; - p->is_seen = is_seen; -} - #endif /* CONFIG_SYSCTL */ /* @@ -2977,6 +2516,3 @@ EXPORT_SYMBOL(proc_dointvec_ms_jiffies); EXPORT_SYMBOL(proc_dostring); EXPORT_SYMBOL(proc_doulongvec_minmax); EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax); -EXPORT_SYMBOL(register_sysctl_table); -EXPORT_SYMBOL(register_sysctl_paths); -EXPORT_SYMBOL(unregister_sysctl_table); diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c deleted file mode 100644 index 362da653813..00000000000 --- a/kernel/sysctl_check.c +++ /dev/null @@ -1,160 +0,0 @@ -#include -#include -#include "../fs/xfs/xfs_sysctl.h" -#include -#include -#include - - -static int sysctl_depth(struct ctl_table *table) -{ - struct ctl_table *tmp; - int depth; - - depth = 0; - for (tmp = table; tmp->parent; tmp = tmp->parent) - depth++; - - return depth; -} - -static struct ctl_table *sysctl_parent(struct ctl_table *table, int n) -{ - int i; - - for (i = 0; table && i < n; i++) - table = table->parent; - - return table; -} - - -static void sysctl_print_path(struct ctl_table *table) -{ - struct ctl_table *tmp; - int depth, i; - depth = sysctl_depth(table); - if (table->procname) { - for (i = depth; i >= 0; i--) { - tmp = sysctl_parent(table, i); - printk("/%s", tmp->procname?tmp->procname:""); - } - } - printk(" "); -} - -static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces, - struct ctl_table *table) -{ - struct ctl_table_header *head; - struct ctl_table *ref, *test; - int depth, cur_depth; - - depth = sysctl_depth(table); - - for (head = __sysctl_head_next(namespaces, NULL); head; - head = __sysctl_head_next(namespaces, head)) { - cur_depth = depth; - ref = head->ctl_table; -repeat: - test = sysctl_parent(table, cur_depth); - for (; ref->procname; ref++) { - int match = 0; - if (cur_depth && !ref->child) - continue; - - if (test->procname && ref->procname && - (strcmp(test->procname, ref->procname) == 0)) - match++; - - if (match) { - if (cur_depth != 0) { - cur_depth--; - ref = ref->child; - goto repeat; - } - goto out; - } - } - } - ref = NULL; -out: - sysctl_head_finish(head); - return ref; -} - -static void set_fail(const char **fail, struct ctl_table *table, const char *str) -{ - if (*fail) { - printk(KERN_ERR "sysctl table check failed: "); - sysctl_print_path(table); - printk(" %s\n", *fail); - dump_stack(); - } - *fail = str; -} - -static void sysctl_check_leaf(struct nsproxy *namespaces, - struct ctl_table *table, const char **fail) -{ - struct ctl_table *ref; - - ref = sysctl_check_lookup(namespaces, table); - if (ref && (ref != table)) - set_fail(fail, table, "Sysctl already exists"); -} - -int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) -{ - int error = 0; - for (; table->procname; table++) { - const char *fail = NULL; - - if (table->parent) { - if (!table->parent->procname) - set_fail(&fail, table, "Parent without procname"); - } - if (table->child) { - if (table->data) - set_fail(&fail, table, "Directory with data?"); - if (table->maxlen) - set_fail(&fail, table, "Directory with maxlen?"); - if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode) - set_fail(&fail, table, "Writable sysctl directory"); - if (table->proc_handler) - set_fail(&fail, table, "Directory with proc_handler"); - if (table->extra1) - set_fail(&fail, table, "Directory with extra1"); - if (table->extra2) - set_fail(&fail, table, "Directory with extra2"); - } else { - if ((table->proc_handler == proc_dostring) || - (table->proc_handler == proc_dointvec) || - (table->proc_handler == proc_dointvec_minmax) || - (table->proc_handler == proc_dointvec_jiffies) || - (table->proc_handler == proc_dointvec_userhz_jiffies) || - (table->proc_handler == proc_dointvec_ms_jiffies) || - (table->proc_handler == proc_doulongvec_minmax) || - (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { - if (!table->data) - set_fail(&fail, table, "No data"); - if (!table->maxlen) - set_fail(&fail, table, "No maxlen"); - } -#ifdef CONFIG_PROC_SYSCTL - if (!table->proc_handler) - set_fail(&fail, table, "No proc_handler"); -#endif - sysctl_check_leaf(namespaces, table, &fail); - } - if (table->mode > 0777) - set_fail(&fail, table, "bogus .mode"); - if (fail) { - set_fail(&fail, table, NULL); - error = -EINVAL; - } - if (table->child) - error |= sysctl_check_table(namespaces, table->child); - } - return error; -} -- cgit v1.2.3-70-g09d2 From 97324cd804b7b9fb6044e114329335db79810425 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 9 Jan 2012 22:19:13 -0800 Subject: sysctl: Implement retire_sysctl_set This adds a small helper retire_sysctl_set to remove the intimate knowledge about the how a sysctl_set is implemented from net/sysct_net.c Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 4 ++++ include/linux/sysctl.h | 1 + net/sysctl_net.c | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux/sysctl.h') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index f6aa75111b4..9d8223cd365 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1080,6 +1080,10 @@ void setup_sysctl_set(struct ctl_table_set *p, p->is_seen = is_seen; } +void retire_sysctl_set(struct ctl_table_set *set) +{ + WARN_ON(!list_empty(&set->list)); +} int __init proc_sys_init(void) { diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 08cabbfddac..475ff0e35e6 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1072,6 +1072,7 @@ void proc_sys_poll_notify(struct ctl_table_poll *poll); extern void setup_sysctl_set(struct ctl_table_set *p, struct ctl_table_set *parent, int (*is_seen)(struct ctl_table_set *)); +extern void retire_sysctl_set(struct ctl_table_set *set); void register_sysctl_root(struct ctl_table_root *root); struct ctl_table_header *__register_sysctl_paths( diff --git a/net/sysctl_net.c b/net/sysctl_net.c index a6bbee2bc71..ffd67a6515a 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -82,7 +82,7 @@ static int __net_init sysctl_net_init(struct net *net) static void __net_exit sysctl_net_exit(struct net *net) { - WARN_ON(!list_empty(&net->sysctls.list)); + retire_sysctl_set(&net->sysctls); } static struct pernet_operations sysctl_pernet_ops = { -- cgit v1.2.3-70-g09d2 From bd295b56cfae85f2dd6c2b03951480c91e6d08f3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 22 Jan 2012 21:10:21 -0800 Subject: sysctl: Remove the unnecessary sysctl_set parent concept. In sysctl_net register the two networking roots in the proper order. In register_sysctl walk the sysctl sets in the reverse order of the sysctl roots. Remove parent from ctl_table_set and setup_sysctl_set as it is no longer needed. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 11 ++++++++--- include/linux/sysctl.h | 3 --- net/sysctl_net.c | 5 ++--- 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include/linux/sysctl.h') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 9d8223cd365..86d32a318e2 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -995,13 +995,20 @@ struct ctl_table_header *__register_sysctl_paths( header->attached_by = header->ctl_table; header->attached_to = root_table; header->parent = &root_table_header; - for (set = header->set; set; set = set->parent) { + set = header->set; + root = header->root; + for (;;) { struct ctl_table_header *p; list_for_each_entry(p, &set->list, ctl_entry) { if (p->unregistering) continue; try_attach(p, header); } + if (root == &sysctl_table_root) + break; + root = list_entry(root->root_list.prev, + struct ctl_table_root, root_list); + set = lookup_header_set(root, namespaces); } header->parent->count++; list_add_tail(&header->ctl_entry, &header->set->list); @@ -1072,11 +1079,9 @@ void unregister_sysctl_table(struct ctl_table_header * header) EXPORT_SYMBOL(unregister_sysctl_table); void setup_sysctl_set(struct ctl_table_set *p, - struct ctl_table_set *parent, int (*is_seen)(struct ctl_table_set *)) { INIT_LIST_HEAD(&p->list); - p->parent = parent ? parent : &sysctl_table_root.default_set; p->is_seen = is_seen; } diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 475ff0e35e6..43c36acdb62 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1047,7 +1047,6 @@ struct ctl_table_header struct ctl_table_set { struct list_head list; - struct ctl_table_set *parent; int (*is_seen)(struct ctl_table_set *); }; @@ -1070,7 +1069,6 @@ struct ctl_path { void proc_sys_poll_notify(struct ctl_table_poll *poll); extern void setup_sysctl_set(struct ctl_table_set *p, - struct ctl_table_set *parent, int (*is_seen)(struct ctl_table_set *)); extern void retire_sysctl_set(struct ctl_table_set *set); @@ -1102,7 +1100,6 @@ static inline void unregister_sysctl_table(struct ctl_table_header * table) } static inline void setup_sysctl_set(struct ctl_table_set *p, - struct ctl_table_set *parent, int (*is_seen)(struct ctl_table_set *)) { } diff --git a/net/sysctl_net.c b/net/sysctl_net.c index ffd67a6515a..07c6b879c8b 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -75,7 +75,6 @@ static struct ctl_table_root net_sysctl_ro_root = { static int __net_init sysctl_net_init(struct net *net) { setup_sysctl_set(&net->sysctls, - &net_sysctl_ro_root.default_set, is_seen); return 0; } @@ -96,9 +95,9 @@ static __init int net_sysctl_init(void) ret = register_pernet_subsys(&sysctl_pernet_ops); if (ret) goto out; - register_sysctl_root(&net_sysctl_root); - setup_sysctl_set(&net_sysctl_ro_root.default_set, NULL, NULL); + setup_sysctl_set(&net_sysctl_ro_root.default_set, NULL); register_sysctl_root(&net_sysctl_ro_root); + register_sysctl_root(&net_sysctl_root); out: return ret; } -- cgit v1.2.3-70-g09d2 From 6e9d5164153ad6539edd31e7afb02a3e79124cad Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 21 Jan 2012 10:26:26 -0800 Subject: sysctl: Add support for register sysctl tables with a normal cstring path. Make __register_sysctl_table the core sysctl registration operation and make it take a char * string as path. Now that binary paths have been banished into the real of backwards compatibility in kernel/binary_sysctl.c where they can be safely ignored there is no longer a need to use struct ctl_path to represent path names when registering ctl_tables. Start the transition to using normal char * strings to represent pathnames when registering sysctl tables. Normal strings are easier to deal with both in the internal sysctl implementation and for programmers registering sysctl tables. __register_sysctl_paths is turned into a backwards compatibility wrapper that converts a ctl_path array into a normal char * string. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 94 ++++++++++++++++++++++++++++++++++++++++++++------ include/linux/sysctl.h | 3 ++ 2 files changed, 87 insertions(+), 10 deletions(-) (limited to 'include/linux/sysctl.h') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index bcf60fb8dce..5704ff0e889 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -882,7 +882,7 @@ static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *tabl #endif /* CONFIG_SYSCTL_SYSCALL_CHECK */ /** - * __register_sysctl_paths - register a sysctl hierarchy + * __register_sysctl_table - register a sysctl table * @root: List of sysctl headers to register on * @namespaces: Data to compute which lists of sysctl entries are visible * @path: The path to the directory the sysctl table is in. @@ -934,21 +934,34 @@ static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *tabl * This routine returns %NULL on a failure to register, and a pointer * to the table header on success. */ -struct ctl_table_header *__register_sysctl_paths( +struct ctl_table_header *__register_sysctl_table( struct ctl_table_root *root, struct nsproxy *namespaces, - const struct ctl_path *path, struct ctl_table *table) + const char *path, struct ctl_table *table) { struct ctl_table_header *header; struct ctl_table *new, **prevp; - unsigned int n, npath; + const char *name, *nextname; + unsigned int npath = 0; struct ctl_table_set *set; size_t path_bytes = 0; char *new_name; /* Count the path components */ - for (npath = 0; path[npath].procname; ++npath) - path_bytes += strlen(path[npath].procname) + 1; + for (name = path; name; name = nextname) { + int namelen; + nextname = strchr(name, '/'); + if (nextname) { + namelen = nextname - name; + nextname++; + } else { + namelen = strlen(name); + } + if (namelen == 0) + continue; + path_bytes += namelen + 1; + npath++; + } /* * For each path component, allocate a 2-element ctl_table array. @@ -968,9 +981,20 @@ struct ctl_table_header *__register_sysctl_paths( /* Now connect the dots */ prevp = &header->ctl_table; - for (n = 0; n < npath; ++n, ++path) { - /* Copy the procname */ - strcpy(new_name, path->procname); + for (name = path; name; name = nextname) { + int namelen; + nextname = strchr(name, '/'); + if (nextname) { + namelen = nextname - name; + nextname++; + } else { + namelen = strlen(name); + } + if (namelen == 0) + continue; + memcpy(new_name, name, namelen); + new_name[namelen] = '\0'; + new->procname = new_name; new->mode = 0555; @@ -978,7 +1002,7 @@ struct ctl_table_header *__register_sysctl_paths( prevp = &new->child; new += 2; - new_name += strlen(new_name) + 1; + new_name += namelen + 1; } *prevp = table; header->ctl_table_arg = table; @@ -1022,6 +1046,56 @@ struct ctl_table_header *__register_sysctl_paths( return header; } +static char *append_path(const char *path, char *pos, const char *name) +{ + int namelen; + namelen = strlen(name); + if (((pos - path) + namelen + 2) >= PATH_MAX) + return NULL; + memcpy(pos, name, namelen); + pos[namelen] = '/'; + pos[namelen + 1] = '\0'; + pos += namelen + 1; + return pos; +} + +/** + * __register_sysctl_paths - register a sysctl table hierarchy + * @root: List of sysctl headers to register on + * @namespaces: Data to compute which lists of sysctl entries are visible + * @path: The path to the directory the sysctl table is in. + * @table: the top-level table structure + * + * Register a sysctl table hierarchy. @table should be a filled in ctl_table + * array. A completely 0 filled entry terminates the table. + * + * See __register_sysctl_table for more details. + */ +struct ctl_table_header *__register_sysctl_paths( + struct ctl_table_root *root, + struct nsproxy *namespaces, + const struct ctl_path *path, struct ctl_table *table) +{ + struct ctl_table_header *header = NULL; + const struct ctl_path *component; + char *new_path, *pos; + + pos = new_path = kmalloc(PATH_MAX, GFP_KERNEL); + if (!new_path) + return NULL; + + pos[0] = '\0'; + for (component = path; component->procname; component++) { + pos = append_path(new_path, pos, component->procname); + if (!pos) + goto out; + } + header = __register_sysctl_table(root, namespaces, new_path, table); +out: + kfree(new_path); + return header; +} + /** * register_sysctl_table_path - register a sysctl table hierarchy * @path: The path to the directory the sysctl table is in. diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 43c36acdb62..a514e0f6056 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1073,6 +1073,9 @@ extern void setup_sysctl_set(struct ctl_table_set *p, extern void retire_sysctl_set(struct ctl_table_set *set); void register_sysctl_root(struct ctl_table_root *root); +struct ctl_table_header *__register_sysctl_table( + struct ctl_table_root *root, struct nsproxy *namespaces, + const char *path, struct ctl_table *table); struct ctl_table_header *__register_sysctl_paths( struct ctl_table_root *root, struct nsproxy *namespaces, const struct ctl_path *path, struct ctl_table *table); -- cgit v1.2.3-70-g09d2 From f728019bb72e655680c02ad1829323054a8e875f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 22 Jan 2012 18:22:05 -0800 Subject: sysctl: register only tables of sysctl files Split the registration of a complex ctl_table array which may have arbitrary numbers of directories (->child != NULL) and tables of files into a series of simpler registrations that only register tables of files. Graphically: register('dir', { + file-a + file-b + subdir1 + file-c + subdir2 + file-d + file-e }) is transformed into: wrapper->subheaders[0] = register('dir', {file1-a, file1-b}) wrapper->subheaders[1] = register('dir/subdir1', {file-c}) wrapper->subheaders[2] = register('dir/subdir2', {file-d, file-e}) return wrapper This guarantees that __register_sysctl_table will only see a simple ctl_table array with all entries having (->child == NULL). Care was taken to pass the original simple ctl_table arrays to __register_sysctl_table whenever possible. This change is derived from a similar patch written by Lucrian Grijincu. Inspired-by: Lucian Adrian Grijincu Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 165 +++++++++++++++++++++++++++++++++++++++++++------ include/linux/sysctl.h | 2 +- 2 files changed, 148 insertions(+), 19 deletions(-) (limited to 'include/linux/sysctl.h') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 9b91deeeb56..6bab2ae9e39 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -882,7 +882,7 @@ static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *tabl #endif /* CONFIG_SYSCTL_SYSCALL_CHECK */ /** - * __register_sysctl_table - register a sysctl table + * __register_sysctl_table - register a leaf sysctl table * @root: List of sysctl headers to register on * @namespaces: Data to compute which lists of sysctl entries are visible * @path: The path to the directory the sysctl table is in. @@ -900,29 +900,19 @@ static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *tabl * * maxlen - the maximum size in bytes of the data * - * mode - the file permissions for the /proc/sys file, and for sysctl(2) + * mode - the file permissions for the /proc/sys file * - * child - a pointer to the child sysctl table if this entry is a directory, or - * %NULL. + * child - must be %NULL. * * proc_handler - the text handler routine (described below) * - * de - for internal use by the sysctl routines - * * extra1, extra2 - extra pointers usable by the proc handler routines * * Leaf nodes in the sysctl tree will be represented by a single file * under /proc; non-leaf nodes will be represented by directories. * - * sysctl(2) can automatically manage read and write requests through - * the sysctl table. The data and maxlen fields of the ctl_table - * struct enable minimal validation of the values being written to be - * performed, and the mode field allows minimal authentication. - * - * There must be a proc_handler routine for any terminal nodes - * mirrored under /proc/sys (non-terminals are handled by a built-in - * directory handler). Several default handlers are available to - * cover common cases - + * There must be a proc_handler routine for any terminal nodes. + * Several default handlers are available to cover common cases - * * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), @@ -1059,6 +1049,100 @@ static char *append_path(const char *path, char *pos, const char *name) return pos; } +static int count_subheaders(struct ctl_table *table) +{ + int has_files = 0; + int nr_subheaders = 0; + struct ctl_table *entry; + + /* special case: no directory and empty directory */ + if (!table || !table->procname) + return 1; + + for (entry = table; entry->procname; entry++) { + if (entry->child) + nr_subheaders += count_subheaders(entry->child); + else + has_files = 1; + } + return nr_subheaders + has_files; +} + +static int register_leaf_sysctl_tables(const char *path, char *pos, + struct ctl_table_header ***subheader, + struct ctl_table_root *root, struct nsproxy *namespaces, + struct ctl_table *table) +{ + struct ctl_table *ctl_table_arg = NULL; + struct ctl_table *entry, *files; + int nr_files = 0; + int nr_dirs = 0; + int err = -ENOMEM; + + for (entry = table; entry->procname; entry++) { + if (entry->child) + nr_dirs++; + else + nr_files++; + } + + files = table; + /* If there are mixed files and directories we need a new table */ + if (nr_dirs && nr_files) { + struct ctl_table *new; + files = kzalloc(sizeof(struct ctl_table) * (nr_files + 1), + GFP_KERNEL); + if (!files) + goto out; + + ctl_table_arg = files; + for (new = files, entry = table; entry->procname; entry++) { + if (entry->child) + continue; + *new = *entry; + new++; + } + } + + /* Register everything except a directory full of subdirectories */ + if (nr_files || !nr_dirs) { + struct ctl_table_header *header; + header = __register_sysctl_table(root, namespaces, path, files); + if (!header) { + kfree(ctl_table_arg); + goto out; + } + + /* Remember if we need to free the file table */ + header->ctl_table_arg = ctl_table_arg; + **subheader = header; + (*subheader)++; + } + + /* Recurse into the subdirectories. */ + for (entry = table; entry->procname; entry++) { + char *child_pos; + + if (!entry->child) + continue; + + err = -ENAMETOOLONG; + child_pos = append_path(path, pos, entry->procname); + if (!child_pos) + goto out; + + err = register_leaf_sysctl_tables(path, child_pos, subheader, + root, namespaces, entry->child); + pos[0] = '\0'; + if (err) + goto out; + } + err = 0; +out: + /* On failure our caller will unregister all registered subheaders */ + return err; +} + /** * __register_sysctl_paths - register a sysctl table hierarchy * @root: List of sysctl headers to register on @@ -1077,7 +1161,8 @@ struct ctl_table_header *__register_sysctl_paths( const struct ctl_path *path, struct ctl_table *table) { struct ctl_table *ctl_table_arg = table; - struct ctl_table_header *header = NULL; + int nr_subheaders = count_subheaders(table); + struct ctl_table_header *header = NULL, **subheaders, **subheader; const struct ctl_path *component; char *new_path, *pos; @@ -1097,12 +1182,39 @@ struct ctl_table_header *__register_sysctl_paths( goto out; table = table->child; } - header = __register_sysctl_table(root, namespaces, new_path, table); - if (header) + if (nr_subheaders == 1) { + header = __register_sysctl_table(root, namespaces, new_path, table); + if (header) + header->ctl_table_arg = ctl_table_arg; + } else { + header = kzalloc(sizeof(*header) + + sizeof(*subheaders)*nr_subheaders, GFP_KERNEL); + if (!header) + goto out; + + subheaders = (struct ctl_table_header **) (header + 1); + subheader = subheaders; header->ctl_table_arg = ctl_table_arg; + + if (register_leaf_sysctl_tables(new_path, pos, &subheader, + root, namespaces, table)) + goto err_register_leaves; + } + out: kfree(new_path); return header; + +err_register_leaves: + while (subheader > subheaders) { + struct ctl_table_header *subh = *(--subheader); + struct ctl_table *table = subh->ctl_table_arg; + unregister_sysctl_table(subh); + kfree(table); + } + kfree(header); + header = NULL; + goto out; } /** @@ -1149,11 +1261,28 @@ EXPORT_SYMBOL(register_sysctl_table); */ void unregister_sysctl_table(struct ctl_table_header * header) { + int nr_subheaders; might_sleep(); if (header == NULL) return; + nr_subheaders = count_subheaders(header->ctl_table_arg); + if (unlikely(nr_subheaders > 1)) { + struct ctl_table_header **subheaders; + int i; + + subheaders = (struct ctl_table_header **)(header + 1); + for (i = nr_subheaders -1; i >= 0; i--) { + struct ctl_table_header *subh = subheaders[i]; + struct ctl_table *table = subh->ctl_table_arg; + unregister_sysctl_table(subh); + kfree(table); + } + kfree(header); + return; + } + spin_lock(&sysctl_lock); start_unregistering(header); if (!--header->parent->count) { diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index a514e0f6056..25c7dfe1a7e 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1015,7 +1015,7 @@ struct ctl_table void *data; int maxlen; umode_t mode; - struct ctl_table *child; + struct ctl_table *child; /* Deprecated */ struct ctl_table *parent; /* Automatically set */ proc_handler *proc_handler; /* Callback for text formatting */ struct ctl_table_poll *poll; -- cgit v1.2.3-70-g09d2 From 8d6ecfcc014332fd2fe933f64194160f0e3a6696 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 6 Jan 2012 11:55:30 -0800 Subject: sysctl: Remove the now unused ctl_table parent field. While useful at one time for selinux and the sysctl sanity checks those users no longer use the parent field and we can safely remove it. Inspired-by: Lucian Adrian Grijincu Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 12 +----------- include/linux/sysctl.h | 1 - 2 files changed, 1 insertion(+), 12 deletions(-) (limited to 'include/linux/sysctl.h') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index a492ff60e07..e573f9b4f22 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -218,16 +218,6 @@ static int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int return test_perm(mode, op); } -static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) -{ - for (; table->procname; table++) { - table->parent = parent; - if (table->child) - sysctl_set_parent(table, table->child); - } -} - - static struct inode *proc_sys_make_inode(struct super_block *sb, struct ctl_table_header *head, struct ctl_table *table) { @@ -947,10 +937,10 @@ struct ctl_table_header *__register_sysctl_table( header->used = 0; header->unregistering = NULL; header->root = root; - sysctl_set_parent(NULL, header->ctl_table); header->count = 1; if (sysctl_check_table(path, table)) goto fail; + spin_lock(&sysctl_lock); header->set = lookup_header_set(root, namespaces); header->attached_by = header->ctl_table; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 25c7dfe1a7e..094bc5ccf1e 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1016,7 +1016,6 @@ struct ctl_table int maxlen; umode_t mode; struct ctl_table *child; /* Deprecated */ - struct ctl_table *parent; /* Automatically set */ proc_handler *proc_handler; /* Callback for text formatting */ struct ctl_table_poll *poll; void *extra1; -- cgit v1.2.3-70-g09d2 From 938aaa4f9249aa1519fd0db07fc72125de2df338 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 9 Jan 2012 17:24:30 -0800 Subject: sysctl: Initial support for auto-unregistering sysctl tables. Add nreg to ctl_table_header. When nreg drops to 0 the ctl_table_header will be unregistered. Factor out drop_sysctl_table from unregister_sysctl_table, and add the logic for decrementing nreg. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 28 +++++++++++++++++++--------- include/linux/sysctl.h | 1 + 2 files changed, 20 insertions(+), 9 deletions(-) (limited to 'include/linux/sysctl.h') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 15444850b3e..13faa48c467 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -29,8 +29,9 @@ static struct ctl_table root_table[1]; static struct ctl_table_root sysctl_table_root; static struct ctl_table_header root_table_header = { {{.count = 1, - .ctl_table = root_table, - .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, + .nreg = 1, + .ctl_table = root_table, + .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, .root = &sysctl_table_root, .set = &sysctl_table_root.default_set, }; @@ -938,6 +939,7 @@ struct ctl_table_header *__register_sysctl_table( header->unregistering = NULL; header->root = root; header->count = 1; + header->nreg = 1; if (sysctl_check_table(path, table)) goto fail; @@ -1192,6 +1194,20 @@ struct ctl_table_header *register_sysctl_table(struct ctl_table *table) } EXPORT_SYMBOL(register_sysctl_table); +static void drop_sysctl_table(struct ctl_table_header *header) +{ + if (--header->nreg) + return; + + start_unregistering(header); + if (!--header->parent->count) { + WARN_ON(1); + kfree_rcu(header->parent, rcu); + } + if (!--header->count) + kfree_rcu(header, rcu); +} + /** * unregister_sysctl_table - unregister a sysctl table hierarchy * @header: the header returned from register_sysctl_table @@ -1224,13 +1240,7 @@ void unregister_sysctl_table(struct ctl_table_header * header) } spin_lock(&sysctl_lock); - start_unregistering(header); - if (!--header->parent->count) { - WARN_ON(1); - kfree_rcu(header->parent, rcu); - } - if (!--header->count) - kfree_rcu(header, rcu); + drop_sysctl_table(header); spin_unlock(&sysctl_lock); } EXPORT_SYMBOL(unregister_sysctl_table); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 094bc5ccf1e..e40b8f6e5d0 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1032,6 +1032,7 @@ struct ctl_table_header struct list_head ctl_entry; int used; int count; + int nreg; }; struct rcu_head rcu; }; -- cgit v1.2.3-70-g09d2 From 9eb47c26f09e27506d343ef52e634b2a50ee21ef Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 22 Jan 2012 21:26:00 -0800 Subject: sysctl: Add a root pointer to ctl_table_set Add a ctl_table_root pointer to ctl_table set so it is easy to go from a ctl_table_set to a ctl_table_root. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 3 +++ include/linux/sysctl.h | 3 +++ net/sysctl_net.c | 5 ++--- 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux/sysctl.h') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index d9c3ae6afe4..65c13dddcea 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -45,6 +45,7 @@ static struct ctl_table_header root_table_header = { static struct ctl_table_root sysctl_table_root = { .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), + .default_set.root = &sysctl_table_root, }; static DEFINE_SPINLOCK(sysctl_lock); @@ -1348,9 +1349,11 @@ void unregister_sysctl_table(struct ctl_table_header * header) EXPORT_SYMBOL(unregister_sysctl_table); void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_root *root, int (*is_seen)(struct ctl_table_set *)) { INIT_LIST_HEAD(&p->list); + p->root = root; p->is_seen = is_seen; } diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index e40b8f6e5d0..e73ba33cbf0 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1047,6 +1047,7 @@ struct ctl_table_header struct ctl_table_set { struct list_head list; + struct ctl_table_root *root; int (*is_seen)(struct ctl_table_set *); }; @@ -1069,6 +1070,7 @@ struct ctl_path { void proc_sys_poll_notify(struct ctl_table_poll *poll); extern void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_root *root, int (*is_seen)(struct ctl_table_set *)); extern void retire_sysctl_set(struct ctl_table_set *set); @@ -1103,6 +1105,7 @@ static inline void unregister_sysctl_table(struct ctl_table_header * table) } static inline void setup_sysctl_set(struct ctl_table_set *p, + struct ctl_table_root *root, int (*is_seen)(struct ctl_table_set *)) { } diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 07c6b879c8b..e998c644804 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -74,8 +74,7 @@ static struct ctl_table_root net_sysctl_ro_root = { static int __net_init sysctl_net_init(struct net *net) { - setup_sysctl_set(&net->sysctls, - is_seen); + setup_sysctl_set(&net->sysctls, &net_sysctl_root, is_seen); return 0; } @@ -95,7 +94,7 @@ static __init int net_sysctl_init(void) ret = register_pernet_subsys(&sysctl_pernet_ops); if (ret) goto out; - setup_sysctl_set(&net_sysctl_ro_root.default_set, NULL); + setup_sysctl_set(&net_sysctl_ro_root.default_set, &net_sysctl_ro_root, NULL); register_sysctl_root(&net_sysctl_ro_root); register_sysctl_root(&net_sysctl_root); out: -- cgit v1.2.3-70-g09d2 From 7ec66d06362da7684a4948c4c2bf1f8546425df4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 29 Dec 2011 08:24:29 -0800 Subject: sysctl: Stop requiring explicit management of sysctl directories Simplify the code and the sysctl semantics by autogenerating sysctl directories when a sysctl table is registered that needs the directories and autodeleting the directories when there are no more sysctl tables registered that need them. Autogenerating directories keeps sysctl tables from depending on each other, removing all of the arcane register/unregister ordering constraints and makes it impossible to get the order wrong when reigsering and unregistering sysctl tables. Autogenerating directories yields one unique entity that dentries can point to, retaining the current effective use of the dcache. Add struct ctl_dir as the type of these new autogenerated directories. The attached_by and attached_to fields in ctl_table_header are removed as they are no longer needed. The child field in ctl_table is no longer needed by the core of the sysctl code. ctl_table.child can be removed once all of the existing users have been updated. Benchmark before: make-dummies 0 999 -> 0.7s rmmod dummy -> 0.07s make-dummies 0 9999 -> 1m10s rmmod dummy -> 0.4s Benchmark after: make-dummies 0 999 -> 0.44s rmmod dummy -> 0.065s make-dummies 0 9999 -> 1m36s rmmod dummy -> 0.4s Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 342 +++++++++++++++++++++---------------------------- include/linux/sysctl.h | 10 +- 2 files changed, 150 insertions(+), 202 deletions(-) (limited to 'include/linux/sysctl.h') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 65c13dddcea..3c0767d5a55 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -28,28 +28,31 @@ void proc_sys_poll_notify(struct ctl_table_poll *poll) static struct ctl_table root_table[] = { { .procname = "", - .mode = S_IRUGO|S_IXUGO, - .child = &root_table[1], + .mode = S_IFDIR|S_IRUGO|S_IXUGO, }, { } }; static struct ctl_table_root sysctl_table_root; -static struct ctl_table_header root_table_header = { - {{.count = 1, - .nreg = 1, - .ctl_table = root_table, - .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, - .root = &sysctl_table_root, - .set = &sysctl_table_root.default_set, +static struct ctl_dir sysctl_root_dir = { + .header = { + {{.count = 1, + .nreg = 1, + .ctl_table = root_table, + .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, + .root = &sysctl_table_root, + .set = &sysctl_table_root.default_set, + }, }; static struct ctl_table_root sysctl_table_root = { .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), - .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), + .default_set.list = LIST_HEAD_INIT(sysctl_root_dir.header.ctl_entry), .default_set.root = &sysctl_table_root, }; static DEFINE_SPINLOCK(sysctl_lock); +static void drop_sysctl_table(struct ctl_table_header *header); + static int namecmp(const char *name1, int len1, const char *name2, int len2) { int minlen; @@ -66,29 +69,18 @@ static int namecmp(const char *name1, int len1, const char *name2, int len2) } static struct ctl_table *find_entry(struct ctl_table_header **phead, - struct ctl_table_set *set, - struct ctl_table_header *dir_head, struct ctl_table *dir, + struct ctl_table_set *set, struct ctl_dir *dir, const char *name, int namelen) { struct ctl_table_header *head; struct ctl_table *entry; - if (dir_head->set == set) { - for (entry = dir; entry->procname; entry++) { - const char *procname = entry->procname; - if (namecmp(procname, strlen(procname), name, namelen) == 0) { - *phead = dir_head; - return entry; - } - } - } - list_for_each_entry(head, &set->list, ctl_entry) { if (head->unregistering) continue; - if (head->attached_to != dir) + if (head->parent != dir) continue; - for (entry = head->attached_by; entry->procname; entry++) { + for (entry = head->ctl_table; entry->procname; entry++) { const char *procname = entry->procname; if (namecmp(procname, strlen(procname), name, namelen) == 0) { *phead = head; @@ -103,6 +95,7 @@ static void init_header(struct ctl_table_header *head, struct ctl_table_root *root, struct ctl_table_set *set, struct ctl_table *table) { + head->ctl_table = table; head->ctl_table_arg = table; INIT_LIST_HEAD(&head->ctl_entry); head->used = 0; @@ -119,9 +112,10 @@ static void erase_header(struct ctl_table_header *head) list_del_init(&head->ctl_entry); } -static void insert_header(struct ctl_table_header *header) +static void insert_header(struct ctl_dir *dir, struct ctl_table_header *header) { - header->parent->count++; + header->parent = dir; + header->parent->header.nreg++; list_add_tail(&header->ctl_entry, &header->set->list); } @@ -219,8 +213,7 @@ lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) } static struct ctl_table *lookup_entry(struct ctl_table_header **phead, - struct ctl_table_header *dir_head, - struct ctl_table *dir, + struct ctl_dir *dir, const char *name, int namelen) { struct ctl_table_header *head; @@ -232,7 +225,7 @@ static struct ctl_table *lookup_entry(struct ctl_table_header **phead, root = &sysctl_table_root; do { set = lookup_header_set(root, current->nsproxy); - entry = find_entry(&head, set, dir_head, dir, name, namelen); + entry = find_entry(&head, set, dir, name, namelen); if (entry && use_table(head)) *phead = head; else @@ -244,7 +237,7 @@ static struct ctl_table *lookup_entry(struct ctl_table_header **phead, return entry; } -static struct ctl_table_header *next_usable_entry(struct ctl_table *dir, +static struct ctl_table_header *next_usable_entry(struct ctl_dir *dir, struct ctl_table_root *root, struct list_head *tmp) { struct nsproxy *namespaces = current->nsproxy; @@ -256,8 +249,8 @@ static struct ctl_table_header *next_usable_entry(struct ctl_table *dir, head = list_entry(tmp, struct ctl_table_header, ctl_entry); root = head->root; - if (head->attached_to != dir || - !head->attached_by->procname || + if (head->parent != dir || + !head->ctl_table->procname || !use_table(head)) goto next; @@ -281,47 +274,35 @@ out: return NULL; } -static void first_entry( - struct ctl_table_header *dir_head, struct ctl_table *dir, +static void first_entry(struct ctl_dir *dir, struct ctl_table_header **phead, struct ctl_table **pentry) { - struct ctl_table_header *head = dir_head; - struct ctl_table *entry = dir; + struct ctl_table_header *head; + struct ctl_table *entry = NULL; spin_lock(&sysctl_lock); - if (entry->procname) { - use_table(head); - } else { - head = next_usable_entry(dir, &sysctl_table_root, - &sysctl_table_root.default_set.list); - if (head) - entry = head->attached_by; - } + head = next_usable_entry(dir, &sysctl_table_root, + &sysctl_table_root.default_set.list); spin_unlock(&sysctl_lock); + if (head) + entry = head->ctl_table; *phead = head; *pentry = entry; } -static void next_entry(struct ctl_table *dir, - struct ctl_table_header **phead, struct ctl_table **pentry) +static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentry) { struct ctl_table_header *head = *phead; struct ctl_table *entry = *pentry; entry++; if (!entry->procname) { - struct ctl_table_root *root = head->root; - struct list_head *tmp = &head->ctl_entry; - if (head->attached_to != dir) { - root = &sysctl_table_root; - tmp = &sysctl_table_root.default_set.list; - } spin_lock(&sysctl_lock); unuse_table(head); - head = next_usable_entry(dir, root, tmp); + head = next_usable_entry(head->parent, head->root, &head->ctl_entry); spin_unlock(&sysctl_lock); if (head) - entry = head->attached_by; + entry = head->ctl_table; } *phead = head; *pentry = entry; @@ -381,7 +362,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_mode = table->mode; - if (!table->child) { + if (!S_ISDIR(table->mode)) { inode->i_mode |= S_IFREG; inode->i_op = &proc_sys_inode_operations; inode->i_fop = &proc_sys_file_operations; @@ -398,7 +379,7 @@ static struct ctl_table_header *grab_header(struct inode *inode) { struct ctl_table_header *head = PROC_I(inode)->sysctl; if (!head) - head = &root_table_header; + head = &sysctl_root_dir.header; return sysctl_head_grab(head); } @@ -406,24 +387,19 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct ctl_table_header *head = grab_header(dir); - struct ctl_table *table = PROC_I(dir)->sysctl_entry; struct ctl_table_header *h = NULL; struct qstr *name = &dentry->d_name; struct ctl_table *p; struct inode *inode; struct dentry *err = ERR_PTR(-ENOENT); + struct ctl_dir *ctl_dir; if (IS_ERR(head)) return ERR_CAST(head); - if (table && !table->child) { - WARN_ON(1); - goto out; - } + ctl_dir = container_of(head, struct ctl_dir, header); - table = table ? table->child : &head->ctl_table[1]; - - p = lookup_entry(&h, head, table, name->name, name->len); + p = lookup_entry(&h, ctl_dir, name->name, name->len); if (!p) goto out; @@ -586,21 +562,16 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; struct ctl_table_header *head = grab_header(inode); - struct ctl_table *table = PROC_I(inode)->sysctl_entry; struct ctl_table_header *h = NULL; struct ctl_table *entry; + struct ctl_dir *ctl_dir; unsigned long pos; int ret = -EINVAL; if (IS_ERR(head)) return PTR_ERR(head); - if (table && !table->child) { - WARN_ON(1); - goto out; - } - - table = table ? table->child : &head->ctl_table[1]; + ctl_dir = container_of(head, struct ctl_dir, header); ret = 0; /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ @@ -618,7 +589,7 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) } pos = 2; - for (first_entry(head, table, &h, &entry); h; next_entry(table, &h, &entry)) { + for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) { ret = scan(h, entry, &pos, filp, dirent, filldir); if (ret) { sysctl_head_finish(h); @@ -779,52 +750,86 @@ static const struct dentry_operations proc_sys_dentry_operations = { .d_compare = proc_sys_compare, }; -static struct ctl_table *is_branch_in(struct ctl_table *branch, - struct ctl_table *table) +static struct ctl_dir *find_subdir(struct ctl_table_set *set, struct ctl_dir *dir, + const char *name, int namelen) { - struct ctl_table *p; - const char *s = branch->procname; + struct ctl_table_header *head; + struct ctl_table *entry; - /* branch should have named subdirectory as its first element */ - if (!s || !branch->child) - return NULL; + entry = find_entry(&head, set, dir, name, namelen); + if (!entry) + return ERR_PTR(-ENOENT); + if (S_ISDIR(entry->mode)) + return container_of(head, struct ctl_dir, header); + return ERR_PTR(-ENOTDIR); +} + +static struct ctl_dir *new_dir(struct ctl_table_set *set, + const char *name, int namelen) +{ + struct ctl_table *table; + struct ctl_dir *new; + char *new_name; - /* ... and nothing else */ - if (branch[1].procname) + new = kzalloc(sizeof(*new) + sizeof(struct ctl_table)*2 + + namelen + 1, GFP_KERNEL); + if (!new) return NULL; - /* table should contain subdirectory with the same name */ - for (p = table; p->procname; p++) { - if (!p->child) - continue; - if (p->procname && strcmp(p->procname, s) == 0) - return p; - } - return NULL; + table = (struct ctl_table *)(new + 1); + new_name = (char *)(table + 2); + memcpy(new_name, name, namelen); + new_name[namelen] = '\0'; + table[0].procname = new_name; + table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; + init_header(&new->header, set->root, set, table); + + return new; } -/* see if attaching q to p would be an improvement */ -static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) +static struct ctl_dir *get_subdir(struct ctl_table_set *set, + struct ctl_dir *dir, const char *name, int namelen) { - struct ctl_table *to = p->ctl_table, *by = q->ctl_table; - struct ctl_table *next; - int is_better = 0; - int not_in_parent = !p->attached_by; - - while ((next = is_branch_in(by, to)) != NULL) { - if (by == q->attached_by) - is_better = 1; - if (to == p->attached_by) - not_in_parent = 1; - by = by->child; - to = next->child; - } + struct ctl_dir *subdir, *new = NULL; - if (is_better && not_in_parent) { - q->attached_by = by; - q->attached_to = to; - q->parent = p; + spin_lock(&sysctl_lock); + subdir = find_subdir(dir->header.set, dir, name, namelen); + if (!IS_ERR(subdir)) + goto found; + if ((PTR_ERR(subdir) == -ENOENT) && set != dir->header.set) + subdir = find_subdir(set, dir, name, namelen); + if (!IS_ERR(subdir)) + goto found; + if (PTR_ERR(subdir) != -ENOENT) + goto failed; + + spin_unlock(&sysctl_lock); + new = new_dir(set, name, namelen); + spin_lock(&sysctl_lock); + subdir = ERR_PTR(-ENOMEM); + if (!new) + goto failed; + + subdir = find_subdir(set, dir, name, namelen); + if (!IS_ERR(subdir)) + goto found; + if (PTR_ERR(subdir) != -ENOENT) + goto failed; + + insert_header(dir, &new->header); + subdir = new; +found: + subdir->header.nreg++; +failed: + if (unlikely(IS_ERR(subdir))) { + printk(KERN_ERR "sysctl could not get directory: %*.*s %ld\n", + namelen, namelen, name, PTR_ERR(subdir)); } + drop_sysctl_table(&dir->header); + if (new) + drop_sysctl_table(&new->header); + spin_unlock(&sysctl_lock); + return subdir; } static int sysctl_check_table_dups(const char *path, struct ctl_table *old, @@ -846,24 +851,14 @@ static int sysctl_check_table_dups(const char *path, struct ctl_table *old, } static int sysctl_check_dups(struct nsproxy *namespaces, - struct ctl_table_header *header, + struct ctl_dir *dir, const char *path, struct ctl_table *table) { struct ctl_table_root *root; struct ctl_table_set *set; - struct ctl_table_header *dir_head, *head; - struct ctl_table *dir_table; + struct ctl_table_header *head; int error = 0; - /* No dups if we are the only member of our directory */ - if (header->attached_by != table) - return 0; - - dir_head = header->parent; - dir_table = header->attached_to; - - error = sysctl_check_table_dups(path, dir_table, table); - root = &sysctl_table_root; do { set = lookup_header_set(root, namespaces); @@ -871,9 +866,9 @@ static int sysctl_check_dups(struct nsproxy *namespaces, list_for_each_entry(head, &set->list, ctl_entry) { if (head->unregistering) continue; - if (head->attached_to != dir_table) + if (head->parent != dir) continue; - error = sysctl_check_table_dups(path, head->attached_by, + error = sysctl_check_table_dups(path, head->ctl_table, table); } root = list_entry(root->root_list.next, @@ -977,47 +972,25 @@ struct ctl_table_header *__register_sysctl_table( const char *path, struct ctl_table *table) { struct ctl_table_header *header; - struct ctl_table *new, **prevp; const char *name, *nextname; - unsigned int npath = 0; struct ctl_table_set *set; - size_t path_bytes = 0; - char *new_name; - - /* Count the path components */ - for (name = path; name; name = nextname) { - int namelen; - nextname = strchr(name, '/'); - if (nextname) { - namelen = nextname - name; - nextname++; - } else { - namelen = strlen(name); - } - if (namelen == 0) - continue; - path_bytes += namelen + 1; - npath++; - } + struct ctl_dir *dir; - /* - * For each path component, allocate a 2-element ctl_table array. - * The first array element will be filled with the sysctl entry - * for this, the second will be the sentinel (procname == 0). - * - * We allocate everything in one go so that we don't have to - * worry about freeing additional memory in unregister_sysctl_table. - */ - header = kzalloc(sizeof(struct ctl_table_header) + path_bytes + - (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL); + header = kzalloc(sizeof(struct ctl_table_header), GFP_KERNEL); if (!header) return NULL; - new = (struct ctl_table *) (header + 1); - new_name = (char *)(new + (2 * npath)); + init_header(header, root, NULL, table); + if (sysctl_check_table(path, table)) + goto fail; + + spin_lock(&sysctl_lock); + header->set = set = lookup_header_set(root, namespaces); + dir = &sysctl_root_dir; + dir->header.nreg++; + spin_unlock(&sysctl_lock); - /* Now connect the dots */ - prevp = &header->ctl_table; + /* Find the directory for the ctl_table */ for (name = path; name; name = nextname) { int namelen; nextname = strchr(name, '/'); @@ -1029,51 +1002,21 @@ struct ctl_table_header *__register_sysctl_table( } if (namelen == 0) continue; - memcpy(new_name, name, namelen); - new_name[namelen] = '\0'; - - new->procname = new_name; - new->mode = 0555; - - *prevp = new; - prevp = &new->child; - new += 2; - new_name += namelen + 1; + dir = get_subdir(set, dir, name, namelen); + if (IS_ERR(dir)) + goto fail; } - *prevp = table; - - init_header(header, root, NULL, table); - if (sysctl_check_table(path, table)) - goto fail; - spin_lock(&sysctl_lock); - header->set = lookup_header_set(root, namespaces); - header->attached_by = header->ctl_table; - header->attached_to = &root_table[1]; - header->parent = &root_table_header; - set = header->set; - root = header->root; - for (;;) { - struct ctl_table_header *p; - list_for_each_entry(p, &set->list, ctl_entry) { - if (p->unregistering) - continue; - try_attach(p, header); - } - if (root == &sysctl_table_root) - break; - root = list_entry(root->root_list.prev, - struct ctl_table_root, root_list); - set = lookup_header_set(root, namespaces); - } - if (sysctl_check_dups(namespaces, header, path, table)) - goto fail_locked; - insert_header(header); + if (sysctl_check_dups(namespaces, dir, path, table)) + goto fail_put_dir_locked; + insert_header(dir, header); + drop_sysctl_table(&dir->header); spin_unlock(&sysctl_lock); return header; -fail_locked: +fail_put_dir_locked: + drop_sysctl_table(&dir->header); spin_unlock(&sysctl_lock); fail: kfree(header); @@ -1299,16 +1242,17 @@ EXPORT_SYMBOL(register_sysctl_table); static void drop_sysctl_table(struct ctl_table_header *header) { + struct ctl_dir *parent = header->parent; + if (--header->nreg) return; start_unregistering(header); - if (!--header->parent->count) { - WARN_ON(1); - kfree_rcu(header->parent, rcu); - } if (!--header->count) kfree_rcu(header, rcu); + + if (parent) + drop_sysctl_table(&parent->header); } /** diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index e73ba33cbf0..3084b624868 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -938,6 +938,7 @@ struct ctl_table; struct nsproxy; struct ctl_table_root; struct ctl_table_header; +struct ctl_dir; typedef struct ctl_table ctl_table; @@ -1040,9 +1041,12 @@ struct ctl_table_header struct ctl_table *ctl_table_arg; struct ctl_table_root *root; struct ctl_table_set *set; - struct ctl_table *attached_by; - struct ctl_table *attached_to; - struct ctl_table_header *parent; + struct ctl_dir *parent; +}; + +struct ctl_dir { + /* Header must be at the start of ctl_dir */ + struct ctl_table_header header; }; struct ctl_table_set { -- cgit v1.2.3-70-g09d2 From 0e47c99d7fe25e0f3907d9f3401079169d904891 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 7 Jan 2012 23:24:30 -0800 Subject: sysctl: Replace root_list with links between sysctl_table_sets. Piecing together directories by looking first in one directory tree, than in another directory tree and finally in a third directory tree makes it hard to verify that some directory entries are not multiply defined and makes it hard to create efficient implementations the sysctl filesystem. Replace the sysctl wide list of roots with autogenerated links from the core sysctl directory tree to the other sysctl directory trees. This simplifies sysctl directory reading and lookups as now only entries in a single sysctl directory tree need to be considered. Benchmark before: make-dummies 0 999 -> 0.44s rmmod dummy -> 0.065s make-dummies 0 9999 -> 1m36s rmmod dummy -> 0.4s Benchmark after: make-dummies 0 999 -> 0.63s rmmod dummy -> 0.12s make-dummies 0 9999 -> 2m35s rmmod dummy -> 18s The slowdown is caused by the lookups used in insert_headers and put_links to see if we need to add links or remove links. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 397 ++++++++++++++++++++++++++++++++++++------------- include/linux/sysctl.h | 3 +- 2 files changed, 296 insertions(+), 104 deletions(-) (limited to 'include/linux/sysctl.h') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index a78556514a8..ec54a57c469 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -32,26 +32,26 @@ static struct ctl_table root_table[] = { }, { } }; -static struct ctl_table_root sysctl_table_root; -static struct ctl_dir sysctl_root_dir = { - .header = { +static struct ctl_table_root sysctl_table_root = { + .default_set.list = LIST_HEAD_INIT(sysctl_table_root.default_set.dir.header.ctl_entry), + .default_set.dir.header = { {{.count = 1, .nreg = 1, .ctl_table = root_table, .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, + .ctl_table_arg = root_table, .root = &sysctl_table_root, .set = &sysctl_table_root.default_set, }, }; -static struct ctl_table_root sysctl_table_root = { - .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), - .default_set.list = LIST_HEAD_INIT(sysctl_root_dir.header.ctl_entry), - .default_set.root = &sysctl_table_root, -}; static DEFINE_SPINLOCK(sysctl_lock); static void drop_sysctl_table(struct ctl_table_header *header); +static int sysctl_follow_link(struct ctl_table_header **phead, + struct ctl_table **pentry, struct nsproxy *namespaces); +static int insert_links(struct ctl_table_header *head); +static void put_links(struct ctl_table_header *header); static void sysctl_print_dir(struct ctl_dir *dir) { @@ -76,9 +76,9 @@ static int namecmp(const char *name1, int len1, const char *name2, int len2) } static struct ctl_table *find_entry(struct ctl_table_header **phead, - struct ctl_table_set *set, struct ctl_dir *dir, - const char *name, int namelen) + struct ctl_dir *dir, const char *name, int namelen) { + struct ctl_table_set *set = dir->header.set; struct ctl_table_header *head; struct ctl_table *entry; @@ -119,11 +119,21 @@ static void erase_header(struct ctl_table_header *head) list_del_init(&head->ctl_entry); } -static void insert_header(struct ctl_dir *dir, struct ctl_table_header *header) +static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) { + int err; + + dir->header.nreg++; header->parent = dir; - header->parent->header.nreg++; + err = insert_links(header); + if (err) + goto fail_links; list_add_tail(&header->ctl_entry, &header->set->list); + return 0; +fail_links: + header->parent = NULL; + drop_sysctl_table(&dir->header); + return err; } /* called under sysctl_lock */ @@ -212,72 +222,39 @@ lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) return set; } -static struct list_head * -lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) -{ - struct ctl_table_set *set = lookup_header_set(root, namespaces); - return &set->list; -} - static struct ctl_table *lookup_entry(struct ctl_table_header **phead, struct ctl_dir *dir, const char *name, int namelen) { struct ctl_table_header *head; struct ctl_table *entry; - struct ctl_table_root *root; - struct ctl_table_set *set; spin_lock(&sysctl_lock); - root = &sysctl_table_root; - do { - set = lookup_header_set(root, current->nsproxy); - entry = find_entry(&head, set, dir, name, namelen); - if (entry && use_table(head)) - *phead = head; - else - entry = NULL; - root = list_entry(root->root_list.next, - struct ctl_table_root, root_list); - } while (!entry && root != &sysctl_table_root); + entry = find_entry(&head, dir, name, namelen); + if (entry && use_table(head)) + *phead = head; + else + entry = NULL; spin_unlock(&sysctl_lock); return entry; } static struct ctl_table_header *next_usable_entry(struct ctl_dir *dir, - struct ctl_table_root *root, struct list_head *tmp) + struct list_head *tmp) { - struct nsproxy *namespaces = current->nsproxy; - struct list_head *header_list; + struct ctl_table_set *set = dir->header.set; struct ctl_table_header *head; - goto next; - for (;;) { + for (tmp = tmp->next; tmp != &set->list; tmp = tmp->next) { head = list_entry(tmp, struct ctl_table_header, ctl_entry); - root = head->root; if (head->parent != dir || !head->ctl_table->procname || !use_table(head)) - goto next; - - return head; - next: - tmp = tmp->next; - header_list = lookup_header_list(root, namespaces); - if (tmp != header_list) continue; - do { - root = list_entry(root->root_list.next, - struct ctl_table_root, root_list); - if (root == &sysctl_table_root) - goto out; - header_list = lookup_header_list(root, namespaces); - } while (list_empty(header_list)); - tmp = header_list->next; + return head; } -out: return NULL; } @@ -288,8 +265,7 @@ static void first_entry(struct ctl_dir *dir, struct ctl_table *entry = NULL; spin_lock(&sysctl_lock); - head = next_usable_entry(dir, &sysctl_table_root, - &sysctl_table_root.default_set.list); + head = next_usable_entry(dir, &dir->header.set->list); spin_unlock(&sysctl_lock); if (head) entry = head->ctl_table; @@ -306,7 +282,7 @@ static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentr if (!entry->procname) { spin_lock(&sysctl_lock); unuse_table(head); - head = next_usable_entry(head->parent, head->root, &head->ctl_entry); + head = next_usable_entry(head->parent, &head->ctl_entry); spin_unlock(&sysctl_lock); if (head) entry = head->ctl_table; @@ -317,9 +293,6 @@ static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentr void register_sysctl_root(struct ctl_table_root *root) { - spin_lock(&sysctl_lock); - list_add_tail(&root->root_list, &sysctl_table_root.root_list); - spin_unlock(&sysctl_lock); } /* @@ -386,7 +359,7 @@ static struct ctl_table_header *grab_header(struct inode *inode) { struct ctl_table_header *head = PROC_I(inode)->sysctl; if (!head) - head = &sysctl_root_dir.header; + head = &sysctl_table_root.default_set.dir.header; return sysctl_head_grab(head); } @@ -400,6 +373,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, struct inode *inode; struct dentry *err = ERR_PTR(-ENOENT); struct ctl_dir *ctl_dir; + int ret; if (IS_ERR(head)) return ERR_CAST(head); @@ -410,6 +384,11 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, if (!p) goto out; + ret = sysctl_follow_link(&h, &p, current->nsproxy); + err = ERR_PTR(ret); + if (ret) + goto out; + err = ERR_PTR(-ENOMEM); inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); if (h) @@ -547,6 +526,25 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent, return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); } +static int proc_sys_link_fill_cache(struct file *filp, void *dirent, + filldir_t filldir, + struct ctl_table_header *head, + struct ctl_table *table) +{ + int err, ret = 0; + head = sysctl_head_grab(head); + + /* It is not an error if we can not follow the link ignore it */ + err = sysctl_follow_link(&head, &table, current->nsproxy); + if (err) + goto out; + + ret = proc_sys_fill_cache(filp, dirent, filldir, head, table); +out: + sysctl_head_finish(head); + return ret; +} + static int scan(struct ctl_table_header *head, ctl_table *table, unsigned long *pos, struct file *file, void *dirent, filldir_t filldir) @@ -556,7 +554,10 @@ static int scan(struct ctl_table_header *head, ctl_table *table, if ((*pos)++ < file->f_pos) return 0; - res = proc_sys_fill_cache(file, dirent, filldir, head, table); + if (unlikely(S_ISLNK(table->mode))) + res = proc_sys_link_fill_cache(file, dirent, filldir, head, table); + else + res = proc_sys_fill_cache(file, dirent, filldir, head, table); if (res == 0) file->f_pos = *pos; @@ -757,13 +758,13 @@ static const struct dentry_operations proc_sys_dentry_operations = { .d_compare = proc_sys_compare, }; -static struct ctl_dir *find_subdir(struct ctl_table_set *set, struct ctl_dir *dir, - const char *name, int namelen) +static struct ctl_dir *find_subdir(struct ctl_dir *dir, + const char *name, int namelen) { struct ctl_table_header *head; struct ctl_table *entry; - entry = find_entry(&head, set, dir, name, namelen); + entry = find_entry(&head, dir, name, namelen); if (!entry) return ERR_PTR(-ENOENT); if (S_ISDIR(entry->mode)) @@ -772,7 +773,7 @@ static struct ctl_dir *find_subdir(struct ctl_table_set *set, struct ctl_dir *di } static struct ctl_dir *new_dir(struct ctl_table_set *set, - const char *name, int namelen) + const char *name, int namelen) { struct ctl_table *table; struct ctl_dir *new; @@ -789,22 +790,19 @@ static struct ctl_dir *new_dir(struct ctl_table_set *set, new_name[namelen] = '\0'; table[0].procname = new_name; table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; - init_header(&new->header, set->root, set, table); + init_header(&new->header, set->dir.header.root, set, table); return new; } -static struct ctl_dir *get_subdir(struct ctl_table_set *set, - struct ctl_dir *dir, const char *name, int namelen) +static struct ctl_dir *get_subdir(struct ctl_dir *dir, + const char *name, int namelen) { + struct ctl_table_set *set = dir->header.set; struct ctl_dir *subdir, *new = NULL; spin_lock(&sysctl_lock); - subdir = find_subdir(dir->header.set, dir, name, namelen); - if (!IS_ERR(subdir)) - goto found; - if ((PTR_ERR(subdir) == -ENOENT) && set != dir->header.set) - subdir = find_subdir(set, dir, name, namelen); + subdir = find_subdir(dir, name, namelen); if (!IS_ERR(subdir)) goto found; if (PTR_ERR(subdir) != -ENOENT) @@ -817,13 +815,14 @@ static struct ctl_dir *get_subdir(struct ctl_table_set *set, if (!new) goto failed; - subdir = find_subdir(set, dir, name, namelen); + subdir = find_subdir(dir, name, namelen); if (!IS_ERR(subdir)) goto found; if (PTR_ERR(subdir) != -ENOENT) goto failed; - insert_header(dir, &new->header); + if (insert_header(dir, &new->header)) + goto failed; subdir = new; found: subdir->header.nreg++; @@ -841,6 +840,57 @@ failed: return subdir; } +static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir) +{ + struct ctl_dir *parent; + const char *procname; + if (!dir->header.parent) + return &set->dir; + parent = xlate_dir(set, dir->header.parent); + if (IS_ERR(parent)) + return parent; + procname = dir->header.ctl_table[0].procname; + return find_subdir(parent, procname, strlen(procname)); +} + +static int sysctl_follow_link(struct ctl_table_header **phead, + struct ctl_table **pentry, struct nsproxy *namespaces) +{ + struct ctl_table_header *head; + struct ctl_table_root *root; + struct ctl_table_set *set; + struct ctl_table *entry; + struct ctl_dir *dir; + int ret; + + /* Get out quickly if not a link */ + if (!S_ISLNK((*pentry)->mode)) + return 0; + + ret = 0; + spin_lock(&sysctl_lock); + root = (*pentry)->data; + set = lookup_header_set(root, namespaces); + dir = xlate_dir(set, (*phead)->parent); + if (IS_ERR(dir)) + ret = PTR_ERR(dir); + else { + const char *procname = (*pentry)->procname; + head = NULL; + entry = find_entry(&head, dir, procname, strlen(procname)); + ret = -ENOENT; + if (entry && use_table(head)) { + unuse_table(*phead); + *phead = head; + *pentry = entry; + ret = 0; + } + } + + spin_unlock(&sysctl_lock); + return ret; +} + static int sysctl_check_table_dups(const char *path, struct ctl_table *old, struct ctl_table *table) { @@ -859,30 +909,21 @@ static int sysctl_check_table_dups(const char *path, struct ctl_table *old, return error; } -static int sysctl_check_dups(struct nsproxy *namespaces, - struct ctl_dir *dir, +static int sysctl_check_dups(struct ctl_dir *dir, const char *path, struct ctl_table *table) { - struct ctl_table_root *root; struct ctl_table_set *set; struct ctl_table_header *head; int error = 0; - root = &sysctl_table_root; - do { - set = lookup_header_set(root, namespaces); - - list_for_each_entry(head, &set->list, ctl_entry) { - if (head->unregistering) - continue; - if (head->parent != dir) - continue; - error = sysctl_check_table_dups(path, head->ctl_table, - table); - } - root = list_entry(root->root_list.next, - struct ctl_table_root, root_list); - } while (root != &sysctl_table_root); + set = dir->header.set; + list_for_each_entry(head, &set->list, ctl_entry) { + if (head->unregistering) + continue; + if (head->parent != dir) + continue; + error = sysctl_check_table_dups(path, head->ctl_table, table); + } return error; } @@ -932,6 +973,115 @@ static int sysctl_check_table(const char *path, struct ctl_table *table) return err; } +static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table *table, + struct ctl_table_root *link_root) +{ + struct ctl_table *link_table, *entry, *link; + struct ctl_table_header *links; + char *link_name; + int nr_entries, name_bytes; + + name_bytes = 0; + nr_entries = 0; + for (entry = table; entry->procname; entry++) { + nr_entries++; + name_bytes += strlen(entry->procname) + 1; + } + + links = kzalloc(sizeof(struct ctl_table_header) + + sizeof(struct ctl_table)*(nr_entries + 1) + + name_bytes, + GFP_KERNEL); + + if (!links) + return NULL; + + link_table = (struct ctl_table *)(links + 1); + link_name = (char *)&link_table[nr_entries + 1]; + + for (link = link_table, entry = table; entry->procname; link++, entry++) { + int len = strlen(entry->procname) + 1; + memcpy(link_name, entry->procname, len); + link->procname = link_name; + link->mode = S_IFLNK|S_IRWXUGO; + link->data = link_root; + link_name += len; + } + init_header(links, dir->header.root, dir->header.set, link_table); + links->nreg = nr_entries; + + return links; +} + +static bool get_links(struct ctl_dir *dir, + struct ctl_table *table, struct ctl_table_root *link_root) +{ + struct ctl_table_header *head; + struct ctl_table *entry, *link; + + /* Are there links available for every entry in table? */ + for (entry = table; entry->procname; entry++) { + const char *procname = entry->procname; + link = find_entry(&head, dir, procname, strlen(procname)); + if (!link) + return false; + if (S_ISDIR(link->mode) && S_ISDIR(entry->mode)) + continue; + if (S_ISLNK(link->mode) && (link->data == link_root)) + continue; + return false; + } + + /* The checks passed. Increase the registration count on the links */ + for (entry = table; entry->procname; entry++) { + const char *procname = entry->procname; + link = find_entry(&head, dir, procname, strlen(procname)); + head->nreg++; + } + return true; +} + +static int insert_links(struct ctl_table_header *head) +{ + struct ctl_table_set *root_set = &sysctl_table_root.default_set; + struct ctl_dir *core_parent = NULL; + struct ctl_table_header *links; + int err; + + if (head->set == root_set) + return 0; + + core_parent = xlate_dir(root_set, head->parent); + if (IS_ERR(core_parent)) + return 0; + + if (get_links(core_parent, head->ctl_table, head->root)) + return 0; + + core_parent->header.nreg++; + spin_unlock(&sysctl_lock); + + links = new_links(core_parent, head->ctl_table, head->root); + + spin_lock(&sysctl_lock); + err = -ENOMEM; + if (!links) + goto out; + + err = 0; + if (get_links(core_parent, head->ctl_table, head->root)) { + kfree(links); + goto out; + } + + err = insert_header(core_parent, links); + if (err) + kfree(links); +out: + drop_sysctl_table(&core_parent->header); + return err; +} + /** * __register_sysctl_table - register a leaf sysctl table * @root: List of sysctl headers to register on @@ -980,6 +1130,7 @@ struct ctl_table_header *__register_sysctl_table( struct nsproxy *namespaces, const char *path, struct ctl_table *table) { + struct ctl_table_header *links = NULL; struct ctl_table_header *header; const char *name, *nextname; struct ctl_table_set *set; @@ -995,7 +1146,7 @@ struct ctl_table_header *__register_sysctl_table( spin_lock(&sysctl_lock); header->set = set = lookup_header_set(root, namespaces); - dir = &sysctl_root_dir; + dir = &set->dir; dir->header.nreg++; spin_unlock(&sysctl_lock); @@ -1012,22 +1163,28 @@ struct ctl_table_header *__register_sysctl_table( if (namelen == 0) continue; - dir = get_subdir(set, dir, name, namelen); + dir = get_subdir(dir, name, namelen); if (IS_ERR(dir)) goto fail; } + spin_lock(&sysctl_lock); - if (sysctl_check_dups(namespaces, dir, path, table)) + if (sysctl_check_dups(dir, path, table)) + goto fail_put_dir_locked; + + if (insert_header(dir, header)) goto fail_put_dir_locked; - insert_header(dir, header); + drop_sysctl_table(&dir->header); spin_unlock(&sysctl_lock); return header; + fail_put_dir_locked: drop_sysctl_table(&dir->header); spin_unlock(&sysctl_lock); fail: + kfree(links); kfree(header); dump_stack(); return NULL; @@ -1249,6 +1406,40 @@ struct ctl_table_header *register_sysctl_table(struct ctl_table *table) } EXPORT_SYMBOL(register_sysctl_table); +static void put_links(struct ctl_table_header *header) +{ + struct ctl_table_set *root_set = &sysctl_table_root.default_set; + struct ctl_table_root *root = header->root; + struct ctl_dir *parent = header->parent; + struct ctl_dir *core_parent; + struct ctl_table *entry; + + if (header->set == root_set) + return; + + core_parent = xlate_dir(root_set, parent); + if (IS_ERR(core_parent)) + return; + + for (entry = header->ctl_table; entry->procname; entry++) { + struct ctl_table_header *link_head; + struct ctl_table *link; + const char *name = entry->procname; + + link = find_entry(&link_head, core_parent, name, strlen(name)); + if (link && + ((S_ISDIR(link->mode) && S_ISDIR(entry->mode)) || + (S_ISLNK(link->mode) && (link->data == root)))) { + drop_sysctl_table(link_head); + } + else { + printk(KERN_ERR "sysctl link missing during unregister: "); + sysctl_print_dir(parent); + printk(KERN_CONT "/%s\n", name); + } + } +} + static void drop_sysctl_table(struct ctl_table_header *header) { struct ctl_dir *parent = header->parent; @@ -1256,6 +1447,7 @@ static void drop_sysctl_table(struct ctl_table_header *header) if (--header->nreg) return; + put_links(header); start_unregistering(header); if (!--header->count) kfree_rcu(header, rcu); @@ -1301,13 +1493,14 @@ void unregister_sysctl_table(struct ctl_table_header * header) } EXPORT_SYMBOL(unregister_sysctl_table); -void setup_sysctl_set(struct ctl_table_set *p, +void setup_sysctl_set(struct ctl_table_set *set, struct ctl_table_root *root, int (*is_seen)(struct ctl_table_set *)) { - INIT_LIST_HEAD(&p->list); - p->root = root; - p->is_seen = is_seen; + memset(set, sizeof(*set), 0); + INIT_LIST_HEAD(&set->list); + set->is_seen = is_seen; + init_header(&set->dir.header, root, set, root_table); } void retire_sysctl_set(struct ctl_table_set *set) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 3084b624868..2a1446a9609 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1051,12 +1051,11 @@ struct ctl_dir { struct ctl_table_set { struct list_head list; - struct ctl_table_root *root; int (*is_seen)(struct ctl_table_set *); + struct ctl_dir dir; }; struct ctl_table_root { - struct list_head root_list; struct ctl_table_set default_set; struct ctl_table_set *(*lookup)(struct ctl_table_root *root, struct nsproxy *namespaces); -- cgit v1.2.3-70-g09d2 From 60a47a2e823cbe6b609346bffff61a00c0c76470 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 8 Jan 2012 00:02:37 -0800 Subject: sysctl: Modify __register_sysctl_paths to take a set instead of a root and an nsproxy An nsproxy argument here has always been awkard and now the nsproxy argument is completely unnecessary so remove it, replacing it with the set we want the registered tables to show up in. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 30 ++++++++++++------------------ include/linux/sysctl.h | 4 ++-- net/sysctl_net.c | 10 +++------- 3 files changed, 17 insertions(+), 27 deletions(-) (limited to 'include/linux/sysctl.h') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index ec54a57c469..e0d3e7e59cb 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1084,8 +1084,7 @@ out: /** * __register_sysctl_table - register a leaf sysctl table - * @root: List of sysctl headers to register on - * @namespaces: Data to compute which lists of sysctl entries are visible + * @set: Sysctl tree to register on * @path: The path to the directory the sysctl table is in. * @table: the top-level table structure * @@ -1126,26 +1125,24 @@ out: * to the table header on success. */ struct ctl_table_header *__register_sysctl_table( - struct ctl_table_root *root, - struct nsproxy *namespaces, + struct ctl_table_set *set, const char *path, struct ctl_table *table) { + struct ctl_table_root *root = set->dir.header.root; struct ctl_table_header *links = NULL; struct ctl_table_header *header; const char *name, *nextname; - struct ctl_table_set *set; struct ctl_dir *dir; header = kzalloc(sizeof(struct ctl_table_header), GFP_KERNEL); if (!header) return NULL; - init_header(header, root, NULL, table); + init_header(header, root, set, table); if (sysctl_check_table(path, table)) goto fail; spin_lock(&sysctl_lock); - header->set = set = lookup_header_set(root, namespaces); dir = &set->dir; dir->header.nreg++; spin_unlock(&sysctl_lock); @@ -1223,8 +1220,7 @@ static int count_subheaders(struct ctl_table *table) } static int register_leaf_sysctl_tables(const char *path, char *pos, - struct ctl_table_header ***subheader, - struct ctl_table_root *root, struct nsproxy *namespaces, + struct ctl_table_header ***subheader, struct ctl_table_set *set, struct ctl_table *table) { struct ctl_table *ctl_table_arg = NULL; @@ -1261,7 +1257,7 @@ static int register_leaf_sysctl_tables(const char *path, char *pos, /* Register everything except a directory full of subdirectories */ if (nr_files || !nr_dirs) { struct ctl_table_header *header; - header = __register_sysctl_table(root, namespaces, path, files); + header = __register_sysctl_table(set, path, files); if (!header) { kfree(ctl_table_arg); goto out; @@ -1286,7 +1282,7 @@ static int register_leaf_sysctl_tables(const char *path, char *pos, goto out; err = register_leaf_sysctl_tables(path, child_pos, subheader, - root, namespaces, entry->child); + set, entry->child); pos[0] = '\0'; if (err) goto out; @@ -1299,8 +1295,7 @@ out: /** * __register_sysctl_paths - register a sysctl table hierarchy - * @root: List of sysctl headers to register on - * @namespaces: Data to compute which lists of sysctl entries are visible + * @set: Sysctl tree to register on * @path: The path to the directory the sysctl table is in. * @table: the top-level table structure * @@ -1310,8 +1305,7 @@ out: * See __register_sysctl_table for more details. */ struct ctl_table_header *__register_sysctl_paths( - struct ctl_table_root *root, - struct nsproxy *namespaces, + struct ctl_table_set *set, const struct ctl_path *path, struct ctl_table *table) { struct ctl_table *ctl_table_arg = table; @@ -1337,7 +1331,7 @@ struct ctl_table_header *__register_sysctl_paths( table = table->child; } if (nr_subheaders == 1) { - header = __register_sysctl_table(root, namespaces, new_path, table); + header = __register_sysctl_table(set, new_path, table); if (header) header->ctl_table_arg = ctl_table_arg; } else { @@ -1351,7 +1345,7 @@ struct ctl_table_header *__register_sysctl_paths( header->ctl_table_arg = ctl_table_arg; if (register_leaf_sysctl_tables(new_path, pos, &subheader, - root, namespaces, table)) + set, table)) goto err_register_leaves; } @@ -1384,7 +1378,7 @@ err_register_leaves: struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, struct ctl_table *table) { - return __register_sysctl_paths(&sysctl_table_root, current->nsproxy, + return __register_sysctl_paths(&sysctl_table_root.default_set, path, table); } EXPORT_SYMBOL(register_sysctl_paths); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 2a1446a9609..cec59415b3c 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1079,10 +1079,10 @@ extern void retire_sysctl_set(struct ctl_table_set *set); void register_sysctl_root(struct ctl_table_root *root); struct ctl_table_header *__register_sysctl_table( - struct ctl_table_root *root, struct nsproxy *namespaces, + struct ctl_table_set *set, const char *path, struct ctl_table *table); struct ctl_table_header *__register_sysctl_paths( - struct ctl_table_root *root, struct nsproxy *namespaces, + struct ctl_table_set *set, const struct ctl_path *path, struct ctl_table *table); struct ctl_table_header *register_sysctl_table(struct ctl_table * table); struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, diff --git a/net/sysctl_net.c b/net/sysctl_net.c index e998c644804..c3e65aebecc 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -105,19 +105,15 @@ subsys_initcall(net_sysctl_init); struct ctl_table_header *register_net_sysctl_table(struct net *net, const struct ctl_path *path, struct ctl_table *table) { - struct nsproxy namespaces; - namespaces = *current->nsproxy; - namespaces.net_ns = net; - return __register_sysctl_paths(&net_sysctl_root, - &namespaces, path, table); + return __register_sysctl_paths(&net->sysctls, path, table); } EXPORT_SYMBOL_GPL(register_net_sysctl_table); struct ctl_table_header *register_net_sysctl_rotable(const struct ctl_path *path, struct ctl_table *table) { - return __register_sysctl_paths(&net_sysctl_ro_root, - &init_nsproxy, path, table); + return __register_sysctl_paths(&net_sysctl_ro_root.default_set, + path, table); } EXPORT_SYMBOL_GPL(register_net_sysctl_rotable); -- cgit v1.2.3-70-g09d2 From 9e3d47df35abd6430fed04fb40a76c7358b1e815 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 7 Jan 2012 23:45:12 -0800 Subject: sysctl: Make the header lists per directory. Slightly enhance efficiency and clarity of the code by making the header list per directory instead of per set. Benchmark before: make-dummies 0 999 -> 0.63s rmmod dummy -> 0.12s make-dummies 0 9999 -> 2m35s rmmod dummy -> 18s Benchmark after: make-dummies 0 999 -> 0.32s rmmod dummy -> 0.12s make-dummies 0 9999 -> 1m17s rmmod dummy -> 17s Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 28 +++++++++++----------------- include/linux/sysctl.h | 2 +- 2 files changed, 12 insertions(+), 18 deletions(-) (limited to 'include/linux/sysctl.h') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 160d5781638..e971ccccac4 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -33,12 +33,12 @@ static struct ctl_table root_table[] = { { } }; static struct ctl_table_root sysctl_table_root = { - .default_set.list = LIST_HEAD_INIT(sysctl_table_root.default_set.dir.header.ctl_entry), + .default_set.dir.list = LIST_HEAD_INIT(sysctl_table_root.default_set.dir.list), .default_set.dir.header = { {{.count = 1, .nreg = 1, .ctl_table = root_table, - .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, + .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.dir.header.ctl_entry),}}, .ctl_table_arg = root_table, .root = &sysctl_table_root, .set = &sysctl_table_root.default_set, @@ -79,15 +79,12 @@ static int namecmp(const char *name1, int len1, const char *name2, int len2) static struct ctl_table *find_entry(struct ctl_table_header **phead, struct ctl_dir *dir, const char *name, int namelen) { - struct ctl_table_set *set = dir->header.set; struct ctl_table_header *head; struct ctl_table *entry; - list_for_each_entry(head, &set->list, ctl_entry) { + list_for_each_entry(head, &dir->list, ctl_entry) { if (head->unregistering) continue; - if (head->parent != dir) - continue; for (entry = head->ctl_table; entry->procname; entry++) { const char *procname = entry->procname; if (namecmp(procname, strlen(procname), name, namelen) == 0) { @@ -133,7 +130,7 @@ static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) err = insert_links(header); if (err) goto fail_links; - list_add_tail(&header->ctl_entry, &header->set->list); + list_add_tail(&header->ctl_entry, &header->parent->list); return 0; fail_links: header->parent = NULL; @@ -247,14 +244,12 @@ static struct ctl_table *lookup_entry(struct ctl_table_header **phead, static struct ctl_table_header *next_usable_entry(struct ctl_dir *dir, struct list_head *tmp) { - struct ctl_table_set *set = dir->header.set; struct ctl_table_header *head; - for (tmp = tmp->next; tmp != &set->list; tmp = tmp->next) { + for (tmp = tmp->next; tmp != &dir->list; tmp = tmp->next) { head = list_entry(tmp, struct ctl_table_header, ctl_entry); - if (head->parent != dir || - !head->ctl_table->procname || + if (!head->ctl_table->procname || !use_table(head)) continue; @@ -270,7 +265,7 @@ static void first_entry(struct ctl_dir *dir, struct ctl_table *entry = NULL; spin_lock(&sysctl_lock); - head = next_usable_entry(dir, &dir->header.set->list); + head = next_usable_entry(dir, &dir->list); spin_unlock(&sysctl_lock); if (head) entry = head->ctl_table; @@ -793,6 +788,7 @@ static struct ctl_dir *new_dir(struct ctl_table_set *set, new_name = (char *)(table + 2); memcpy(new_name, name, namelen); new_name[namelen] = '\0'; + INIT_LIST_HEAD(&new->list); table[0].procname = new_name; table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; init_header(&new->header, set->dir.header.root, set, table); @@ -917,12 +913,10 @@ static int sysctl_check_table_dups(struct ctl_dir *dir, struct ctl_table *old, static int sysctl_check_dups(struct ctl_dir *dir, struct ctl_table *table) { - struct ctl_table_set *set; struct ctl_table_header *head; int error = 0; - set = dir->header.set; - list_for_each_entry(head, &set->list, ctl_entry) { + list_for_each_entry(head, &dir->list, ctl_entry) { if (head->unregistering) continue; if (head->parent != dir) @@ -1494,14 +1488,14 @@ void setup_sysctl_set(struct ctl_table_set *set, int (*is_seen)(struct ctl_table_set *)) { memset(set, sizeof(*set), 0); - INIT_LIST_HEAD(&set->list); set->is_seen = is_seen; + INIT_LIST_HEAD(&set->dir.list); init_header(&set->dir.header, root, set, root_table); } void retire_sysctl_set(struct ctl_table_set *set) { - WARN_ON(!list_empty(&set->list)); + WARN_ON(!list_empty(&set->dir.list)); } int __init proc_sys_init(void) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index cec59415b3c..36dec756ef9 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1047,10 +1047,10 @@ struct ctl_table_header struct ctl_dir { /* Header must be at the start of ctl_dir */ struct ctl_table_header header; + struct list_head list; }; struct ctl_table_set { - struct list_head list; int (*is_seen)(struct ctl_table_set *); struct ctl_dir dir; }; -- cgit v1.2.3-70-g09d2 From ac13ac6f4c6c0504d2c927862216f4e422a2c0b5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 9 Jan 2012 17:24:30 -0800 Subject: sysctl: Index sysctl directories with rbtrees. One of the most important jobs of sysctl is to export network stack tunables. Several of those tunables are per network device. In several instances people are running with 1000+ network devices in there network stacks, which makes the simple per directory linked list in sysctl a scaling bottleneck. Replace O(N^2) sysctl insertion and lookup times with O(NlogN) by using an rbtree to index the sysctl directories. Benchmark before: make-dummies 0 999 -> 0.32s rmmod dummy -> 0.12s make-dummies 0 9999 -> 1m17s rmmod dummy -> 17s Benchmark after: make-dummies 0 999 -> 0.074s rmmod dummy -> 0.070s make-dummies 0 9999 -> 3.4s rmmod dummy -> 0.44s Benchmark after (without dev_snmp6): make-dummies 0 9999 -> 0.75s rmmod dummy -> 0.44s make-dummies 0 99999 -> 11s rmmod dummy -> 4.3s At 10,000 dummy devices the bottleneck becomes the time to add and remove the files under /proc/sys/net/dev_snmp6. I have commented out the code that adds and removes files under /proc/sys/net/dev_snmp6 and taken measurments of creating and destroying 100,000 dummies to verify the sysctl continues to scale. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 224 +++++++++++++++++++++++++++++-------------------- include/linux/sysctl.h | 10 ++- 2 files changed, 142 insertions(+), 92 deletions(-) (limited to 'include/linux/sysctl.h') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index e971ccccac4..05c393a5c53 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -33,12 +33,10 @@ static struct ctl_table root_table[] = { { } }; static struct ctl_table_root sysctl_table_root = { - .default_set.dir.list = LIST_HEAD_INIT(sysctl_table_root.default_set.dir.list), .default_set.dir.header = { {{.count = 1, .nreg = 1, - .ctl_table = root_table, - .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.dir.header.ctl_entry),}}, + .ctl_table = root_table }}, .ctl_table_arg = root_table, .root = &sysctl_table_root, .set = &sysctl_table_root.default_set, @@ -52,7 +50,6 @@ static int sysctl_follow_link(struct ctl_table_header **phead, struct ctl_table **pentry, struct nsproxy *namespaces); static int insert_links(struct ctl_table_header *head); static void put_links(struct ctl_table_header *header); -static int sysctl_check_dups(struct ctl_dir *dir, struct ctl_table *table); static void sysctl_print_dir(struct ctl_dir *dir) { @@ -81,28 +78,83 @@ static struct ctl_table *find_entry(struct ctl_table_header **phead, { struct ctl_table_header *head; struct ctl_table *entry; + struct rb_node *node = dir->root.rb_node; - list_for_each_entry(head, &dir->list, ctl_entry) { - if (head->unregistering) - continue; - for (entry = head->ctl_table; entry->procname; entry++) { - const char *procname = entry->procname; - if (namecmp(procname, strlen(procname), name, namelen) == 0) { - *phead = head; - return entry; - } + while (node) + { + struct ctl_node *ctl_node; + const char *procname; + int cmp; + + ctl_node = rb_entry(node, struct ctl_node, node); + head = ctl_node->header; + entry = &head->ctl_table[ctl_node - head->node]; + procname = entry->procname; + + cmp = namecmp(name, namelen, procname, strlen(procname)); + if (cmp < 0) + node = node->rb_left; + else if (cmp > 0) + node = node->rb_right; + else { + *phead = head; + return entry; } } return NULL; } +static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry) +{ + struct rb_node *node = &head->node[entry - head->ctl_table].node; + struct rb_node **p = &head->parent->root.rb_node; + struct rb_node *parent = NULL; + const char *name = entry->procname; + int namelen = strlen(name); + + while (*p) { + struct ctl_table_header *parent_head; + struct ctl_table *parent_entry; + struct ctl_node *parent_node; + const char *parent_name; + int cmp; + + parent = *p; + parent_node = rb_entry(parent, struct ctl_node, node); + parent_head = parent_node->header; + parent_entry = &parent_head->ctl_table[parent_node - parent_head->node]; + parent_name = parent_entry->procname; + + cmp = namecmp(name, namelen, parent_name, strlen(parent_name)); + if (cmp < 0) + p = &(*p)->rb_left; + else if (cmp > 0) + p = &(*p)->rb_right; + else { + printk(KERN_ERR "sysctl duplicate entry: "); + sysctl_print_dir(head->parent); + printk(KERN_CONT "/%s\n", entry->procname); + return -EEXIST; + } + } + + rb_link_node(node, parent, p); + return 0; +} + +static void erase_entry(struct ctl_table_header *head, struct ctl_table *entry) +{ + struct rb_node *node = &head->node[entry - head->ctl_table].node; + + rb_erase(node, &head->parent->root); +} + static void init_header(struct ctl_table_header *head, struct ctl_table_root *root, struct ctl_table_set *set, - struct ctl_table *table) + struct ctl_node *node, struct ctl_table *table) { head->ctl_table = table; head->ctl_table_arg = table; - INIT_LIST_HEAD(&head->ctl_entry); head->used = 0; head->count = 1; head->nreg = 1; @@ -110,28 +162,42 @@ static void init_header(struct ctl_table_header *head, head->root = root; head->set = set; head->parent = NULL; + head->node = node; + if (node) { + struct ctl_table *entry; + for (entry = table; entry->procname; entry++, node++) { + rb_init_node(&node->node); + node->header = head; + } + } } static void erase_header(struct ctl_table_header *head) { - list_del_init(&head->ctl_entry); + struct ctl_table *entry; + for (entry = head->ctl_table; entry->procname; entry++) + erase_entry(head, entry); } static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) { + struct ctl_table *entry; int err; - err = sysctl_check_dups(dir, header->ctl_table); - if (err) - return err; - dir->header.nreg++; header->parent = dir; err = insert_links(header); if (err) goto fail_links; - list_add_tail(&header->ctl_entry, &header->parent->list); + for (entry = header->ctl_table; entry->procname; entry++) { + err = insert_entry(header, entry); + if (err) + goto fail; + } return 0; +fail: + erase_header(header); + put_links(header); fail_links: header->parent = NULL; drop_sysctl_table(&dir->header); @@ -241,19 +307,14 @@ static struct ctl_table *lookup_entry(struct ctl_table_header **phead, return entry; } -static struct ctl_table_header *next_usable_entry(struct ctl_dir *dir, - struct list_head *tmp) +static struct ctl_node *first_usable_entry(struct rb_node *node) { - struct ctl_table_header *head; - - for (tmp = tmp->next; tmp != &dir->list; tmp = tmp->next) { - head = list_entry(tmp, struct ctl_table_header, ctl_entry); + struct ctl_node *ctl_node; - if (!head->ctl_table->procname || - !use_table(head)) - continue; - - return head; + for (;node; node = rb_next(node)) { + ctl_node = rb_entry(node, struct ctl_node, node); + if (use_table(ctl_node->header)) + return ctl_node; } return NULL; } @@ -261,14 +322,17 @@ static struct ctl_table_header *next_usable_entry(struct ctl_dir *dir, static void first_entry(struct ctl_dir *dir, struct ctl_table_header **phead, struct ctl_table **pentry) { - struct ctl_table_header *head; + struct ctl_table_header *head = NULL; struct ctl_table *entry = NULL; + struct ctl_node *ctl_node; spin_lock(&sysctl_lock); - head = next_usable_entry(dir, &dir->list); + ctl_node = first_usable_entry(rb_first(&dir->root)); spin_unlock(&sysctl_lock); - if (head) - entry = head->ctl_table; + if (ctl_node) { + head = ctl_node->header; + entry = &head->ctl_table[ctl_node - head->node]; + } *phead = head; *pentry = entry; } @@ -277,15 +341,17 @@ static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentr { struct ctl_table_header *head = *phead; struct ctl_table *entry = *pentry; + struct ctl_node *ctl_node = &head->node[entry - head->ctl_table]; - entry++; - if (!entry->procname) { - spin_lock(&sysctl_lock); - unuse_table(head); - head = next_usable_entry(head->parent, &head->ctl_entry); - spin_unlock(&sysctl_lock); - if (head) - entry = head->ctl_table; + spin_lock(&sysctl_lock); + unuse_table(head); + + ctl_node = first_usable_entry(rb_next(&ctl_node->node)); + spin_unlock(&sysctl_lock); + head = NULL; + if (ctl_node) { + head = ctl_node->header; + entry = &head->ctl_table[ctl_node - head->node]; } *phead = head; *pentry = entry; @@ -777,21 +843,23 @@ static struct ctl_dir *new_dir(struct ctl_table_set *set, { struct ctl_table *table; struct ctl_dir *new; + struct ctl_node *node; char *new_name; - new = kzalloc(sizeof(*new) + sizeof(struct ctl_table)*2 + - namelen + 1, GFP_KERNEL); + new = kzalloc(sizeof(*new) + sizeof(struct ctl_node) + + sizeof(struct ctl_table)*2 + namelen + 1, + GFP_KERNEL); if (!new) return NULL; - table = (struct ctl_table *)(new + 1); + node = (struct ctl_node *)(new + 1); + table = (struct ctl_table *)(node + 1); new_name = (char *)(table + 2); memcpy(new_name, name, namelen); new_name[namelen] = '\0'; - INIT_LIST_HEAD(&new->list); table[0].procname = new_name; table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; - init_header(&new->header, set->dir.header.root, set, table); + init_header(&new->header, set->dir.header.root, set, node, table); return new; } @@ -892,40 +960,6 @@ static int sysctl_follow_link(struct ctl_table_header **phead, return ret; } -static int sysctl_check_table_dups(struct ctl_dir *dir, struct ctl_table *old, - struct ctl_table *table) -{ - struct ctl_table *entry, *test; - int error = 0; - - for (entry = old; entry->procname; entry++) { - for (test = table; test->procname; test++) { - if (strcmp(entry->procname, test->procname) == 0) { - printk(KERN_ERR "sysctl duplicate entry: "); - sysctl_print_dir(dir); - printk(KERN_CONT "/%s\n", test->procname); - error = -EEXIST; - } - } - } - return error; -} - -static int sysctl_check_dups(struct ctl_dir *dir, struct ctl_table *table) -{ - struct ctl_table_header *head; - int error = 0; - - list_for_each_entry(head, &dir->list, ctl_entry) { - if (head->unregistering) - continue; - if (head->parent != dir) - continue; - error = sysctl_check_table_dups(dir, head->ctl_table, table); - } - return error; -} - static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...) { struct va_format vaf; @@ -977,6 +1011,7 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table { struct ctl_table *link_table, *entry, *link; struct ctl_table_header *links; + struct ctl_node *node; char *link_name; int nr_entries, name_bytes; @@ -988,6 +1023,7 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table } links = kzalloc(sizeof(struct ctl_table_header) + + sizeof(struct ctl_node)*nr_entries + sizeof(struct ctl_table)*(nr_entries + 1) + name_bytes, GFP_KERNEL); @@ -995,7 +1031,8 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table if (!links) return NULL; - link_table = (struct ctl_table *)(links + 1); + node = (struct ctl_node *)(links + 1); + link_table = (struct ctl_table *)(node + nr_entries); link_name = (char *)&link_table[nr_entries + 1]; for (link = link_table, entry = table; entry->procname; link++, entry++) { @@ -1006,7 +1043,7 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table link->data = link_root; link_name += len; } - init_header(links, dir->header.root, dir->header.set, link_table); + init_header(links, dir->header.root, dir->header.set, node, link_table); links->nreg = nr_entries; return links; @@ -1132,12 +1169,20 @@ struct ctl_table_header *__register_sysctl_table( struct ctl_table_header *header; const char *name, *nextname; struct ctl_dir *dir; + struct ctl_table *entry; + struct ctl_node *node; + int nr_entries = 0; + + for (entry = table; entry->procname; entry++) + nr_entries++; - header = kzalloc(sizeof(struct ctl_table_header), GFP_KERNEL); + header = kzalloc(sizeof(struct ctl_table_header) + + sizeof(struct ctl_node)*nr_entries, GFP_KERNEL); if (!header) return NULL; - init_header(header, root, set, table); + node = (struct ctl_node *)(header + 1); + init_header(header, root, set, node, table); if (sysctl_check_table(path, table)) goto fail; @@ -1489,13 +1534,12 @@ void setup_sysctl_set(struct ctl_table_set *set, { memset(set, sizeof(*set), 0); set->is_seen = is_seen; - INIT_LIST_HEAD(&set->dir.list); - init_header(&set->dir.header, root, set, root_table); + init_header(&set->dir.header, root, set, NULL, root_table); } void retire_sysctl_set(struct ctl_table_set *set) { - WARN_ON(!list_empty(&set->dir.list)); + WARN_ON(!RB_EMPTY_ROOT(&set->dir.root)); } int __init proc_sys_init(void) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 36dec756ef9..35c50ed36fc 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -932,6 +932,7 @@ enum #include #include #include +#include /* For the /proc/sys support */ struct ctl_table; @@ -1023,6 +1024,11 @@ struct ctl_table void *extra2; }; +struct ctl_node { + struct rb_node node; + struct ctl_table_header *header; +}; + /* struct ctl_table_header is used to maintain dynamic lists of struct ctl_table trees. */ struct ctl_table_header @@ -1030,7 +1036,6 @@ struct ctl_table_header union { struct { struct ctl_table *ctl_table; - struct list_head ctl_entry; int used; int count; int nreg; @@ -1042,12 +1047,13 @@ struct ctl_table_header struct ctl_table_root *root; struct ctl_table_set *set; struct ctl_dir *parent; + struct ctl_node *node; }; struct ctl_dir { /* Header must be at the start of ctl_dir */ struct ctl_table_header header; - struct list_head list; + struct rb_root root; }; struct ctl_table_set { -- cgit v1.2.3-70-g09d2 From fea478d4101a4285aa25c5bafaaf4cec35026fe0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 Jan 2012 21:47:03 -0800 Subject: sysctl: Add register_sysctl for normal sysctl users The plan is to convert all callers of register_sysctl_table and register_sysctl_paths to register_sysctl. The interface to register_sysctl is enough nicer this should make the callers a bit more readable. Additionally after the conversion the 230 lines of backwards compatibility can be removed. Signed-off-by: Eric W. Biederman --- fs/proc/proc_sysctl.c | 17 +++++++++++++++++ include/linux/sysctl.h | 1 + 2 files changed, 18 insertions(+) (limited to 'include/linux/sysctl.h') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 05c393a5c53..8dc7f0e46e7 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1228,6 +1228,23 @@ fail: return NULL; } +/** + * register_sysctl - register a sysctl table + * @path: The path to the directory the sysctl table is in. + * @table: the table structure + * + * Register a sysctl table. @table should be a filled in ctl_table + * array. A completely 0 filled entry terminates the table. + * + * See __register_sysctl_table for more details. + */ +struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table) +{ + return __register_sysctl_table(&sysctl_table_root.default_set, + path, table); +} +EXPORT_SYMBOL(register_sysctl); + static char *append_path(const char *path, char *pos, const char *name) { int namelen; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 35c50ed36fc..c34b4c82b0d 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -1090,6 +1090,7 @@ struct ctl_table_header *__register_sysctl_table( struct ctl_table_header *__register_sysctl_paths( struct ctl_table_set *set, const struct ctl_path *path, struct ctl_table *table); +struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table); struct ctl_table_header *register_sysctl_table(struct ctl_table * table); struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, struct ctl_table *table); -- cgit v1.2.3-70-g09d2