cgroup: implement cgroup_add_cftypes() and friends

Currently, cgroup directories are populated by subsys->populate() callback explicitly creating files on each cgroup creation. This level of flexibility isn't needed or desirable. It provides largely unused flexibility which call for abuses while severely limiting what the core layer can do through the lack of structure and conventions. Per each cgroup file type, the only distinction that cgroup users is making is whether a cgroup is root or not, which can easily be expressed with flags. This patch introduces cgroup_add_cftypes(). These deal with cftypes instead of individual files - controllers indicate that certain types of files exist for certain subsystem. Newly added CFTYPE_*_ON_ROOT flags indicate whether a cftype should be excluded or created only on the root cgroup. cgroup_add_cftypes() can be called any time whether the target subsystem is currently attached or not. cgroup core will create files on the existing cgroups as necessary. Also, cgroup_subsys->base_cftypes is added to ease registration of the base files for the subsystem. If non-NULL on subsys init, the cftypes pointed to by ->base_cftypes are automatically registered on subsys init / load. Further patches will convert the existing users and remove the file based interface. Note that this interface allows dynamic addition of files to an active controller. This will be used for sub-controller modularity and unified hierarchy in the longer term. This patch implements the new mechanism but doesn't apply it to any user. v2: replaced DECLARE_CGROUP_CFTYPES[_COND]() with cgroup_subsys->base_cftypes, which works better for cgroup_subsys which is loaded as module. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Li Zefan <lizf@cn.fujitsu.com>
author: Tejun Heo <tj@kernel.org> 2012-04-01 12:09:55 -0700
committer: Tejun Heo <tj@kernel.org> 2012-04-01 12:09:55 -0700
commit: 8e3f6541d45e1a002801e56a19530a90f775deba (patch)
tree: 5342d7afce759252f5204c7e33283879cca0490d /kernel/cgroup.c
parent: b0ca5a84fc3ad8f75bb2b7ac3dc6a77151cd3906 (diff)
1 files changed, 131 insertions, 1 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index db4319c030d..df8fb82ef35 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2623,8 +2623,14 @@ int cgroup_add_file(struct cgroup *cgrp,
 	struct dentry *dentry;
 	int error;
 	umode_t mode;
-
 	char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
+
+	/* does @cft->flags tell us to skip creation on @cgrp? */
+	if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
+		return 0;
+	if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
+		return 0;
+
 	if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
 		strcpy(name, subsys->name);
 		strcat(name, ".");
@@ -2660,6 +2666,95 @@ int cgroup_add_files(struct cgroup *cgrp,
 }
 EXPORT_SYMBOL_GPL(cgroup_add_files);
 
+static DEFINE_MUTEX(cgroup_cft_mutex);
+
+static void cgroup_cfts_prepare(void)
+	__acquires(&cgroup_cft_mutex) __acquires(&cgroup_mutex)
+{
+	/*
+	 * Thanks to the entanglement with vfs inode locking, we can't walk
+	 * the existing cgroups under cgroup_mutex and create files.
+	 * Instead, we increment reference on all cgroups and build list of
+	 * them using @cgrp->cft_q_node.  Grab cgroup_cft_mutex to ensure
+	 * exclusive access to the field.
+	 */
+	mutex_lock(&cgroup_cft_mutex);
+	mutex_lock(&cgroup_mutex);
+}
+
+static void cgroup_cfts_commit(struct cgroup_subsys *ss,
+			       const struct cftype *cfts)
+	__releases(&cgroup_mutex) __releases(&cgroup_cft_mutex)
+{
+	LIST_HEAD(pending);
+	struct cgroup *cgrp, *n;
+	int count = 0;
+
+	while (cfts[count].name[0] != '\0')
+		count++;
+
+	/* %NULL @cfts indicates abort and don't bother if @ss isn't attached */
+	if (cfts && ss->root != &rootnode) {
+		list_for_each_entry(cgrp, &ss->root->allcg_list, allcg_node) {
+			dget(cgrp->dentry);
+			list_add_tail(&cgrp->cft_q_node, &pending);
+		}
+	}
+
+	mutex_unlock(&cgroup_mutex);
+
+	/*
+	 * All new cgroups will see @cfts update on @ss->cftsets.  Add/rm
+	 * files for all cgroups which were created before.
+	 */
+	list_for_each_entry_safe(cgrp, n, &pending, cft_q_node) {
+		struct inode *inode = cgrp->dentry->d_inode;
+
+		mutex_lock(&inode->i_mutex);
+		mutex_lock(&cgroup_mutex);
+		if (!cgroup_is_removed(cgrp))
+			cgroup_add_files(cgrp, ss, cfts, count);
+		mutex_unlock(&cgroup_mutex);
+		mutex_unlock(&inode->i_mutex);
+
+		list_del_init(&cgrp->cft_q_node);
+		dput(cgrp->dentry);
+	}
+
+	mutex_unlock(&cgroup_cft_mutex);
+}
+
+/**
+ * cgroup_add_cftypes - add an array of cftypes to a subsystem
+ * @ss: target cgroup subsystem
+ * @cfts: zero-length name terminated array of cftypes
+ *
+ * Register @cfts to @ss.  Files described by @cfts are created for all
+ * existing cgroups to which @ss is attached and all future cgroups will
+ * have them too.  This function can be called anytime whether @ss is
+ * attached or not.
+ *
+ * Returns 0 on successful registration, -errno on failure.  Note that this
+ * function currently returns 0 as long as @cfts registration is successful
+ * even if some file creation attempts on existing cgroups fail.
+ */
+int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts)
+{
+	struct cftype_set *set;
+
+	set = kzalloc(sizeof(*set), GFP_KERNEL);
+	if (!set)
+		return -ENOMEM;
+
+	cgroup_cfts_prepare();
+	set->cfts = cfts;
+	list_add_tail(&set->node, &ss->cftsets);
+	cgroup_cfts_commit(ss, cfts);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cgroup_add_cftypes);
+
 /**
  * cgroup_task_count - count the number of tasks in a cgroup.
  * @cgrp: the cgroup in question
@@ -3660,10 +3755,25 @@ static int cgroup_populate_dir(struct cgroup *cgrp)
 			return err;
 	}
 
+	/* process cftsets of each subsystem */
 	for_each_subsys(cgrp->root, ss) {
+		struct cftype_set *set;
+
 		if (ss->populate && (err = ss->populate(ss, cgrp)) < 0)
 			return err;
+
+		list_for_each_entry(set, &ss->cftsets, node) {
+			const struct cftype *cft;
+
+			for (cft = set->cfts; cft->name[0] != '\0'; cft++) {
+				err = cgroup_add_file(cgrp, ss, cft);
+				if (err)
+					pr_warning("cgroup_populate_dir: failed to create %s, err=%d\n",
+						   cft->name, err);
+			}
+		}
 	}
+
 	/* This cgroup is ready now */
 	for_each_subsys(cgrp->root, ss) {
 		struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
@@ -4034,12 +4144,29 @@ again:
 	return 0;
 }
 
+static void __init_or_module cgroup_init_cftsets(struct cgroup_subsys *ss)
+{
+	INIT_LIST_HEAD(&ss->cftsets);
+
+	/*
+	 * base_cftset is embedded in subsys itself, no need to worry about
+	 * deregistration.
+	 */
+	if (ss->base_cftypes) {
+		ss->base_cftset.cfts = ss->base_cftypes;
+		list_add_tail(&ss->base_cftset.node, &ss->cftsets);
+	}
+}
+
 static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 {
 	struct cgroup_subsys_state *css;
 
 	printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
 
+	/* init base cftset */
+	cgroup_init_cftsets(ss);
+
 	/* Create the top cgroup state for this subsystem */
 	list_add(&ss->sibling, &rootnode.subsys_list);
 	ss->root = &rootnode;
@@ -4109,6 +4236,9 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
 		return 0;
 	}
 
+	/* init base cftset */
+	cgroup_init_cftsets(ss);
+
 	/*
 	 * need to register a subsys id before anything else - for example,
 	 * init_cgroup_css needs it.
author	Tejun Heo <tj@kernel.org>	2012-04-01 12:09:55 -0700
committer	Tejun Heo <tj@kernel.org>	2012-04-01 12:09:55 -0700
commit	8e3f6541d45e1a002801e56a19530a90f775deba (patch)
tree	5342d7afce759252f5204c7e33283879cca0490d /kernel/cgroup.c
parent	b0ca5a84fc3ad8f75bb2b7ac3dc6a77151cd3906 (diff)