From 073219e995b4a3f8cf1ce8228b7ef440b6994ac0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 8 Feb 2014 10:36:58 -0500 Subject: cgroup: clean up cgroup_subsys names and initialization cgroup_subsys is a bit messier than it needs to be. * The name of a subsys can be different from its internal identifier defined in cgroup_subsys.h. Most subsystems use the matching name but three - cpu, memory and perf_event - use different ones. * cgroup_subsys_id enums are postfixed with _subsys_id and each cgroup_subsys is postfixed with _subsys. cgroup.h is widely included throughout various subsystems, it doesn't and shouldn't have claim on such generic names which don't have any qualifier indicating that they belong to cgroup. * cgroup_subsys->subsys_id should always equal the matching cgroup_subsys_id enum; however, we require each controller to initialize it and then BUG if they don't match, which is a bit silly. This patch cleans up cgroup_subsys names and initialization by doing the followings. * cgroup_subsys_id enums are now postfixed with _cgrp_id, and each cgroup_subsys with _cgrp_subsys. * With the above, renaming subsys identifiers to match the userland visible names doesn't cause any naming conflicts. All non-matching identifiers are renamed to match the official names. cpu_cgroup -> cpu mem_cgroup -> memory perf -> perf_event * controllers no longer need to initialize ->subsys_id and ->name. They're generated in cgroup core and set automatically during boot. * Redundant cgroup_subsys declarations removed. * While updating BUG_ON()s in cgroup_init_early(), convert them to WARN()s. BUGging that early during boot is stupid - the kernel can't print anything, even through serial console and the trap handler doesn't even link stack frame properly for back-tracing. This patch doesn't introduce any behavior changes. v2: Rebased on top of fe1217c4f3f7 ("net: net_cls: move cgroupfs classid handling into core"). Signed-off-by: Tejun Heo Acked-by: Neil Horman Acked-by: "David S. Miller" Acked-by: "Rafael J. Wysocki" Acked-by: Michal Hocko Acked-by: Peter Zijlstra Acked-by: Aristeu Rozanski Acked-by: Ingo Molnar Acked-by: Li Zefan Cc: Johannes Weiner Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Cc: Serge E. Hallyn Cc: Vivek Goyal Cc: Thomas Graf --- kernel/cpuset.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4410ac6a55f..2d018c795fe 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -119,7 +119,7 @@ static inline struct cpuset *css_cs(struct cgroup_subsys_state *css) /* Retrieve the cpuset for a task */ static inline struct cpuset *task_cs(struct task_struct *task) { - return css_cs(task_css(task, cpuset_subsys_id)); + return css_cs(task_css(task, cpuset_cgrp_id)); } static inline struct cpuset *parent_cs(struct cpuset *cs) @@ -1521,7 +1521,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css, struct task_struct *task; struct task_struct *leader = cgroup_taskset_first(tset); struct cgroup_subsys_state *oldcss = cgroup_taskset_cur_css(tset, - cpuset_subsys_id); + cpuset_cgrp_id); struct cpuset *cs = css_cs(css); struct cpuset *oldcs = css_cs(oldcss); struct cpuset *cpus_cs = effective_cpumask_cpuset(cs); @@ -2024,8 +2024,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css) kfree(cs); } -struct cgroup_subsys cpuset_subsys = { - .name = "cpuset", +struct cgroup_subsys cpuset_cgrp_subsys = { .css_alloc = cpuset_css_alloc, .css_online = cpuset_css_online, .css_offline = cpuset_css_offline, @@ -2033,7 +2032,6 @@ struct cgroup_subsys cpuset_subsys = { .can_attach = cpuset_can_attach, .cancel_attach = cpuset_cancel_attach, .attach = cpuset_attach, - .subsys_id = cpuset_subsys_id, .base_cftypes = files, .early_init = 1, }; @@ -2699,7 +2697,7 @@ int proc_cpuset_show(struct seq_file *m, void *unused_v) goto out_free; rcu_read_lock(); - css = task_css(tsk, cpuset_subsys_id); + css = task_css(tsk, cpuset_cgrp_id); retval = cgroup_path(css->cgroup, buf, PAGE_SIZE); rcu_read_unlock(); if (retval < 0) -- cgit v1.2.3-70-g09d2 From e61734c55c24cdf11b07e52a74aec4dc4a7f4bd0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 12 Feb 2014 09:29:50 -0500 Subject: cgroup: remove cgroup->name cgroup->name handling became quite complicated over time involving dedicated struct cgroup_name for RCU protection. Now that cgroup is on kernfs, we can drop all of it and simply use kernfs_name/path() and friends. Replace cgroup->name and all related code with kernfs name/path constructs. * Reimplement cgroup_name() and cgroup_path() as thin wrappers on top of kernfs counterparts, which involves semantic changes. pr_cont_cgroup_name() and pr_cont_cgroup_path() added. * cgroup->name handling dropped from cgroup_rename(). * All users of cgroup_name/path() updated to the new semantics. Users which were formatting the string just to printk them are converted to use pr_cont_cgroup_name/path() instead, which simplifies things quite a bit. As cgroup_name() no longer requires RCU read lock around it, RCU lockings which were protecting only cgroup_name() are removed. v2: Comment above oom_info_lock updated as suggested by Michal. v3: dummy_top doesn't have a kn associated and pr_cont_cgroup_name/path() ended up calling the matching kernfs functions with NULL kn leading to oops. Test for NULL kn and print "/" if so. This issue was reported by Fengguang Wu. v4: Rebased on top of 0ab02ca8f887 ("cgroup: protect modifications to cgroup_idr with cgroup_mutex"). Signed-off-by: Tejun Heo Acked-by: Peter Zijlstra Acked-by: Michal Hocko Acked-by: Li Zefan Cc: Fengguang Wu Cc: Ingo Molnar Cc: Johannes Weiner Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki --- block/blk-cgroup.h | 12 ++-- fs/kernfs/dir.c | 1 + include/linux/cgroup.h | 63 ++++++++++++--------- kernel/cgroup.c | 146 +++++++++++-------------------------------------- kernel/cpuset.c | 27 +++++---- kernel/sched/debug.c | 3 +- mm/memcontrol.c | 68 ++++++----------------- 7 files changed, 110 insertions(+), 210 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 453b528c8e1..15a8d640de5 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -241,12 +241,16 @@ static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) */ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) { - int ret; + char *p; - ret = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); - if (ret) + p = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); + if (!p) { strncpy(buf, "", buflen); - return ret; + return -ENAMETOOLONG; + } + + memmove(buf, p, buf + buflen - p); + return 0; } /** diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index a347792c2e5..939684ebff1 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -112,6 +112,7 @@ char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen) spin_unlock_irqrestore(&kernfs_rename_lock, flags); return p; } +EXPORT_SYMBOL_GPL(kernfs_path); /** * pr_cont_kernfs_name - pr_cont name of a kernfs_node diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b42251a2312..4d6ff7d40cf 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -138,11 +138,6 @@ enum { CGRP_SANE_BEHAVIOR, }; -struct cgroup_name { - struct rcu_head rcu_head; - char name[]; -}; - struct cgroup { unsigned long flags; /* "unsigned long" so bitops work */ @@ -179,19 +174,6 @@ struct cgroup { */ u64 serial_nr; - /* - * This is a copy of dentry->d_name, and it's needed because - * we can't use dentry->d_name in cgroup_path(). - * - * You must acquire rcu_read_lock() to access cgrp->name, and - * the only place that can change it is rename(), which is - * protected by parent dir's i_mutex. - * - * Normally you should use cgroup_name() wrapper rather than - * access it directly. - */ - struct cgroup_name __rcu *name; - /* Private pointers for each registered subsystem */ struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; @@ -479,12 +461,6 @@ static inline bool cgroup_sane_behavior(const struct cgroup *cgrp) return cgrp->root->flags & CGRP_ROOT_SANE_BEHAVIOR; } -/* Caller should hold rcu_read_lock() */ -static inline const char *cgroup_name(const struct cgroup *cgrp) -{ - return rcu_dereference(cgrp->name)->name; -} - /* returns ino associated with a cgroup, 0 indicates unmounted root */ static inline ino_t cgroup_ino(struct cgroup *cgrp) { @@ -503,14 +479,47 @@ static inline struct cftype *seq_cft(struct seq_file *seq) struct cgroup_subsys_state *seq_css(struct seq_file *seq); +/* + * Name / path handling functions. All are thin wrappers around the kernfs + * counterparts and can be called under any context. + */ + +static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen) +{ + return kernfs_name(cgrp->kn, buf, buflen); +} + +static inline char * __must_check cgroup_path(struct cgroup *cgrp, char *buf, + size_t buflen) +{ + return kernfs_path(cgrp->kn, buf, buflen); +} + +static inline void pr_cont_cgroup_name(struct cgroup *cgrp) +{ + /* dummy_top doesn't have a kn associated */ + if (cgrp->kn) + pr_cont_kernfs_name(cgrp->kn); + else + pr_cont("/"); +} + +static inline void pr_cont_cgroup_path(struct cgroup *cgrp) +{ + /* dummy_top doesn't have a kn associated */ + if (cgrp->kn) + pr_cont_kernfs_path(cgrp->kn); + else + pr_cont("/"); +} + +char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); + int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_rm_cftypes(struct cftype *cfts); bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); -int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen); -int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); - int cgroup_task_count(const struct cgroup *cgrp); /* diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 59dfb025f1a..638df032fb9 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -145,8 +145,6 @@ static int cgroup_root_count; /* hierarchy ID allocation and mapping, protected by cgroup_mutex */ static DEFINE_IDR(cgroup_hierarchy_idr); -static struct cgroup_name root_cgroup_name = { .name = "/" }; - /* * Assign a monotonically increasing serial number to cgroups. It * guarantees cgroups with bigger numbers are newer than those with smaller @@ -888,17 +886,6 @@ static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask); static struct kernfs_syscall_ops cgroup_kf_syscall_ops; static const struct file_operations proc_cgroupstats_operations; -static struct cgroup_name *cgroup_alloc_name(const char *name_str) -{ - struct cgroup_name *name; - - name = kmalloc(sizeof(*name) + strlen(name_str) + 1, GFP_KERNEL); - if (!name) - return NULL; - strcpy(name->name, name_str); - return name; -} - static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft, char *buf) { @@ -958,8 +945,6 @@ static void cgroup_free_fn(struct work_struct *work) cgroup_pidlist_destroy_all(cgrp); kernfs_put(cgrp->kn); - - kfree(rcu_dereference_raw(cgrp->name)); kfree(cgrp); } @@ -1377,7 +1362,6 @@ static void init_cgroup_root(struct cgroupfs_root *root) INIT_LIST_HEAD(&root->root_list); root->number_of_cgroups = 1; cgrp->root = root; - RCU_INIT_POINTER(cgrp->name, &root_cgroup_name); init_cgroup_housekeeping(cgrp); idr_init(&root->cgroup_idr); } @@ -1597,57 +1581,6 @@ static struct file_system_type cgroup_fs_type = { static struct kobject *cgroup_kobj; -/** - * cgroup_path - generate the path of a cgroup - * @cgrp: the cgroup in question - * @buf: the buffer to write the path into - * @buflen: the length of the buffer - * - * Writes path of cgroup into buf. Returns 0 on success, -errno on error. - * - * We can't generate cgroup path using dentry->d_name, as accessing - * dentry->name must be protected by irq-unsafe dentry->d_lock or parent - * inode's i_mutex, while on the other hand cgroup_path() can be called - * with some irq-safe spinlocks held. - */ -int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) -{ - int ret = -ENAMETOOLONG; - char *start; - - if (!cgrp->parent) { - if (strlcpy(buf, "/", buflen) >= buflen) - return -ENAMETOOLONG; - return 0; - } - - start = buf + buflen - 1; - *start = '\0'; - - rcu_read_lock(); - do { - const char *name = cgroup_name(cgrp); - int len; - - len = strlen(name); - if ((start -= len) < buf) - goto out; - memcpy(start, name, len); - - if (--start < buf) - goto out; - *start = '/'; - - cgrp = cgrp->parent; - } while (cgrp->parent); - ret = 0; - memmove(buf, start, buf + buflen - start); -out: - rcu_read_unlock(); - return ret; -} -EXPORT_SYMBOL_GPL(cgroup_path); - /** * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy * @task: target task @@ -1659,16 +1592,14 @@ EXPORT_SYMBOL_GPL(cgroup_path); * function grabs cgroup_mutex and shouldn't be used inside locks used by * cgroup controller callbacks. * - * Returns 0 on success, fails with -%ENAMETOOLONG if @buflen is too short. + * Return value is the same as kernfs_path(). */ -int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) +char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) { struct cgroupfs_root *root; struct cgroup *cgrp; - int hierarchy_id = 1, ret = 0; - - if (buflen < 2) - return -ENAMETOOLONG; + int hierarchy_id = 1; + char *path = NULL; mutex_lock(&cgroup_mutex); @@ -1676,14 +1607,15 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) if (root) { cgrp = task_cgroup_from_root(task, root); - ret = cgroup_path(cgrp, buf, buflen); + path = cgroup_path(cgrp, buf, buflen); } else { /* if no hierarchy exists, everyone is in "/" */ - memcpy(buf, "/", 2); + if (strlcpy(buf, "/", buflen) < buflen) + path = buf; } mutex_unlock(&cgroup_mutex); - return ret; + return path; } EXPORT_SYMBOL_GPL(task_cgroup_path); @@ -2211,7 +2143,6 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name_str) { struct cgroup *cgrp = kn->priv; - struct cgroup_name *name, *old_name; int ret; if (kernfs_type(kn) != KERNFS_DIR) @@ -2226,25 +2157,13 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent, if (cgroup_sane_behavior(cgrp)) return -EPERM; - name = cgroup_alloc_name(new_name_str); - if (!name) - return -ENOMEM; - mutex_lock(&cgroup_tree_mutex); mutex_lock(&cgroup_mutex); ret = kernfs_rename(kn, new_parent, new_name_str); - if (!ret) { - old_name = rcu_dereference_protected(cgrp->name, true); - rcu_assign_pointer(cgrp->name, name); - } else { - old_name = name; - } mutex_unlock(&cgroup_mutex); mutex_unlock(&cgroup_tree_mutex); - - kfree_rcu(old_name, rcu_head); return ret; } @@ -3719,14 +3638,13 @@ err_free: /** * cgroup_create - create a cgroup * @parent: cgroup that will be parent of the new cgroup - * @name_str: name of the new cgroup + * @name: name of the new cgroup * @mode: mode to set on new cgroup */ -static long cgroup_create(struct cgroup *parent, const char *name_str, +static long cgroup_create(struct cgroup *parent, const char *name, umode_t mode) { struct cgroup *cgrp; - struct cgroup_name *name; struct cgroupfs_root *root = parent->root; int ssid, err; struct cgroup_subsys *ss; @@ -3737,13 +3655,6 @@ static long cgroup_create(struct cgroup *parent, const char *name_str, if (!cgrp) return -ENOMEM; - name = cgroup_alloc_name(name_str); - if (!name) { - err = -ENOMEM; - goto err_free_cgrp; - } - rcu_assign_pointer(cgrp->name, name); - mutex_lock(&cgroup_tree_mutex); /* @@ -3781,7 +3692,7 @@ static long cgroup_create(struct cgroup *parent, const char *name_str, set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags); /* create the directory */ - kn = kernfs_create_dir(parent->kn, name->name, mode, cgrp); + kn = kernfs_create_dir(parent->kn, name, mode, cgrp); if (IS_ERR(kn)) { err = PTR_ERR(kn); goto err_free_id; @@ -3839,8 +3750,6 @@ err_unlock: mutex_unlock(&cgroup_mutex); err_unlock_tree: mutex_unlock(&cgroup_tree_mutex); - kfree(rcu_dereference_raw(cgrp->name)); -err_free_cgrp: kfree(cgrp); return err; @@ -4304,12 +4213,12 @@ int proc_cgroup_show(struct seq_file *m, void *v) { struct pid *pid; struct task_struct *tsk; - char *buf; + char *buf, *path; int retval; struct cgroupfs_root *root; retval = -ENOMEM; - buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + buf = kmalloc(PATH_MAX, GFP_KERNEL); if (!buf) goto out; @@ -4337,10 +4246,12 @@ int proc_cgroup_show(struct seq_file *m, void *v) root->name); seq_putc(m, ':'); cgrp = task_cgroup_from_root(tsk, root); - retval = cgroup_path(cgrp, buf, PAGE_SIZE); - if (retval < 0) + path = cgroup_path(cgrp, buf, PATH_MAX); + if (!path) { + retval = -ENAMETOOLONG; goto out_unlock; - seq_puts(m, buf); + } + seq_puts(m, path); seq_putc(m, '\n'); } @@ -4588,16 +4499,17 @@ static void cgroup_release_agent(struct work_struct *work) while (!list_empty(&release_list)) { char *argv[3], *envp[3]; int i; - char *pathbuf = NULL, *agentbuf = NULL; + char *pathbuf = NULL, *agentbuf = NULL, *path; struct cgroup *cgrp = list_entry(release_list.next, struct cgroup, release_list); list_del_init(&cgrp->release_list); raw_spin_unlock(&release_list_lock); - pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL); + pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); if (!pathbuf) goto continue_free; - if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0) + path = cgroup_path(cgrp, pathbuf, PATH_MAX); + if (!path) goto continue_free; agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL); if (!agentbuf) @@ -4605,7 +4517,7 @@ static void cgroup_release_agent(struct work_struct *work) i = 0; argv[i++] = agentbuf; - argv[i++] = pathbuf; + argv[i++] = path; argv[i] = NULL; i = 0; @@ -4755,6 +4667,11 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v) { struct cgrp_cset_link *link; struct css_set *cset; + char *name_buf; + + name_buf = kmalloc(NAME_MAX + 1, GFP_KERNEL); + if (!name_buf) + return -ENOMEM; read_lock(&css_set_lock); rcu_read_lock(); @@ -4763,14 +4680,17 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v) struct cgroup *c = link->cgrp; const char *name = "?"; - if (c != cgroup_dummy_top) - name = cgroup_name(c); + if (c != cgroup_dummy_top) { + cgroup_name(c, name_buf, NAME_MAX + 1); + name = name_buf; + } seq_printf(seq, "Root %d group %s\n", c->root->hierarchy_id, name); } rcu_read_unlock(); read_unlock(&css_set_lock); + kfree(name_buf); return 0; } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 2d018c795fe..e97a6e88d03 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2088,10 +2088,9 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) parent = parent_cs(parent); if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) { - rcu_read_lock(); - printk(KERN_ERR "cpuset: failed to transfer tasks out of empty cpuset %s\n", - cgroup_name(cs->css.cgroup)); - rcu_read_unlock(); + printk(KERN_ERR "cpuset: failed to transfer tasks out of empty cpuset "); + pr_cont_cgroup_name(cs->css.cgroup); + pr_cont("\n"); } } @@ -2619,19 +2618,17 @@ void cpuset_print_task_mems_allowed(struct task_struct *tsk) /* Statically allocated to prevent using excess stack. */ static char cpuset_nodelist[CPUSET_NODELIST_LEN]; static DEFINE_SPINLOCK(cpuset_buffer_lock); - struct cgroup *cgrp = task_cs(tsk)->css.cgroup; - rcu_read_lock(); spin_lock(&cpuset_buffer_lock); nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, tsk->mems_allowed); - printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n", - tsk->comm, cgroup_name(cgrp), cpuset_nodelist); + printk(KERN_INFO "%s cpuset=", tsk->comm); + pr_cont_cgroup_name(cgrp); + pr_cont(" mems_allowed=%s\n", cpuset_nodelist); spin_unlock(&cpuset_buffer_lock); - rcu_read_unlock(); } /* @@ -2681,12 +2678,12 @@ int proc_cpuset_show(struct seq_file *m, void *unused_v) { struct pid *pid; struct task_struct *tsk; - char *buf; + char *buf, *p; struct cgroup_subsys_state *css; int retval; retval = -ENOMEM; - buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + buf = kmalloc(PATH_MAX, GFP_KERNEL); if (!buf) goto out; @@ -2696,14 +2693,16 @@ int proc_cpuset_show(struct seq_file *m, void *unused_v) if (!tsk) goto out_free; + retval = -ENAMETOOLONG; rcu_read_lock(); css = task_css(tsk, cpuset_cgrp_id); - retval = cgroup_path(css->cgroup, buf, PAGE_SIZE); + p = cgroup_path(css->cgroup, buf, PATH_MAX); rcu_read_unlock(); - if (retval < 0) + if (!p) goto out_put_task; - seq_puts(m, buf); + seq_puts(m, p); seq_putc(m, '\n'); + retval = 0; out_put_task: put_task_struct(tsk); out_free: diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index dd52e7ffb10..30eee3b5293 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -111,8 +111,7 @@ static char *task_group_path(struct task_group *tg) if (autogroup_path(tg, group_path, PATH_MAX)) return group_path; - cgroup_path(tg->css.cgroup, group_path, PATH_MAX); - return group_path; + return cgroup_path(tg->css.cgroup, group_path, PATH_MAX); } #endif diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 102ab48ffa1..c1c25494f7a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1683,15 +1683,8 @@ static void move_unlock_mem_cgroup(struct mem_cgroup *memcg, */ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { - /* - * protects memcg_name and makes sure that parallel ooms do not - * interleave - */ + /* oom_info_lock ensures that parallel ooms do not interleave */ static DEFINE_SPINLOCK(oom_info_lock); - struct cgroup *task_cgrp; - struct cgroup *mem_cgrp; - static char memcg_name[PATH_MAX]; - int ret; struct mem_cgroup *iter; unsigned int i; @@ -1701,36 +1694,14 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) spin_lock(&oom_info_lock); rcu_read_lock(); - mem_cgrp = memcg->css.cgroup; - task_cgrp = task_cgroup(p, memory_cgrp_id); + pr_info("Task in "); + pr_cont_cgroup_path(task_cgroup(p, memory_cgrp_id)); + pr_info(" killed as a result of limit of "); + pr_cont_cgroup_path(memcg->css.cgroup); + pr_info("\n"); - ret = cgroup_path(task_cgrp, memcg_name, PATH_MAX); - if (ret < 0) { - /* - * Unfortunately, we are unable to convert to a useful name - * But we'll still print out the usage information - */ - rcu_read_unlock(); - goto done; - } rcu_read_unlock(); - pr_info("Task in %s killed", memcg_name); - - rcu_read_lock(); - ret = cgroup_path(mem_cgrp, memcg_name, PATH_MAX); - if (ret < 0) { - rcu_read_unlock(); - goto done; - } - rcu_read_unlock(); - - /* - * Continues from above, so we don't need an KERN_ level - */ - pr_cont(" as a result of limit of %s\n", memcg_name); -done: - pr_info("memory: usage %llukB, limit %llukB, failcnt %llu\n", res_counter_read_u64(&memcg->res, RES_USAGE) >> 10, res_counter_read_u64(&memcg->res, RES_LIMIT) >> 10, @@ -1745,13 +1716,8 @@ done: res_counter_read_u64(&memcg->kmem, RES_FAILCNT)); for_each_mem_cgroup_tree(iter, memcg) { - pr_info("Memory cgroup stats"); - - rcu_read_lock(); - ret = cgroup_path(iter->css.cgroup, memcg_name, PATH_MAX); - if (!ret) - pr_cont(" for %s", memcg_name); - rcu_read_unlock(); + pr_info("Memory cgroup stats for "); + pr_cont_cgroup_path(iter->css.cgroup); pr_cont(":"); for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { @@ -3401,7 +3367,7 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, struct kmem_cache *s) { struct kmem_cache *new = NULL; - static char *tmp_name = NULL; + static char *tmp_path = NULL, *tmp_name = NULL; static DEFINE_MUTEX(mutex); /* protects tmp_name */ BUG_ON(!memcg_can_account_kmem(memcg)); @@ -3413,18 +3379,20 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, * This static temporary buffer is used to prevent from * pointless shortliving allocation. */ - if (!tmp_name) { - tmp_name = kmalloc(PATH_MAX, GFP_KERNEL); + if (!tmp_path || !tmp_name) { + if (!tmp_path) + tmp_path = kmalloc(PATH_MAX, GFP_KERNEL); if (!tmp_name) + tmp_name = kmalloc(NAME_MAX + 1, GFP_KERNEL); + if (!tmp_path || !tmp_name) goto out; } - rcu_read_lock(); - snprintf(tmp_name, PATH_MAX, "%s(%d:%s)", s->name, - memcg_cache_id(memcg), cgroup_name(memcg->css.cgroup)); - rcu_read_unlock(); + cgroup_name(memcg->css.cgroup, tmp_name, NAME_MAX + 1); + snprintf(tmp_path, PATH_MAX, "%s(%d:%s)", s->name, + memcg_cache_id(memcg), tmp_name); - new = kmem_cache_create_memcg(memcg, tmp_name, s->object_size, s->align, + new = kmem_cache_create_memcg(memcg, tmp_path, s->object_size, s->align, (s->flags & ~SLAB_PANIC), s->ctor, s); if (new) new->allocflags |= __GFP_KMEMCG; -- cgit v1.2.3-70-g09d2 From 07bc356ed2950048d33d667e933e1b913c6e6b6d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 13 Feb 2014 06:58:39 -0500 Subject: cgroup: implement cgroup_has_tasks() and unexport cgroup_task_count() cgroup_task_count() read-locks css_set_lock and walks all tasks to count them and then returns the result. The only thing all the users want is determining whether the cgroup is empty or not. This patch implements cgroup_has_tasks() which tests whether cgroup->cset_links is empty, replaces all cgroup_task_count() usages and unexports it. Note that the test isn't synchronized. This is the same as before. The test has always been racy. This will help planned css_set locking update. Signed-off-by: Tejun Heo Acked-by: Li Zefan Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki --- include/linux/cgroup.h | 8 ++++++-- kernel/cgroup.c | 2 +- kernel/cpuset.c | 2 +- mm/memcontrol.c | 4 ++-- 4 files changed, 10 insertions(+), 6 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index e2ffcdc26cb..72154fb44fb 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -457,6 +457,12 @@ static inline bool cgroup_sane_behavior(const struct cgroup *cgrp) return cgrp->root->flags & CGRP_ROOT_SANE_BEHAVIOR; } +/* no synchronization, the result can only be used as a hint */ +static inline bool cgroup_has_tasks(struct cgroup *cgrp) +{ + return !list_empty(&cgrp->cset_links); +} + /* returns ino associated with a cgroup, 0 indicates unmounted root */ static inline ino_t cgroup_ino(struct cgroup *cgrp) { @@ -516,8 +522,6 @@ int cgroup_rm_cftypes(struct cftype *cfts); bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); -int cgroup_task_count(const struct cgroup *cgrp); - /* * Control Group taskset, used to pass around set of tasks to cgroup_subsys * methods. diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2469699408b..ec7746e5ded 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2391,7 +2391,7 @@ EXPORT_SYMBOL_GPL(cgroup_add_cftypes); * * Return the number of tasks in the cgroup. */ -int cgroup_task_count(const struct cgroup *cgrp) +static int cgroup_task_count(const struct cgroup *cgrp) { int count = 0; struct cgrp_cset_link *link; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index e97a6e88d03..ae190b0a196 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -467,7 +467,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) * be changed to have empty cpus_allowed or mems_allowed. */ ret = -ENOSPC; - if ((cgroup_task_count(cur->css.cgroup) || cur->attach_in_progress)) { + if ((cgroup_has_tasks(cur->css.cgroup) || cur->attach_in_progress)) { if (!cpumask_empty(cur->cpus_allowed) && cpumask_empty(trial->cpus_allowed)) goto out; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c1c25494f7a..d9c6ac1532e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4958,7 +4958,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg) struct cgroup *cgrp = memcg->css.cgroup; /* returns EBUSY if there is a task or if we come here twice. */ - if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) + if (cgroup_has_tasks(cgrp) || !list_empty(&cgrp->children)) return -EBUSY; /* we call try-to-free pages for make this cgroup empty */ @@ -5140,7 +5140,7 @@ static int __memcg_activate_kmem(struct mem_cgroup *memcg, * of course permitted. */ mutex_lock(&memcg_create_mutex); - if (cgroup_task_count(memcg->css.cgroup) || memcg_has_children(memcg)) + if (cgroup_has_tasks(memcg->css.cgroup) || memcg_has_children(memcg)) err = -EBUSY; mutex_unlock(&memcg_create_mutex); if (err) -- cgit v1.2.3-70-g09d2 From d66393e54e0a9dc743e440eb36c58bd1158a560e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 13 Feb 2014 06:58:40 -0500 Subject: cpuset: use css_task_iter_start/next/end() instead of css_scan_tasks() Now that css_task_iter_start/next_end() supports blocking while iterating, there's no reason to use css_scan_tasks() which is more cumbersome to use and scheduled to be removed. Convert all css_scan_tasks() usages in cpuset to css_task_iter_start/next/end(). This simplifies the code by removing heap allocation and callbacks. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- kernel/cpuset.c | 186 ++++++++++++++++++-------------------------------------- 1 file changed, 58 insertions(+), 128 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index ae190b0a196..65ae0bdf4af 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -828,56 +828,37 @@ static struct cpuset *effective_nodemask_cpuset(struct cpuset *cs) return cs; } -/** - * cpuset_change_cpumask - make a task's cpus_allowed the same as its cpuset's - * @tsk: task to test - * @data: cpuset to @tsk belongs to - * - * Called by css_scan_tasks() for each task in a cgroup whose cpus_allowed - * mask needs to be changed. - * - * We don't need to re-check for the cgroup/cpuset membership, since we're - * holding cpuset_mutex at this point. - */ -static void cpuset_change_cpumask(struct task_struct *tsk, void *data) -{ - struct cpuset *cs = data; - struct cpuset *cpus_cs = effective_cpumask_cpuset(cs); - - set_cpus_allowed_ptr(tsk, cpus_cs->cpus_allowed); -} - /** * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset. * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed - * @heap: if NULL, defer allocating heap memory to css_scan_tasks() - * - * Called with cpuset_mutex held - * - * The css_scan_tasks() function will scan all the tasks in a cgroup, - * calling callback functions for each. * - * No return value. It's guaranteed that css_scan_tasks() always returns 0 - * if @heap != NULL. + * Iterate through each task of @cs updating its cpus_allowed to the + * effective cpuset's. As this function is called with cpuset_mutex held, + * cpuset membership stays stable. */ -static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap) +static void update_tasks_cpumask(struct cpuset *cs) { - css_scan_tasks(&cs->css, NULL, cpuset_change_cpumask, cs, heap); + struct cpuset *cpus_cs = effective_cpumask_cpuset(cs); + struct css_task_iter it; + struct task_struct *task; + + css_task_iter_start(&cs->css, &it); + while ((task = css_task_iter_next(&it))) + set_cpus_allowed_ptr(task, cpus_cs->cpus_allowed); + css_task_iter_end(&it); } /* * update_tasks_cpumask_hier - Update the cpumasks of tasks in the hierarchy. * @root_cs: the root cpuset of the hierarchy * @update_root: update root cpuset or not? - * @heap: the heap used by css_scan_tasks() * * This will update cpumasks of tasks in @root_cs and all other empty cpusets * which take on cpumask of @root_cs. * * Called with cpuset_mutex held */ -static void update_tasks_cpumask_hier(struct cpuset *root_cs, - bool update_root, struct ptr_heap *heap) +static void update_tasks_cpumask_hier(struct cpuset *root_cs, bool update_root) { struct cpuset *cp; struct cgroup_subsys_state *pos_css; @@ -898,7 +879,7 @@ static void update_tasks_cpumask_hier(struct cpuset *root_cs, continue; rcu_read_unlock(); - update_tasks_cpumask(cp, heap); + update_tasks_cpumask(cp); rcu_read_lock(); css_put(&cp->css); @@ -914,7 +895,6 @@ static void update_tasks_cpumask_hier(struct cpuset *root_cs, static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, const char *buf) { - struct ptr_heap heap; int retval; int is_load_balanced; @@ -947,19 +927,13 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, if (retval < 0) return retval; - retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL); - if (retval) - return retval; - is_load_balanced = is_sched_load_balance(trialcs); mutex_lock(&callback_mutex); cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); mutex_unlock(&callback_mutex); - update_tasks_cpumask_hier(cs, true, &heap); - - heap_free(&heap); + update_tasks_cpumask_hier(cs, true); if (is_load_balanced) rebuild_sched_domains_locked(); @@ -1052,53 +1026,22 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, task_unlock(tsk); } -struct cpuset_change_nodemask_arg { - struct cpuset *cs; - nodemask_t *newmems; -}; - -/* - * Update task's mems_allowed and rebind its mempolicy and vmas' mempolicy - * of it to cpuset's new mems_allowed, and migrate pages to new nodes if - * memory_migrate flag is set. Called with cpuset_mutex held. - */ -static void cpuset_change_nodemask(struct task_struct *p, void *data) -{ - struct cpuset_change_nodemask_arg *arg = data; - struct cpuset *cs = arg->cs; - struct mm_struct *mm; - int migrate; - - cpuset_change_task_nodemask(p, arg->newmems); - - mm = get_task_mm(p); - if (!mm) - return; - - migrate = is_memory_migrate(cs); - - mpol_rebind_mm(mm, &cs->mems_allowed); - if (migrate) - cpuset_migrate_mm(mm, &cs->old_mems_allowed, arg->newmems); - mmput(mm); -} - static void *cpuset_being_rebound; /** * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset. * @cs: the cpuset in which each task's mems_allowed mask needs to be changed - * @heap: if NULL, defer allocating heap memory to css_scan_tasks() * - * Called with cpuset_mutex held. No return value. It's guaranteed that - * css_scan_tasks() always returns 0 if @heap != NULL. + * Iterate through each task of @cs updating its mems_allowed to the + * effective cpuset's. As this function is called with cpuset_mutex held, + * cpuset membership stays stable. */ -static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap) +static void update_tasks_nodemask(struct cpuset *cs) { static nodemask_t newmems; /* protected by cpuset_mutex */ struct cpuset *mems_cs = effective_nodemask_cpuset(cs); - struct cpuset_change_nodemask_arg arg = { .cs = cs, - .newmems = &newmems }; + struct css_task_iter it; + struct task_struct *task; cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ @@ -1114,7 +1057,25 @@ static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap) * It's ok if we rebind the same mm twice; mpol_rebind_mm() * is idempotent. Also migrate pages in each mm to new nodes. */ - css_scan_tasks(&cs->css, NULL, cpuset_change_nodemask, &arg, heap); + css_task_iter_start(&cs->css, &it); + while ((task = css_task_iter_next(&it))) { + struct mm_struct *mm; + bool migrate; + + cpuset_change_task_nodemask(task, &newmems); + + mm = get_task_mm(task); + if (!mm) + continue; + + migrate = is_memory_migrate(cs); + + mpol_rebind_mm(mm, &cs->mems_allowed); + if (migrate) + cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems); + mmput(mm); + } + css_task_iter_end(&it); /* * All the tasks' nodemasks have been updated, update @@ -1130,15 +1091,13 @@ static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap) * update_tasks_nodemask_hier - Update the nodemasks of tasks in the hierarchy. * @cs: the root cpuset of the hierarchy * @update_root: update the root cpuset or not? - * @heap: the heap used by css_scan_tasks() * * This will update nodemasks of tasks in @root_cs and all other empty cpusets * which take on nodemask of @root_cs. * * Called with cpuset_mutex held */ -static void update_tasks_nodemask_hier(struct cpuset *root_cs, - bool update_root, struct ptr_heap *heap) +static void update_tasks_nodemask_hier(struct cpuset *root_cs, bool update_root) { struct cpuset *cp; struct cgroup_subsys_state *pos_css; @@ -1159,7 +1118,7 @@ static void update_tasks_nodemask_hier(struct cpuset *root_cs, continue; rcu_read_unlock(); - update_tasks_nodemask(cp, heap); + update_tasks_nodemask(cp); rcu_read_lock(); css_put(&cp->css); @@ -1184,7 +1143,6 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, const char *buf) { int retval; - struct ptr_heap heap; /* * top_cpuset.mems_allowed tracks node_stats[N_MEMORY]; @@ -1223,17 +1181,11 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, if (retval < 0) goto done; - retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL); - if (retval < 0) - goto done; - mutex_lock(&callback_mutex); cs->mems_allowed = trialcs->mems_allowed; mutex_unlock(&callback_mutex); - update_tasks_nodemask_hier(cs, true, &heap); - - heap_free(&heap); + update_tasks_nodemask_hier(cs, true); done: return retval; } @@ -1260,39 +1212,23 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) return 0; } -/** - * cpuset_change_flag - make a task's spread flags the same as its cpuset's - * @tsk: task to be updated - * @data: cpuset to @tsk belongs to - * - * Called by css_scan_tasks() for each task in a cgroup. - * - * We don't need to re-check for the cgroup/cpuset membership, since we're - * holding cpuset_mutex at this point. - */ -static void cpuset_change_flag(struct task_struct *tsk, void *data) -{ - struct cpuset *cs = data; - - cpuset_update_task_spread_flag(cs, tsk); -} - /** * update_tasks_flags - update the spread flags of tasks in the cpuset. * @cs: the cpuset in which each task's spread flags needs to be changed - * @heap: if NULL, defer allocating heap memory to css_scan_tasks() - * - * Called with cpuset_mutex held * - * The css_scan_tasks() function will scan all the tasks in a cgroup, - * calling callback functions for each. - * - * No return value. It's guaranteed that css_scan_tasks() always returns 0 - * if @heap != NULL. + * Iterate through each task of @cs updating its spread flags. As this + * function is called with cpuset_mutex held, cpuset membership stays + * stable. */ -static void update_tasks_flags(struct cpuset *cs, struct ptr_heap *heap) +static void update_tasks_flags(struct cpuset *cs) { - css_scan_tasks(&cs->css, NULL, cpuset_change_flag, cs, heap); + struct css_task_iter it; + struct task_struct *task; + + css_task_iter_start(&cs->css, &it); + while ((task = css_task_iter_next(&it))) + cpuset_update_task_spread_flag(cs, task); + css_task_iter_end(&it); } /* @@ -1310,7 +1246,6 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, struct cpuset *trialcs; int balance_flag_changed; int spread_flag_changed; - struct ptr_heap heap; int err; trialcs = alloc_trial_cpuset(cs); @@ -1326,10 +1261,6 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, if (err < 0) goto out; - err = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL); - if (err < 0) - goto out; - balance_flag_changed = (is_sched_load_balance(cs) != is_sched_load_balance(trialcs)); @@ -1344,8 +1275,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, rebuild_sched_domains_locked(); if (spread_flag_changed) - update_tasks_flags(cs, &heap); - heap_free(&heap); + update_tasks_flags(cs); out: free_trial_cpuset(trialcs); return err; @@ -2138,7 +2068,7 @@ retry: */ if ((sane && cpumask_empty(cs->cpus_allowed)) || (!cpumask_empty(&off_cpus) && !cpumask_empty(cs->cpus_allowed))) - update_tasks_cpumask(cs, NULL); + update_tasks_cpumask(cs); mutex_lock(&callback_mutex); nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems); @@ -2152,7 +2082,7 @@ retry: */ if ((sane && nodes_empty(cs->mems_allowed)) || (!nodes_empty(off_mems) && !nodes_empty(cs->mems_allowed))) - update_tasks_nodemask(cs, NULL); + update_tasks_nodemask(cs); is_empty = cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed); @@ -2214,7 +2144,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) mutex_lock(&callback_mutex); top_cpuset.mems_allowed = new_mems; mutex_unlock(&callback_mutex); - update_tasks_nodemask(&top_cpuset, NULL); + update_tasks_nodemask(&top_cpuset); } mutex_unlock(&cpuset_mutex); -- cgit v1.2.3-70-g09d2 From 924f0d9a2078f49ff331bb43196ec5afadc16b8f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 13 Feb 2014 06:58:41 -0500 Subject: cgroup: drop @skip_css from cgroup_taskset_for_each() If !NULL, @skip_css makes cgroup_taskset_for_each() skip the matching css. The intention of the interface is to make it easy to skip css's (cgroup_subsys_states) which already match the migration target; however, this is entirely unnecessary as migration taskset doesn't include tasks which are already in the target cgroup. Drop @skip_css from cgroup_taskset_for_each(). Signed-off-by: Tejun Heo Acked-by: Li Zefan Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann --- block/blk-cgroup.c | 2 +- include/linux/cgroup.h | 8 ++------ kernel/cgroup_freezer.c | 2 +- kernel/cpuset.c | 4 ++-- kernel/events/core.c | 2 +- kernel/sched/core.c | 4 ++-- net/core/netclassid_cgroup.c | 2 +- net/core/netprio_cgroup.c | 2 +- 8 files changed, 11 insertions(+), 15 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 1cef07cf9c2..4aefd46d7d9 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -894,7 +894,7 @@ static int blkcg_can_attach(struct cgroup_subsys_state *css, int ret = 0; /* task_lock() is needed to avoid races with exit_io_context() */ - cgroup_taskset_for_each(task, css, tset) { + cgroup_taskset_for_each(task, tset) { task_lock(task); ioc = task->io_context; if (ioc && atomic_read(&ioc->nr_tasks) > 1) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 3bd0a713837..581a124c7bc 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -535,15 +535,11 @@ int cgroup_taskset_size(struct cgroup_taskset *tset); /** * cgroup_taskset_for_each - iterate cgroup_taskset * @task: the loop cursor - * @skip_css: skip if task's css matches this, %NULL to iterate through all * @tset: taskset to iterate */ -#define cgroup_taskset_for_each(task, skip_css, tset) \ +#define cgroup_taskset_for_each(task, tset) \ for ((task) = cgroup_taskset_first((tset)); (task); \ - (task) = cgroup_taskset_next((tset))) \ - if (!(skip_css) || \ - cgroup_taskset_cur_css((tset), \ - (skip_css)->ss->id) != (skip_css)) + (task) = cgroup_taskset_next((tset))) /* * Control Group subsystem type. diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 98ea26a9907..7201a637c40 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -187,7 +187,7 @@ static void freezer_attach(struct cgroup_subsys_state *new_css, * current state before executing the following - !frozen tasks may * be visible in a FROZEN cgroup and frozen tasks in a THAWED one. */ - cgroup_taskset_for_each(task, new_css, tset) { + cgroup_taskset_for_each(task, tset) { if (!(freezer->state & CGROUP_FREEZING)) { __thaw_task(task); } else { diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 65ae0bdf4af..bf20e4ac2f7 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1398,7 +1398,7 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css, (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) goto out_unlock; - cgroup_taskset_for_each(task, css, tset) { + cgroup_taskset_for_each(task, tset) { /* * Kthreads which disallow setaffinity shouldn't be moved * to a new cpuset; we don't want to change their cpu @@ -1467,7 +1467,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css, guarantee_online_mems(mems_cs, &cpuset_attach_nodemask_to); - cgroup_taskset_for_each(task, css, tset) { + cgroup_taskset_for_each(task, tset) { /* * can_attach beforehand should guarantee that this doesn't * fail. TODO: have a better way to handle failure here diff --git a/kernel/events/core.c b/kernel/events/core.c index a3c3ab50271..6dd714955b0 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8021,7 +8021,7 @@ static void perf_cgroup_attach(struct cgroup_subsys_state *css, { struct task_struct *task; - cgroup_taskset_for_each(task, css, tset) + cgroup_taskset_for_each(task, tset) task_function_call(task, __perf_cgroup_move, task); } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d4cfc556183..ba386a06ab1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7600,7 +7600,7 @@ static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css, { struct task_struct *task; - cgroup_taskset_for_each(task, css, tset) { + cgroup_taskset_for_each(task, tset) { #ifdef CONFIG_RT_GROUP_SCHED if (!sched_rt_can_attach(css_tg(css), task)) return -EINVAL; @@ -7618,7 +7618,7 @@ static void cpu_cgroup_attach(struct cgroup_subsys_state *css, { struct task_struct *task; - cgroup_taskset_for_each(task, css, tset) + cgroup_taskset_for_each(task, tset) sched_move_task(task); } diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index b865662fba7..22931e1b99b 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -73,7 +73,7 @@ static void cgrp_attach(struct cgroup_subsys_state *css, void *v = (void *)(unsigned long)cs->classid; struct task_struct *p; - cgroup_taskset_for_each(p, css, tset) { + cgroup_taskset_for_each(p, tset) { task_lock(p); iterate_fd(p->files, 0, update_classid, v); task_unlock(p); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index d7d23e28faf..f9f3a40d335 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -224,7 +224,7 @@ static void net_prio_attach(struct cgroup_subsys_state *css, struct task_struct *p; void *v = (void *)(unsigned long)css->cgroup->id; - cgroup_taskset_for_each(p, css, tset) { + cgroup_taskset_for_each(p, tset) { task_lock(p); iterate_fd(p->files, 0, update_netprio, v); task_unlock(p); -- cgit v1.2.3-70-g09d2 From 57fce0a68e3aa71d223d9023aae66c7393970c34 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 13 Feb 2014 06:58:41 -0500 Subject: cpuset: don't use cgroup_taskset_cur_css() cgroup_taskset_cur_css() will be removed during the planned resturcturing of migration path. The only use of cgroup_taskset_cur_css() is finding out the old cgroup_subsys_state of the leader in cpuset_attach(). This usage can easily be removed by remembering the old value from cpuset_can_attach(). Signed-off-by: Tejun Heo Acked-by: Li Zefan --- kernel/cpuset.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index bf20e4ac2f7..d8bec21d7a1 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1379,6 +1379,8 @@ static int fmeter_getrate(struct fmeter *fmp) return val; } +static struct cpuset *cpuset_attach_old_cs; + /* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */ static int cpuset_can_attach(struct cgroup_subsys_state *css, struct cgroup_taskset *tset) @@ -1387,6 +1389,9 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css, struct task_struct *task; int ret; + /* used later by cpuset_attach() */ + cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset)); + mutex_lock(&cpuset_mutex); /* @@ -1450,10 +1455,8 @@ static void cpuset_attach(struct cgroup_subsys_state *css, struct mm_struct *mm; struct task_struct *task; struct task_struct *leader = cgroup_taskset_first(tset); - struct cgroup_subsys_state *oldcss = cgroup_taskset_cur_css(tset, - cpuset_cgrp_id); struct cpuset *cs = css_cs(css); - struct cpuset *oldcs = css_cs(oldcss); + struct cpuset *oldcs = cpuset_attach_old_cs; struct cpuset *cpus_cs = effective_cpumask_cpuset(cs); struct cpuset *mems_cs = effective_nodemask_cpuset(cs); -- cgit v1.2.3-70-g09d2 From b8dadcb58d542ecbf1d3dae5fefcd3fd8cb26539 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 3 Mar 2014 17:28:36 -0500 Subject: cpuset: use rcu_read_lock() to protect task_cs() We no longer use task_lock() to protect tsk->cgroups. Reported-by: Fengguang Wu Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- kernel/cpuset.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index d8bec21d7a1..8d5324583aa 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2239,10 +2239,10 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) struct cpuset *cpus_cs; mutex_lock(&callback_mutex); - task_lock(tsk); + rcu_read_lock(); cpus_cs = effective_cpumask_cpuset(task_cs(tsk)); guarantee_online_cpus(cpus_cs, pmask); - task_unlock(tsk); + rcu_read_unlock(); mutex_unlock(&callback_mutex); } @@ -2295,10 +2295,10 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk) nodemask_t mask; mutex_lock(&callback_mutex); - task_lock(tsk); + rcu_read_lock(); mems_cs = effective_nodemask_cpuset(task_cs(tsk)); guarantee_online_mems(mems_cs, &mask); - task_unlock(tsk); + rcu_read_unlock(); mutex_unlock(&callback_mutex); return mask; @@ -2414,9 +2414,9 @@ int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) /* Not hardwall and node outside mems_allowed: scan up cpusets */ mutex_lock(&callback_mutex); - task_lock(current); + rcu_read_lock(); cs = nearest_hardwall_ancestor(task_cs(current)); - task_unlock(current); + rcu_read_unlock(); allowed = node_isset(node, cs->mems_allowed); mutex_unlock(&callback_mutex); @@ -2543,24 +2543,26 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, * @task: pointer to task_struct of some task. * * Description: Prints @task's name, cpuset name, and cached copy of its - * mems_allowed to the kernel log. Must hold task_lock(task) to allow - * dereferencing task_cs(task). + * mems_allowed to the kernel log. */ void cpuset_print_task_mems_allowed(struct task_struct *tsk) { /* Statically allocated to prevent using excess stack. */ static char cpuset_nodelist[CPUSET_NODELIST_LEN]; static DEFINE_SPINLOCK(cpuset_buffer_lock); - struct cgroup *cgrp = task_cs(tsk)->css.cgroup; + struct cgroup *cgrp; spin_lock(&cpuset_buffer_lock); + rcu_read_lock(); + cgrp = task_cs(tsk)->css.cgroup; nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, tsk->mems_allowed); printk(KERN_INFO "%s cpuset=", tsk->comm); pr_cont_cgroup_name(cgrp); pr_cont(" mems_allowed=%s\n", cpuset_nodelist); + rcu_read_unlock(); spin_unlock(&cpuset_buffer_lock); } @@ -2592,9 +2594,9 @@ int cpuset_memory_pressure_enabled __read_mostly; void __cpuset_memory_pressure_bump(void) { - task_lock(current); + rcu_read_lock(); fmeter_markevent(&task_cs(current)->fmeter); - task_unlock(current); + rcu_read_unlock(); } #ifdef CONFIG_PROC_PID_CPUSET -- cgit v1.2.3-70-g09d2 From 4d3bb511b5f9980fc3e9ae5939ebc475b231d3fc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 19 Mar 2014 10:23:54 -0400 Subject: cgroup: drop const from @buffer of cftype->write_string() cftype->write_string() just passes on the writeable buffer from kernfs and there's no reason to add const restriction on the buffer. The only thing const achieves is unnecessarily complicating parsing of the buffer. Drop const from @buffer. Signed-off-by: Tejun Heo Acked-by: Li Zefan Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Michal Hocko Cc: Johannes Weiner Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki --- block/blk-throttle.c | 4 ++-- block/cfq-iosched.c | 4 ++-- include/linux/cgroup.h | 2 +- kernel/cgroup.c | 2 +- kernel/cgroup_freezer.c | 2 +- kernel/cpuset.c | 2 +- mm/hugetlb_cgroup.c | 2 +- mm/memcontrol.c | 4 ++-- net/core/netprio_cgroup.c | 2 +- net/ipv4/tcp_memcontrol.c | 2 +- security/device_cgroup.c | 4 ++-- 11 files changed, 15 insertions(+), 15 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 861c363e412..033745cd7fb 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1408,13 +1408,13 @@ static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft, } static int tg_set_conf_u64(struct cgroup_subsys_state *css, struct cftype *cft, - const char *buf) + char *buf) { return tg_set_conf(css, cft, buf, true); } static int tg_set_conf_uint(struct cgroup_subsys_state *css, struct cftype *cft, - const char *buf) + char *buf) { return tg_set_conf(css, cft, buf, false); } diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 46118794339..f5de45b6af3 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1701,13 +1701,13 @@ static int __cfqg_set_weight_device(struct cgroup_subsys_state *css, } static int cfqg_set_weight_device(struct cgroup_subsys_state *css, - struct cftype *cft, const char *buf) + struct cftype *cft, char *buf) { return __cfqg_set_weight_device(css, cft, buf, false); } static int cfqg_set_leaf_weight_device(struct cgroup_subsys_state *css, - struct cftype *cft, const char *buf) + struct cftype *cft, char *buf) { return __cfqg_set_weight_device(css, cft, buf, true); } diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 77294fc6660..79993ac066c 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -454,7 +454,7 @@ struct cftype { * Returns 0 or -ve error code. */ int (*write_string)(struct cgroup_subsys_state *css, struct cftype *cft, - const char *buffer); + char *buffer); /* * trigger() callback can be used to get some kick from the * userspace, when the actual string written is not important diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 60ea16058c4..f5754910e80 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2143,7 +2143,7 @@ static int cgroup_procs_write(struct cgroup_subsys_state *css, } static int cgroup_release_agent_write(struct cgroup_subsys_state *css, - struct cftype *cft, const char *buffer) + struct cftype *cft, char *buffer) { struct cgroup_root *root = css->cgroup->root; diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 2ea98b216bf..2bc4a225644 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -442,7 +442,7 @@ static void freezer_change_state(struct freezer *freezer, bool freeze) } static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft, - const char *buffer) + char *buffer) { bool freeze; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 8d5324583aa..efbf9baf77e 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1610,7 +1610,7 @@ out_unlock: * Common handling for a write to a "cpus" or "mems" file. */ static int cpuset_write_resmask(struct cgroup_subsys_state *css, - struct cftype *cft, const char *buf) + struct cftype *cft, char *buf) { struct cpuset *cs = css_cs(css); struct cpuset *trialcs; diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index b135853e68f..595d7fd795e 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -254,7 +254,7 @@ static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, } static int hugetlb_cgroup_write(struct cgroup_subsys_state *css, - struct cftype *cft, const char *buffer) + struct cftype *cft, char *buffer) { int idx, name, ret; unsigned long long val; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d9c6ac1532e..96f94a9f2fa 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5242,7 +5242,7 @@ static int memcg_update_kmem_limit(struct mem_cgroup *memcg, * RES_LIMIT. */ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, - const char *buffer) + char *buffer) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); enum res_type type; @@ -6063,7 +6063,7 @@ static void memcg_event_ptable_queue_proc(struct file *file, * Interpretation of args is defined by control file implementation. */ static int memcg_write_event_control(struct cgroup_subsys_state *css, - struct cftype *cft, const char *buffer) + struct cftype *cft, char *buffer) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup_event *event; diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index f9f3a40d335..3825f669147 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -186,7 +186,7 @@ static int read_priomap(struct seq_file *sf, void *v) } static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft, - const char *buffer) + char *buffer) { char devname[IFNAMSIZ + 1]; struct net_device *dev; diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 20a0aca9131..d4f015ad6c8 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -103,7 +103,7 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) } static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, - const char *buffer) + char *buffer) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); unsigned long long val; diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 7f88bcde7c6..8365909f5f8 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -496,7 +496,7 @@ static inline bool has_children(struct dev_cgroup *devcgroup) * parent cgroup has the access you're asking for. */ static int devcgroup_update_access(struct dev_cgroup *devcgroup, - int filetype, const char *buffer) + int filetype, char *buffer) { const char *b; char temp[12]; /* 11 + 1 characters needed for a u32 */ @@ -652,7 +652,7 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, } static int devcgroup_access_write(struct cgroup_subsys_state *css, - struct cftype *cft, const char *buffer) + struct cftype *cft, char *buffer) { int retval; -- cgit v1.2.3-70-g09d2