summaryrefslogtreecommitdiffstats
path: root/kernel/cgroup_freezer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup_freezer.c')
-rw-r--r--kernel/cgroup_freezer.c514
1 files changed, 309 insertions, 205 deletions
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index b1724ce9898..75dda1ea502 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -22,24 +22,33 @@
#include <linux/freezer.h>
#include <linux/seq_file.h>
-enum freezer_state {
- CGROUP_THAWED = 0,
- CGROUP_FREEZING,
- CGROUP_FROZEN,
+/*
+ * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is
+ * set if "FROZEN" is written to freezer.state cgroupfs file, and cleared
+ * for "THAWED". FREEZING_PARENT is set if the parent freezer is FREEZING
+ * for whatever reason. IOW, a cgroup has FREEZING_PARENT set if one of
+ * its ancestors has FREEZING_SELF set.
+ */
+enum freezer_state_flags {
+ CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */
+ CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */
+ CGROUP_FREEZING_PARENT = (1 << 2), /* the parent freezer is freezing */
+ CGROUP_FROZEN = (1 << 3), /* this and its descendants frozen */
+
+ /* mask for all FREEZING flags */
+ CGROUP_FREEZING = CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT,
};
struct freezer {
- struct cgroup_subsys_state css;
- enum freezer_state state;
- spinlock_t lock; /* protects _writes_ to state */
+ struct cgroup_subsys_state css;
+ unsigned int state;
+ spinlock_t lock;
};
-static inline struct freezer *cgroup_freezer(
- struct cgroup *cgroup)
+static inline struct freezer *cgroup_freezer(struct cgroup *cgroup)
{
- return container_of(
- cgroup_subsys_state(cgroup, freezer_subsys_id),
- struct freezer, css);
+ return container_of(cgroup_subsys_state(cgroup, freezer_subsys_id),
+ struct freezer, css);
}
static inline struct freezer *task_freezer(struct task_struct *task)
@@ -48,14 +57,21 @@ static inline struct freezer *task_freezer(struct task_struct *task)
struct freezer, css);
}
+static struct freezer *parent_freezer(struct freezer *freezer)
+{
+ struct cgroup *pcg = freezer->css.cgroup->parent;
+
+ if (pcg)
+ return cgroup_freezer(pcg);
+ return NULL;
+}
+
bool cgroup_freezing(struct task_struct *task)
{
- enum freezer_state state;
bool ret;
rcu_read_lock();
- state = task_freezer(task)->state;
- ret = state == CGROUP_FREEZING || state == CGROUP_FROZEN;
+ ret = task_freezer(task)->state & CGROUP_FREEZING;
rcu_read_unlock();
return ret;
@@ -65,70 +81,18 @@ bool cgroup_freezing(struct task_struct *task)
* cgroups_write_string() limits the size of freezer state strings to
* CGROUP_LOCAL_BUFFER_SIZE
*/
-static const char *freezer_state_strs[] = {
- "THAWED",
- "FREEZING",
- "FROZEN",
+static const char *freezer_state_strs(unsigned int state)
+{
+ if (state & CGROUP_FROZEN)
+ return "FROZEN";
+ if (state & CGROUP_FREEZING)
+ return "FREEZING";
+ return "THAWED";
};
-/*
- * State diagram
- * Transitions are caused by userspace writes to the freezer.state file.
- * The values in parenthesis are state labels. The rest are edge labels.
- *
- * (THAWED) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN)
- * ^ ^ | |
- * | \_______THAWED_______/ |
- * \__________________________THAWED____________/
- */
-
struct cgroup_subsys freezer_subsys;
-/* Locks taken and their ordering
- * ------------------------------
- * cgroup_mutex (AKA cgroup_lock)
- * freezer->lock
- * css_set_lock
- * task->alloc_lock (AKA task_lock)
- * task->sighand->siglock
- *
- * cgroup code forces css_set_lock to be taken before task->alloc_lock
- *
- * freezer_create(), freezer_destroy():
- * cgroup_mutex [ by cgroup core ]
- *
- * freezer_can_attach():
- * cgroup_mutex (held by caller of can_attach)
- *
- * freezer_fork() (preserving fork() performance means can't take cgroup_mutex):
- * freezer->lock
- * sighand->siglock (if the cgroup is freezing)
- *
- * freezer_read():
- * cgroup_mutex
- * freezer->lock
- * write_lock css_set_lock (cgroup iterator start)
- * task->alloc_lock
- * read_lock css_set_lock (cgroup iterator start)
- *
- * freezer_write() (freeze):
- * cgroup_mutex
- * freezer->lock
- * write_lock css_set_lock (cgroup iterator start)
- * task->alloc_lock
- * read_lock css_set_lock (cgroup iterator start)
- * sighand->siglock (fake signal delivery inside freeze_task())
- *
- * freezer_write() (unfreeze):
- * cgroup_mutex
- * freezer->lock
- * write_lock css_set_lock (cgroup iterator start)
- * task->alloc_lock
- * read_lock css_set_lock (cgroup iterator start)
- * task->alloc_lock (inside __thaw_task(), prevents race with refrigerator())
- * sighand->siglock
- */
-static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup)
+static struct cgroup_subsys_state *freezer_css_alloc(struct cgroup *cgroup)
{
struct freezer *freezer;
@@ -137,160 +101,244 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup)
return ERR_PTR(-ENOMEM);
spin_lock_init(&freezer->lock);
- freezer->state = CGROUP_THAWED;
return &freezer->css;
}
-static void freezer_destroy(struct cgroup *cgroup)
+/**
+ * freezer_css_online - commit creation of a freezer cgroup
+ * @cgroup: cgroup being created
+ *
+ * We're committing to creation of @cgroup. Mark it online and inherit
+ * parent's freezing state while holding both parent's and our
+ * freezer->lock.
+ */
+static int freezer_css_online(struct cgroup *cgroup)
+{
+ struct freezer *freezer = cgroup_freezer(cgroup);
+ struct freezer *parent = parent_freezer(freezer);
+
+ /*
+ * The following double locking and freezing state inheritance
+ * guarantee that @cgroup can never escape ancestors' freezing
+ * states. See cgroup_for_each_descendant_pre() for details.
+ */
+ if (parent)
+ spin_lock_irq(&parent->lock);
+ spin_lock_nested(&freezer->lock, SINGLE_DEPTH_NESTING);
+
+ freezer->state |= CGROUP_FREEZER_ONLINE;
+
+ if (parent && (parent->state & CGROUP_FREEZING)) {
+ freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN;
+ atomic_inc(&system_freezing_cnt);
+ }
+
+ spin_unlock(&freezer->lock);
+ if (parent)
+ spin_unlock_irq(&parent->lock);
+
+ return 0;
+}
+
+/**
+ * freezer_css_offline - initiate destruction of @cgroup
+ * @cgroup: cgroup being destroyed
+ *
+ * @cgroup is going away. Mark it dead and decrement system_freezing_count
+ * if it was holding one.
+ */
+static void freezer_css_offline(struct cgroup *cgroup)
{
struct freezer *freezer = cgroup_freezer(cgroup);
- if (freezer->state != CGROUP_THAWED)
+ spin_lock_irq(&freezer->lock);
+
+ if (freezer->state & CGROUP_FREEZING)
atomic_dec(&system_freezing_cnt);
- kfree(freezer);
+
+ freezer->state = 0;
+
+ spin_unlock_irq(&freezer->lock);
}
-/* task is frozen or will freeze immediately when next it gets woken */
-static bool is_task_frozen_enough(struct task_struct *task)
+static void freezer_css_free(struct cgroup *cgroup)
{
- return frozen(task) ||
- (task_is_stopped_or_traced(task) && freezing(task));
+ kfree(cgroup_freezer(cgroup));
}
/*
- * The call to cgroup_lock() in the freezer.state write method prevents
- * a write to that file racing against an attach, and hence the
- * can_attach() result will remain valid until the attach completes.
+ * Tasks can be migrated into a different freezer anytime regardless of its
+ * current state. freezer_attach() is responsible for making new tasks
+ * conform to the current state.
+ *
+ * Freezer state changes and task migration are synchronized via
+ * @freezer->lock. freezer_attach() makes the new tasks conform to the
+ * current state and all following state changes can see the new tasks.
*/
-static int freezer_can_attach(struct cgroup *new_cgroup,
- struct cgroup_taskset *tset)
+static void freezer_attach(struct cgroup *new_cgrp, struct cgroup_taskset *tset)
{
- struct freezer *freezer;
+ struct freezer *freezer = cgroup_freezer(new_cgrp);
struct task_struct *task;
+ bool clear_frozen = false;
+
+ spin_lock_irq(&freezer->lock);
/*
- * Anything frozen can't move or be moved to/from.
+ * Make the new tasks conform to the current state of @new_cgrp.
+ * For simplicity, when migrating any task to a FROZEN cgroup, we
+ * revert it to FREEZING and let update_if_frozen() determine the
+ * correct state later.
+ *
+ * Tasks in @tset are on @new_cgrp but may not conform to its
+ * current state before executing the following - !frozen tasks may
+ * be visible in a FROZEN cgroup and frozen tasks in a THAWED one.
*/
- cgroup_taskset_for_each(task, new_cgroup, tset)
- if (cgroup_freezing(task))
- return -EBUSY;
+ cgroup_taskset_for_each(task, new_cgrp, tset) {
+ if (!(freezer->state & CGROUP_FREEZING)) {
+ __thaw_task(task);
+ } else {
+ freeze_task(task);
+ freezer->state &= ~CGROUP_FROZEN;
+ clear_frozen = true;
+ }
+ }
- freezer = cgroup_freezer(new_cgroup);
- if (freezer->state != CGROUP_THAWED)
- return -EBUSY;
+ spin_unlock_irq(&freezer->lock);
- return 0;
+ /*
+ * Propagate FROZEN clearing upwards. We may race with
+ * update_if_frozen(), but as long as both work bottom-up, either
+ * update_if_frozen() sees child's FROZEN cleared or we clear the
+ * parent's FROZEN later. No parent w/ !FROZEN children can be
+ * left FROZEN.
+ */
+ while (clear_frozen && (freezer = parent_freezer(freezer))) {
+ spin_lock_irq(&freezer->lock);
+ freezer->state &= ~CGROUP_FROZEN;
+ clear_frozen = freezer->state & CGROUP_FREEZING;
+ spin_unlock_irq(&freezer->lock);
+ }
}
static void freezer_fork(struct task_struct *task)
{
struct freezer *freezer;
- /*
- * No lock is needed, since the task isn't on tasklist yet,
- * so it can't be moved to another cgroup, which means the
- * freezer won't be removed and will be valid during this
- * function call. Nevertheless, apply RCU read-side critical
- * section to suppress RCU lockdep false positives.
- */
rcu_read_lock();
freezer = task_freezer(task);
- rcu_read_unlock();
/*
* The root cgroup is non-freezable, so we can skip the
* following check.
*/
if (!freezer->css.cgroup->parent)
- return;
+ goto out;
spin_lock_irq(&freezer->lock);
- BUG_ON(freezer->state == CGROUP_FROZEN);
-
- /* Locking avoids race with FREEZING -> THAWED transitions. */
- if (freezer->state == CGROUP_FREEZING)
+ if (freezer->state & CGROUP_FREEZING)
freeze_task(task);
spin_unlock_irq(&freezer->lock);
+out:
+ rcu_read_unlock();
}
-/*
- * caller must hold freezer->lock
+/**
+ * update_if_frozen - update whether a cgroup finished freezing
+ * @cgroup: cgroup of interest
+ *
+ * Once FREEZING is initiated, transition to FROZEN is lazily updated by
+ * calling this function. If the current state is FREEZING but not FROZEN,
+ * this function checks whether all tasks of this cgroup and the descendant
+ * cgroups finished freezing and, if so, sets FROZEN.
+ *
+ * The caller is responsible for grabbing RCU read lock and calling
+ * update_if_frozen() on all descendants prior to invoking this function.
+ *
+ * Task states and freezer state might disagree while tasks are being
+ * migrated into or out of @cgroup, so we can't verify task states against
+ * @freezer state here. See freezer_attach() for details.
*/
-static void update_if_frozen(struct cgroup *cgroup,
- struct freezer *freezer)
+static void update_if_frozen(struct cgroup *cgroup)
{
+ struct freezer *freezer = cgroup_freezer(cgroup);
+ struct cgroup *pos;
struct cgroup_iter it;
struct task_struct *task;
- unsigned int nfrozen = 0, ntotal = 0;
- enum freezer_state old_state = freezer->state;
- cgroup_iter_start(cgroup, &it);
- while ((task = cgroup_iter_next(cgroup, &it))) {
- ntotal++;
- if (freezing(task) && is_task_frozen_enough(task))
- nfrozen++;
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ spin_lock_irq(&freezer->lock);
+
+ if (!(freezer->state & CGROUP_FREEZING) ||
+ (freezer->state & CGROUP_FROZEN))
+ goto out_unlock;
+
+ /* are all (live) children frozen? */
+ cgroup_for_each_child(pos, cgroup) {
+ struct freezer *child = cgroup_freezer(pos);
+
+ if ((child->state & CGROUP_FREEZER_ONLINE) &&
+ !(child->state & CGROUP_FROZEN))
+ goto out_unlock;
}
- if (old_state == CGROUP_THAWED) {
- BUG_ON(nfrozen > 0);
- } else if (old_state == CGROUP_FREEZING) {
- if (nfrozen == ntotal)
- freezer->state = CGROUP_FROZEN;
- } else { /* old_state == CGROUP_FROZEN */
- BUG_ON(nfrozen != ntotal);
+ /* are all tasks frozen? */
+ cgroup_iter_start(cgroup, &it);
+
+ while ((task = cgroup_iter_next(cgroup, &it))) {
+ if (freezing(task)) {
+ /*
+ * freezer_should_skip() indicates that the task
+ * should be skipped when determining freezing
+ * completion. Consider it frozen in addition to
+ * the usual frozen condition.
+ */
+ if (!frozen(task) && !freezer_should_skip(task))
+ goto out_iter_end;
+ }
}
+ freezer->state |= CGROUP_FROZEN;
+out_iter_end:
cgroup_iter_end(cgroup, &it);
+out_unlock:
+ spin_unlock_irq(&freezer->lock);
}
static int freezer_read(struct cgroup *cgroup, struct cftype *cft,
struct seq_file *m)
{
- struct freezer *freezer;
- enum freezer_state state;
+ struct cgroup *pos;
- if (!cgroup_lock_live_group(cgroup))
- return -ENODEV;
+ rcu_read_lock();
- freezer = cgroup_freezer(cgroup);
- spin_lock_irq(&freezer->lock);
- state = freezer->state;
- if (state == CGROUP_FREEZING) {
- /* We change from FREEZING to FROZEN lazily if the cgroup was
- * only partially frozen when we exitted write. */
- update_if_frozen(cgroup, freezer);
- state = freezer->state;
- }
- spin_unlock_irq(&freezer->lock);
- cgroup_unlock();
+ /* update states bottom-up */
+ cgroup_for_each_descendant_post(pos, cgroup)
+ update_if_frozen(pos);
+ update_if_frozen(cgroup);
+
+ rcu_read_unlock();
- seq_puts(m, freezer_state_strs[state]);
+ seq_puts(m, freezer_state_strs(cgroup_freezer(cgroup)->state));
seq_putc(m, '\n');
return 0;
}
-static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
+static void freeze_cgroup(struct freezer *freezer)
{
+ struct cgroup *cgroup = freezer->css.cgroup;
struct cgroup_iter it;
struct task_struct *task;
- unsigned int num_cant_freeze_now = 0;
cgroup_iter_start(cgroup, &it);
- while ((task = cgroup_iter_next(cgroup, &it))) {
- if (!freeze_task(task))
- continue;
- if (is_task_frozen_enough(task))
- continue;
- if (!freezing(task) && !freezer_should_skip(task))
- num_cant_freeze_now++;
- }
+ while ((task = cgroup_iter_next(cgroup, &it)))
+ freeze_task(task);
cgroup_iter_end(cgroup, &it);
-
- return num_cant_freeze_now ? -EBUSY : 0;
}
-static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
+static void unfreeze_cgroup(struct freezer *freezer)
{
+ struct cgroup *cgroup = freezer->css.cgroup;
struct cgroup_iter it;
struct task_struct *task;
@@ -300,59 +348,111 @@ static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
cgroup_iter_end(cgroup, &it);
}
-static int freezer_change_state(struct cgroup *cgroup,
- enum freezer_state goal_state)
+/**
+ * freezer_apply_state - apply state change to a single cgroup_freezer
+ * @freezer: freezer to apply state change to
+ * @freeze: whether to freeze or unfreeze
+ * @state: CGROUP_FREEZING_* flag to set or clear
+ *
+ * Set or clear @state on @cgroup according to @freeze, and perform
+ * freezing or thawing as necessary.
+ */
+static void freezer_apply_state(struct freezer *freezer, bool freeze,
+ unsigned int state)
{
- struct freezer *freezer;
- int retval = 0;
-
- freezer = cgroup_freezer(cgroup);
+ /* also synchronizes against task migration, see freezer_attach() */
+ lockdep_assert_held(&freezer->lock);
- spin_lock_irq(&freezer->lock);
+ if (!(freezer->state & CGROUP_FREEZER_ONLINE))
+ return;
- update_if_frozen(cgroup, freezer);
-
- switch (goal_state) {
- case CGROUP_THAWED:
- if (freezer->state != CGROUP_THAWED)
- atomic_dec(&system_freezing_cnt);
- freezer->state = CGROUP_THAWED;
- unfreeze_cgroup(cgroup, freezer);
- break;
- case CGROUP_FROZEN:
- if (freezer->state == CGROUP_THAWED)
+ if (freeze) {
+ if (!(freezer->state & CGROUP_FREEZING))
atomic_inc(&system_freezing_cnt);
- freezer->state = CGROUP_FREEZING;
- retval = try_to_freeze_cgroup(cgroup, freezer);
- break;
- default:
- BUG();
+ freezer->state |= state;
+ freeze_cgroup(freezer);
+ } else {
+ bool was_freezing = freezer->state & CGROUP_FREEZING;
+
+ freezer->state &= ~state;
+
+ if (!(freezer->state & CGROUP_FREEZING)) {
+ if (was_freezing)
+ atomic_dec(&system_freezing_cnt);
+ freezer->state &= ~CGROUP_FROZEN;
+ unfreeze_cgroup(freezer);
+ }
}
+}
+/**
+ * freezer_change_state - change the freezing state of a cgroup_freezer
+ * @freezer: freezer of interest
+ * @freeze: whether to freeze or thaw
+ *
+ * Freeze or thaw @freezer according to @freeze. The operations are
+ * recursive - all descendants of @freezer will be affected.
+ */
+static void freezer_change_state(struct freezer *freezer, bool freeze)
+{
+ struct cgroup *pos;
+
+ /* update @freezer */
+ spin_lock_irq(&freezer->lock);
+ freezer_apply_state(freezer, freeze, CGROUP_FREEZING_SELF);
spin_unlock_irq(&freezer->lock);
- return retval;
+ /*
+ * Update all its descendants in pre-order traversal. Each
+ * descendant will try to inherit its parent's FREEZING state as
+ * CGROUP_FREEZING_PARENT.
+ */
+ rcu_read_lock();
+ cgroup_for_each_descendant_pre(pos, freezer->css.cgroup) {
+ struct freezer *pos_f = cgroup_freezer(pos);
+ struct freezer *parent = parent_freezer(pos_f);
+
+ /*
+ * Our update to @parent->state is already visible which is
+ * all we need. No need to lock @parent. For more info on
+ * synchronization, see freezer_post_create().
+ */
+ spin_lock_irq(&pos_f->lock);
+ freezer_apply_state(pos_f, parent->state & CGROUP_FREEZING,
+ CGROUP_FREEZING_PARENT);
+ spin_unlock_irq(&pos_f->lock);
+ }
+ rcu_read_unlock();
}
-static int freezer_write(struct cgroup *cgroup,
- struct cftype *cft,
+static int freezer_write(struct cgroup *cgroup, struct cftype *cft,
const char *buffer)
{
- int retval;
- enum freezer_state goal_state;
+ bool freeze;
- if (strcmp(buffer, freezer_state_strs[CGROUP_THAWED]) == 0)
- goal_state = CGROUP_THAWED;
- else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0)
- goal_state = CGROUP_FROZEN;
+ if (strcmp(buffer, freezer_state_strs(0)) == 0)
+ freeze = false;
+ else if (strcmp(buffer, freezer_state_strs(CGROUP_FROZEN)) == 0)
+ freeze = true;
else
return -EINVAL;
- if (!cgroup_lock_live_group(cgroup))
- return -ENODEV;
- retval = freezer_change_state(cgroup, goal_state);
- cgroup_unlock();
- return retval;
+ freezer_change_state(cgroup_freezer(cgroup), freeze);
+ return 0;
+}
+
+static u64 freezer_self_freezing_read(struct cgroup *cgroup, struct cftype *cft)
+{
+ struct freezer *freezer = cgroup_freezer(cgroup);
+
+ return (bool)(freezer->state & CGROUP_FREEZING_SELF);
+}
+
+static u64 freezer_parent_freezing_read(struct cgroup *cgroup, struct cftype *cft)
+{
+ struct freezer *freezer = cgroup_freezer(cgroup);
+
+ return (bool)(freezer->state & CGROUP_FREEZING_PARENT);
}
static struct cftype files[] = {
@@ -362,23 +462,27 @@ static struct cftype files[] = {
.read_seq_string = freezer_read,
.write_string = freezer_write,
},
+ {
+ .name = "self_freezing",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .read_u64 = freezer_self_freezing_read,
+ },
+ {
+ .name = "parent_freezing",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .read_u64 = freezer_parent_freezing_read,
+ },
{ } /* terminate */
};
struct cgroup_subsys freezer_subsys = {
.name = "freezer",
- .create = freezer_create,
- .destroy = freezer_destroy,
+ .css_alloc = freezer_css_alloc,
+ .css_online = freezer_css_online,
+ .css_offline = freezer_css_offline,
+ .css_free = freezer_css_free,
.subsys_id = freezer_subsys_id,
- .can_attach = freezer_can_attach,
+ .attach = freezer_attach,
.fork = freezer_fork,
.base_cftypes = files,
-
- /*
- * freezer subsys doesn't handle hierarchy at all. Frozen state
- * should be inherited through the hierarchy - if a parent is
- * frozen, all its children should be frozen. Fix it and remove
- * the following.
- */
- .broken_hierarchy = true,
};