diff options
author | Tejun Heo <tj@kernel.org> | 2012-04-01 14:38:42 -0700 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2012-04-01 14:38:42 -0700 |
commit | edcb0722c654947908388df660791abd41e6617c (patch) | |
tree | 8c44dd4517bd562b58216dd01fb5abc42cb74a95 /block/blk-cgroup.c | |
parent | 2aa4a1523b40a065bc3a31e20097ea7a618ec3de (diff) |
blkcg: introduce blkg_stat and blkg_rwstat
blkcg uses u64_stats_sync to avoid reading wrong u64 statistic values
on 32bit archs and some stat counters have subtypes to distinguish
read/writes and sync/async IOs. The stat code paths are confusing and
involve a lot of going back and forth between blkcg core and specific
policy implementations, and synchronization and subtype handling are
open coded in blkcg core.
This patch introduces struct blkg_stat and blkg_rwstat which, with
accompanying operations, encapsulate stat updating and accessing with
proper synchronization.
blkg_stat is simple u64 counter with 64bit read-access protection.
blkg_rwstat is the one with rw and [a]sync subcounters and takes @rw
flags to distinguish IO subtypes (%REQ_WRITE and %REQ_SYNC) and
replaces stat_sub_type indexed arrays.
All counters in blkio_group_stats and blkio_group_stats_cpu are
replaced with either blkg_stat or blkg_rwstat along with all users.
This does add one u64_stats_sync per counter and increase stats_sync
operations but they're empty/noops on 64bit archs and blkcg doesn't
have too many counters, especially with DEBUG_BLK_CGROUP off.
While the currently resulting code isn't necessarily simpler at the
moment, this will enable further clean up of blkcg stats code.
- BLKIO_STAT_{READ|WRITE|SYNC|ASYNC|TOTAL} renamed to
BLKG_RWSTAT_{READ|WRITE|SYNC|ASYNC|TOTAL}.
- blkg_stat_add() replaces blkio_add_stat() and
blkio_check_and_dec_stat(). Note that BUG_ON() on underflow in the
latter function no longer exists. It's *way* better to have
underflowed stat counters than oopsing.
- blkio_group_stats->dequeue is now a proper u64 stat counter instead
of ulong.
- reset_stats() updated to clear each stat counters individually and
BLKG_STATS_DEBUG_CLEAR_{START|SIZE} are removed.
- Some functions reconstruct rw flags from direction and sync
booleans. This will be removed by future patches.
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'block/blk-cgroup.c')
-rw-r--r-- | block/blk-cgroup.c | 289 |
1 files changed, 127 insertions, 162 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 55ccbae6c43..09ac462ba89 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -132,46 +132,6 @@ static inline void blkio_update_group_iops(struct blkio_group *blkg, } } -/* - * Add to the appropriate stat variable depending on the request type. - * This should be called with queue_lock held. - */ -static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction, - bool sync) -{ - if (direction) - stat[BLKIO_STAT_WRITE] += add; - else - stat[BLKIO_STAT_READ] += add; - if (sync) - stat[BLKIO_STAT_SYNC] += add; - else - stat[BLKIO_STAT_ASYNC] += add; -} - -/* - * Decrements the appropriate stat variable if non-zero depending on the - * request type. Panics on value being zero. - * This should be called with the queue_lock held. - */ -static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync) -{ - if (direction) { - BUG_ON(stat[BLKIO_STAT_WRITE] == 0); - stat[BLKIO_STAT_WRITE]--; - } else { - BUG_ON(stat[BLKIO_STAT_READ] == 0); - stat[BLKIO_STAT_READ]--; - } - if (sync) { - BUG_ON(stat[BLKIO_STAT_SYNC] == 0); - stat[BLKIO_STAT_SYNC]--; - } else { - BUG_ON(stat[BLKIO_STAT_ASYNC] == 0); - stat[BLKIO_STAT_ASYNC]--; - } -} - #ifdef CONFIG_DEBUG_BLK_CGROUP /* This should be called with the queue_lock held. */ static void blkio_set_start_group_wait_time(struct blkio_group *blkg, @@ -198,7 +158,8 @@ static void blkio_update_group_wait_time(struct blkio_group_stats *stats) now = sched_clock(); if (time_after64(now, stats->start_group_wait_time)) - stats->group_wait_time += now - stats->start_group_wait_time; + blkg_stat_add(&stats->group_wait_time, + now - stats->start_group_wait_time); blkio_clear_blkg_waiting(stats); } @@ -212,7 +173,8 @@ static void blkio_end_empty_time(struct blkio_group_stats *stats) now = sched_clock(); if (time_after64(now, stats->start_empty_time)) - stats->empty_time += now - stats->start_empty_time; + blkg_stat_add(&stats->empty_time, + now - stats->start_empty_time); blkio_clear_blkg_empty(stats); } @@ -239,11 +201,9 @@ void blkiocg_update_idle_time_stats(struct blkio_group *blkg, if (blkio_blkg_idling(stats)) { unsigned long long now = sched_clock(); - if (time_after64(now, stats->start_idle_time)) { - u64_stats_update_begin(&stats->syncp); - stats->idle_time += now - stats->start_idle_time; - u64_stats_update_end(&stats->syncp); - } + if (time_after64(now, stats->start_idle_time)) + blkg_stat_add(&stats->idle_time, + now - stats->start_idle_time); blkio_clear_blkg_idling(stats); } } @@ -256,13 +216,10 @@ void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg, lockdep_assert_held(blkg->q->queue_lock); - u64_stats_update_begin(&stats->syncp); - stats->avg_queue_size_sum += - stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] + - stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]; - stats->avg_queue_size_samples++; + blkg_stat_add(&stats->avg_queue_size_sum, + blkg_rwstat_sum(&stats->queued)); + blkg_stat_add(&stats->avg_queue_size_samples, 1); blkio_update_group_wait_time(stats); - u64_stats_update_end(&stats->syncp); } EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats); @@ -273,8 +230,7 @@ void blkiocg_set_start_empty_time(struct blkio_group *blkg, lockdep_assert_held(blkg->q->queue_lock); - if (stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] || - stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]) + if (blkg_rwstat_sum(&stats->queued)) return; /* @@ -298,7 +254,7 @@ void blkiocg_update_dequeue_stats(struct blkio_group *blkg, lockdep_assert_held(blkg->q->queue_lock); - pd->stats.dequeue += dequeue; + blkg_stat_add(&pd->stats.dequeue, dequeue); } EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats); #else @@ -314,14 +270,12 @@ void blkiocg_update_io_add_stats(struct blkio_group *blkg, bool sync) { struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats; + int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0); lockdep_assert_held(blkg->q->queue_lock); - u64_stats_update_begin(&stats->syncp); - blkio_add_stat(stats->stat_arr[BLKIO_STAT_QUEUED], 1, direction, sync); + blkg_rwstat_add(&stats->queued, rw, 1); blkio_end_empty_time(stats); - u64_stats_update_end(&stats->syncp); - blkio_set_start_group_wait_time(blkg, pol, curr_blkg); } EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats); @@ -331,13 +285,11 @@ void blkiocg_update_io_remove_stats(struct blkio_group *blkg, bool direction, bool sync) { struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats; + int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0); lockdep_assert_held(blkg->q->queue_lock); - u64_stats_update_begin(&stats->syncp); - blkio_check_and_dec_stat(stats->stat_arr[BLKIO_STAT_QUEUED], direction, - sync); - u64_stats_update_end(&stats->syncp); + blkg_rwstat_add(&stats->queued, rw, -1); } EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats); @@ -350,12 +302,10 @@ void blkiocg_update_timeslice_used(struct blkio_group *blkg, lockdep_assert_held(blkg->q->queue_lock); - u64_stats_update_begin(&stats->syncp); - stats->time += time; + blkg_stat_add(&stats->time, time); #ifdef CONFIG_DEBUG_BLK_CGROUP - stats->unaccounted_time += unaccounted_time; + blkg_stat_add(&stats->unaccounted_time, unaccounted_time); #endif - u64_stats_update_end(&stats->syncp); } EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used); @@ -367,6 +317,7 @@ void blkiocg_update_dispatch_stats(struct blkio_group *blkg, struct blkio_policy_type *pol, uint64_t bytes, bool direction, bool sync) { + int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0); struct blkg_policy_data *pd = blkg->pd[pol->plid]; struct blkio_group_stats_cpu *stats_cpu; unsigned long flags; @@ -384,13 +335,10 @@ void blkiocg_update_dispatch_stats(struct blkio_group *blkg, stats_cpu = this_cpu_ptr(pd->stats_cpu); - u64_stats_update_begin(&stats_cpu->syncp); - stats_cpu->sectors += bytes >> 9; - blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICED], - 1, direction, sync); - blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICE_BYTES], - bytes, direction, sync); - u64_stats_update_end(&stats_cpu->syncp); + blkg_stat_add(&stats_cpu->sectors, bytes >> 9); + blkg_rwstat_add(&stats_cpu->serviced, rw, 1); + blkg_rwstat_add(&stats_cpu->service_bytes, rw, bytes); + local_irq_restore(flags); } EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats); @@ -403,17 +351,15 @@ void blkiocg_update_completion_stats(struct blkio_group *blkg, { struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats; unsigned long long now = sched_clock(); + int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0); lockdep_assert_held(blkg->q->queue_lock); - u64_stats_update_begin(&stats->syncp); if (time_after64(now, io_start_time)) - blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME], - now - io_start_time, direction, sync); + blkg_rwstat_add(&stats->service_time, rw, now - io_start_time); if (time_after64(io_start_time, start_time)) - blkio_add_stat(stats->stat_arr[BLKIO_STAT_WAIT_TIME], - io_start_time - start_time, direction, sync); - u64_stats_update_end(&stats->syncp); + blkg_rwstat_add(&stats->wait_time, rw, + io_start_time - start_time); } EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats); @@ -423,12 +369,11 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction, bool sync) { struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats; + int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0); lockdep_assert_held(blkg->q->queue_lock); - u64_stats_update_begin(&stats->syncp); - blkio_add_stat(stats->stat_arr[BLKIO_STAT_MERGED], 1, direction, sync); - u64_stats_update_end(&stats->syncp); + blkg_rwstat_add(&stats->merged, rw, 1); } EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats); @@ -757,8 +702,9 @@ static void blkio_reset_stats_cpu(struct blkio_group *blkg, int plid) struct blkio_group_stats_cpu *sc = per_cpu_ptr(pd->stats_cpu, cpu); - sc->sectors = 0; - memset(sc->stat_arr_cpu, 0, sizeof(sc->stat_arr_cpu)); + blkg_rwstat_reset(&sc->service_bytes); + blkg_rwstat_reset(&sc->serviced); + blkg_stat_reset(&sc->sectors); } } @@ -768,7 +714,6 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val) struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); struct blkio_group *blkg; struct hlist_node *n; - int i; spin_lock(&blkio_list_lock); spin_lock_irq(&blkcg->lock); @@ -786,14 +731,18 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val) struct blkio_group_stats *stats = &pd->stats; /* queued stats shouldn't be cleared */ - for (i = 0; i < ARRAY_SIZE(stats->stat_arr); i++) - if (i != BLKIO_STAT_QUEUED) - memset(stats->stat_arr[i], 0, - sizeof(stats->stat_arr[i])); - stats->time = 0; + blkg_rwstat_reset(&stats->merged); + blkg_rwstat_reset(&stats->service_time); + blkg_rwstat_reset(&stats->wait_time); + blkg_stat_reset(&stats->time); #ifdef CONFIG_DEBUG_BLK_CGROUP - memset((void *)stats + BLKG_STATS_DEBUG_CLEAR_START, 0, - BLKG_STATS_DEBUG_CLEAR_SIZE); + blkg_stat_reset(&stats->unaccounted_time); + blkg_stat_reset(&stats->avg_queue_size_sum); + blkg_stat_reset(&stats->avg_queue_size_samples); + blkg_stat_reset(&stats->dequeue); + blkg_stat_reset(&stats->group_wait_time); + blkg_stat_reset(&stats->idle_time); + blkg_stat_reset(&stats->empty_time); #endif blkio_reset_stats_cpu(blkg, pol->plid); } @@ -804,7 +753,7 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val) return 0; } -static void blkio_get_key_name(enum stat_sub_type type, const char *dname, +static void blkio_get_key_name(enum blkg_rwstat_type type, const char *dname, char *str, int chars_left, bool diskname_only) { snprintf(str, chars_left, "%s", dname); @@ -817,19 +766,19 @@ static void blkio_get_key_name(enum stat_sub_type type, const char *dname, if (diskname_only) return; switch (type) { - case BLKIO_STAT_READ: + case BLKG_RWSTAT_READ: strlcat(str, " Read", chars_left); break; - case BLKIO_STAT_WRITE: + case BLKG_RWSTAT_WRITE: strlcat(str, " Write", chars_left); break; - case BLKIO_STAT_SYNC: + case BLKG_RWSTAT_SYNC: strlcat(str, " Sync", chars_left); break; - case BLKIO_STAT_ASYNC: + case BLKG_RWSTAT_ASYNC: strlcat(str, " Async", chars_left); break; - case BLKIO_STAT_TOTAL: + case BLKG_RWSTAT_TOTAL: strlcat(str, " Total", chars_left); break; default: @@ -838,29 +787,34 @@ static void blkio_get_key_name(enum stat_sub_type type, const char *dname, } static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg, int plid, - enum stat_type_cpu type, enum stat_sub_type sub_type) + enum stat_type_cpu type, + enum blkg_rwstat_type sub_type) { struct blkg_policy_data *pd = blkg->pd[plid]; + u64 val = 0; int cpu; - struct blkio_group_stats_cpu *stats_cpu; - u64 val = 0, tval; if (pd->stats_cpu == NULL) return val; for_each_possible_cpu(cpu) { - unsigned int start; - stats_cpu = per_cpu_ptr(pd->stats_cpu, cpu); - - do { - start = u64_stats_fetch_begin(&stats_cpu->syncp); - if (type == BLKIO_STAT_CPU_SECTORS) - tval = stats_cpu->sectors; - else - tval = stats_cpu->stat_arr_cpu[type][sub_type]; - } while(u64_stats_fetch_retry(&stats_cpu->syncp, start)); - - val += tval; + struct blkio_group_stats_cpu *stats_cpu = + per_cpu_ptr(pd->stats_cpu, cpu); + struct blkg_rwstat rws; + + switch (type) { + case BLKIO_STAT_CPU_SECTORS: + val += blkg_stat_read(&stats_cpu->sectors); + break; + case BLKIO_STAT_CPU_SERVICE_BYTES: + rws = blkg_rwstat_read(&stats_cpu->service_bytes); + val += rws.cnt[sub_type]; + break; + case BLKIO_STAT_CPU_SERVICED: + rws = blkg_rwstat_read(&stats_cpu->serviced); + val += rws.cnt[sub_type]; + break; + } } return val; @@ -872,7 +826,7 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid, { uint64_t disk_total, val; char key_str[MAX_KEY_LEN]; - enum stat_sub_type sub_type; + enum blkg_rwstat_type sub_type; if (type == BLKIO_STAT_CPU_SECTORS) { val = blkio_read_stat_cpu(blkg, plid, type, 0); @@ -881,7 +835,7 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid, return val; } - for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL; + for (sub_type = BLKG_RWSTAT_READ; sub_type < BLKG_RWSTAT_NR; sub_type++) { blkio_get_key_name(sub_type, dname, key_str, MAX_KEY_LEN, false); @@ -889,10 +843,10 @@ static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg, int plid, cb->fill(cb, key_str, val); } - disk_total = blkio_read_stat_cpu(blkg, plid, type, BLKIO_STAT_READ) + - blkio_read_stat_cpu(blkg, plid, type, BLKIO_STAT_WRITE); + disk_total = blkio_read_stat_cpu(blkg, plid, type, BLKG_RWSTAT_READ) + + blkio_read_stat_cpu(blkg, plid, type, BLKG_RWSTAT_WRITE); - blkio_get_key_name(BLKIO_STAT_TOTAL, dname, key_str, MAX_KEY_LEN, + blkio_get_key_name(BLKG_RWSTAT_TOTAL, dname, key_str, MAX_KEY_LEN, false); cb->fill(cb, key_str, disk_total); return disk_total; @@ -905,65 +859,76 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg, int plid, struct blkio_group_stats *stats = &blkg->pd[plid]->stats; uint64_t v = 0, disk_total = 0; char key_str[MAX_KEY_LEN]; - unsigned int sync_start; + struct blkg_rwstat rws = { }; int st; if (type >= BLKIO_STAT_ARR_NR) { - do { - sync_start = u64_stats_fetch_begin(&stats->syncp); - switch (type) { - case BLKIO_STAT_TIME: - v = stats->time; - break; + switch (type) { + case BLKIO_STAT_TIME: + v = blkg_stat_read(&stats->time); + break; #ifdef CONFIG_DEBUG_BLK_CGROUP - case BLKIO_STAT_UNACCOUNTED_TIME: - v = stats->unaccounted_time; - break; - case BLKIO_STAT_AVG_QUEUE_SIZE: { - uint64_t samples = stats->avg_queue_size_samples; + case BLKIO_STAT_UNACCOUNTED_TIME: + v = blkg_stat_read(&stats->unaccounted_time); + break; + case BLKIO_STAT_AVG_QUEUE_SIZE: { + uint64_t samples; - if (samples) { - v = stats->avg_queue_size_sum; - do_div(v, samples); - } - break; + samples = blkg_stat_read(&stats->avg_queue_size_samples); + if (samples) { + v = blkg_stat_read(&stats->avg_queue_size_sum); + do_div(v, samples); } - case BLKIO_STAT_IDLE_TIME: - v = stats->idle_time; - break; - case BLKIO_STAT_EMPTY_TIME: - v = stats->empty_time; - break; - case BLKIO_STAT_DEQUEUE: - v = stats->dequeue; - break; - case BLKIO_STAT_GROUP_WAIT_TIME: - v = stats->group_wait_time; - break; + break; + } + case BLKIO_STAT_IDLE_TIME: + v = blkg_stat_read(&stats->idle_time); + break; + case BLKIO_STAT_EMPTY_TIME: + v = blkg_stat_read(&stats->empty_time); + break; + case BLKIO_STAT_DEQUEUE: + v = blkg_stat_read(&stats->dequeue); + break; + case BLKIO_STAT_GROUP_WAIT_TIME: + v = blkg_stat_read(&stats->group_wait_time); + break; #endif - default: - WARN_ON_ONCE(1); - } - } while (u64_stats_fetch_retry(&stats->syncp, sync_start)); + default: + WARN_ON_ONCE(1); + } blkio_get_key_name(0, dname, key_str, MAX_KEY_LEN, true); cb->fill(cb, key_str, v); return v; } - for (st = BLKIO_STAT_READ; st < BLKIO_STAT_TOTAL; st++) { - do { - sync_start = u64_stats_fetch_begin(&stats->syncp); - v = stats->stat_arr[type][st]; - } while (u64_stats_fetch_retry(&stats->syncp, sync_start)); + switch (type) { + case BLKIO_STAT_MERGED: + rws = blkg_rwstat_read(&stats->merged); + break; + case BLKIO_STAT_SERVICE_TIME: + rws = blkg_rwstat_read(&stats->service_time); + break; + case BLKIO_STAT_WAIT_TIME: + rws = blkg_rwstat_read(&stats->wait_time); + break; + case BLKIO_STAT_QUEUED: + rws = blkg_rwstat_read(&stats->queued); + break; + default: + WARN_ON_ONCE(true); + break; + } + for (st = BLKG_RWSTAT_READ; st < BLKG_RWSTAT_NR; st++) { blkio_get_key_name(st, dname, key_str, MAX_KEY_LEN, false); - cb->fill(cb, key_str, v); - if (st == BLKIO_STAT_READ || st == BLKIO_STAT_WRITE) - disk_total += v; + cb->fill(cb, key_str, rws.cnt[st]); + if (st == BLKG_RWSTAT_READ || st == BLKG_RWSTAT_WRITE) + disk_total += rws.cnt[st]; } - blkio_get_key_name(BLKIO_STAT_TOTAL, dname, key_str, MAX_KEY_LEN, + blkio_get_key_name(BLKG_RWSTAT_TOTAL, dname, key_str, MAX_KEY_LEN, false); cb->fill(cb, key_str, disk_total); return disk_total; |