From cafe563591446cf80bfbc2fe3bc72a2e36cf1060 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 23 Mar 2013 16:11:31 -0700 Subject: bcache: A block layer cache Does writethrough and writeback caching, handles unclean shutdown, and has a bunch of other nifty features motivated by real world usage. See the wiki at http://bcache.evilpiepirate.org for more. Signed-off-by: Kent Overstreet --- drivers/md/bcache/request.c | 1409 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1409 insertions(+) create mode 100644 drivers/md/bcache/request.c (limited to 'drivers/md/bcache/request.c') diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c new file mode 100644 index 00000000000..4f552de49aa --- /dev/null +++ b/drivers/md/bcache/request.c @@ -0,0 +1,1409 @@ +/* + * Main bcache entry point - handle a read or a write request and decide what to + * do with it; the make_request functions are called by the block layer. + * + * Copyright 2010, 2011 Kent Overstreet + * Copyright 2012 Google, Inc. + */ + +#include "bcache.h" +#include "btree.h" +#include "debug.h" +#include "request.h" + +#include +#include +#include +#include +#include "blk-cgroup.h" + +#include + +#define CUTOFF_CACHE_ADD 95 +#define CUTOFF_CACHE_READA 90 +#define CUTOFF_WRITEBACK 50 +#define CUTOFF_WRITEBACK_SYNC 75 + +struct kmem_cache *bch_search_cache; + +static void check_should_skip(struct cached_dev *, struct search *); + +/* Cgroup interface */ + +#ifdef CONFIG_CGROUP_BCACHE +static struct bch_cgroup bcache_default_cgroup = { .cache_mode = -1 }; + +static struct bch_cgroup *cgroup_to_bcache(struct cgroup *cgroup) +{ + struct cgroup_subsys_state *css; + return cgroup && + (css = cgroup_subsys_state(cgroup, bcache_subsys_id)) + ? container_of(css, struct bch_cgroup, css) + : &bcache_default_cgroup; +} + +struct bch_cgroup *bch_bio_to_cgroup(struct bio *bio) +{ + struct cgroup_subsys_state *css = bio->bi_css + ? cgroup_subsys_state(bio->bi_css->cgroup, bcache_subsys_id) + : task_subsys_state(current, bcache_subsys_id); + + return css + ? container_of(css, struct bch_cgroup, css) + : &bcache_default_cgroup; +} + +static ssize_t cache_mode_read(struct cgroup *cgrp, struct cftype *cft, + struct file *file, + char __user *buf, size_t nbytes, loff_t *ppos) +{ + char tmp[1024]; + int len = snprint_string_list(tmp, PAGE_SIZE, bch_cache_modes, + cgroup_to_bcache(cgrp)->cache_mode + 1); + + if (len < 0) + return len; + + return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); +} + +static int cache_mode_write(struct cgroup *cgrp, struct cftype *cft, + const char *buf) +{ + int v = read_string_list(buf, bch_cache_modes); + if (v < 0) + return v; + + cgroup_to_bcache(cgrp)->cache_mode = v - 1; + return 0; +} + +static u64 bch_verify_read(struct cgroup *cgrp, struct cftype *cft) +{ + return cgroup_to_bcache(cgrp)->verify; +} + +static int bch_verify_write(struct cgroup *cgrp, struct cftype *cft, u64 val) +{ + cgroup_to_bcache(cgrp)->verify = val; + return 0; +} + +static u64 bch_cache_hits_read(struct cgroup *cgrp, struct cftype *cft) +{ + struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); + return atomic_read(&bcachecg->stats.cache_hits); +} + +static u64 bch_cache_misses_read(struct cgroup *cgrp, struct cftype *cft) +{ + struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); + return atomic_read(&bcachecg->stats.cache_misses); +} + +static u64 bch_cache_bypass_hits_read(struct cgroup *cgrp, + struct cftype *cft) +{ + struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); + return atomic_read(&bcachecg->stats.cache_bypass_hits); +} + +static u64 bch_cache_bypass_misses_read(struct cgroup *cgrp, + struct cftype *cft) +{ + struct bch_cgroup *bcachecg = cgroup_to_bcache(cgrp); + return atomic_read(&bcachecg->stats.cache_bypass_misses); +} + +static struct cftype bch_files[] = { + { + .name = "cache_mode", + .read = cache_mode_read, + .write_string = cache_mode_write, + }, + { + .name = "verify", + .read_u64 = bch_verify_read, + .write_u64 = bch_verify_write, + }, + { + .name = "cache_hits", + .read_u64 = bch_cache_hits_read, + }, + { + .name = "cache_misses", + .read_u64 = bch_cache_misses_read, + }, + { + .name = "cache_bypass_hits", + .read_u64 = bch_cache_bypass_hits_read, + }, + { + .name = "cache_bypass_misses", + .read_u64 = bch_cache_bypass_misses_read, + }, + { } /* terminate */ +}; + +static void init_bch_cgroup(struct bch_cgroup *cg) +{ + cg->cache_mode = -1; +} + +static struct cgroup_subsys_state *bcachecg_create(struct cgroup *cgroup) +{ + struct bch_cgroup *cg; + + cg = kzalloc(sizeof(*cg), GFP_KERNEL); + if (!cg) + return ERR_PTR(-ENOMEM); + init_bch_cgroup(cg); + return &cg->css; +} + +static void bcachecg_destroy(struct cgroup *cgroup) +{ + struct bch_cgroup *cg = cgroup_to_bcache(cgroup); + free_css_id(&bcache_subsys, &cg->css); + kfree(cg); +} + +struct cgroup_subsys bcache_subsys = { + .create = bcachecg_create, + .destroy = bcachecg_destroy, + .subsys_id = bcache_subsys_id, + .name = "bcache", + .module = THIS_MODULE, +}; +EXPORT_SYMBOL_GPL(bcache_subsys); +#endif + +static unsigned cache_mode(struct cached_dev *dc, struct bio *bio) +{ +#ifdef CONFIG_CGROUP_BCACHE + int r = bch_bio_to_cgroup(bio)->cache_mode; + if (r >= 0) + return r; +#endif + return BDEV_CACHE_MODE(&dc->sb); +} + +static bool verify(struct cached_dev *dc, struct bio *bio) +{ +#ifdef CONFIG_CGROUP_BCACHE + if (bch_bio_to_cgroup(bio)->verify) + return true; +#endif + return dc->verify; +} + +static void bio_csum(struct bio *bio, struct bkey *k) +{ + struct bio_vec *bv; + uint64_t csum = 0; + int i; + + bio_for_each_segment(bv, bio, i) { + void *d = kmap(bv->bv_page) + bv->bv_offset; + csum = crc64_update(csum, d, bv->bv_len); + kunmap(bv->bv_page); + } + + k->ptr[KEY_PTRS(k)] = csum & (~0ULL >> 1); +} + +/* Insert data into cache */ + +static void bio_invalidate(struct closure *cl) +{ + struct btree_op *op = container_of(cl, struct btree_op, cl); + struct bio *bio = op->cache_bio; + + pr_debug("invalidating %i sectors from %llu", + bio_sectors(bio), (uint64_t) bio->bi_sector); + + while (bio_sectors(bio)) { + unsigned len = min(bio_sectors(bio), 1U << 14); + + if (bch_keylist_realloc(&op->keys, 0, op->c)) + goto out; + + bio->bi_sector += len; + bio->bi_size -= len << 9; + + bch_keylist_add(&op->keys, + &KEY(op->inode, bio->bi_sector, len)); + } + + op->insert_data_done = true; + bio_put(bio); +out: + continue_at(cl, bch_journal, bcache_wq); +} + +struct open_bucket { + struct list_head list; + struct task_struct *last; + unsigned sectors_free; + BKEY_PADDED(key); +}; + +void bch_open_buckets_free(struct cache_set *c) +{ + struct open_bucket *b; + + while (!list_empty(&c->data_buckets)) { + b = list_first_entry(&c->data_buckets, + struct open_bucket, list); + list_del(&b->list); + kfree(b); + } +} + +int bch_open_buckets_alloc(struct cache_set *c) +{ + int i; + + spin_lock_init(&c->data_bucket_lock); + + for (i = 0; i < 6; i++) { + struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL); + if (!b) + return -ENOMEM; + + list_add(&b->list, &c->data_buckets); + } + + return 0; +} + +/* + * We keep multiple buckets open for writes, and try to segregate different + * write streams for better cache utilization: first we look for a bucket where + * the last write to it was sequential with the current write, and failing that + * we look for a bucket that was last used by the same task. + * + * The ideas is if you've got multiple tasks pulling data into the cache at the + * same time, you'll get better cache utilization if you try to segregate their + * data and preserve locality. + * + * For example, say you've starting Firefox at the same time you're copying a + * bunch of files. Firefox will likely end up being fairly hot and stay in the + * cache awhile, but the data you copied might not be; if you wrote all that + * data to the same buckets it'd get invalidated at the same time. + * + * Both of those tasks will be doing fairly random IO so we can't rely on + * detecting sequential IO to segregate their data, but going off of the task + * should be a sane heuristic. + */ +static struct open_bucket *pick_data_bucket(struct cache_set *c, + const struct bkey *search, + struct task_struct *task, + struct bkey *alloc) +{ + struct open_bucket *ret, *ret_task = NULL; + + list_for_each_entry_reverse(ret, &c->data_buckets, list) + if (!bkey_cmp(&ret->key, search)) + goto found; + else if (ret->last == task) + ret_task = ret; + + ret = ret_task ?: list_first_entry(&c->data_buckets, + struct open_bucket, list); +found: + if (!ret->sectors_free && KEY_PTRS(alloc)) { + ret->sectors_free = c->sb.bucket_size; + bkey_copy(&ret->key, alloc); + bkey_init(alloc); + } + + if (!ret->sectors_free) + ret = NULL; + + return ret; +} + +/* + * Allocates some space in the cache to write to, and k to point to the newly + * allocated space, and updates KEY_SIZE(k) and KEY_OFFSET(k) (to point to the + * end of the newly allocated space). + * + * May allocate fewer sectors than @sectors, KEY_SIZE(k) indicates how many + * sectors were actually allocated. + * + * If s->writeback is true, will not fail. + */ +static bool bch_alloc_sectors(struct bkey *k, unsigned sectors, + struct search *s) +{ + struct cache_set *c = s->op.c; + struct open_bucket *b; + BKEY_PADDED(key) alloc; + struct closure cl, *w = NULL; + unsigned i; + + if (s->writeback) { + closure_init_stack(&cl); + w = &cl; + } + + /* + * We might have to allocate a new bucket, which we can't do with a + * spinlock held. So if we have to allocate, we drop the lock, allocate + * and then retry. KEY_PTRS() indicates whether alloc points to + * allocated bucket(s). + */ + + bkey_init(&alloc.key); + spin_lock(&c->data_bucket_lock); + + while (!(b = pick_data_bucket(c, k, s->task, &alloc.key))) { + unsigned watermark = s->op.write_prio + ? WATERMARK_MOVINGGC + : WATERMARK_NONE; + + spin_unlock(&c->data_bucket_lock); + + if (bch_bucket_alloc_set(c, watermark, &alloc.key, 1, w)) + return false; + + spin_lock(&c->data_bucket_lock); + } + + /* + * If we had to allocate, we might race and not need to allocate the + * second time we call find_data_bucket(). If we allocated a bucket but + * didn't use it, drop the refcount bch_bucket_alloc_set() took: + */ + if (KEY_PTRS(&alloc.key)) + __bkey_put(c, &alloc.key); + + for (i = 0; i < KEY_PTRS(&b->key); i++) + EBUG_ON(ptr_stale(c, &b->key, i)); + + /* Set up the pointer to the space we're allocating: */ + + for (i = 0; i < KEY_PTRS(&b->key); i++) + k->ptr[i] = b->key.ptr[i]; + + sectors = min(sectors, b->sectors_free); + + SET_KEY_OFFSET(k, KEY_OFFSET(k) + sectors); + SET_KEY_SIZE(k, sectors); + SET_KEY_PTRS(k, KEY_PTRS(&b->key)); + + /* + * Move b to the end of the lru, and keep track of what this bucket was + * last used for: + */ + list_move_tail(&b->list, &c->data_buckets); + bkey_copy_key(&b->key, k); + b->last = s->task; + + b->sectors_free -= sectors; + + for (i = 0; i < KEY_PTRS(&b->key); i++) { + SET_PTR_OFFSET(&b->key, i, PTR_OFFSET(&b->key, i) + sectors); + + atomic_long_add(sectors, + &PTR_CACHE(c, &b->key, i)->sectors_written); + } + + if (b->sectors_free < c->sb.block_size) + b->sectors_free = 0; + + /* + * k takes refcounts on the buckets it points to until it's inserted + * into the btree, but if we're done with this bucket we just transfer + * get_data_bucket()'s refcount. + */ + if (b->sectors_free) + for (i = 0; i < KEY_PTRS(&b->key); i++) + atomic_inc(&PTR_BUCKET(c, &b->key, i)->pin); + + spin_unlock(&c->data_bucket_lock); + return true; +} + +static void bch_insert_data_error(struct closure *cl) +{ + struct btree_op *op = container_of(cl, struct btree_op, cl); + + /* + * Our data write just errored, which means we've got a bunch of keys to + * insert that point to data that wasn't succesfully written. + * + * We don't have to insert those keys but we still have to invalidate + * that region of the cache - so, if we just strip off all the pointers + * from the keys we'll accomplish just that. + */ + + struct bkey *src = op->keys.bottom, *dst = op->keys.bottom; + + while (src != op->keys.top) { + struct bkey *n = bkey_next(src); + + SET_KEY_PTRS(src, 0); + bkey_copy(dst, src); + + dst = bkey_next(dst); + src = n; + } + + op->keys.top = dst; + + bch_journal(cl); +} + +static void bch_insert_data_endio(struct bio *bio, int error) +{ + struct closure *cl = bio->bi_private; + struct btree_op *op = container_of(cl, struct btree_op, cl); + struct search *s = container_of(op, struct search, op); + + if (error) { + /* TODO: We could try to recover from this. */ + if (s->writeback) + s->error = error; + else if (s->write) + set_closure_fn(cl, bch_insert_data_error, bcache_wq); + else + set_closure_fn(cl, NULL, NULL); + } + + bch_bbio_endio(op->c, bio, error, "writing data to cache"); +} + +static void bch_insert_data_loop(struct closure *cl) +{ + struct btree_op *op = container_of(cl, struct btree_op, cl); + struct search *s = container_of(op, struct search, op); + struct bio *bio = op->cache_bio, *n; + + if (op->skip) + return bio_invalidate(cl); + + if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) { + set_gc_sectors(op->c); + bch_queue_gc(op->c); + } + + do { + unsigned i; + struct bkey *k; + struct bio_set *split = s->d + ? s->d->bio_split : op->c->bio_split; + + /* 1 for the device pointer and 1 for the chksum */ + if (bch_keylist_realloc(&op->keys, + 1 + (op->csum ? 1 : 0), + op->c)) + continue_at(cl, bch_journal, bcache_wq); + + k = op->keys.top; + bkey_init(k); + SET_KEY_INODE(k, op->inode); + SET_KEY_OFFSET(k, bio->bi_sector); + + if (!bch_alloc_sectors(k, bio_sectors(bio), s)) + goto err; + + n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split); + if (!n) { + __bkey_put(op->c, k); + continue_at(cl, bch_insert_data_loop, bcache_wq); + } + + n->bi_end_io = bch_insert_data_endio; + n->bi_private = cl; + + if (s->writeback) { + SET_KEY_DIRTY(k, true); + + for (i = 0; i < KEY_PTRS(k); i++) + SET_GC_MARK(PTR_BUCKET(op->c, k, i), + GC_MARK_DIRTY); + } + + SET_KEY_CSUM(k, op->csum); + if (KEY_CSUM(k)) + bio_csum(n, k); + + pr_debug("%s", pkey(k)); + bch_keylist_push(&op->keys); + + trace_bcache_cache_insert(n, n->bi_sector, n->bi_bdev); + n->bi_rw |= REQ_WRITE; + bch_submit_bbio(n, op->c, k, 0); + } while (n != bio); + + op->insert_data_done = true; + continue_at(cl, bch_journal, bcache_wq); +err: + /* bch_alloc_sectors() blocks if s->writeback = true */ + BUG_ON(s->writeback); + + /* + * But if it's not a writeback write we'd rather just bail out if + * there aren't any buckets ready to write to - it might take awhile and + * we might be starving btree writes for gc or something. + */ + + if (s->write) { + /* + * Writethrough write: We can't complete the write until we've + * updated the index. But we don't want to delay the write while + * we wait for buckets to be freed up, so just invalidate the + * rest of the write. + */ + op->skip = true; + return bio_invalidate(cl); + } else { + /* + * From a cache miss, we can just insert the keys for the data + * we have written or bail out if we didn't do anything. + */ + op->insert_data_done = true; + bio_put(bio); + + if (!bch_keylist_empty(&op->keys)) + continue_at(cl, bch_journal, bcache_wq); + else + closure_return(cl); + } +} + +/** + * bch_insert_data - stick some data in the cache + * + * This is the starting point for any data to end up in a cache device; it could + * be from a normal write, or a writeback write, or a write to a flash only + * volume - it's also used by the moving garbage collector to compact data in + * mostly empty buckets. + * + * It first writes the data to the cache, creating a list of keys to be inserted + * (if the data had to be fragmented there will be multiple keys); after the + * data is written it calls bch_journal, and after the keys have been added to + * the next journal write they're inserted into the btree. + * + * It inserts the data in op->cache_bio; bi_sector is used for the key offset, + * and op->inode is used for the key inode. + * + * If op->skip is true, instead of inserting the data it invalidates the region + * of the cache represented by op->cache_bio and op->inode. + */ +void bch_insert_data(struct closure *cl) +{ + struct btree_op *op = container_of(cl, struct btree_op, cl); + + bch_keylist_init(&op->keys); + bio_get(op->cache_bio); + bch_insert_data_loop(cl); +} + +void bch_btree_insert_async(struct closure *cl) +{ + struct btree_op *op = container_of(cl, struct btree_op, cl); + struct search *s = container_of(op, struct search, op); + + if (bch_btree_insert(op, op->c)) { + s->error = -ENOMEM; + op->insert_data_done = true; + } + + if (op->insert_data_done) { + bch_keylist_free(&op->keys); + closure_return(cl); + } else + continue_at(cl, bch_insert_data_loop, bcache_wq); +} + +/* Common code for the make_request functions */ + +static void request_endio(struct bio *bio, int error) +{ + struct closure *cl = bio->bi_private; + + if (error) { + struct search *s = container_of(cl, struct search, cl); + s->error = error; + /* Only cache read errors are recoverable */ + s->recoverable = false; + } + + bio_put(bio); + closure_put(cl); +} + +void bch_cache_read_endio(struct bio *bio, int error) +{ + struct bbio *b = container_of(bio, struct bbio, bio); + struct closure *cl = bio->bi_private; + struct search *s = container_of(cl, struct search, cl); + + /* + * If the bucket was reused while our bio was in flight, we might have + * read the wrong data. Set s->error but not error so it doesn't get + * counted against the cache device, but we'll still reread the data + * from the backing device. + */ + + if (error) + s->error = error; + else if (ptr_stale(s->op.c, &b->key, 0)) { + atomic_long_inc(&s->op.c->cache_read_races); + s->error = -EINTR; + } + + bch_bbio_endio(s->op.c, bio, error, "reading from cache"); +} + +static void bio_complete(struct search *s) +{ + if (s->orig_bio) { + int cpu, rw = bio_data_dir(s->orig_bio); + unsigned long duration = jiffies - s->start_time; + + cpu = part_stat_lock(); + part_round_stats(cpu, &s->d->disk->part0); + part_stat_add(cpu, &s->d->disk->part0, ticks[rw], duration); + part_stat_unlock(); + + trace_bcache_request_end(s, s->orig_bio); + bio_endio(s->orig_bio, s->error); + s->orig_bio = NULL; + } +} + +static void do_bio_hook(struct search *s) +{ + struct bio *bio = &s->bio.bio; + memcpy(bio, s->orig_bio, sizeof(struct bio)); + + bio->bi_end_io = request_endio; + bio->bi_private = &s->cl; + atomic_set(&bio->bi_cnt, 3); +} + +static void search_free(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + bio_complete(s); + + if (s->op.cache_bio) + bio_put(s->op.cache_bio); + + if (s->unaligned_bvec) + mempool_free(s->bio.bio.bi_io_vec, s->d->unaligned_bvec); + + closure_debug_destroy(cl); + mempool_free(s, s->d->c->search); +} + +static struct search *search_alloc(struct bio *bio, struct bcache_device *d) +{ + struct bio_vec *bv; + struct search *s = mempool_alloc(d->c->search, GFP_NOIO); + memset(s, 0, offsetof(struct search, op.keys)); + + __closure_init(&s->cl, NULL); + + s->op.inode = d->id; + s->op.c = d->c; + s->d = d; + s->op.lock = -1; + s->task = current; + s->orig_bio = bio; + s->write = (bio->bi_rw & REQ_WRITE) != 0; + s->op.flush_journal = (bio->bi_rw & REQ_FLUSH) != 0; + s->op.skip = (bio->bi_rw & REQ_DISCARD) != 0; + s->recoverable = 1; + s->start_time = jiffies; + do_bio_hook(s); + + if (bio->bi_size != bio_segments(bio) * PAGE_SIZE) { + bv = mempool_alloc(d->unaligned_bvec, GFP_NOIO); + memcpy(bv, bio_iovec(bio), + sizeof(struct bio_vec) * bio_segments(bio)); + + s->bio.bio.bi_io_vec = bv; + s->unaligned_bvec = 1; + } + + return s; +} + +static void btree_read_async(struct closure *cl) +{ + struct btree_op *op = container_of(cl, struct btree_op, cl); + + int ret = btree_root(search_recurse, op->c, op); + + if (ret == -EAGAIN) + continue_at(cl, btree_read_async, bcache_wq); + + closure_return(cl); +} + +/* Cached devices */ + +static void cached_dev_bio_complete(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + + search_free(cl); + cached_dev_put(dc); +} + +/* Process reads */ + +static void cached_dev_read_complete(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + + if (s->op.insert_collision) + bch_mark_cache_miss_collision(s); + + if (s->op.cache_bio) { + int i; + struct bio_vec *bv; + + __bio_for_each_segment(bv, s->op.cache_bio, i, 0) + __free_page(bv->bv_page); + } + + cached_dev_bio_complete(cl); +} + +static void request_read_error(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + struct bio_vec *bv; + int i; + + if (s->recoverable) { + /* The cache read failed, but we can retry from the backing + * device. + */ + pr_debug("recovering at sector %llu", + (uint64_t) s->orig_bio->bi_sector); + + s->error = 0; + bv = s->bio.bio.bi_io_vec; + do_bio_hook(s); + s->bio.bio.bi_io_vec = bv; + + if (!s->unaligned_bvec) + bio_for_each_segment(bv, s->orig_bio, i) + bv->bv_offset = 0, bv->bv_len = PAGE_SIZE; + else + memcpy(s->bio.bio.bi_io_vec, + bio_iovec(s->orig_bio), + sizeof(struct bio_vec) * + bio_segments(s->orig_bio)); + + /* XXX: invalidate cache */ + + trace_bcache_read_retry(&s->bio.bio); + closure_bio_submit(&s->bio.bio, &s->cl, s->d); + } + + continue_at(cl, cached_dev_read_complete, NULL); +} + +static void request_read_done(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + + /* + * s->cache_bio != NULL implies that we had a cache miss; cache_bio now + * contains data ready to be inserted into the cache. + * + * First, we copy the data we just read from cache_bio's bounce buffers + * to the buffers the original bio pointed to: + */ + + if (s->op.cache_bio) { + struct bio_vec *src, *dst; + unsigned src_offset, dst_offset, bytes; + void *dst_ptr; + + bio_reset(s->op.cache_bio); + s->op.cache_bio->bi_sector = s->cache_miss->bi_sector; + s->op.cache_bio->bi_bdev = s->cache_miss->bi_bdev; + s->op.cache_bio->bi_size = s->cache_bio_sectors << 9; + bio_map(s->op.cache_bio, NULL); + + src = bio_iovec(s->op.cache_bio); + dst = bio_iovec(s->cache_miss); + src_offset = src->bv_offset; + dst_offset = dst->bv_offset; + dst_ptr = kmap(dst->bv_page); + + while (1) { + if (dst_offset == dst->bv_offset + dst->bv_len) { + kunmap(dst->bv_page); + dst++; + if (dst == bio_iovec_idx(s->cache_miss, + s->cache_miss->bi_vcnt)) + break; + + dst_offset = dst->bv_offset; + dst_ptr = kmap(dst->bv_page); + } + + if (src_offset == src->bv_offset + src->bv_len) { + src++; + if (src == bio_iovec_idx(s->op.cache_bio, + s->op.cache_bio->bi_vcnt)) + BUG(); + + src_offset = src->bv_offset; + } + + bytes = min(dst->bv_offset + dst->bv_len - dst_offset, + src->bv_offset + src->bv_len - src_offset); + + memcpy(dst_ptr + dst_offset, + page_address(src->bv_page) + src_offset, + bytes); + + src_offset += bytes; + dst_offset += bytes; + } + + bio_put(s->cache_miss); + s->cache_miss = NULL; + } + + if (verify(dc, &s->bio.bio) && s->recoverable) + bch_data_verify(s); + + bio_complete(s); + + if (s->op.cache_bio && + !test_bit(CACHE_SET_STOPPING, &s->op.c->flags)) { + s->op.type = BTREE_REPLACE; + closure_call(&s->op.cl, bch_insert_data, NULL, cl); + } + + continue_at(cl, cached_dev_read_complete, NULL); +} + +static void request_read_done_bh(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + + bch_mark_cache_accounting(s, !s->cache_miss, s->op.skip); + + if (s->error) + continue_at_nobarrier(cl, request_read_error, bcache_wq); + else if (s->op.cache_bio || verify(dc, &s->bio.bio)) + continue_at_nobarrier(cl, request_read_done, bcache_wq); + else + continue_at_nobarrier(cl, cached_dev_read_complete, NULL); +} + +static int cached_dev_cache_miss(struct btree *b, struct search *s, + struct bio *bio, unsigned sectors) +{ + int ret = 0; + unsigned reada; + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + struct bio *miss; + + miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); + if (!miss) + return -EAGAIN; + + if (miss == bio) + s->op.lookup_done = true; + + miss->bi_end_io = request_endio; + miss->bi_private = &s->cl; + + if (s->cache_miss || s->op.skip) + goto out_submit; + + if (miss != bio || + (bio->bi_rw & REQ_RAHEAD) || + (bio->bi_rw & REQ_META) || + s->op.c->gc_stats.in_use >= CUTOFF_CACHE_READA) + reada = 0; + else { + reada = min(dc->readahead >> 9, + sectors - bio_sectors(miss)); + + if (bio_end(miss) + reada > bdev_sectors(miss->bi_bdev)) + reada = bdev_sectors(miss->bi_bdev) - bio_end(miss); + } + + s->cache_bio_sectors = bio_sectors(miss) + reada; + s->op.cache_bio = bio_alloc_bioset(GFP_NOWAIT, + DIV_ROUND_UP(s->cache_bio_sectors, PAGE_SECTORS), + dc->disk.bio_split); + + if (!s->op.cache_bio) + goto out_submit; + + s->op.cache_bio->bi_sector = miss->bi_sector; + s->op.cache_bio->bi_bdev = miss->bi_bdev; + s->op.cache_bio->bi_size = s->cache_bio_sectors << 9; + + s->op.cache_bio->bi_end_io = request_endio; + s->op.cache_bio->bi_private = &s->cl; + + /* btree_search_recurse()'s btree iterator is no good anymore */ + ret = -EINTR; + if (!bch_btree_insert_check_key(b, &s->op, s->op.cache_bio)) + goto out_put; + + bio_map(s->op.cache_bio, NULL); + if (bio_alloc_pages(s->op.cache_bio, __GFP_NOWARN|GFP_NOIO)) + goto out_put; + + s->cache_miss = miss; + bio_get(s->op.cache_bio); + + trace_bcache_cache_miss(s->orig_bio); + closure_bio_submit(s->op.cache_bio, &s->cl, s->d); + + return ret; +out_put: + bio_put(s->op.cache_bio); + s->op.cache_bio = NULL; +out_submit: + closure_bio_submit(miss, &s->cl, s->d); + return ret; +} + +static void request_read(struct cached_dev *dc, struct search *s) +{ + struct closure *cl = &s->cl; + + check_should_skip(dc, s); + closure_call(&s->op.cl, btree_read_async, NULL, cl); + + continue_at(cl, request_read_done_bh, NULL); +} + +/* Process writes */ + +static void cached_dev_write_complete(struct closure *cl) +{ + struct search *s = container_of(cl, struct search, cl); + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); + + up_read_non_owner(&dc->writeback_lock); + cached_dev_bio_complete(cl); +} + +static bool should_writeback(struct cached_dev *dc, struct bio *bio) +{ + unsigned threshold = (bio->bi_rw & REQ_SYNC) + ? CUTOFF_WRITEBACK_SYNC + : CUTOFF_WRITEBACK; + + return !atomic_read(&dc->disk.detaching) && + cache_mode(dc, bio) == CACHE_MODE_WRITEBACK && + dc->disk.c->gc_stats.in_use < threshold; +} + +static void request_write(struct cached_dev *dc, struct search *s) +{ + struct closure *cl = &s->cl; + struct bio *bio = &s->bio.bio; + struct bkey start, end; + start = KEY(dc->disk.id, bio->bi_sector, 0); + end = KEY(dc->disk.id, bio_end(bio), 0); + + bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys, &start, &end); + + check_should_skip(dc, s); + down_read_non_owner(&dc->writeback_lock); + + if (bch_keybuf_check_overlapping(&dc->writeback_keys, &start, &end)) { + s->op.skip = false; + s->writeback = true; + } + + if (bio->bi_rw & REQ_DISCARD) + goto skip; + + if (s->op.skip) + goto skip; + + if (should_writeback(dc, s->orig_bio)) + s->writeback = true; + + if (!s->writeback) { + s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO, + dc->disk.bio_split); + + trace_bcache_writethrough(s->orig_bio); + closure_bio_submit(bio, cl, s->d); + } else { + s->op.cache_bio = bio; + trace_bcache_writeback(s->orig_bio); + bch_writeback_add(dc, bio_sectors(bio)); + } +out: + closure_call(&s->op.cl, bch_insert_data, NULL, cl); + continue_at(cl, cached_dev_write_complete, NULL); +skip: + s->op.skip = true; + s->op.cache_bio = s->orig_bio; + bio_get(s->op.cache_bio); + trace_bcache_write_skip(s->orig_bio); + + if ((bio->bi_rw & REQ_DISCARD) && + !blk_queue_discard(bdev_get_queue(dc->bdev))) + goto out; + + closure_bio_submit(bio, cl, s->d); + goto out; +} + +static void request_nodata(struct cached_dev *dc, struct search *s) +{ + struct closure *cl = &s->cl; + struct bio *bio = &s->bio.bio; + + if (bio->bi_rw & REQ_DISCARD) { + request_write(dc, s); + return; + } + + if (s->op.flush_journal) + bch_journal_meta(s->op.c, cl); + + closure_bio_submit(bio, cl, s->d); + + continue_at(cl, cached_dev_bio_complete, NULL); +} + +/* Cached devices - read & write stuff */ + +int bch_get_congested(struct cache_set *c) +{ + int i; + + if (!c->congested_read_threshold_us && + !c->congested_write_threshold_us) + return 0; + + i = (local_clock_us() - c->congested_last_us) / 1024; + if (i < 0) + return 0; + + i += atomic_read(&c->congested); + if (i >= 0) + return 0; + + i += CONGESTED_MAX; + + return i <= 0 ? 1 : fract_exp_two(i, 6); +} + +static void add_sequential(struct task_struct *t) +{ + ewma_add(t->sequential_io_avg, + t->sequential_io, 8, 0); + + t->sequential_io = 0; +} + +static void check_should_skip(struct cached_dev *dc, struct search *s) +{ + struct hlist_head *iohash(uint64_t k) + { return &dc->io_hash[hash_64(k, RECENT_IO_BITS)]; } + + struct cache_set *c = s->op.c; + struct bio *bio = &s->bio.bio; + + long rand; + int cutoff = bch_get_congested(c); + unsigned mode = cache_mode(dc, bio); + + if (atomic_read(&dc->disk.detaching) || + c->gc_stats.in_use > CUTOFF_CACHE_ADD || + (bio->bi_rw & REQ_DISCARD)) + goto skip; + + if (mode == CACHE_MODE_NONE || + (mode == CACHE_MODE_WRITEAROUND && + (bio->bi_rw & REQ_WRITE))) + goto skip; + + if (bio->bi_sector & (c->sb.block_size - 1) || + bio_sectors(bio) & (c->sb.block_size - 1)) { + pr_debug("skipping unaligned io"); + goto skip; + } + + if (!cutoff) { + cutoff = dc->sequential_cutoff >> 9; + + if (!cutoff) + goto rescale; + + if (mode == CACHE_MODE_WRITEBACK && + (bio->bi_rw & REQ_WRITE) && + (bio->bi_rw & REQ_SYNC)) + goto rescale; + } + + if (dc->sequential_merge) { + struct io *i; + + spin_lock(&dc->io_lock); + + hlist_for_each_entry(i, iohash(bio->bi_sector), hash) + if (i->last == bio->bi_sector && + time_before(jiffies, i->jiffies)) + goto found; + + i = list_first_entry(&dc->io_lru, struct io, lru); + + add_sequential(s->task); + i->sequential = 0; +found: + if (i->sequential + bio->bi_size > i->sequential) + i->sequential += bio->bi_size; + + i->last = bio_end(bio); + i->jiffies = jiffies + msecs_to_jiffies(5000); + s->task->sequential_io = i->sequential; + + hlist_del(&i->hash); + hlist_add_head(&i->hash, iohash(i->last)); + list_move_tail(&i->lru, &dc->io_lru); + + spin_unlock(&dc->io_lock); + } else { + s->task->sequential_io = bio->bi_size; + + add_sequential(s->task); + } + + rand = get_random_int(); + cutoff -= bitmap_weight(&rand, BITS_PER_LONG); + + if (cutoff <= (int) (max(s->task->sequential_io, + s->task->sequential_io_avg) >> 9)) + goto skip; + +rescale: + bch_rescale_priorities(c, bio_sectors(bio)); + return; +skip: + bch_mark_sectors_bypassed(s, bio_sectors(bio)); + s->op.skip = true; +} + +static void cached_dev_make_request(struct request_queue *q, struct bio *bio) +{ + struct search *s; + struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; + struct cached_dev *dc = container_of(d, struct cached_dev, disk); + int cpu, rw = bio_data_dir(bio); + + cpu = part_stat_lock(); + part_stat_inc(cpu, &d->disk->part0, ios[rw]); + part_stat_add(cpu, &d->disk->part0, sectors[rw], bio_sectors(bio)); + part_stat_unlock(); + + bio->bi_bdev = dc->bdev; + bio->bi_sector += BDEV_DATA_START; + + if (cached_dev_get(dc)) { + s = search_alloc(bio, d); + trace_bcache_request_start(s, bio); + + if (!bio_has_data(bio)) + request_nodata(dc, s); + else if (rw) + request_write(dc, s); + else + request_read(dc, s); + } else { + if ((bio->bi_rw & REQ_DISCARD) && + !blk_queue_discard(bdev_get_queue(dc->bdev))) + bio_endio(bio, 0); + else + bch_generic_make_request(bio, &d->bio_split_hook); + } +} + +static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + struct cached_dev *dc = container_of(d, struct cached_dev, disk); + return __blkdev_driver_ioctl(dc->bdev, mode, cmd, arg); +} + +static int cached_dev_congested(void *data, int bits) +{ + struct bcache_device *d = data; + struct cached_dev *dc = container_of(d, struct cached_dev, disk); + struct request_queue *q = bdev_get_queue(dc->bdev); + int ret = 0; + + if (bdi_congested(&q->backing_dev_info, bits)) + return 1; + + if (cached_dev_get(dc)) { + unsigned i; + struct cache *ca; + + for_each_cache(ca, d->c, i) { + q = bdev_get_queue(ca->bdev); + ret |= bdi_congested(&q->backing_dev_info, bits); + } + + cached_dev_put(dc); + } + + return ret; +} + +void bch_cached_dev_request_init(struct cached_dev *dc) +{ + struct gendisk *g = dc->disk.disk; + + g->queue->make_request_fn = cached_dev_make_request; + g->queue->backing_dev_info.congested_fn = cached_dev_congested; + dc->disk.cache_miss = cached_dev_cache_miss; + dc->disk.ioctl = cached_dev_ioctl; +} + +/* Flash backed devices */ + +static int flash_dev_cache_miss(struct btree *b, struct search *s, + struct bio *bio, unsigned sectors) +{ + /* Zero fill bio */ + + while (bio->bi_idx != bio->bi_vcnt) { + struct bio_vec *bv = bio_iovec(bio); + unsigned j = min(bv->bv_len >> 9, sectors); + + void *p = kmap(bv->bv_page); + memset(p + bv->bv_offset, 0, j << 9); + kunmap(bv->bv_page); + + bv->bv_len -= j << 9; + bv->bv_offset += j << 9; + + if (bv->bv_len) + return 0; + + bio->bi_sector += j; + bio->bi_size -= j << 9; + + bio->bi_idx++; + sectors -= j; + } + + s->op.lookup_done = true; + + return 0; +} + +static void flash_dev_make_request(struct request_queue *q, struct bio *bio) +{ + struct search *s; + struct closure *cl; + struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; + int cpu, rw = bio_data_dir(bio); + + cpu = part_stat_lock(); + part_stat_inc(cpu, &d->disk->part0, ios[rw]); + part_stat_add(cpu, &d->disk->part0, sectors[rw], bio_sectors(bio)); + part_stat_unlock(); + + s = search_alloc(bio, d); + cl = &s->cl; + bio = &s->bio.bio; + + trace_bcache_request_start(s, bio); + + if (bio_has_data(bio) && !rw) { + closure_call(&s->op.cl, btree_read_async, NULL, cl); + } else if (bio_has_data(bio) || s->op.skip) { + bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys, + &KEY(d->id, bio->bi_sector, 0), + &KEY(d->id, bio_end(bio), 0)); + + s->writeback = true; + s->op.cache_bio = bio; + + closure_call(&s->op.cl, bch_insert_data, NULL, cl); + } else { + /* No data - probably a cache flush */ + if (s->op.flush_journal) + bch_journal_meta(s->op.c, cl); + } + + continue_at(cl, search_free, NULL); +} + +static int flash_dev_ioctl(struct bcache_device *d, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + return -ENOTTY; +} + +static int flash_dev_congested(void *data, int bits) +{ + struct bcache_device *d = data; + struct request_queue *q; + struct cache *ca; + unsigned i; + int ret = 0; + + for_each_cache(ca, d->c, i) { + q = bdev_get_queue(ca->bdev); + ret |= bdi_congested(&q->backing_dev_info, bits); + } + + return ret; +} + +void bch_flash_dev_request_init(struct bcache_device *d) +{ + struct gendisk *g = d->disk; + + g->queue->make_request_fn = flash_dev_make_request; + g->queue->backing_dev_info.congested_fn = flash_dev_congested; + d->cache_miss = flash_dev_cache_miss; + d->ioctl = flash_dev_ioctl; +} + +void bch_request_exit(void) +{ +#ifdef CONFIG_CGROUP_BCACHE + cgroup_unload_subsys(&bcache_subsys); +#endif + if (bch_search_cache) + kmem_cache_destroy(bch_search_cache); +} + +int __init bch_request_init(void) +{ + bch_search_cache = KMEM_CACHE(search, 0); + if (!bch_search_cache) + return -ENOMEM; + +#ifdef CONFIG_CGROUP_BCACHE + cgroup_load_subsys(&bcache_subsys); + init_bch_cgroup(&bcache_default_cgroup); + + cgroup_add_cftypes(&bcache_subsys, bch_files); +#endif + return 0; +} -- cgit v1.2.3-70-g09d2 From b1a67b0f4c747ca10c96ebb24f04e2a74b3c298d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 25 Mar 2013 11:46:44 -0700 Subject: bcache: Style/checkpatch fixes Took out some nested functions, and fixed some more checkpatch complaints. Signed-off-by: Kent Overstreet Cc: linux-bcache@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/md/bcache/alloc.c | 22 ++++++---------------- drivers/md/bcache/bcache.h | 10 +++++----- drivers/md/bcache/bset.c | 9 +++++---- drivers/md/bcache/btree.c | 4 ++-- drivers/md/bcache/debug.c | 8 ++++---- drivers/md/bcache/journal.c | 8 ++++---- drivers/md/bcache/movinggc.c | 24 ++++++++++++------------ drivers/md/bcache/request.c | 12 +++++++----- drivers/md/bcache/stats.c | 3 ++- drivers/md/bcache/super.c | 7 ++++--- 10 files changed, 51 insertions(+), 56 deletions(-) (limited to 'drivers/md/bcache/request.c') diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index ed18115e078..2879487d036 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -229,24 +229,14 @@ static void invalidate_one_bucket(struct cache *ca, struct bucket *b) fifo_push(&ca->free_inc, b - ca->buckets); } -static void invalidate_buckets_lru(struct cache *ca) -{ - unsigned bucket_prio(struct bucket *b) - { - return ((unsigned) (b->prio - ca->set->min_prio)) * - GC_SECTORS_USED(b); - } - - bool bucket_max_cmp(struct bucket *l, struct bucket *r) - { - return bucket_prio(l) < bucket_prio(r); - } +#define bucket_prio(b) \ + (((unsigned) (b->prio - ca->set->min_prio)) * GC_SECTORS_USED(b)) - bool bucket_min_cmp(struct bucket *l, struct bucket *r) - { - return bucket_prio(l) > bucket_prio(r); - } +#define bucket_max_cmp(l, r) (bucket_prio(l) < bucket_prio(r)) +#define bucket_min_cmp(l, r) (bucket_prio(l) > bucket_prio(r)) +static void invalidate_buckets_lru(struct cache *ca) +{ struct bucket *b; ssize_t i; diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index d01a553f63f..b2846e70149 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -644,8 +644,8 @@ struct gc_stat { * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e. * flushing dirty data). * - * CACHE_SET_STOPPING_2 gets set at the last phase, when it's time to shut down the - * allocation thread. + * CACHE_SET_STOPPING_2 gets set at the last phase, when it's time to shut down + * the allocation thread. */ #define CACHE_SET_UNREGISTERING 0 #define CACHE_SET_STOPPING 1 @@ -1012,11 +1012,11 @@ static inline struct bucket *PTR_BUCKET(struct cache_set *c, * searches - it told you where a key started. It's not used anymore, * and can probably be safely dropped. */ -#define KEY(dev, sector, len) (struct bkey) \ -{ \ +#define KEY(dev, sector, len) \ +((struct bkey) { \ .high = (1ULL << 63) | ((uint64_t) (len) << 20) | (dev), \ .low = (sector) \ -} +}) static inline void bkey_init(struct bkey *k) { diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 79fe1f0190f..4dc9cb4efac 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -161,9 +161,9 @@ bool bch_ptr_bad(struct btree *b, const struct bkey *k) #ifdef CONFIG_BCACHE_EDEBUG bug: mutex_unlock(&b->c->bucket_lock); - btree_bug(b, "inconsistent pointer %s: bucket %zu pin %i " - "prio %i gen %i last_gc %i mark %llu gc_gen %i", pkey(k), - PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin), + btree_bug(b, +"inconsistent pointer %s: bucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i", + pkey(k), PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin), g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen); return true; #endif @@ -1049,7 +1049,8 @@ void bch_btree_sort_partial(struct btree *b, unsigned start) for (i = start; i <= b->nsets; i++) keys += b->sets[i].data->keys; - order = roundup_pow_of_two(__set_bytes(b->sets->data, keys)) / PAGE_SIZE; + order = roundup_pow_of_two(__set_bytes(b->sets->data, + keys)) / PAGE_SIZE; if (order) order = ilog2(order); } diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index af9ea4a9633..24b67805909 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1021,8 +1021,8 @@ retry: goto err_free; if (!b) { - cache_bug(c, "Tried to allocate bucket" - " that was in btree cache"); + cache_bug(c, + "Tried to allocate bucket that was in btree cache"); __bkey_put(c, &k.key); goto retry; } diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 4b37ef2b80e..141a5cac11a 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -217,8 +217,8 @@ void bch_data_verify(struct search *s) if (memcmp(p1 + bv->bv_offset, p2 + bv->bv_offset, bv->bv_len)) - printk(KERN_ERR "bcache (%s): verify failed" - " at sector %llu\n", + printk(KERN_ERR + "bcache (%s): verify failed at sector %llu\n", bdevname(dc->bdev, name), (uint64_t) s->orig_bio->bi_sector); @@ -525,8 +525,8 @@ static ssize_t btree_fuzz(struct kobject *k, struct kobj_attribute *a, k = bkey_next(k), l = bkey_next(l)) if (bkey_cmp(k, l) || KEY_SIZE(k) != KEY_SIZE(l)) - pr_err("key %zi differs: %s " - "!= %s", (uint64_t *) k - i->d, + pr_err("key %zi differs: %s != %s", + (uint64_t *) k - i->d, pkey(k), pkey(l)); for (j = 0; j < 3; j++) { diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index c871ffaabbb..21fd1010cf5 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -293,9 +293,9 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list, BUG_ON(i->pin && atomic_read(i->pin) != 1); if (n != i->j.seq) - pr_err("journal entries %llu-%llu " - "missing! (replaying %llu-%llu)\n", - n, i->j.seq - 1, start, end); + pr_err( + "journal entries %llu-%llu missing! (replaying %llu-%llu)\n", + n, i->j.seq - 1, start, end); for (k = i->j.start; k < end(&i->j); @@ -439,7 +439,7 @@ static void do_journal_discard(struct cache *ca) bio_init(bio); bio->bi_sector = bucket_to_sector(ca->set, - ca->sb.d[ja->discard_idx]); + ca->sb.d[ja->discard_idx]); bio->bi_bdev = ca->bdev; bio->bi_rw = REQ_WRITE|REQ_DISCARD; bio->bi_max_vecs = 1; diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index c69fc92b02c..e3ec0a550b0 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -183,6 +183,16 @@ err: if (!IS_ERR_OR_NULL(w->private)) closure_return(cl); } +static bool bucket_cmp(struct bucket *l, struct bucket *r) +{ + return GC_SECTORS_USED(l) < GC_SECTORS_USED(r); +} + +static unsigned bucket_heap_top(struct cache *ca) +{ + return GC_SECTORS_USED(heap_peek(&ca->heap)); +} + void bch_moving_gc(struct closure *cl) { struct cache_set *c = container_of(cl, struct cache_set, gc.cl); @@ -190,16 +200,6 @@ void bch_moving_gc(struct closure *cl) struct bucket *b; unsigned i; - bool bucket_cmp(struct bucket *l, struct bucket *r) - { - return GC_SECTORS_USED(l) < GC_SECTORS_USED(r); - } - - unsigned top(struct cache *ca) - { - return GC_SECTORS_USED(heap_peek(&ca->heap)); - } - if (!c->copy_gc_enabled) closure_return(cl); @@ -220,7 +220,7 @@ void bch_moving_gc(struct closure *cl) sectors_to_move += GC_SECTORS_USED(b); heap_add(&ca->heap, b, bucket_cmp); } else if (bucket_cmp(b, heap_peek(&ca->heap))) { - sectors_to_move -= top(ca); + sectors_to_move -= bucket_heap_top(ca); sectors_to_move += GC_SECTORS_USED(b); ca->heap.data[0] = b; @@ -233,7 +233,7 @@ void bch_moving_gc(struct closure *cl) sectors_to_move -= GC_SECTORS_USED(b); } - ca->gc_move_threshold = top(ca); + ca->gc_move_threshold = bucket_heap_top(ca); pr_debug("threshold %u", ca->gc_move_threshold); } diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 4f552de49aa..dbda9646ef3 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1117,11 +1117,13 @@ static void add_sequential(struct task_struct *t) t->sequential_io = 0; } -static void check_should_skip(struct cached_dev *dc, struct search *s) +static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k) { - struct hlist_head *iohash(uint64_t k) - { return &dc->io_hash[hash_64(k, RECENT_IO_BITS)]; } + return &dc->io_hash[hash_64(k, RECENT_IO_BITS)]; +} +static void check_should_skip(struct cached_dev *dc, struct search *s) +{ struct cache_set *c = s->op.c; struct bio *bio = &s->bio.bio; @@ -1162,7 +1164,7 @@ static void check_should_skip(struct cached_dev *dc, struct search *s) spin_lock(&dc->io_lock); - hlist_for_each_entry(i, iohash(bio->bi_sector), hash) + hlist_for_each_entry(i, iohash(dc, bio->bi_sector), hash) if (i->last == bio->bi_sector && time_before(jiffies, i->jiffies)) goto found; @@ -1180,7 +1182,7 @@ found: s->task->sequential_io = i->sequential; hlist_del(&i->hash); - hlist_add_head(&i->hash, iohash(i->last)); + hlist_add_head(&i->hash, iohash(dc, i->last)); list_move_tail(&i->lru, &dc->io_lru); spin_unlock(&dc->io_lock); diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c index bf6cf9518c8..64e679449c2 100644 --- a/drivers/md/bcache/stats.c +++ b/drivers/md/bcache/stats.c @@ -95,7 +95,8 @@ static KTYPE(bch_stats); static void scale_accounting(unsigned long data); -void bch_cache_accounting_init(struct cache_accounting *acc, struct closure *parent) +void bch_cache_accounting_init(struct cache_accounting *acc, + struct closure *parent) { kobject_init(&acc->total.kobj, &bch_stats_ktype); kobject_init(&acc->five_minute.kobj, &bch_stats_ktype); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 7b8efc77087..484ae6c8f43 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -526,7 +526,8 @@ void bch_prio_write(struct cache *ca) for (i = prio_buckets(ca) - 1; i >= 0; --i) { long bucket; struct prio_set *p = ca->disk_buckets; - struct bucket_disk *d = p->data, *end = d + prios_per_bucket(ca); + struct bucket_disk *d = p->data; + struct bucket_disk *end = d + prios_per_bucket(ca); for (b = ca->buckets + i * prios_per_bucket(ca); b < ca->buckets + ca->sb.nbuckets && d < end; @@ -865,8 +866,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) if (dc->sb.block_size < c->sb.block_size) { /* Will die */ - pr_err("Couldn't attach %s: block size " - "less than set's block size", buf); + pr_err("Couldn't attach %s: block size less than set's block size", + buf); return -EINVAL; } -- cgit v1.2.3-70-g09d2 From 169ef1cf6171d35550fef85645b83b960e241cff Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 28 Mar 2013 12:50:55 -0600 Subject: bcache: Don't export utility code, prefix with bch_ Signed-off-by: Kent Overstreet Cc: linux-bcache@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/md/bcache/bcache.h | 2 +- drivers/md/bcache/bset.c | 4 ++-- drivers/md/bcache/btree.c | 18 +++++++-------- drivers/md/bcache/debug.c | 2 +- drivers/md/bcache/journal.c | 4 ++-- drivers/md/bcache/movinggc.c | 4 ++-- drivers/md/bcache/request.c | 14 +++++------ drivers/md/bcache/super.c | 18 +++++++-------- drivers/md/bcache/sysfs.c | 24 +++++++++---------- drivers/md/bcache/sysfs.h | 2 +- drivers/md/bcache/util.c | 38 +++++++++++------------------- drivers/md/bcache/util.h | 54 +++++++++++++++++++++---------------------- drivers/md/bcache/writeback.c | 6 ++--- 13 files changed, 89 insertions(+), 101 deletions(-) (limited to 'drivers/md/bcache/request.c') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index b2846e70149..f05723565f1 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -1033,7 +1033,7 @@ static inline void bkey_init(struct bkey *k) * jset: The checksum is _always_ the first 8 bytes of these structs */ #define csum_set(i) \ - crc64(((void *) (i)) + sizeof(uint64_t), \ + bch_crc64(((void *) (i)) + sizeof(uint64_t), \ ((void *) end(i)) - (((void *) (i)) + sizeof(uint64_t))) /* Error handling macros */ diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 4dc9cb4efac..0b33aac1f14 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -1026,7 +1026,7 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter, if (!start) { spin_lock(&b->c->sort_time_lock); - time_stats_update(&b->c->sort_time, start_time); + bch_time_stats_update(&b->c->sort_time, start_time); spin_unlock(&b->c->sort_time_lock); } } @@ -1076,7 +1076,7 @@ void bch_btree_sort_into(struct btree *b, struct btree *new) btree_mergesort(b, new->sets->data, &iter, false, true); spin_lock(&b->c->sort_time_lock); - time_stats_update(&b->c->sort_time, start_time); + bch_time_stats_update(&b->c->sort_time, start_time); spin_unlock(&b->c->sort_time_lock); bkey_copy_key(&new->key, &b->key); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 24b67805909..f2b2c653c5a 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -129,7 +129,7 @@ static uint64_t btree_csum_set(struct btree *b, struct bset *i) uint64_t crc = b->key.ptr[0]; void *data = (void *) i + 8, *end = end(i); - crc = crc64_update(crc, data, end - data); + crc = bch_crc64_update(crc, data, end - data); return crc ^ 0xffffffffffffffff; } @@ -231,7 +231,7 @@ out: mutex_unlock(&b->c->fill_lock); spin_lock(&b->c->btree_read_time_lock); - time_stats_update(&b->c->btree_read_time, b->io_start_time); + bch_time_stats_update(&b->c->btree_read_time, b->io_start_time); spin_unlock(&b->c->btree_read_time_lock); smp_wmb(); /* read_done is our write lock */ @@ -259,7 +259,7 @@ void bch_btree_read(struct btree *b) b->bio->bi_rw = REQ_META|READ_SYNC; b->bio->bi_size = KEY_SIZE(&b->key) << 9; - bio_map(b->bio, b->sets[0].data); + bch_bio_map(b->bio, b->sets[0].data); pr_debug("%s", pbtree(b)); trace_bcache_btree_read(b->bio); @@ -327,12 +327,12 @@ static void do_btree_write(struct btree *b) btree_bio_init(b); b->bio->bi_rw = REQ_META|WRITE_SYNC; b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c); - bio_map(b->bio, i); + bch_bio_map(b->bio, i); bkey_copy(&k.key, &b->key); SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i)); - if (!bio_alloc_pages(b->bio, GFP_NOIO)) { + if (!bch_bio_alloc_pages(b->bio, GFP_NOIO)) { int j; struct bio_vec *bv; void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1)); @@ -347,7 +347,7 @@ static void do_btree_write(struct btree *b) continue_at(cl, btree_write_done, NULL); } else { b->bio->bi_vcnt = 0; - bio_map(b->bio, i); + bch_bio_map(b->bio, i); trace_bcache_btree_write(b->bio); bch_submit_bbio(b->bio, b->c, &k.key, 0); @@ -815,7 +815,7 @@ retry: void bch_cannibalize_unlock(struct cache_set *c, struct closure *cl) { if (c->try_harder == cl) { - time_stats_update(&c->try_harder_time, c->try_harder_start); + bch_time_stats_update(&c->try_harder_time, c->try_harder_start); c->try_harder = NULL; __closure_wake_up(&c->try_wait); } @@ -1536,7 +1536,7 @@ static void bch_btree_gc(struct closure *cl) available = bch_btree_gc_finish(c); - time_stats_update(&c->btree_gc_time, start_time); + bch_time_stats_update(&c->btree_gc_time, start_time); stats.key_bytes *= sizeof(uint64_t); stats.dirty <<= 9; @@ -2007,7 +2007,7 @@ static int btree_split(struct btree *b, struct btree_op *op) rw_unlock(true, n1); btree_node_free(b, op); - time_stats_update(&b->c->btree_split_time, start_time); + bch_time_stats_update(&b->c->btree_split_time, start_time); return 0; err_free2: diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 141a5cac11a..732234d9ec0 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -200,7 +200,7 @@ void bch_data_verify(struct search *s) if (!check) return; - if (bio_alloc_pages(check, GFP_NOIO)) + if (bch_bio_alloc_pages(check, GFP_NOIO)) goto out_put; check->bi_rw = READ_SYNC; diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 21fd1010cf5..b0a3d0577d1 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -54,7 +54,7 @@ reread: left = ca->sb.bucket_size - offset; bio->bi_end_io = journal_read_endio; bio->bi_private = &op->cl; - bio_map(bio, data); + bch_bio_map(bio, data); closure_bio_submit(bio, &op->cl, ca); closure_sync(&op->cl); @@ -621,7 +621,7 @@ static void journal_write_unlocked(struct closure *cl) bio->bi_end_io = journal_write_endio; bio->bi_private = w; - bio_map(bio, w->data); + bch_bio_map(bio, w->data); trace_bcache_journal_write(bio); bio_list_add(&list, bio); diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index e3ec0a550b0..8589512c972 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -85,7 +85,7 @@ static void moving_init(struct moving_io *io) PAGE_SECTORS); bio->bi_private = &io->s.cl; bio->bi_io_vec = bio->bi_inline_vecs; - bio_map(bio, NULL); + bch_bio_map(bio, NULL); } static void write_moving(struct closure *cl) @@ -159,7 +159,7 @@ static void read_moving(struct closure *cl) bio->bi_rw = READ; bio->bi_end_io = read_moving_endio; - if (bio_alloc_pages(bio, GFP_KERNEL)) + if (bch_bio_alloc_pages(bio, GFP_KERNEL)) goto err; pr_debug("%s", pkey(&w->key)); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index dbda9646ef3..83731dc36f3 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -58,8 +58,8 @@ static ssize_t cache_mode_read(struct cgroup *cgrp, struct cftype *cft, char __user *buf, size_t nbytes, loff_t *ppos) { char tmp[1024]; - int len = snprint_string_list(tmp, PAGE_SIZE, bch_cache_modes, - cgroup_to_bcache(cgrp)->cache_mode + 1); + int len = bch_snprint_string_list(tmp, PAGE_SIZE, bch_cache_modes, + cgroup_to_bcache(cgrp)->cache_mode + 1); if (len < 0) return len; @@ -70,7 +70,7 @@ static ssize_t cache_mode_read(struct cgroup *cgrp, struct cftype *cft, static int cache_mode_write(struct cgroup *cgrp, struct cftype *cft, const char *buf) { - int v = read_string_list(buf, bch_cache_modes); + int v = bch_read_string_list(buf, bch_cache_modes); if (v < 0) return v; @@ -205,7 +205,7 @@ static void bio_csum(struct bio *bio, struct bkey *k) bio_for_each_segment(bv, bio, i) { void *d = kmap(bv->bv_page) + bv->bv_offset; - csum = crc64_update(csum, d, bv->bv_len); + csum = bch_crc64_update(csum, d, bv->bv_len); kunmap(bv->bv_page); } @@ -835,7 +835,7 @@ static void request_read_done(struct closure *cl) s->op.cache_bio->bi_sector = s->cache_miss->bi_sector; s->op.cache_bio->bi_bdev = s->cache_miss->bi_bdev; s->op.cache_bio->bi_size = s->cache_bio_sectors << 9; - bio_map(s->op.cache_bio, NULL); + bch_bio_map(s->op.cache_bio, NULL); src = bio_iovec(s->op.cache_bio); dst = bio_iovec(s->cache_miss); @@ -962,8 +962,8 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, if (!bch_btree_insert_check_key(b, &s->op, s->op.cache_bio)) goto out_put; - bio_map(s->op.cache_bio, NULL); - if (bio_alloc_pages(s->op.cache_bio, __GFP_NOWARN|GFP_NOIO)) + bch_bio_map(s->op.cache_bio, NULL); + if (bch_bio_alloc_pages(s->op.cache_bio, __GFP_NOWARN|GFP_NOIO)) goto out_put; s->cache_miss = miss; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 484ae6c8f43..f47ecb5cb31 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -142,7 +142,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, goto err; err = "Bad UUID"; - if (is_zero(sb->uuid, 16)) + if (bch_is_zero(sb->uuid, 16)) goto err; err = "Unsupported superblock version"; @@ -170,7 +170,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, goto out; err = "Bad UUID"; - if (is_zero(sb->set_uuid, 16)) + if (bch_is_zero(sb->set_uuid, 16)) goto err; err = "Bad cache device number in set"; @@ -218,7 +218,7 @@ static void __write_super(struct cache_sb *sb, struct bio *bio) bio->bi_sector = SB_SECTOR; bio->bi_rw = REQ_SYNC|REQ_META; bio->bi_size = SB_SIZE; - bio_map(bio, NULL); + bch_bio_map(bio, NULL); out->offset = cpu_to_le64(sb->offset); out->version = cpu_to_le64(sb->version); @@ -332,7 +332,7 @@ static void uuid_io(struct cache_set *c, unsigned long rw, bio->bi_end_io = uuid_endio; bio->bi_private = cl; - bio_map(bio, c->uuids); + bch_bio_map(bio, c->uuids); bch_submit_bbio(bio, c, k, i); @@ -344,7 +344,7 @@ static void uuid_io(struct cache_set *c, unsigned long rw, pkey(&c->uuid_bucket)); for (u = c->uuids; u < c->uuids + c->nr_uuids; u++) - if (!is_zero(u->uuid, 16)) + if (!bch_is_zero(u->uuid, 16)) pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u", u - c->uuids, u->uuid, u->label, u->first_reg, u->last_reg, u->invalidated); @@ -491,7 +491,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, unsigned long rw) bio->bi_end_io = prio_endio; bio->bi_private = ca; - bio_map(bio, ca->disk_buckets); + bch_bio_map(bio, ca->disk_buckets); closure_bio_submit(bio, &ca->prio, ca); closure_sync(cl); @@ -538,7 +538,7 @@ void bch_prio_write(struct cache *ca) p->next_bucket = ca->prio_buckets[i + 1]; p->magic = pset_magic(ca); - p->csum = crc64(&p->magic, bucket_bytes(ca) - 8); + p->csum = bch_crc64(&p->magic, bucket_bytes(ca) - 8); bucket = bch_bucket_alloc(ca, WATERMARK_PRIO, &cl); BUG_ON(bucket == -1); @@ -585,7 +585,7 @@ static void prio_read(struct cache *ca, uint64_t bucket) prio_io(ca, bucket, READ_SYNC); - if (p->csum != crc64(&p->magic, bucket_bytes(ca) - 8)) + if (p->csum != bch_crc64(&p->magic, bucket_bytes(ca) - 8)) pr_warn("bad csum reading priorities"); if (p->magic != pset_magic(ca)) @@ -898,7 +898,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) sysfs_remove_file(&dc->kobj, &sysfs_attach); */ - if (is_zero(u->uuid, 16)) { + if (bch_is_zero(u->uuid, 16)) { struct closure cl; closure_init_stack(&cl); diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 5c7e77073b1..4d9cca47e4c 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -105,9 +105,9 @@ SHOW(__bch_cached_dev) #define var(stat) (dc->stat) if (attr == &sysfs_cache_mode) - return snprint_string_list(buf, PAGE_SIZE, - bch_cache_modes + 1, - BDEV_CACHE_MODE(&dc->sb)); + return bch_snprint_string_list(buf, PAGE_SIZE, + bch_cache_modes + 1, + BDEV_CACHE_MODE(&dc->sb)); sysfs_printf(data_csum, "%i", dc->disk.data_csum); var_printf(verify, "%i"); @@ -126,10 +126,10 @@ SHOW(__bch_cached_dev) char dirty[20]; char derivative[20]; char target[20]; - hprint(dirty, + bch_hprint(dirty, atomic_long_read(&dc->disk.sectors_dirty) << 9); - hprint(derivative, dc->writeback_rate_derivative << 9); - hprint(target, dc->writeback_rate_target << 9); + bch_hprint(derivative, dc->writeback_rate_derivative << 9); + bch_hprint(target, dc->writeback_rate_target << 9); return sprintf(buf, "rate:\t\t%u\n" @@ -202,7 +202,7 @@ STORE(__cached_dev) bch_cached_dev_run(dc); if (attr == &sysfs_cache_mode) { - ssize_t v = read_string_list(buf, bch_cache_modes + 1); + ssize_t v = bch_read_string_list(buf, bch_cache_modes + 1); if (v < 0) return v; @@ -224,7 +224,7 @@ STORE(__cached_dev) } if (attr == &sysfs_attach) { - if (parse_uuid(buf, dc->sb.set_uuid) < 16) + if (bch_parse_uuid(buf, dc->sb.set_uuid) < 16) return -EINVAL; list_for_each_entry(c, &bch_cache_sets, list) { @@ -657,9 +657,9 @@ SHOW(__bch_cache) ((size_t) ca->sb.nbuckets)); if (attr == &sysfs_cache_replacement_policy) - return snprint_string_list(buf, PAGE_SIZE, - cache_replacement_policies, - CACHE_REPLACEMENT(&ca->sb)); + return bch_snprint_string_list(buf, PAGE_SIZE, + cache_replacement_policies, + CACHE_REPLACEMENT(&ca->sb)); if (attr == &sysfs_priority_stats) { int cmp(const void *l, const void *r) @@ -747,7 +747,7 @@ STORE(__bch_cache) } if (attr == &sysfs_cache_replacement_policy) { - ssize_t v = read_string_list(buf, cache_replacement_policies); + ssize_t v = bch_read_string_list(buf, cache_replacement_policies); if (v < 0) return v; diff --git a/drivers/md/bcache/sysfs.h b/drivers/md/bcache/sysfs.h index 34e4ba1184f..0526fe92a68 100644 --- a/drivers/md/bcache/sysfs.h +++ b/drivers/md/bcache/sysfs.h @@ -62,7 +62,7 @@ do { \ #define sysfs_hprint(file, val) \ do { \ if (attr == &sysfs_ ## file) { \ - ssize_t ret = hprint(buf, val); \ + ssize_t ret = bch_hprint(buf, val); \ strcat(buf, "\n"); \ return ret + 1; \ } \ diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index dcec2e4f84a..22324d8b284 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -19,7 +19,7 @@ #define simple_strtouint(c, end, base) simple_strtoul(c, end, base) #define STRTO_H(name, type) \ -int name ## _h(const char *cp, type *res) \ +int bch_ ## name ## _h(const char *cp, type *res) \ { \ int u = 0; \ char *e; \ @@ -67,14 +67,13 @@ int name ## _h(const char *cp, type *res) \ *res = i; \ return 0; \ } \ -EXPORT_SYMBOL_GPL(name ## _h); STRTO_H(strtoint, int) STRTO_H(strtouint, unsigned int) STRTO_H(strtoll, long long) STRTO_H(strtoull, unsigned long long) -ssize_t hprint(char *buf, int64_t v) +ssize_t bch_hprint(char *buf, int64_t v) { static const char units[] = "?kMGTPEZY"; char dec[3] = ""; @@ -93,9 +92,8 @@ ssize_t hprint(char *buf, int64_t v) return sprintf(buf, "%lli%s%c", v, dec, units[u]); } -EXPORT_SYMBOL_GPL(hprint); -ssize_t snprint_string_list(char *buf, size_t size, const char * const list[], +ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[], size_t selected) { char *out = buf; @@ -108,9 +106,8 @@ ssize_t snprint_string_list(char *buf, size_t size, const char * const list[], out[-1] = '\n'; return out - buf; } -EXPORT_SYMBOL_GPL(snprint_string_list); -ssize_t read_string_list(const char *buf, const char * const list[]) +ssize_t bch_read_string_list(const char *buf, const char * const list[]) { size_t i; char *s, *d = kstrndup(buf, PAGE_SIZE - 1, GFP_KERNEL); @@ -130,9 +127,8 @@ ssize_t read_string_list(const char *buf, const char * const list[]) return i; } -EXPORT_SYMBOL_GPL(read_string_list); -bool is_zero(const char *p, size_t n) +bool bch_is_zero(const char *p, size_t n) { size_t i; @@ -141,9 +137,8 @@ bool is_zero(const char *p, size_t n) return false; return true; } -EXPORT_SYMBOL_GPL(is_zero); -int parse_uuid(const char *s, char *uuid) +int bch_parse_uuid(const char *s, char *uuid) { size_t i, j, x; memset(uuid, 0, 16); @@ -170,9 +165,8 @@ int parse_uuid(const char *s, char *uuid) } return i; } -EXPORT_SYMBOL_GPL(parse_uuid); -void time_stats_update(struct time_stats *stats, uint64_t start_time) +void bch_time_stats_update(struct time_stats *stats, uint64_t start_time) { uint64_t now = local_clock(); uint64_t duration = time_after64(now, start_time) @@ -195,9 +189,8 @@ void time_stats_update(struct time_stats *stats, uint64_t start_time) stats->last = now ?: 1; } -EXPORT_SYMBOL_GPL(time_stats_update); -unsigned next_delay(struct ratelimit *d, uint64_t done) +unsigned bch_next_delay(struct ratelimit *d, uint64_t done) { uint64_t now = local_clock(); @@ -207,9 +200,8 @@ unsigned next_delay(struct ratelimit *d, uint64_t done) ? div_u64(d->next - now, NSEC_PER_SEC / HZ) : 0; } -EXPORT_SYMBOL_GPL(next_delay); -void bio_map(struct bio *bio, void *base) +void bch_bio_map(struct bio *bio, void *base) { size_t size = bio->bi_size; struct bio_vec *bv = bio->bi_io_vec; @@ -235,9 +227,8 @@ start: bv->bv_len = min_t(size_t, PAGE_SIZE - bv->bv_offset, size -= bv->bv_len; } } -EXPORT_SYMBOL_GPL(bio_map); -int bio_alloc_pages(struct bio *bio, gfp_t gfp) +int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp) { int i; struct bio_vec *bv; @@ -253,7 +244,6 @@ int bio_alloc_pages(struct bio *bio, gfp_t gfp) return 0; } -EXPORT_SYMBOL_GPL(bio_alloc_pages); /* * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any @@ -365,7 +355,7 @@ static const uint64_t crc_table[256] = { 0x9AFCE626CE85B507 }; -uint64_t crc64_update(uint64_t crc, const void *_data, size_t len) +uint64_t bch_crc64_update(uint64_t crc, const void *_data, size_t len) { const unsigned char *data = _data; @@ -376,14 +366,12 @@ uint64_t crc64_update(uint64_t crc, const void *_data, size_t len) return crc; } -EXPORT_SYMBOL(crc64_update); -uint64_t crc64(const void *data, size_t len) +uint64_t bch_crc64(const void *data, size_t len) { uint64_t crc = 0xffffffffffffffff; - crc = crc64_update(crc, data, len); + crc = bch_crc64_update(crc, data, len); return crc ^ 0xffffffffffffffff; } -EXPORT_SYMBOL(crc64); diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index 56705fdcc14..577393e38c3 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -307,42 +307,42 @@ do { \ #define ANYSINT_MAX(t) \ ((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1) -int strtoint_h(const char *, int *); -int strtouint_h(const char *, unsigned int *); -int strtoll_h(const char *, long long *); -int strtoull_h(const char *, unsigned long long *); +int bch_strtoint_h(const char *, int *); +int bch_strtouint_h(const char *, unsigned int *); +int bch_strtoll_h(const char *, long long *); +int bch_strtoull_h(const char *, unsigned long long *); -static inline int strtol_h(const char *cp, long *res) +static inline int bch_strtol_h(const char *cp, long *res) { #if BITS_PER_LONG == 32 - return strtoint_h(cp, (int *) res); + return bch_strtoint_h(cp, (int *) res); #else - return strtoll_h(cp, (long long *) res); + return bch_strtoll_h(cp, (long long *) res); #endif } -static inline int strtoul_h(const char *cp, long *res) +static inline int bch_strtoul_h(const char *cp, long *res) { #if BITS_PER_LONG == 32 - return strtouint_h(cp, (unsigned int *) res); + return bch_strtouint_h(cp, (unsigned int *) res); #else - return strtoull_h(cp, (unsigned long long *) res); + return bch_strtoull_h(cp, (unsigned long long *) res); #endif } #define strtoi_h(cp, res) \ (__builtin_types_compatible_p(typeof(*res), int) \ - ? strtoint_h(cp, (void *) res) \ + ? bch_strtoint_h(cp, (void *) res) \ : __builtin_types_compatible_p(typeof(*res), long) \ - ? strtol_h(cp, (void *) res) \ + ? bch_strtol_h(cp, (void *) res) \ : __builtin_types_compatible_p(typeof(*res), long long) \ - ? strtoll_h(cp, (void *) res) \ + ? bch_strtoll_h(cp, (void *) res) \ : __builtin_types_compatible_p(typeof(*res), unsigned int) \ - ? strtouint_h(cp, (void *) res) \ + ? bch_strtouint_h(cp, (void *) res) \ : __builtin_types_compatible_p(typeof(*res), unsigned long) \ - ? strtoul_h(cp, (void *) res) \ + ? bch_strtoul_h(cp, (void *) res) \ : __builtin_types_compatible_p(typeof(*res), unsigned long long)\ - ? strtoull_h(cp, (void *) res) : -EINVAL) + ? bch_strtoull_h(cp, (void *) res) : -EINVAL) #define strtoul_safe(cp, var) \ ({ \ @@ -379,15 +379,15 @@ static inline int strtoul_h(const char *cp, long *res) __builtin_types_compatible_p(typeof(var), const char *) \ ? "%s\n" : "%i\n", var) -ssize_t hprint(char *buf, int64_t v); +ssize_t bch_hprint(char *buf, int64_t v); -bool is_zero(const char *p, size_t n); -int parse_uuid(const char *s, char *uuid); +bool bch_is_zero(const char *p, size_t n); +int bch_parse_uuid(const char *s, char *uuid); -ssize_t snprint_string_list(char *buf, size_t size, const char * const list[], +ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[], size_t selected); -ssize_t read_string_list(const char *buf, const char * const list[]); +ssize_t bch_read_string_list(const char *buf, const char * const list[]); struct time_stats { /* @@ -400,7 +400,7 @@ struct time_stats { uint64_t last; }; -void time_stats_update(struct time_stats *stats, uint64_t time); +void bch_time_stats_update(struct time_stats *stats, uint64_t time); #define NSEC_PER_ns 1L #define NSEC_PER_us NSEC_PER_USEC @@ -462,7 +462,7 @@ static inline void ratelimit_reset(struct ratelimit *d) d->next = local_clock(); } -unsigned next_delay(struct ratelimit *d, uint64_t done); +unsigned bch_next_delay(struct ratelimit *d, uint64_t done); #define __DIV_SAFE(n, d, zero) \ ({ \ @@ -568,9 +568,9 @@ static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits) #define bio_end(bio) ((bio)->bi_sector + bio_sectors(bio)) -void bio_map(struct bio *bio, void *base); +void bch_bio_map(struct bio *bio, void *base); -int bio_alloc_pages(struct bio *bio, gfp_t gfp); +int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp); static inline sector_t bdev_sectors(struct block_device *bdev) { @@ -583,7 +583,7 @@ do { \ bch_generic_make_request(bio, &(dev)->bio_split_hook); \ } while (0) -uint64_t crc64_update(uint64_t, const void *, size_t); -uint64_t crc64(const void *, size_t); +uint64_t bch_crc64_update(uint64_t, const void *, size_t); +uint64_t bch_crc64(const void *, size_t); #endif /* _BCACHE_UTIL_H */ diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index a80ee5373fd..93e7e31a4bd 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -95,7 +95,7 @@ static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) !dc->writeback_percent) return 0; - return next_delay(&dc->writeback_rate, sectors * 10000000ULL); + return bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL); } /* Background writeback */ @@ -118,7 +118,7 @@ static void dirty_init(struct keybuf_key *w) bio->bi_max_vecs = DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS); bio->bi_private = w; bio->bi_io_vec = bio->bi_inline_vecs; - bio_map(bio, NULL); + bch_bio_map(bio, NULL); } static void refill_dirty(struct closure *cl) @@ -349,7 +349,7 @@ static void read_dirty(struct closure *cl) io->bio.bi_rw = READ; io->bio.bi_end_io = read_dirty_endio; - if (bio_alloc_pages(&io->bio, GFP_KERNEL)) + if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL)) goto err_free; pr_debug("%s", pkey(&w->key)); -- cgit v1.2.3-70-g09d2 From 2903381fce71004a7ce24d40fad53ba8236a3921 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 11 Apr 2013 15:14:35 -0700 Subject: bcache: Take data offset from the bdev superblock. Add a new superblock version, and consolidate related defines. Signed-off-by: Gabriel de Perthuis Signed-off-by: Kent Overstreet --- drivers/md/bcache/bcache.h | 47 +++++++++++++++---- drivers/md/bcache/request.c | 2 +- drivers/md/bcache/super.c | 108 +++++++++++++++++++++++++------------------- 3 files changed, 100 insertions(+), 57 deletions(-) (limited to 'drivers/md/bcache/request.c') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index f05723565f1..340146d7c17 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -223,11 +223,17 @@ struct bkey { #define BKEY_PADDED(key) \ union { struct bkey key; uint64_t key ## _pad[BKEY_PAD]; } -/* Version 1: Backing device +/* Version 0: Cache device + * Version 1: Backing device * Version 2: Seed pointer into btree node checksum - * Version 3: New UUID format + * Version 3: Cache device with new UUID format + * Version 4: Backing device with data offset */ -#define BCACHE_SB_VERSION 3 +#define BCACHE_SB_VERSION_CDEV 0 +#define BCACHE_SB_VERSION_BDEV 1 +#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3 +#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4 +#define BCACHE_SB_MAX_VERSION 4 #define SB_SECTOR 8 #define SB_SIZE 4096 @@ -236,13 +242,12 @@ struct bkey { /* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */ #define MAX_CACHES_PER_SET 8 -#define BDEV_DATA_START 16 /* sectors */ +#define BDEV_DATA_START_DEFAULT 16 /* sectors */ struct cache_sb { uint64_t csum; uint64_t offset; /* sector where this sb was written */ uint64_t version; -#define CACHE_BACKING_DEV 1 uint8_t magic[16]; @@ -257,12 +262,28 @@ struct cache_sb { uint64_t seq; uint64_t pad[8]; - uint64_t nbuckets; /* device size */ - uint16_t block_size; /* sectors */ - uint16_t bucket_size; /* sectors */ + union { + struct { + /* Cache devices */ + uint64_t nbuckets; /* device size */ + + uint16_t block_size; /* sectors */ + uint16_t bucket_size; /* sectors */ - uint16_t nr_in_set; - uint16_t nr_this_dev; + uint16_t nr_in_set; + uint16_t nr_this_dev; + }; + struct { + /* Backing devices */ + uint64_t data_offset; + + /* + * block_size from the cache device section is still used by + * backing devices, so don't add anything here until we fix + * things to not need it for backing devices anymore + */ + }; + }; uint32_t last_mount; /* time_t */ @@ -861,6 +882,12 @@ static inline bool key_merging_disabled(struct cache_set *c) #endif } +static inline bool SB_IS_BDEV(const struct cache_sb *sb) +{ + return sb->version == BCACHE_SB_VERSION_BDEV + || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET; +} + struct bbio { unsigned submit_time_us; union { diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 83731dc36f3..e5ff12e52d5 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1220,7 +1220,7 @@ static void cached_dev_make_request(struct request_queue *q, struct bio *bio) part_stat_unlock(); bio->bi_bdev = dc->bdev; - bio->bi_sector += BDEV_DATA_START; + bio->bi_sector += dc->sb.data_offset; if (cached_dev_get(dc)) { s = search_alloc(bio, d); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 5fa3cd2d9ff..f1e69f2fad3 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -110,15 +110,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, sb->flags = le64_to_cpu(s->flags); sb->seq = le64_to_cpu(s->seq); - - sb->nbuckets = le64_to_cpu(s->nbuckets); - sb->block_size = le16_to_cpu(s->block_size); - sb->bucket_size = le16_to_cpu(s->bucket_size); - - sb->nr_in_set = le16_to_cpu(s->nr_in_set); - sb->nr_this_dev = le16_to_cpu(s->nr_this_dev); sb->last_mount = le32_to_cpu(s->last_mount); - sb->first_bucket = le16_to_cpu(s->first_bucket); sb->keys = le16_to_cpu(s->keys); @@ -147,53 +139,77 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, if (bch_is_zero(sb->uuid, 16)) goto err; - err = "Unsupported superblock version"; - if (sb->version > BCACHE_SB_VERSION) - goto err; + switch (sb->version) { + case BCACHE_SB_VERSION_BDEV: + sb->block_size = le16_to_cpu(s->block_size); + sb->data_offset = BDEV_DATA_START_DEFAULT; + break; + case BCACHE_SB_VERSION_BDEV_WITH_OFFSET: + sb->block_size = le16_to_cpu(s->block_size); + sb->data_offset = le64_to_cpu(s->data_offset); + + err = "Bad data offset"; + if (sb->data_offset < BDEV_DATA_START_DEFAULT) + goto err; - err = "Bad block/bucket size"; - if (!is_power_of_2(sb->block_size) || sb->block_size > PAGE_SECTORS || - !is_power_of_2(sb->bucket_size) || sb->bucket_size < PAGE_SECTORS) - goto err; + break; + case BCACHE_SB_VERSION_CDEV: + case BCACHE_SB_VERSION_CDEV_WITH_UUID: + sb->nbuckets = le64_to_cpu(s->nbuckets); + sb->block_size = le16_to_cpu(s->block_size); + sb->bucket_size = le16_to_cpu(s->bucket_size); - err = "Too many buckets"; - if (sb->nbuckets > LONG_MAX) - goto err; + sb->nr_in_set = le16_to_cpu(s->nr_in_set); + sb->nr_this_dev = le16_to_cpu(s->nr_this_dev); - err = "Not enough buckets"; - if (sb->nbuckets < 1 << 7) - goto err; + err = "Too many buckets"; + if (sb->nbuckets > LONG_MAX) + goto err; - err = "Invalid superblock: device too small"; - if (get_capacity(bdev->bd_disk) < sb->bucket_size * sb->nbuckets) - goto err; + err = "Not enough buckets"; + if (sb->nbuckets < 1 << 7) + goto err; - if (sb->version == CACHE_BACKING_DEV) - goto out; + err = "Bad block/bucket size"; + if (!is_power_of_2(sb->block_size) || + sb->block_size > PAGE_SECTORS || + !is_power_of_2(sb->bucket_size) || + sb->bucket_size < PAGE_SECTORS) + goto err; - err = "Bad UUID"; - if (bch_is_zero(sb->set_uuid, 16)) - goto err; + err = "Invalid superblock: device too small"; + if (get_capacity(bdev->bd_disk) < sb->bucket_size * sb->nbuckets) + goto err; - err = "Bad cache device number in set"; - if (!sb->nr_in_set || - sb->nr_in_set <= sb->nr_this_dev || - sb->nr_in_set > MAX_CACHES_PER_SET) - goto err; + err = "Bad UUID"; + if (bch_is_zero(sb->set_uuid, 16)) + goto err; - err = "Journal buckets not sequential"; - for (i = 0; i < sb->keys; i++) - if (sb->d[i] != sb->first_bucket + i) + err = "Bad cache device number in set"; + if (!sb->nr_in_set || + sb->nr_in_set <= sb->nr_this_dev || + sb->nr_in_set > MAX_CACHES_PER_SET) goto err; - err = "Too many journal buckets"; - if (sb->first_bucket + sb->keys > sb->nbuckets) - goto err; + err = "Journal buckets not sequential"; + for (i = 0; i < sb->keys; i++) + if (sb->d[i] != sb->first_bucket + i) + goto err; - err = "Invalid superblock: first bucket comes before end of super"; - if (sb->first_bucket * sb->bucket_size < 16) + err = "Too many journal buckets"; + if (sb->first_bucket + sb->keys > sb->nbuckets) + goto err; + + err = "Invalid superblock: first bucket comes before end of super"; + if (sb->first_bucket * sb->bucket_size < 16) + goto err; + + break; + default: + err = "Unsupported superblock version"; goto err; -out: + } + sb->last_mount = get_seconds(); err = NULL; @@ -286,7 +302,7 @@ void bcache_write_super(struct cache_set *c) for_each_cache(ca, c, i) { struct bio *bio = &ca->sb_bio; - ca->sb.version = BCACHE_SB_VERSION; + ca->sb.version = BCACHE_SB_VERSION_CDEV_WITH_UUID; ca->sb.seq = c->sb.seq; ca->sb.last_mount = c->sb.last_mount; @@ -1049,7 +1065,7 @@ static const char *register_bdev(struct cache_sb *sb, struct page *sb_page, g = dc->disk.disk; - set_capacity(g, dc->bdev->bd_part->nr_sects - 16); + set_capacity(g, dc->bdev->bd_part->nr_sects - dc->sb.data_offset); bch_cached_dev_request_init(dc); @@ -1802,7 +1818,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (err) goto err_close; - if (sb->version == CACHE_BACKING_DEV) { + if (SB_IS_BDEV(sb)) { struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); err = register_bdev(sb, sb_page, bdev, dc); -- cgit v1.2.3-70-g09d2