From 90fa1527bddc7147dc0d590ee6184ca88bc50ecf Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 6 Jan 2009 03:04:54 +0000 Subject: dm snapshot: change yield to msleep Change yield() to msleep(1). If the thread had realtime priority, yield() doesn't really yield, so the yielding process would loop indefinitely and cause machine lockup. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-snap.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/md/dm-snap.c') diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 6c96db26b87..4ceedd4f22a 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -735,7 +736,7 @@ static void snapshot_dtr(struct dm_target *ti) unregister_snapshot(s); while (atomic_read(&s->pending_exceptions_count)) - yield(); + msleep(1); /* * Ensure instructions in mempool_destroy aren't reordered * before atomic_read. @@ -888,10 +889,10 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) /* * Check for conflicting reads. This is extremely improbable, - * so yield() is sufficient and there is no need for a wait queue. + * so msleep(1) is sufficient and there is no need for a wait queue. */ while (__chunk_is_tracked(s, pe->e.old_chunk)) - yield(); + msleep(1); /* * Add a proper exception, and remove the -- cgit v1.2.3-70-g09d2 From 10d3bd09a3c25df114f74f7f86e1b58d070bef32 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 6 Jan 2009 03:04:58 +0000 Subject: dm: consolidate target deregistration error handling Change dm_unregister_target to return void and use BUG() for error reporting. dm_unregister_target can only fail because of programming bug in the target driver. It can't fail because of user's behavior or disk errors. This patch changes unregister_target to return void and use BUG if someone tries to unregister non-registered target or unregister target that is in use. This patch removes code duplication (testing of error codes in all dm targets) and reports bugs in just one place, in dm_unregister_target. In some target drivers, these return codes were ignored, which could lead to a situation where bugs could be missed. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 6 +----- drivers/md/dm-delay.c | 6 +----- drivers/md/dm-linear.c | 5 +---- drivers/md/dm-mpath.c | 6 +----- drivers/md/dm-raid1.c | 6 +----- drivers/md/dm-snap.c | 11 ++--------- drivers/md/dm-stripe.c | 4 +--- drivers/md/dm-target.c | 15 +++++++-------- drivers/md/dm-zero.c | 5 +---- include/linux/device-mapper.h | 6 ++++-- 10 files changed, 20 insertions(+), 50 deletions(-) (limited to 'drivers/md/dm-snap.c') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 3326750ec02..35bda49796f 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1322,11 +1322,7 @@ static int __init dm_crypt_init(void) static void __exit dm_crypt_exit(void) { - int r = dm_unregister_target(&crypt_target); - - if (r < 0) - DMERR("unregister failed %d", r); - + dm_unregister_target(&crypt_target); kmem_cache_destroy(_crypt_io_pool); } diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 848b381f117..59ee1b015d2 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -364,11 +364,7 @@ bad_queue: static void __exit dm_delay_exit(void) { - int r = dm_unregister_target(&delay_target); - - if (r < 0) - DMERR("unregister failed %d", r); - + dm_unregister_target(&delay_target); kmem_cache_destroy(delayed_cache); destroy_workqueue(kdelayd_wq); } diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 44042becad8..79fb53e51c7 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -156,8 +156,5 @@ int __init dm_linear_init(void) void dm_linear_exit(void) { - int r = dm_unregister_target(&linear_target); - - if (r < 0) - DMERR("unregister failed %d", r); + dm_unregister_target(&linear_target); } diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 3d7f4923cd1..345a26047ae 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1495,14 +1495,10 @@ static int __init dm_multipath_init(void) static void __exit dm_multipath_exit(void) { - int r; - destroy_workqueue(kmpath_handlerd); destroy_workqueue(kmultipathd); - r = dm_unregister_target(&multipath_target); - if (r < 0) - DMERR("target unregister failed %d", r); + dm_unregister_target(&multipath_target); kmem_cache_destroy(_mpio_cache); } diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index d0fed2b21b0..250f401668d 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1300,11 +1300,7 @@ static int __init dm_mirror_init(void) static void __exit dm_mirror_exit(void) { - int r; - - r = dm_unregister_target(&mirror_target); - if (r < 0) - DMERR("unregister failed %d", r); + dm_unregister_target(&mirror_target); } /* Module hooks */ diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 4ceedd4f22a..a8005b43a06 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1470,17 +1470,10 @@ static int __init dm_snapshot_init(void) static void __exit dm_snapshot_exit(void) { - int r; - destroy_workqueue(ksnapd); - r = dm_unregister_target(&snapshot_target); - if (r) - DMERR("snapshot unregister failed %d", r); - - r = dm_unregister_target(&origin_target); - if (r) - DMERR("origin unregister failed %d", r); + dm_unregister_target(&snapshot_target); + dm_unregister_target(&origin_target); exit_origin_hash(); kmem_cache_destroy(pending_cache); diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 9e4ef88d421..41569bc60ab 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -337,9 +337,7 @@ int __init dm_stripe_init(void) void dm_stripe_exit(void) { - if (dm_unregister_target(&stripe_target)) - DMWARN("target unregistration failed"); - + dm_unregister_target(&stripe_target); destroy_workqueue(kstriped); return; diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 835cf95b857..7decf10006e 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c @@ -130,26 +130,26 @@ int dm_register_target(struct target_type *t) return rv; } -int dm_unregister_target(struct target_type *t) +void dm_unregister_target(struct target_type *t) { struct tt_internal *ti; down_write(&_lock); if (!(ti = __find_target_type(t->name))) { - up_write(&_lock); - return -EINVAL; + DMCRIT("Unregistering unrecognised target: %s", t->name); + BUG(); } if (ti->use) { - up_write(&_lock); - return -ETXTBSY; + DMCRIT("Attempt to unregister target still in use: %s", + t->name); + BUG(); } list_del(&ti->list); kfree(ti); up_write(&_lock); - return 0; } /* @@ -187,8 +187,7 @@ int __init dm_target_init(void) void dm_target_exit(void) { - if (dm_unregister_target(&error_target)) - DMWARN("error target unregistration failed"); + dm_unregister_target(&error_target); } EXPORT_SYMBOL(dm_register_target); diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index cdbf126ec10..bbc97030c0c 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c @@ -69,10 +69,7 @@ static int __init dm_zero_init(void) static void __exit dm_zero_exit(void) { - int r = dm_unregister_target(&zero_target); - - if (r < 0) - DMERR("unregister failed %d", r); + dm_unregister_target(&zero_target); } module_init(dm_zero_init) diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index c17fd334e57..89ff2df4024 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -157,8 +157,7 @@ struct dm_target { }; int dm_register_target(struct target_type *t); -int dm_unregister_target(struct target_type *t); - +void dm_unregister_target(struct target_type *t); /*----------------------------------------------------------------- * Functions for creating and manipulating mapped devices. @@ -276,6 +275,9 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); *---------------------------------------------------------------*/ #define DM_NAME "device-mapper" +#define DMCRIT(f, arg...) \ + printk(KERN_CRIT DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) + #define DMERR(f, arg...) \ printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) #define DMERR_LIMIT(f, arg...) \ -- cgit v1.2.3-70-g09d2 From aea53d92f70eeb00ae480e399a997dd55fd5055d Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Tue, 6 Jan 2009 03:05:15 +0000 Subject: dm snapshot: separate out exception store interface Pull structures that bridge the gap between snapshot and exception store out of dm-snap.h and put them in a new .h file - dm-exception-store.h. This file will define the API for new exception stores. Ultimately, dm-snap.h is unnecessary, since only dm-snap.c should be using it. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon --- drivers/md/dm-exception-store.c | 1 + drivers/md/dm-exception-store.h | 131 ++++++++++++++++++++++++++++++++++++++++ drivers/md/dm-snap.c | 1 + drivers/md/dm-snap.h | 121 +------------------------------------ 4 files changed, 134 insertions(+), 120 deletions(-) create mode 100644 drivers/md/dm-exception-store.h (limited to 'drivers/md/dm-snap.c') diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 01590f3e000..ef152e600cb 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -7,6 +7,7 @@ * This file is released under the GPL. */ +#include "dm-exception-store.h" #include "dm-snap.h" #include diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h new file mode 100644 index 00000000000..d75f775562e --- /dev/null +++ b/drivers/md/dm-exception-store.h @@ -0,0 +1,131 @@ +/* + * Copyright (C) 2001-2002 Sistina Software (UK) Limited. + * Copyright (C) 2008 Red Hat, Inc. All rights reserved. + * + * Device-mapper snapshot exception store. + * + * This file is released under the GPL. + */ + +#ifndef _LINUX_DM_EXCEPTION_STORE +#define _LINUX_DM_EXCEPTION_STORE + +#include + +/* + * The snapshot code deals with largish chunks of the disk at a + * time. Typically 32k - 512k. + */ +typedef sector_t chunk_t; + +/* + * An exception is used where an old chunk of data has been + * replaced by a new one. + * If chunk_t is 64 bits in size, the top 8 bits of new_chunk hold the number + * of chunks that follow contiguously. Remaining bits hold the number of the + * chunk within the device. + */ +struct dm_snap_exception { + struct list_head hash_list; + + chunk_t old_chunk; + chunk_t new_chunk; +}; + +/* + * Abstraction to handle the meta/layout of exception stores (the + * COW device). + */ +struct exception_store { + /* + * Destroys this object when you've finished with it. + */ + void (*destroy) (struct exception_store *store); + + /* + * The target shouldn't read the COW device until this is + * called. + */ + int (*read_metadata) (struct exception_store *store); + + /* + * Find somewhere to store the next exception. + */ + int (*prepare_exception) (struct exception_store *store, + struct dm_snap_exception *e); + + /* + * Update the metadata with this exception. + */ + void (*commit_exception) (struct exception_store *store, + struct dm_snap_exception *e, + void (*callback) (void *, int success), + void *callback_context); + + /* + * The snapshot is invalid, note this in the metadata. + */ + void (*drop_snapshot) (struct exception_store *store); + + /* + * Return how full the snapshot is. + */ + void (*fraction_full) (struct exception_store *store, + sector_t *numerator, + sector_t *denominator); + + struct dm_snapshot *snap; + void *context; +}; + +/* + * Funtions to manipulate consecutive chunks + */ +# if defined(CONFIG_LBD) || (BITS_PER_LONG == 64) +# define DM_CHUNK_CONSECUTIVE_BITS 8 +# define DM_CHUNK_NUMBER_BITS 56 + +static inline chunk_t dm_chunk_number(chunk_t chunk) +{ + return chunk & (chunk_t)((1ULL << DM_CHUNK_NUMBER_BITS) - 1ULL); +} + +static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e) +{ + return e->new_chunk >> DM_CHUNK_NUMBER_BITS; +} + +static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) +{ + e->new_chunk += (1ULL << DM_CHUNK_NUMBER_BITS); + + BUG_ON(!dm_consecutive_chunk_count(e)); +} + +# else +# define DM_CHUNK_CONSECUTIVE_BITS 0 + +static inline chunk_t dm_chunk_number(chunk_t chunk) +{ + return chunk; +} + +static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e) +{ + return 0; +} + +static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) +{ +} + +# endif + +/* + * Two exception store implementations. + */ +int dm_create_persistent(struct exception_store *store); + +int dm_create_transient(struct exception_store *store); + +#endif /* _LINUX_DM_EXCEPTION_STORE */ diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index a8005b43a06..81f03a0e783 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -21,6 +21,7 @@ #include #include +#include "dm-exception-store.h" #include "dm-snap.h" #include "dm-bio-list.h" diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h index 99c0106ede2..6e4beaf89f6 100644 --- a/drivers/md/dm-snap.h +++ b/drivers/md/dm-snap.h @@ -1,6 +1,4 @@ /* - * dm-snapshot.c - * * Copyright (C) 2001-2002 Sistina Software (UK) Limited. * * This file is released under the GPL. @@ -10,6 +8,7 @@ #define DM_SNAPSHOT_H #include +#include "dm-exception-store.h" #include "dm-bio-list.h" #include #include @@ -20,116 +19,6 @@ struct exception_table { struct list_head *table; }; -/* - * The snapshot code deals with largish chunks of the disk at a - * time. Typically 32k - 512k. - */ -typedef sector_t chunk_t; - -/* - * An exception is used where an old chunk of data has been - * replaced by a new one. - * If chunk_t is 64 bits in size, the top 8 bits of new_chunk hold the number - * of chunks that follow contiguously. Remaining bits hold the number of the - * chunk within the device. - */ -struct dm_snap_exception { - struct list_head hash_list; - - chunk_t old_chunk; - chunk_t new_chunk; -}; - -/* - * Funtions to manipulate consecutive chunks - */ -# if defined(CONFIG_LBD) || (BITS_PER_LONG == 64) -# define DM_CHUNK_CONSECUTIVE_BITS 8 -# define DM_CHUNK_NUMBER_BITS 56 - -static inline chunk_t dm_chunk_number(chunk_t chunk) -{ - return chunk & (chunk_t)((1ULL << DM_CHUNK_NUMBER_BITS) - 1ULL); -} - -static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e) -{ - return e->new_chunk >> DM_CHUNK_NUMBER_BITS; -} - -static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) -{ - e->new_chunk += (1ULL << DM_CHUNK_NUMBER_BITS); - - BUG_ON(!dm_consecutive_chunk_count(e)); -} - -# else -# define DM_CHUNK_CONSECUTIVE_BITS 0 - -static inline chunk_t dm_chunk_number(chunk_t chunk) -{ - return chunk; -} - -static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e) -{ - return 0; -} - -static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) -{ -} - -# endif - -/* - * Abstraction to handle the meta/layout of exception stores (the - * COW device). - */ -struct exception_store { - - /* - * Destroys this object when you've finished with it. - */ - void (*destroy) (struct exception_store *store); - - /* - * The target shouldn't read the COW device until this is - * called. - */ - int (*read_metadata) (struct exception_store *store); - - /* - * Find somewhere to store the next exception. - */ - int (*prepare_exception) (struct exception_store *store, - struct dm_snap_exception *e); - - /* - * Update the metadata with this exception. - */ - void (*commit_exception) (struct exception_store *store, - struct dm_snap_exception *e, - void (*callback) (void *, int success), - void *callback_context); - - /* - * The snapshot is invalid, note this in the metadata. - */ - void (*drop_snapshot) (struct exception_store *store); - - /* - * Return how full the snapshot is. - */ - void (*fraction_full) (struct exception_store *store, - sector_t *numerator, - sector_t *denominator); - - struct dm_snapshot *snap; - void *context; -}; - #define DM_TRACKED_CHUNK_HASH_SIZE 16 #define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ (DM_TRACKED_CHUNK_HASH_SIZE - 1)) @@ -192,14 +81,6 @@ struct dm_snapshot { */ int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new); -/* - * Constructor and destructor for the default persistent - * store. - */ -int dm_create_persistent(struct exception_store *store); - -int dm_create_transient(struct exception_store *store); - /* * Return the number of sectors in the device. */ -- cgit v1.2.3-70-g09d2 From 4db6bfe02bdc7dc5048f46dd682a94801d029adc Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Tue, 6 Jan 2009 03:05:17 +0000 Subject: dm snapshot: split out exception store implementations Move the existing snapshot exception store implementations out into separate files. Later patches will place these behind a new interface in preparation for alternative implementations. Signed-off-by: Alasdair G Kergon --- drivers/md/Makefile | 3 +- drivers/md/dm-exception-store.c | 749 +--------------------------------------- drivers/md/dm-exception-store.h | 9 + drivers/md/dm-snap-persistent.c | 694 +++++++++++++++++++++++++++++++++++++ drivers/md/dm-snap-transient.c | 95 +++++ drivers/md/dm-snap.c | 20 +- 6 files changed, 833 insertions(+), 737 deletions(-) create mode 100644 drivers/md/dm-snap-persistent.c create mode 100644 drivers/md/dm-snap-transient.c (limited to 'drivers/md/dm-snap.c') diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 63f0ae94415..72880b7e28d 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -5,7 +5,8 @@ dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o dm-multipath-objs := dm-path-selector.o dm-mpath.o -dm-snapshot-objs := dm-snap.o dm-exception-store.o +dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-snap-transient.o \ + dm-snap-persistent.o dm-mirror-objs := dm-raid1.o md-mod-objs := md.o bitmap.o raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \ diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index c5c9a265231..74777e0f80d 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -1,757 +1,46 @@ /* - * dm-exception-store.c - * * Copyright (C) 2001-2002 Sistina Software (UK) Limited. - * Copyright (C) 2006 Red Hat GmbH + * Copyright (C) 2006-2008 Red Hat GmbH * * This file is released under the GPL. */ #include "dm-exception-store.h" -#include "dm-snap.h" #include #include #include #include -#include -#include - -#define DM_MSG_PREFIX "snapshots" -#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */ - -/*----------------------------------------------------------------- - * Persistent snapshots, by persistent we mean that the snapshot - * will survive a reboot. - *---------------------------------------------------------------*/ - -/* - * We need to store a record of which parts of the origin have - * been copied to the snapshot device. The snapshot code - * requires that we copy exception chunks to chunk aligned areas - * of the COW store. It makes sense therefore, to store the - * metadata in chunk size blocks. - * - * There is no backward or forward compatibility implemented, - * snapshots with different disk versions than the kernel will - * not be usable. It is expected that "lvcreate" will blank out - * the start of a fresh COW device before calling the snapshot - * constructor. - * - * The first chunk of the COW device just contains the header. - * After this there is a chunk filled with exception metadata, - * followed by as many exception chunks as can fit in the - * metadata areas. - * - * All on disk structures are in little-endian format. The end - * of the exceptions info is indicated by an exception with a - * new_chunk of 0, which is invalid since it would point to the - * header chunk. - */ - -/* - * Magic for persistent snapshots: "SnAp" - Feeble isn't it. - */ -#define SNAP_MAGIC 0x70416e53 - -/* - * The on-disk version of the metadata. - */ -#define SNAPSHOT_DISK_VERSION 1 - -struct disk_header { - uint32_t magic; - - /* - * Is this snapshot valid. There is no way of recovering - * an invalid snapshot. - */ - uint32_t valid; - - /* - * Simple, incrementing version. no backward - * compatibility. - */ - uint32_t version; - - /* In sectors */ - uint32_t chunk_size; -}; - -struct disk_exception { - uint64_t old_chunk; - uint64_t new_chunk; -}; - -struct commit_callback { - void (*callback)(void *, int success); - void *context; -}; - -/* - * The top level structure for a persistent exception store. - */ -struct pstore { - struct dm_snapshot *snap; /* up pointer to my snapshot */ - int version; - int valid; - uint32_t exceptions_per_area; - - /* - * Now that we have an asynchronous kcopyd there is no - * need for large chunk sizes, so it wont hurt to have a - * whole chunks worth of metadata in memory at once. - */ - void *area; - - /* - * An area of zeros used to clear the next area. - */ - void *zero_area; - - /* - * Used to keep track of which metadata area the data in - * 'chunk' refers to. - */ - chunk_t current_area; - - /* - * The next free chunk for an exception. - */ - chunk_t next_free; - - /* - * The index of next free exception in the current - * metadata area. - */ - uint32_t current_committed; - - atomic_t pending_count; - uint32_t callback_count; - struct commit_callback *callbacks; - struct dm_io_client *io_client; - - struct workqueue_struct *metadata_wq; -}; - -static unsigned sectors_to_pages(unsigned sectors) -{ - return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9); -} - -static int alloc_area(struct pstore *ps) -{ - int r = -ENOMEM; - size_t len; - - len = ps->snap->chunk_size << SECTOR_SHIFT; - - /* - * Allocate the chunk_size block of memory that will hold - * a single metadata area. - */ - ps->area = vmalloc(len); - if (!ps->area) - return r; - - ps->zero_area = vmalloc(len); - if (!ps->zero_area) { - vfree(ps->area); - return r; - } - memset(ps->zero_area, 0, len); - - return 0; -} - -static void free_area(struct pstore *ps) -{ - vfree(ps->area); - ps->area = NULL; - vfree(ps->zero_area); - ps->zero_area = NULL; -} - -struct mdata_req { - struct dm_io_region *where; - struct dm_io_request *io_req; - struct work_struct work; - int result; -}; - -static void do_metadata(struct work_struct *work) -{ - struct mdata_req *req = container_of(work, struct mdata_req, work); - - req->result = dm_io(req->io_req, 1, req->where, NULL); -} - -/* - * Read or write a chunk aligned and sized block of data from a device. - */ -static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) -{ - struct dm_io_region where = { - .bdev = ps->snap->cow->bdev, - .sector = ps->snap->chunk_size * chunk, - .count = ps->snap->chunk_size, - }; - struct dm_io_request io_req = { - .bi_rw = rw, - .mem.type = DM_IO_VMA, - .mem.ptr.vma = ps->area, - .client = ps->io_client, - .notify.fn = NULL, - }; - struct mdata_req req; - - if (!metadata) - return dm_io(&io_req, 1, &where, NULL); - - req.where = &where; - req.io_req = &io_req; - - /* - * Issue the synchronous I/O from a different thread - * to avoid generic_make_request recursion. - */ - INIT_WORK(&req.work, do_metadata); - queue_work(ps->metadata_wq, &req.work); - flush_workqueue(ps->metadata_wq); - - return req.result; -} - -/* - * Convert a metadata area index to a chunk index. - */ -static chunk_t area_location(struct pstore *ps, chunk_t area) -{ - return 1 + ((ps->exceptions_per_area + 1) * area); -} - -/* - * Read or write a metadata area. Remembering to skip the first - * chunk which holds the header. - */ -static int area_io(struct pstore *ps, int rw) -{ - int r; - chunk_t chunk; - - chunk = area_location(ps, ps->current_area); - - r = chunk_io(ps, chunk, rw, 0); - if (r) - return r; - - return 0; -} - -static void zero_memory_area(struct pstore *ps) -{ - memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); -} - -static int zero_disk_area(struct pstore *ps, chunk_t area) -{ - struct dm_io_region where = { - .bdev = ps->snap->cow->bdev, - .sector = ps->snap->chunk_size * area_location(ps, area), - .count = ps->snap->chunk_size, - }; - struct dm_io_request io_req = { - .bi_rw = WRITE, - .mem.type = DM_IO_VMA, - .mem.ptr.vma = ps->zero_area, - .client = ps->io_client, - .notify.fn = NULL, - }; - - return dm_io(&io_req, 1, &where, NULL); -} - -static int read_header(struct pstore *ps, int *new_snapshot) -{ - int r; - struct disk_header *dh; - chunk_t chunk_size; - int chunk_size_supplied = 1; - - /* - * Use default chunk size (or hardsect_size, if larger) if none supplied - */ - if (!ps->snap->chunk_size) { - ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, - bdev_hardsect_size(ps->snap->cow->bdev) >> 9); - ps->snap->chunk_mask = ps->snap->chunk_size - 1; - ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1; - chunk_size_supplied = 0; - } - - ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap-> - chunk_size)); - if (IS_ERR(ps->io_client)) - return PTR_ERR(ps->io_client); - - r = alloc_area(ps); - if (r) - return r; - - r = chunk_io(ps, 0, READ, 1); - if (r) - goto bad; - - dh = (struct disk_header *) ps->area; - - if (le32_to_cpu(dh->magic) == 0) { - *new_snapshot = 1; - return 0; - } - - if (le32_to_cpu(dh->magic) != SNAP_MAGIC) { - DMWARN("Invalid or corrupt snapshot"); - r = -ENXIO; - goto bad; - } - - *new_snapshot = 0; - ps->valid = le32_to_cpu(dh->valid); - ps->version = le32_to_cpu(dh->version); - chunk_size = le32_to_cpu(dh->chunk_size); - - if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size) - return 0; - - DMWARN("chunk size %llu in device metadata overrides " - "table chunk size of %llu.", - (unsigned long long)chunk_size, - (unsigned long long)ps->snap->chunk_size); - - /* We had a bogus chunk_size. Fix stuff up. */ - free_area(ps); - - ps->snap->chunk_size = chunk_size; - ps->snap->chunk_mask = chunk_size - 1; - ps->snap->chunk_shift = ffs(chunk_size) - 1; - - r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size), - ps->io_client); - if (r) - return r; - - r = alloc_area(ps); - return r; - -bad: - free_area(ps); - return r; -} - -static int write_header(struct pstore *ps) -{ - struct disk_header *dh; - - memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); - - dh = (struct disk_header *) ps->area; - dh->magic = cpu_to_le32(SNAP_MAGIC); - dh->valid = cpu_to_le32(ps->valid); - dh->version = cpu_to_le32(ps->version); - dh->chunk_size = cpu_to_le32(ps->snap->chunk_size); - - return chunk_io(ps, 0, WRITE, 1); -} - -/* - * Access functions for the disk exceptions, these do the endian conversions. - */ -static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) -{ - BUG_ON(index >= ps->exceptions_per_area); - - return ((struct disk_exception *) ps->area) + index; -} +#include -static void read_exception(struct pstore *ps, - uint32_t index, struct disk_exception *result) -{ - struct disk_exception *e = get_exception(ps, index); - - /* copy it */ - result->old_chunk = le64_to_cpu(e->old_chunk); - result->new_chunk = le64_to_cpu(e->new_chunk); -} - -static void write_exception(struct pstore *ps, - uint32_t index, struct disk_exception *de) -{ - struct disk_exception *e = get_exception(ps, index); - - /* copy it */ - e->old_chunk = cpu_to_le64(de->old_chunk); - e->new_chunk = cpu_to_le64(de->new_chunk); -} +#define DM_MSG_PREFIX "snapshot exception stores" -/* - * Registers the exceptions that are present in the current area. - * 'full' is filled in to indicate if the area has been - * filled. - */ -static int insert_exceptions(struct pstore *ps, int *full) +int dm_exception_store_init(void) { int r; - unsigned int i; - struct disk_exception de; - - /* presume the area is full */ - *full = 1; - - for (i = 0; i < ps->exceptions_per_area; i++) { - read_exception(ps, i, &de); - - /* - * If the new_chunk is pointing at the start of - * the COW device, where the first metadata area - * is we know that we've hit the end of the - * exceptions. Therefore the area is not full. - */ - if (de.new_chunk == 0LL) { - ps->current_committed = i; - *full = 0; - break; - } - - /* - * Keep track of the start of the free chunks. - */ - if (ps->next_free <= de.new_chunk) - ps->next_free = de.new_chunk + 1; - - /* - * Otherwise we add the exception to the snapshot. - */ - r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk); - if (r) - return r; - } - - return 0; -} - -static int read_exceptions(struct pstore *ps) -{ - int r, full = 1; - - /* - * Keeping reading chunks and inserting exceptions until - * we find a partially full area. - */ - for (ps->current_area = 0; full; ps->current_area++) { - r = area_io(ps, READ); - if (r) - return r; - r = insert_exceptions(ps, &full); - if (r) - return r; + r = dm_transient_snapshot_init(); + if (r) { + DMERR("Unable to register transient exception store type."); + goto transient_fail; } - ps->current_area--; - - return 0; -} - -static struct pstore *get_info(struct dm_exception_store *store) -{ - return (struct pstore *) store->context; -} - -static void persistent_fraction_full(struct dm_exception_store *store, - sector_t *numerator, sector_t *denominator) -{ - *numerator = get_info(store)->next_free * store->snap->chunk_size; - *denominator = get_dev_size(store->snap->cow->bdev); -} - -static void persistent_destroy(struct dm_exception_store *store) -{ - struct pstore *ps = get_info(store); - - destroy_workqueue(ps->metadata_wq); - dm_io_client_destroy(ps->io_client); - vfree(ps->callbacks); - free_area(ps); - kfree(ps); -} - -static int persistent_read_metadata(struct dm_exception_store *store) -{ - int r, uninitialized_var(new_snapshot); - struct pstore *ps = get_info(store); - - /* - * Read the snapshot header. - */ - r = read_header(ps, &new_snapshot); - if (r) - return r; - - /* - * Now we know correct chunk_size, complete the initialisation. - */ - ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) / - sizeof(struct disk_exception); - ps->callbacks = dm_vcalloc(ps->exceptions_per_area, - sizeof(*ps->callbacks)); - if (!ps->callbacks) - return -ENOMEM; - - /* - * Do we need to setup a new snapshot ? - */ - if (new_snapshot) { - r = write_header(ps); - if (r) { - DMWARN("write_header failed"); - return r; - } - - ps->current_area = 0; - zero_memory_area(ps); - r = zero_disk_area(ps, 0); - if (r) { - DMWARN("zero_disk_area(0) failed"); - return r; - } - } else { - /* - * Sanity checks. - */ - if (ps->version != SNAPSHOT_DISK_VERSION) { - DMWARN("unable to handle snapshot disk version %d", - ps->version); - return -EINVAL; - } - - /* - * Metadata are valid, but snapshot is invalidated - */ - if (!ps->valid) - return 1; - - /* - * Read the metadata. - */ - r = read_exceptions(ps); - if (r) - return r; + r = dm_persistent_snapshot_init(); + if (r) { + DMERR("Unable to register persistent exception store type"); + goto persistent_fail; } return 0; -} - -static int persistent_prepare(struct dm_exception_store *store, - struct dm_snap_exception *e) -{ - struct pstore *ps = get_info(store); - uint32_t stride; - chunk_t next_free; - sector_t size = get_dev_size(store->snap->cow->bdev); - - /* Is there enough room ? */ - if (size < ((ps->next_free + 1) * store->snap->chunk_size)) - return -ENOSPC; - e->new_chunk = ps->next_free; - - /* - * Move onto the next free pending, making sure to take - * into account the location of the metadata chunks. - */ - stride = (ps->exceptions_per_area + 1); - next_free = ++ps->next_free; - if (sector_div(next_free, stride) == 1) - ps->next_free++; - - atomic_inc(&ps->pending_count); - return 0; -} - -static void persistent_commit(struct dm_exception_store *store, - struct dm_snap_exception *e, - void (*callback) (void *, int success), - void *callback_context) -{ - unsigned int i; - struct pstore *ps = get_info(store); - struct disk_exception de; - struct commit_callback *cb; - - de.old_chunk = e->old_chunk; - de.new_chunk = e->new_chunk; - write_exception(ps, ps->current_committed++, &de); - - /* - * Add the callback to the back of the array. This code - * is the only place where the callback array is - * manipulated, and we know that it will never be called - * multiple times concurrently. - */ - cb = ps->callbacks + ps->callback_count++; - cb->callback = callback; - cb->context = callback_context; - - /* - * If there are exceptions in flight and we have not yet - * filled this metadata area there's nothing more to do. - */ - if (!atomic_dec_and_test(&ps->pending_count) && - (ps->current_committed != ps->exceptions_per_area)) - return; - - /* - * If we completely filled the current area, then wipe the next one. - */ - if ((ps->current_committed == ps->exceptions_per_area) && - zero_disk_area(ps, ps->current_area + 1)) - ps->valid = 0; - - /* - * Commit exceptions to disk. - */ - if (ps->valid && area_io(ps, WRITE)) - ps->valid = 0; - - /* - * Advance to the next area if this one is full. - */ - if (ps->current_committed == ps->exceptions_per_area) { - ps->current_committed = 0; - ps->current_area++; - zero_memory_area(ps); - } - - for (i = 0; i < ps->callback_count; i++) { - cb = ps->callbacks + i; - cb->callback(cb->context, ps->valid); - } - - ps->callback_count = 0; -} - -static void persistent_drop(struct dm_exception_store *store) -{ - struct pstore *ps = get_info(store); - - ps->valid = 0; - if (write_header(ps)) - DMWARN("write header failed"); -} - -int dm_create_persistent(struct dm_exception_store *store) -{ - struct pstore *ps; - - /* allocate the pstore */ - ps = kmalloc(sizeof(*ps), GFP_KERNEL); - if (!ps) - return -ENOMEM; - - ps->snap = store->snap; - ps->valid = 1; - ps->version = SNAPSHOT_DISK_VERSION; - ps->area = NULL; - ps->next_free = 2; /* skipping the header and first area */ - ps->current_committed = 0; - - ps->callback_count = 0; - atomic_set(&ps->pending_count, 0); - ps->callbacks = NULL; - - ps->metadata_wq = create_singlethread_workqueue("ksnaphd"); - if (!ps->metadata_wq) { - kfree(ps); - DMERR("couldn't start header metadata update thread"); - return -ENOMEM; - } - - store->destroy = persistent_destroy; - store->read_metadata = persistent_read_metadata; - store->prepare_exception = persistent_prepare; - store->commit_exception = persistent_commit; - store->drop_snapshot = persistent_drop; - store->fraction_full = persistent_fraction_full; - store->context = ps; - - return 0; -} - -/*----------------------------------------------------------------- - * Implementation of the store for non-persistent snapshots. - *---------------------------------------------------------------*/ -struct transient_c { - sector_t next_free; -}; - -static void transient_destroy(struct dm_exception_store *store) -{ - kfree(store->context); -} - -static int transient_read_metadata(struct dm_exception_store *store) -{ - return 0; -} - -static int transient_prepare(struct dm_exception_store *store, - struct dm_snap_exception *e) -{ - struct transient_c *tc = (struct transient_c *) store->context; - sector_t size = get_dev_size(store->snap->cow->bdev); - - if (size < (tc->next_free + store->snap->chunk_size)) - return -1; - - e->new_chunk = sector_to_chunk(store->snap, tc->next_free); - tc->next_free += store->snap->chunk_size; - - return 0; -} - -static void transient_commit(struct dm_exception_store *store, - struct dm_snap_exception *e, - void (*callback) (void *, int success), - void *callback_context) -{ - /* Just succeed */ - callback(callback_context, 1); -} - -static void transient_fraction_full(struct dm_exception_store *store, - sector_t *numerator, sector_t *denominator) -{ - *numerator = ((struct transient_c *) store->context)->next_free; - *denominator = get_dev_size(store->snap->cow->bdev); +persistent_fail: + dm_persistent_snapshot_exit(); +transient_fail: + return r; } -int dm_create_transient(struct dm_exception_store *store) +void dm_exception_store_exit(void) { - struct transient_c *tc; - - store->destroy = transient_destroy; - store->read_metadata = transient_read_metadata; - store->prepare_exception = transient_prepare; - store->commit_exception = transient_commit; - store->drop_snapshot = NULL; - store->fraction_full = transient_fraction_full; - - tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); - if (!tc) - return -ENOMEM; - - tc->next_free = 0; - store->context = tc; - - return 0; + dm_persistent_snapshot_exit(); + dm_transient_snapshot_exit(); } diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index 25677df8dd5..78d1acec77e 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h @@ -122,9 +122,18 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) # endif +int dm_exception_store_init(void); +void dm_exception_store_exit(void); + /* * Two exception store implementations. */ +int dm_persistent_snapshot_init(void); +void dm_persistent_snapshot_exit(void); + +int dm_transient_snapshot_init(void); +void dm_transient_snapshot_exit(void); + int dm_create_persistent(struct dm_exception_store *store); int dm_create_transient(struct dm_exception_store *store); diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c new file mode 100644 index 00000000000..57c946c69ee --- /dev/null +++ b/drivers/md/dm-snap-persistent.c @@ -0,0 +1,694 @@ +/* + * Copyright (C) 2001-2002 Sistina Software (UK) Limited. + * Copyright (C) 2006-2008 Red Hat GmbH + * + * This file is released under the GPL. + */ + +#include "dm-exception-store.h" +#include "dm-snap.h" + +#include +#include +#include +#include +#include + +#define DM_MSG_PREFIX "persistent snapshot" +#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */ + +/*----------------------------------------------------------------- + * Persistent snapshots, by persistent we mean that the snapshot + * will survive a reboot. + *---------------------------------------------------------------*/ + +/* + * We need to store a record of which parts of the origin have + * been copied to the snapshot device. The snapshot code + * requires that we copy exception chunks to chunk aligned areas + * of the COW store. It makes sense therefore, to store the + * metadata in chunk size blocks. + * + * There is no backward or forward compatibility implemented, + * snapshots with different disk versions than the kernel will + * not be usable. It is expected that "lvcreate" will blank out + * the start of a fresh COW device before calling the snapshot + * constructor. + * + * The first chunk of the COW device just contains the header. + * After this there is a chunk filled with exception metadata, + * followed by as many exception chunks as can fit in the + * metadata areas. + * + * All on disk structures are in little-endian format. The end + * of the exceptions info is indicated by an exception with a + * new_chunk of 0, which is invalid since it would point to the + * header chunk. + */ + +/* + * Magic for persistent snapshots: "SnAp" - Feeble isn't it. + */ +#define SNAP_MAGIC 0x70416e53 + +/* + * The on-disk version of the metadata. + */ +#define SNAPSHOT_DISK_VERSION 1 + +struct disk_header { + uint32_t magic; + + /* + * Is this snapshot valid. There is no way of recovering + * an invalid snapshot. + */ + uint32_t valid; + + /* + * Simple, incrementing version. no backward + * compatibility. + */ + uint32_t version; + + /* In sectors */ + uint32_t chunk_size; +}; + +struct disk_exception { + uint64_t old_chunk; + uint64_t new_chunk; +}; + +struct commit_callback { + void (*callback)(void *, int success); + void *context; +}; + +/* + * The top level structure for a persistent exception store. + */ +struct pstore { + struct dm_snapshot *snap; /* up pointer to my snapshot */ + int version; + int valid; + uint32_t exceptions_per_area; + + /* + * Now that we have an asynchronous kcopyd there is no + * need for large chunk sizes, so it wont hurt to have a + * whole chunks worth of metadata in memory at once. + */ + void *area; + + /* + * An area of zeros used to clear the next area. + */ + void *zero_area; + + /* + * Used to keep track of which metadata area the data in + * 'chunk' refers to. + */ + chunk_t current_area; + + /* + * The next free chunk for an exception. + */ + chunk_t next_free; + + /* + * The index of next free exception in the current + * metadata area. + */ + uint32_t current_committed; + + atomic_t pending_count; + uint32_t callback_count; + struct commit_callback *callbacks; + struct dm_io_client *io_client; + + struct workqueue_struct *metadata_wq; +}; + +static unsigned sectors_to_pages(unsigned sectors) +{ + return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9); +} + +static int alloc_area(struct pstore *ps) +{ + int r = -ENOMEM; + size_t len; + + len = ps->snap->chunk_size << SECTOR_SHIFT; + + /* + * Allocate the chunk_size block of memory that will hold + * a single metadata area. + */ + ps->area = vmalloc(len); + if (!ps->area) + return r; + + ps->zero_area = vmalloc(len); + if (!ps->zero_area) { + vfree(ps->area); + return r; + } + memset(ps->zero_area, 0, len); + + return 0; +} + +static void free_area(struct pstore *ps) +{ + vfree(ps->area); + ps->area = NULL; + vfree(ps->zero_area); + ps->zero_area = NULL; +} + +struct mdata_req { + struct dm_io_region *where; + struct dm_io_request *io_req; + struct work_struct work; + int result; +}; + +static void do_metadata(struct work_struct *work) +{ + struct mdata_req *req = container_of(work, struct mdata_req, work); + + req->result = dm_io(req->io_req, 1, req->where, NULL); +} + +/* + * Read or write a chunk aligned and sized block of data from a device. + */ +static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) +{ + struct dm_io_region where = { + .bdev = ps->snap->cow->bdev, + .sector = ps->snap->chunk_size * chunk, + .count = ps->snap->chunk_size, + }; + struct dm_io_request io_req = { + .bi_rw = rw, + .mem.type = DM_IO_VMA, + .mem.ptr.vma = ps->area, + .client = ps->io_client, + .notify.fn = NULL, + }; + struct mdata_req req; + + if (!metadata) + return dm_io(&io_req, 1, &where, NULL); + + req.where = &where; + req.io_req = &io_req; + + /* + * Issue the synchronous I/O from a different thread + * to avoid generic_make_request recursion. + */ + INIT_WORK(&req.work, do_metadata); + queue_work(ps->metadata_wq, &req.work); + flush_workqueue(ps->metadata_wq); + + return req.result; +} + +/* + * Convert a metadata area index to a chunk index. + */ +static chunk_t area_location(struct pstore *ps, chunk_t area) +{ + return 1 + ((ps->exceptions_per_area + 1) * area); +} + +/* + * Read or write a metadata area. Remembering to skip the first + * chunk which holds the header. + */ +static int area_io(struct pstore *ps, int rw) +{ + int r; + chunk_t chunk; + + chunk = area_location(ps, ps->current_area); + + r = chunk_io(ps, chunk, rw, 0); + if (r) + return r; + + return 0; +} + +static void zero_memory_area(struct pstore *ps) +{ + memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); +} + +static int zero_disk_area(struct pstore *ps, chunk_t area) +{ + struct dm_io_region where = { + .bdev = ps->snap->cow->bdev, + .sector = ps->snap->chunk_size * area_location(ps, area), + .count = ps->snap->chunk_size, + }; + struct dm_io_request io_req = { + .bi_rw = WRITE, + .mem.type = DM_IO_VMA, + .mem.ptr.vma = ps->zero_area, + .client = ps->io_client, + .notify.fn = NULL, + }; + + return dm_io(&io_req, 1, &where, NULL); +} + +static int read_header(struct pstore *ps, int *new_snapshot) +{ + int r; + struct disk_header *dh; + chunk_t chunk_size; + int chunk_size_supplied = 1; + + /* + * Use default chunk size (or hardsect_size, if larger) if none supplied + */ + if (!ps->snap->chunk_size) { + ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, + bdev_hardsect_size(ps->snap->cow->bdev) >> 9); + ps->snap->chunk_mask = ps->snap->chunk_size - 1; + ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1; + chunk_size_supplied = 0; + } + + ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap-> + chunk_size)); + if (IS_ERR(ps->io_client)) + return PTR_ERR(ps->io_client); + + r = alloc_area(ps); + if (r) + return r; + + r = chunk_io(ps, 0, READ, 1); + if (r) + goto bad; + + dh = (struct disk_header *) ps->area; + + if (le32_to_cpu(dh->magic) == 0) { + *new_snapshot = 1; + return 0; + } + + if (le32_to_cpu(dh->magic) != SNAP_MAGIC) { + DMWARN("Invalid or corrupt snapshot"); + r = -ENXIO; + goto bad; + } + + *new_snapshot = 0; + ps->valid = le32_to_cpu(dh->valid); + ps->version = le32_to_cpu(dh->version); + chunk_size = le32_to_cpu(dh->chunk_size); + + if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size) + return 0; + + DMWARN("chunk size %llu in device metadata overrides " + "table chunk size of %llu.", + (unsigned long long)chunk_size, + (unsigned long long)ps->snap->chunk_size); + + /* We had a bogus chunk_size. Fix stuff up. */ + free_area(ps); + + ps->snap->chunk_size = chunk_size; + ps->snap->chunk_mask = chunk_size - 1; + ps->snap->chunk_shift = ffs(chunk_size) - 1; + + r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size), + ps->io_client); + if (r) + return r; + + r = alloc_area(ps); + return r; + +bad: + free_area(ps); + return r; +} + +static int write_header(struct pstore *ps) +{ + struct disk_header *dh; + + memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); + + dh = (struct disk_header *) ps->area; + dh->magic = cpu_to_le32(SNAP_MAGIC); + dh->valid = cpu_to_le32(ps->valid); + dh->version = cpu_to_le32(ps->version); + dh->chunk_size = cpu_to_le32(ps->snap->chunk_size); + + return chunk_io(ps, 0, WRITE, 1); +} + +/* + * Access functions for the disk exceptions, these do the endian conversions. + */ +static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) +{ + BUG_ON(index >= ps->exceptions_per_area); + + return ((struct disk_exception *) ps->area) + index; +} + +static void read_exception(struct pstore *ps, + uint32_t index, struct disk_exception *result) +{ + struct disk_exception *e = get_exception(ps, index); + + /* copy it */ + result->old_chunk = le64_to_cpu(e->old_chunk); + result->new_chunk = le64_to_cpu(e->new_chunk); +} + +static void write_exception(struct pstore *ps, + uint32_t index, struct disk_exception *de) +{ + struct disk_exception *e = get_exception(ps, index); + + /* copy it */ + e->old_chunk = cpu_to_le64(de->old_chunk); + e->new_chunk = cpu_to_le64(de->new_chunk); +} + +/* + * Registers the exceptions that are present in the current area. + * 'full' is filled in to indicate if the area has been + * filled. + */ +static int insert_exceptions(struct pstore *ps, int *full) +{ + int r; + unsigned int i; + struct disk_exception de; + + /* presume the area is full */ + *full = 1; + + for (i = 0; i < ps->exceptions_per_area; i++) { + read_exception(ps, i, &de); + + /* + * If the new_chunk is pointing at the start of + * the COW device, where the first metadata area + * is we know that we've hit the end of the + * exceptions. Therefore the area is not full. + */ + if (de.new_chunk == 0LL) { + ps->current_committed = i; + *full = 0; + break; + } + + /* + * Keep track of the start of the free chunks. + */ + if (ps->next_free <= de.new_chunk) + ps->next_free = de.new_chunk + 1; + + /* + * Otherwise we add the exception to the snapshot. + */ + r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk); + if (r) + return r; + } + + return 0; +} + +static int read_exceptions(struct pstore *ps) +{ + int r, full = 1; + + /* + * Keeping reading chunks and inserting exceptions until + * we find a partially full area. + */ + for (ps->current_area = 0; full; ps->current_area++) { + r = area_io(ps, READ); + if (r) + return r; + + r = insert_exceptions(ps, &full); + if (r) + return r; + } + + ps->current_area--; + + return 0; +} + +static struct pstore *get_info(struct dm_exception_store *store) +{ + return (struct pstore *) store->context; +} + +static void persistent_fraction_full(struct dm_exception_store *store, + sector_t *numerator, sector_t *denominator) +{ + *numerator = get_info(store)->next_free * store->snap->chunk_size; + *denominator = get_dev_size(store->snap->cow->bdev); +} + +static void persistent_destroy(struct dm_exception_store *store) +{ + struct pstore *ps = get_info(store); + + destroy_workqueue(ps->metadata_wq); + dm_io_client_destroy(ps->io_client); + vfree(ps->callbacks); + free_area(ps); + kfree(ps); +} + +static int persistent_read_metadata(struct dm_exception_store *store) +{ + int r, uninitialized_var(new_snapshot); + struct pstore *ps = get_info(store); + + /* + * Read the snapshot header. + */ + r = read_header(ps, &new_snapshot); + if (r) + return r; + + /* + * Now we know correct chunk_size, complete the initialisation. + */ + ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) / + sizeof(struct disk_exception); + ps->callbacks = dm_vcalloc(ps->exceptions_per_area, + sizeof(*ps->callbacks)); + if (!ps->callbacks) + return -ENOMEM; + + /* + * Do we need to setup a new snapshot ? + */ + if (new_snapshot) { + r = write_header(ps); + if (r) { + DMWARN("write_header failed"); + return r; + } + + ps->current_area = 0; + zero_memory_area(ps); + r = zero_disk_area(ps, 0); + if (r) { + DMWARN("zero_disk_area(0) failed"); + return r; + } + } else { + /* + * Sanity checks. + */ + if (ps->version != SNAPSHOT_DISK_VERSION) { + DMWARN("unable to handle snapshot disk version %d", + ps->version); + return -EINVAL; + } + + /* + * Metadata are valid, but snapshot is invalidated + */ + if (!ps->valid) + return 1; + + /* + * Read the metadata. + */ + r = read_exceptions(ps); + if (r) + return r; + } + + return 0; +} + +static int persistent_prepare(struct dm_exception_store *store, + struct dm_snap_exception *e) +{ + struct pstore *ps = get_info(store); + uint32_t stride; + chunk_t next_free; + sector_t size = get_dev_size(store->snap->cow->bdev); + + /* Is there enough room ? */ + if (size < ((ps->next_free + 1) * store->snap->chunk_size)) + return -ENOSPC; + + e->new_chunk = ps->next_free; + + /* + * Move onto the next free pending, making sure to take + * into account the location of the metadata chunks. + */ + stride = (ps->exceptions_per_area + 1); + next_free = ++ps->next_free; + if (sector_div(next_free, stride) == 1) + ps->next_free++; + + atomic_inc(&ps->pending_count); + return 0; +} + +static void persistent_commit(struct dm_exception_store *store, + struct dm_snap_exception *e, + void (*callback) (void *, int success), + void *callback_context) +{ + unsigned int i; + struct pstore *ps = get_info(store); + struct disk_exception de; + struct commit_callback *cb; + + de.old_chunk = e->old_chunk; + de.new_chunk = e->new_chunk; + write_exception(ps, ps->current_committed++, &de); + + /* + * Add the callback to the back of the array. This code + * is the only place where the callback array is + * manipulated, and we know that it will never be called + * multiple times concurrently. + */ + cb = ps->callbacks + ps->callback_count++; + cb->callback = callback; + cb->context = callback_context; + + /* + * If there are exceptions in flight and we have not yet + * filled this metadata area there's nothing more to do. + */ + if (!atomic_dec_and_test(&ps->pending_count) && + (ps->current_committed != ps->exceptions_per_area)) + return; + + /* + * If we completely filled the current area, then wipe the next one. + */ + if ((ps->current_committed == ps->exceptions_per_area) && + zero_disk_area(ps, ps->current_area + 1)) + ps->valid = 0; + + /* + * Commit exceptions to disk. + */ + if (ps->valid && area_io(ps, WRITE)) + ps->valid = 0; + + /* + * Advance to the next area if this one is full. + */ + if (ps->current_committed == ps->exceptions_per_area) { + ps->current_committed = 0; + ps->current_area++; + zero_memory_area(ps); + } + + for (i = 0; i < ps->callback_count; i++) { + cb = ps->callbacks + i; + cb->callback(cb->context, ps->valid); + } + + ps->callback_count = 0; +} + +static void persistent_drop(struct dm_exception_store *store) +{ + struct pstore *ps = get_info(store); + + ps->valid = 0; + if (write_header(ps)) + DMWARN("write header failed"); +} + +int dm_create_persistent(struct dm_exception_store *store) +{ + struct pstore *ps; + + /* allocate the pstore */ + ps = kmalloc(sizeof(*ps), GFP_KERNEL); + if (!ps) + return -ENOMEM; + + ps->snap = store->snap; + ps->valid = 1; + ps->version = SNAPSHOT_DISK_VERSION; + ps->area = NULL; + ps->next_free = 2; /* skipping the header and first area */ + ps->current_committed = 0; + + ps->callback_count = 0; + atomic_set(&ps->pending_count, 0); + ps->callbacks = NULL; + + ps->metadata_wq = create_singlethread_workqueue("ksnaphd"); + if (!ps->metadata_wq) { + kfree(ps); + DMERR("couldn't start header metadata update thread"); + return -ENOMEM; + } + + store->destroy = persistent_destroy; + store->read_metadata = persistent_read_metadata; + store->prepare_exception = persistent_prepare; + store->commit_exception = persistent_commit; + store->drop_snapshot = persistent_drop; + store->fraction_full = persistent_fraction_full; + store->context = ps; + + return 0; +} + +int dm_persistent_snapshot_init(void) +{ + return 0; +} + +void dm_persistent_snapshot_exit(void) +{ +} diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c new file mode 100644 index 00000000000..2a781df57fe --- /dev/null +++ b/drivers/md/dm-snap-transient.c @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2001-2002 Sistina Software (UK) Limited. + * Copyright (C) 2006-2008 Red Hat GmbH + * + * This file is released under the GPL. + */ + +#include "dm-exception-store.h" +#include "dm-snap.h" + +#include +#include +#include +#include +#include + +#define DM_MSG_PREFIX "transient snapshot" + +/*----------------------------------------------------------------- + * Implementation of the store for non-persistent snapshots. + *---------------------------------------------------------------*/ +struct transient_c { + sector_t next_free; +}; + +static void transient_destroy(struct dm_exception_store *store) +{ + kfree(store->context); +} + +static int transient_read_metadata(struct dm_exception_store *store) +{ + return 0; +} + +static int transient_prepare(struct dm_exception_store *store, + struct dm_snap_exception *e) +{ + struct transient_c *tc = (struct transient_c *) store->context; + sector_t size = get_dev_size(store->snap->cow->bdev); + + if (size < (tc->next_free + store->snap->chunk_size)) + return -1; + + e->new_chunk = sector_to_chunk(store->snap, tc->next_free); + tc->next_free += store->snap->chunk_size; + + return 0; +} + +static void transient_commit(struct dm_exception_store *store, + struct dm_snap_exception *e, + void (*callback) (void *, int success), + void *callback_context) +{ + /* Just succeed */ + callback(callback_context, 1); +} + +static void transient_fraction_full(struct dm_exception_store *store, + sector_t *numerator, sector_t *denominator) +{ + *numerator = ((struct transient_c *) store->context)->next_free; + *denominator = get_dev_size(store->snap->cow->bdev); +} + +int dm_create_transient(struct dm_exception_store *store) +{ + struct transient_c *tc; + + store->destroy = transient_destroy; + store->read_metadata = transient_read_metadata; + store->prepare_exception = transient_prepare; + store->commit_exception = transient_commit; + store->drop_snapshot = NULL; + store->fraction_full = transient_fraction_full; + + tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); + if (!tc) + return -ENOMEM; + + tc->next_free = 0; + store->context = tc; + + return 0; +} + +int dm_transient_snapshot_init(void) +{ + return 0; +} + +void dm_transient_snapshot_exit(void) +{ +} diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 81f03a0e783..018b567fc75 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1406,6 +1406,12 @@ static int __init dm_snapshot_init(void) { int r; + r = dm_exception_store_init(); + if (r) { + DMERR("Failed to initialize exception stores"); + return r; + } + r = dm_register_target(&snapshot_target); if (r) { DMERR("snapshot target register failed %d", r); @@ -1454,17 +1460,17 @@ static int __init dm_snapshot_init(void) return 0; - bad_pending_pool: +bad_pending_pool: kmem_cache_destroy(tracked_chunk_cache); - bad5: +bad5: kmem_cache_destroy(pending_cache); - bad4: +bad4: kmem_cache_destroy(exception_cache); - bad3: +bad3: exit_origin_hash(); - bad2: +bad2: dm_unregister_target(&origin_target); - bad1: +bad1: dm_unregister_target(&snapshot_target); return r; } @@ -1480,6 +1486,8 @@ static void __exit dm_snapshot_exit(void) kmem_cache_destroy(pending_cache); kmem_cache_destroy(exception_cache); kmem_cache_destroy(tracked_chunk_cache); + + dm_exception_store_exit(); } /* Module hooks */ -- cgit v1.2.3-70-g09d2 From a159c1ac5f33c6cf0f5aa3c9d1ccdc82c907ee46 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Tue, 6 Jan 2009 03:05:19 +0000 Subject: dm snapshot: extend exception store functions Supply dm_add_exception as a callback to the read_metadata function. Add a status function ready for a later patch and name the functions consistently. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon --- drivers/md/dm-exception-store.c | 1 - drivers/md/dm-exception-store.h | 13 ++++++++++--- drivers/md/dm-snap-persistent.c | 42 +++++++++++++++++++++++++---------------- drivers/md/dm-snap-transient.c | 21 ++++++++++++--------- drivers/md/dm-snap.c | 9 +++++++-- drivers/md/dm-snap.h | 6 ------ 6 files changed, 55 insertions(+), 37 deletions(-) (limited to 'drivers/md/dm-snap.c') diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 74777e0f80d..dccbfb0e010 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -11,7 +11,6 @@ #include #include #include -#include #define DM_MSG_PREFIX "snapshot exception stores" diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index 78d1acec77e..bb9f33d5daa 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h @@ -11,6 +11,7 @@ #define _LINUX_DM_EXCEPTION_STORE #include +#include /* * The snapshot code deals with largish chunks of the disk at a @@ -37,7 +38,6 @@ struct dm_snap_exception { * COW device). */ struct dm_exception_store { - /* * Destroys this object when you've finished with it. */ @@ -45,9 +45,13 @@ struct dm_exception_store { /* * The target shouldn't read the COW device until this is - * called. + * called. As exceptions are read from the COW, they are + * reported back via the callback. */ - int (*read_metadata) (struct dm_exception_store *store); + int (*read_metadata) (struct dm_exception_store *store, + int (*callback)(void *callback_context, + chunk_t old, chunk_t new), + void *callback_context); /* * Find somewhere to store the next exception. @@ -68,6 +72,9 @@ struct dm_exception_store { */ void (*drop_snapshot) (struct dm_exception_store *store); + int (*status) (struct dm_exception_store *store, status_type_t status, + char *result, unsigned int maxlen); + /* * Return how full the snapshot is. */ diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 57c946c69ee..936b34e0959 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -395,7 +395,11 @@ static void write_exception(struct pstore *ps, * 'full' is filled in to indicate if the area has been * filled. */ -static int insert_exceptions(struct pstore *ps, int *full) +static int insert_exceptions(struct pstore *ps, + int (*callback)(void *callback_context, + chunk_t old, chunk_t new), + void *callback_context, + int *full) { int r; unsigned int i; @@ -428,7 +432,7 @@ static int insert_exceptions(struct pstore *ps, int *full) /* * Otherwise we add the exception to the snapshot. */ - r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk); + r = callback(callback_context, de.old_chunk, de.new_chunk); if (r) return r; } @@ -436,7 +440,10 @@ static int insert_exceptions(struct pstore *ps, int *full) return 0; } -static int read_exceptions(struct pstore *ps) +static int read_exceptions(struct pstore *ps, + int (*callback)(void *callback_context, chunk_t old, + chunk_t new), + void *callback_context) { int r, full = 1; @@ -449,7 +456,7 @@ static int read_exceptions(struct pstore *ps) if (r) return r; - r = insert_exceptions(ps, &full); + r = insert_exceptions(ps, callback, callback_context, &full); if (r) return r; } @@ -482,7 +489,10 @@ static void persistent_destroy(struct dm_exception_store *store) kfree(ps); } -static int persistent_read_metadata(struct dm_exception_store *store) +static int persistent_read_metadata(struct dm_exception_store *store, + int (*callback)(void *callback_context, + chunk_t old, chunk_t new), + void *callback_context) { int r, uninitialized_var(new_snapshot); struct pstore *ps = get_info(store); @@ -540,7 +550,7 @@ static int persistent_read_metadata(struct dm_exception_store *store) /* * Read the metadata. */ - r = read_exceptions(ps); + r = read_exceptions(ps, callback, callback_context); if (r) return r; } @@ -548,8 +558,8 @@ static int persistent_read_metadata(struct dm_exception_store *store) return 0; } -static int persistent_prepare(struct dm_exception_store *store, - struct dm_snap_exception *e) +static int persistent_prepare_exception(struct dm_exception_store *store, + struct dm_snap_exception *e) { struct pstore *ps = get_info(store); uint32_t stride; @@ -575,10 +585,10 @@ static int persistent_prepare(struct dm_exception_store *store, return 0; } -static void persistent_commit(struct dm_exception_store *store, - struct dm_snap_exception *e, - void (*callback) (void *, int success), - void *callback_context) +static void persistent_commit_exception(struct dm_exception_store *store, + struct dm_snap_exception *e, + void (*callback) (void *, int success), + void *callback_context) { unsigned int i; struct pstore *ps = get_info(store); @@ -637,7 +647,7 @@ static void persistent_commit(struct dm_exception_store *store, ps->callback_count = 0; } -static void persistent_drop(struct dm_exception_store *store) +static void persistent_drop_snapshot(struct dm_exception_store *store) { struct pstore *ps = get_info(store); @@ -675,9 +685,9 @@ int dm_create_persistent(struct dm_exception_store *store) store->destroy = persistent_destroy; store->read_metadata = persistent_read_metadata; - store->prepare_exception = persistent_prepare; - store->commit_exception = persistent_commit; - store->drop_snapshot = persistent_drop; + store->prepare_exception = persistent_prepare_exception; + store->commit_exception = persistent_commit_exception; + store->drop_snapshot = persistent_drop_snapshot; store->fraction_full = persistent_fraction_full; store->context = ps; diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c index 2a781df57fe..7f6e2e6dcb0 100644 --- a/drivers/md/dm-snap-transient.c +++ b/drivers/md/dm-snap-transient.c @@ -28,13 +28,16 @@ static void transient_destroy(struct dm_exception_store *store) kfree(store->context); } -static int transient_read_metadata(struct dm_exception_store *store) +static int transient_read_metadata(struct dm_exception_store *store, + int (*callback)(void *callback_context, + chunk_t old, chunk_t new), + void *callback_context) { return 0; } -static int transient_prepare(struct dm_exception_store *store, - struct dm_snap_exception *e) +static int transient_prepare_exception(struct dm_exception_store *store, + struct dm_snap_exception *e) { struct transient_c *tc = (struct transient_c *) store->context; sector_t size = get_dev_size(store->snap->cow->bdev); @@ -48,10 +51,10 @@ static int transient_prepare(struct dm_exception_store *store, return 0; } -static void transient_commit(struct dm_exception_store *store, - struct dm_snap_exception *e, - void (*callback) (void *, int success), - void *callback_context) +static void transient_commit_exception(struct dm_exception_store *store, + struct dm_snap_exception *e, + void (*callback) (void *, int success), + void *callback_context) { /* Just succeed */ callback(callback_context, 1); @@ -70,8 +73,8 @@ int dm_create_transient(struct dm_exception_store *store) store->destroy = transient_destroy; store->read_metadata = transient_read_metadata; - store->prepare_exception = transient_prepare; - store->commit_exception = transient_commit; + store->prepare_exception = transient_prepare_exception; + store->commit_exception = transient_commit_exception; store->drop_snapshot = NULL; store->fraction_full = transient_fraction_full; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 018b567fc75..65ff82ff124 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -430,8 +430,13 @@ out: list_add(&new_e->hash_list, e ? &e->hash_list : l); } -int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new) +/* + * Callback used by the exception stores to load exceptions when + * initialising. + */ +static int dm_add_exception(void *context, chunk_t old, chunk_t new) { + struct dm_snapshot *s = context; struct dm_snap_exception *e; e = alloc_exception(); @@ -660,7 +665,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) spin_lock_init(&s->tracked_chunk_lock); /* Metadata must only be loaded into one table at once */ - r = s->store.read_metadata(&s->store); + r = s->store.read_metadata(&s->store, dm_add_exception, (void *)s); if (r < 0) { ti->error = "Failed to read snapshot metadata"; goto bad_load_and_register; diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h index 92812365702..d9e62b43cf8 100644 --- a/drivers/md/dm-snap.h +++ b/drivers/md/dm-snap.h @@ -75,12 +75,6 @@ struct dm_snapshot { struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; }; -/* - * Used by the exception stores to load exceptions hen - * initialising. - */ -int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new); - /* * Return the number of sectors in the device. */ -- cgit v1.2.3-70-g09d2