summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig12
-rw-r--r--drivers/md/Makefile6
-rw-r--r--drivers/md/bitmap.c5
-rw-r--r--drivers/md/dm-bio-list.h5
-rw-r--r--drivers/md/dm-crypt.c222
-rw-r--r--drivers/md/dm-delay.c23
-rw-r--r--drivers/md/dm-emc.c8
-rw-r--r--drivers/md/dm-hw-handler.c6
-rw-r--r--drivers/md/dm-hw-handler.h1
-rw-r--r--drivers/md/dm-ioctl.c40
-rw-r--r--drivers/md/dm-log.c2
-rw-r--r--drivers/md/dm-log.h3
-rw-r--r--drivers/md/dm-mpath-hp-sw.c248
-rw-r--r--drivers/md/dm-mpath-rdac.c15
-rw-r--r--drivers/md/dm-mpath.c88
-rw-r--r--drivers/md/dm-path-selector.c6
-rw-r--r--drivers/md/dm-raid1.c35
-rw-r--r--drivers/md/dm-snap.c3
-rw-r--r--drivers/md/dm-stripe.c3
-rw-r--r--drivers/md/dm-table.c41
-rw-r--r--drivers/md/dm-target.c6
-rw-r--r--drivers/md/dm-uevent.c222
-rw-r--r--drivers/md/dm-uevent.h59
-rw-r--r--drivers/md/dm.c93
-rw-r--r--drivers/md/dm.h1
-rw-r--r--drivers/md/kcopyd.c8
-rw-r--r--drivers/md/linear.c23
-rw-r--r--drivers/md/md.c58
-rw-r--r--drivers/md/multipath.c33
-rw-r--r--drivers/md/raid0.c34
-rw-r--r--drivers/md/raid1.c39
-rw-r--r--drivers/md/raid10.c34
-rw-r--r--drivers/md/raid5.c67
-rw-r--r--drivers/md/raid6algos.c4
-rw-r--r--drivers/md/raid6mmx.c2
-rw-r--r--drivers/md/raid6sse1.c2
-rw-r--r--drivers/md/raid6sse2.c4
-rw-r--r--drivers/md/raid6x86.h2
38 files changed, 992 insertions, 471 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 34a8c60a254..9b6fbf044fd 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -267,6 +267,12 @@ config DM_MULTIPATH_RDAC
---help---
Multipath support for LSI/Engenio RDAC.
+config DM_MULTIPATH_HP
+ tristate "HP MSA multipath support (EXPERIMENTAL)"
+ depends on DM_MULTIPATH && BLK_DEV_DM && EXPERIMENTAL
+ ---help---
+ Multipath support for HP MSA (Active/Passive) series hardware.
+
config DM_DELAY
tristate "I/O delaying target (EXPERIMENTAL)"
depends on BLK_DEV_DM && EXPERIMENTAL
@@ -276,4 +282,10 @@ config DM_DELAY
If unsure, say N.
+config DM_UEVENT
+ bool "DM uevents (EXPERIMENTAL)"
+ depends on BLK_DEV_DM && EXPERIMENTAL
+ ---help---
+ Generate udev events for DM events.
+
endif # MD
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index c49366cdc05..d9aa7edb878 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -8,6 +8,7 @@ dm-multipath-objs := dm-hw-handler.o dm-path-selector.o dm-mpath.o
dm-snapshot-objs := dm-snap.o dm-exception-store.o
dm-mirror-objs := dm-log.o dm-raid1.o
dm-rdac-objs := dm-mpath-rdac.o
+dm-hp-sw-objs := dm-mpath-hp-sw.o
md-mod-objs := md.o bitmap.o
raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \
raid6int1.o raid6int2.o raid6int4.o \
@@ -35,6 +36,7 @@ obj-$(CONFIG_DM_CRYPT) += dm-crypt.o
obj-$(CONFIG_DM_DELAY) += dm-delay.o
obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o
obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc.o
+obj-$(CONFIG_DM_MULTIPATH_HP) += dm-hp-sw.o
obj-$(CONFIG_DM_MULTIPATH_RDAC) += dm-rdac.o
obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
obj-$(CONFIG_DM_MIRROR) += dm-mirror.o
@@ -48,6 +50,10 @@ ifeq ($(CONFIG_ALTIVEC),y)
altivec_flags := -maltivec -mabi=altivec
endif
+ifeq ($(CONFIG_DM_UEVENT),y)
+dm-mod-objs += dm-uevent.o
+endif
+
targets += raid6int1.c
$(obj)/raid6int1.c: UNROLL := 1
$(obj)/raid6int1.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 927cb34c480..1b1ef3130e6 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -274,7 +274,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
if (bitmap->offset < 0) {
/* DATA BITMAP METADATA */
if (bitmap->offset
- + page->index * (PAGE_SIZE/512)
+ + (long)(page->index * (PAGE_SIZE/512))
+ size/512 > 0)
/* bitmap runs in to metadata */
return -EINVAL;
@@ -1207,8 +1207,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
prepare_to_wait(&bitmap->overflow_wait, &__wait,
TASK_UNINTERRUPTIBLE);
spin_unlock_irq(&bitmap->lock);
- bitmap->mddev->queue
- ->unplug_fn(bitmap->mddev->queue);
+ blk_unplug(bitmap->mddev->queue);
schedule();
finish_wait(&bitmap->overflow_wait, &__wait);
continue;
diff --git a/drivers/md/dm-bio-list.h b/drivers/md/dm-bio-list.h
index 3f7b827649e..d4509be0fe6 100644
--- a/drivers/md/dm-bio-list.h
+++ b/drivers/md/dm-bio-list.h
@@ -21,11 +21,6 @@ static inline int bio_list_empty(const struct bio_list *bl)
return bl->head == NULL;
}
-#define BIO_LIST_INIT { .head = NULL, .tail = NULL }
-
-#define BIO_LIST(bl) \
- struct bio_list bl = BIO_LIST_INIT
-
static inline void bio_list_init(struct bio_list *bl)
{
bl->head = bl->tail = NULL;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 8216a6f75be..28c6ae095c5 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -36,7 +36,6 @@ struct dm_crypt_io {
struct work_struct work;
atomic_t pending;
int error;
- int post_process;
};
/*
@@ -57,7 +56,7 @@ struct crypt_config;
struct crypt_iv_operations {
int (*ctr)(struct crypt_config *cc, struct dm_target *ti,
- const char *opts);
+ const char *opts);
void (*dtr)(struct crypt_config *cc);
const char *(*status)(struct crypt_config *cc);
int (*generator)(struct crypt_config *cc, u8 *iv, sector_t sector);
@@ -80,6 +79,8 @@ struct crypt_config {
mempool_t *page_pool;
struct bio_set *bs;
+ struct workqueue_struct *io_queue;
+ struct workqueue_struct *crypt_queue;
/*
* crypto related data
*/
@@ -112,7 +113,7 @@ static void clone_init(struct dm_crypt_io *, struct bio *);
* Different IV generation algorithms:
*
* plain: the initial vector is the 32-bit little-endian version of the sector
- * number, padded with zeros if neccessary.
+ * number, padded with zeros if necessary.
*
* essiv: "encrypted sector|salt initial vector", the sector number is
* encrypted with the bulk cipher using a salt as key. The salt
@@ -137,7 +138,7 @@ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv, sector_t sector)
}
static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
- const char *opts)
+ const char *opts)
{
struct crypto_cipher *essiv_tfm;
struct crypto_hash *hash_tfm;
@@ -167,7 +168,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
return -ENOMEM;
}
- sg_set_buf(&sg, cc->key, cc->key_size);
+ sg_init_one(&sg, cc->key, cc->key_size);
desc.tfm = hash_tfm;
desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
err = crypto_hash_digest(&desc, &sg, cc->key_size, salt);
@@ -175,6 +176,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
if (err) {
ti->error = "Error calculating hash in ESSIV";
+ kfree(salt);
return err;
}
@@ -188,7 +190,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
if (crypto_cipher_blocksize(essiv_tfm) !=
crypto_blkcipher_ivsize(cc->tfm)) {
ti->error = "Block size of ESSIV cipher does "
- "not match IV size of block cipher";
+ "not match IV size of block cipher";
crypto_free_cipher(essiv_tfm);
kfree(salt);
return -EINVAL;
@@ -319,10 +321,10 @@ crypt_convert_scatterlist(struct crypt_config *cc, struct scatterlist *out,
return r;
}
-static void
-crypt_convert_init(struct crypt_config *cc, struct convert_context *ctx,
- struct bio *bio_out, struct bio *bio_in,
- sector_t sector, int write)
+static void crypt_convert_init(struct crypt_config *cc,
+ struct convert_context *ctx,
+ struct bio *bio_out, struct bio *bio_in,
+ sector_t sector, int write)
{
ctx->bio_in = bio_in;
ctx->bio_out = bio_out;
@@ -338,7 +340,7 @@ crypt_convert_init(struct crypt_config *cc, struct convert_context *ctx,
* Encrypt / decrypt data from one bio to another one (can be the same one)
*/
static int crypt_convert(struct crypt_config *cc,
- struct convert_context *ctx)
+ struct convert_context *ctx)
{
int r = 0;
@@ -346,16 +348,13 @@ static int crypt_convert(struct crypt_config *cc,
ctx->idx_out < ctx->bio_out->bi_vcnt) {
struct bio_vec *bv_in = bio_iovec_idx(ctx->bio_in, ctx->idx_in);
struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out);
- struct scatterlist sg_in = {
- .page = bv_in->bv_page,
- .offset = bv_in->bv_offset + ctx->offset_in,
- .length = 1 << SECTOR_SHIFT
- };
- struct scatterlist sg_out = {
- .page = bv_out->bv_page,
- .offset = bv_out->bv_offset + ctx->offset_out,
- .length = 1 << SECTOR_SHIFT
- };
+ struct scatterlist sg_in, sg_out;
+
+ sg_init_table(&sg_in, 1);
+ sg_set_page(&sg_in, bv_in->bv_page, 1 << SECTOR_SHIFT, bv_in->bv_offset + ctx->offset_in);
+
+ sg_init_table(&sg_out, 1);
+ sg_set_page(&sg_out, bv_out->bv_page, 1 << SECTOR_SHIFT, bv_out->bv_offset + ctx->offset_out);
ctx->offset_in += sg_in.length;
if (ctx->offset_in >= bv_in->bv_len) {
@@ -370,7 +369,7 @@ static int crypt_convert(struct crypt_config *cc,
}
r = crypt_convert_scatterlist(cc, &sg_out, &sg_in, sg_in.length,
- ctx->write, ctx->sector);
+ ctx->write, ctx->sector);
if (r < 0)
break;
@@ -380,13 +379,13 @@ static int crypt_convert(struct crypt_config *cc,
return r;
}
- static void dm_crypt_bio_destructor(struct bio *bio)
- {
+static void dm_crypt_bio_destructor(struct bio *bio)
+{
struct dm_crypt_io *io = bio->bi_private;
struct crypt_config *cc = io->target->private;
bio_free(bio, cc->bs);
- }
+}
/*
* Generate a new unfragmented bio with the given size
@@ -441,33 +440,12 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
return clone;
}
-static void crypt_free_buffer_pages(struct crypt_config *cc,
- struct bio *clone, unsigned int bytes)
+static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
{
- unsigned int i, start, end;
+ unsigned int i;
struct bio_vec *bv;
- /*
- * This is ugly, but Jens Axboe thinks that using bi_idx in the
- * endio function is too dangerous at the moment, so I calculate the
- * correct position using bi_vcnt and bi_size.
- * The bv_offset and bv_len fields might already be modified but we
- * know that we always allocated whole pages.
- * A fix to the bi_idx issue in the kernel is in the works, so
- * we will hopefully be able to revert to the cleaner solution soon.
- */
- i = clone->bi_vcnt - 1;
- bv = bio_iovec_idx(clone, i);
- end = (i << PAGE_SHIFT) + (bv->bv_offset + bv->bv_len) - clone->bi_size;
- start = end - bytes;
-
- start >>= PAGE_SHIFT;
- if (!clone->bi_size)
- end = clone->bi_vcnt;
- else
- end >>= PAGE_SHIFT;
-
- for (i = start; i < end; i++) {
+ for (i = 0; i < clone->bi_vcnt; i++) {
bv = bio_iovec_idx(clone, i);
BUG_ON(!bv->bv_page);
mempool_free(bv->bv_page, cc->page_pool);
@@ -479,7 +457,7 @@ static void crypt_free_buffer_pages(struct crypt_config *cc,
* One of the bios was finished. Check for completion of
* the whole request and correctly clean up the buffer.
*/
-static void dec_pending(struct dm_crypt_io *io, int error)
+static void crypt_dec_pending(struct dm_crypt_io *io, int error)
{
struct crypt_config *cc = (struct crypt_config *) io->target->private;
@@ -495,18 +473,36 @@ static void dec_pending(struct dm_crypt_io *io, int error)
}
/*
- * kcryptd:
+ * kcryptd/kcryptd_io:
*
* Needed because it would be very unwise to do decryption in an
* interrupt context.
+ *
+ * kcryptd performs the actual encryption or decryption.
+ *
+ * kcryptd_io performs the IO submission.
+ *
+ * They must be separated as otherwise the final stages could be
+ * starved by new requests which can block in the first stages due
+ * to memory allocation.
*/
-static struct workqueue_struct *_kcryptd_workqueue;
static void kcryptd_do_work(struct work_struct *work);
+static void kcryptd_do_crypt(struct work_struct *work);
static void kcryptd_queue_io(struct dm_crypt_io *io)
{
+ struct crypt_config *cc = io->target->private;
+
INIT_WORK(&io->work, kcryptd_do_work);
- queue_work(_kcryptd_workqueue, &io->work);
+ queue_work(cc->io_queue, &io->work);
+}
+
+static void kcryptd_queue_crypt(struct dm_crypt_io *io)
+{
+ struct crypt_config *cc = io->target->private;
+
+ INIT_WORK(&io->work, kcryptd_do_crypt);
+ queue_work(cc->crypt_queue, &io->work);
}
static void crypt_endio(struct bio *clone, int error)
@@ -519,7 +515,7 @@ static void crypt_endio(struct bio *clone, int error)
* free the processed pages
*/
if (!read_io) {
- crypt_free_buffer_pages(cc, clone, clone->bi_size);
+ crypt_free_buffer_pages(cc, clone);
goto out;
}
@@ -529,13 +525,12 @@ static void crypt_endio(struct bio *clone, int error)
}
bio_put(clone);
- io->post_process = 1;
- kcryptd_queue_io(io);
+ kcryptd_queue_crypt(io);
return;
out:
bio_put(clone);
- dec_pending(io, error);
+ crypt_dec_pending(io, error);
}
static void clone_init(struct dm_crypt_io *io, struct bio *clone)
@@ -565,7 +560,7 @@ static void process_read(struct dm_crypt_io *io)
*/
clone = bio_alloc_bioset(GFP_NOIO, bio_segments(base_bio), cc->bs);
if (unlikely(!clone)) {
- dec_pending(io, -ENOMEM);
+ crypt_dec_pending(io, -ENOMEM);
return;
}
@@ -600,7 +595,7 @@ static void process_write(struct dm_crypt_io *io)
while (remaining) {
clone = crypt_alloc_buffer(io, remaining);
if (unlikely(!clone)) {
- dec_pending(io, -ENOMEM);
+ crypt_dec_pending(io, -ENOMEM);
return;
}
@@ -608,9 +603,9 @@ static void process_write(struct dm_crypt_io *io)
ctx.idx_out = 0;
if (unlikely(crypt_convert(cc, &ctx) < 0)) {
- crypt_free_buffer_pages(cc, clone, clone->bi_size);
+ crypt_free_buffer_pages(cc, clone);
bio_put(clone);
- dec_pending(io, -EIO);
+ crypt_dec_pending(io, -EIO);
return;
}
@@ -645,17 +640,23 @@ static void process_read_endio(struct dm_crypt_io *io)
crypt_convert_init(cc, &ctx, io->base_bio, io->base_bio,
io->base_bio->bi_sector - io->target->begin, 0);
- dec_pending(io, crypt_convert(cc, &ctx));
+ crypt_dec_pending(io, crypt_convert(cc, &ctx));
}
static void kcryptd_do_work(struct work_struct *work)
{
struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
- if (io->post_process)
- process_read_endio(io);
- else if (bio_data_dir(io->base_bio) == READ)
+ if (bio_data_dir(io->base_bio) == READ)
process_read(io);
+}
+
+static void kcryptd_do_crypt(struct work_struct *work)
+{
+ struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
+
+ if (bio_data_dir(io->base_bio) == READ)
+ process_read_endio(io);
else
process_write(io);
}
@@ -711,7 +712,7 @@ static int crypt_set_key(struct crypt_config *cc, char *key)
cc->key_size = key_size; /* initial settings */
if ((!key_size && strcmp(key, "-")) ||
- (key_size && crypt_decode_key(cc->key, key, key_size) < 0))
+ (key_size && crypt_decode_key(cc->key, key, key_size) < 0))
return -EINVAL;
set_bit(DM_CRYPT_KEY_VALID, &cc->flags);
@@ -767,7 +768,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (crypt_set_key(cc, argv[1])) {
ti->error = "Error decoding key";
- goto bad1;
+ goto bad_cipher;
}
/* Compatiblity mode for old dm-crypt cipher strings */
@@ -778,19 +779,19 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (strcmp(chainmode, "ecb") && !ivmode) {
ti->error = "This chaining mode requires an IV mechanism";
- goto bad1;
+ goto bad_cipher;
}
- if (snprintf(cc->cipher, CRYPTO_MAX_ALG_NAME, "%s(%s)", chainmode,
- cipher) >= CRYPTO_MAX_ALG_NAME) {
+ if (snprintf(cc->cipher, CRYPTO_MAX_ALG_NAME, "%s(%s)",
+ chainmode, cipher) >= CRYPTO_MAX_ALG_NAME) {
ti->error = "Chain mode + cipher name is too long";
- goto bad1;
+ goto bad_cipher;
}
tfm = crypto_alloc_blkcipher(cc->cipher, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(tfm)) {
ti->error = "Error allocating crypto tfm";
- goto bad1;
+ goto bad_cipher;
}
strcpy(cc->cipher, cipher);
@@ -814,18 +815,18 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
cc->iv_gen_ops = &crypt_iv_null_ops;
else {
ti->error = "Invalid IV mode";
- goto bad2;
+ goto bad_ivmode;
}
if (cc->iv_gen_ops && cc->iv_gen_ops->ctr &&
cc->iv_gen_ops->ctr(cc, ti, ivopts) < 0)
- goto bad2;
+ goto bad_ivmode;
cc->iv_size = crypto_blkcipher_ivsize(tfm);
if (cc->iv_size)
/* at least a 64 bit sector number should fit in our buffer */
cc->iv_size = max(cc->iv_size,
- (unsigned int)(sizeof(u64) / sizeof(u8)));
+ (unsigned int)(sizeof(u64) / sizeof(u8)));
else {
if (cc->iv_gen_ops) {
DMWARN("Selected cipher does not support IVs");
@@ -838,13 +839,13 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool);
if (!cc->io_pool) {
ti->error = "Cannot allocate crypt io mempool";
- goto bad3;
+ goto bad_slab_pool;
}
cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0);
if (!cc->page_pool) {
ti->error = "Cannot allocate page mempool";
- goto bad4;
+ goto bad_page_pool;
}
cc->bs = bioset_create(MIN_IOS, MIN_IOS);
@@ -855,25 +856,25 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (crypto_blkcipher_setkey(tfm, cc->key, key_size) < 0) {
ti->error = "Error setting key";
- goto bad5;
+ goto bad_device;
}
if (sscanf(argv[2], "%llu", &tmpll) != 1) {
ti->error = "Invalid iv_offset sector";
- goto bad5;
+ goto bad_device;
}
cc->iv_offset = tmpll;
if (sscanf(argv[4], "%llu", &tmpll) != 1) {
ti->error = "Invalid device sector";
- goto bad5;
+ goto bad_device;
}
cc->start = tmpll;
if (dm_get_device(ti, argv[3], cc->start, ti->len,
- dm_table_get_mode(ti->table), &cc->dev)) {
+ dm_table_get_mode(ti->table), &cc->dev)) {
ti->error = "Device lookup failed";
- goto bad5;
+ goto bad_device;
}
if (ivmode && cc->iv_gen_ops) {
@@ -882,27 +883,45 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
cc->iv_mode = kmalloc(strlen(ivmode) + 1, GFP_KERNEL);
if (!cc->iv_mode) {
ti->error = "Error kmallocing iv_mode string";
- goto bad5;
+ goto bad_ivmode_string;
}
strcpy(cc->iv_mode, ivmode);
} else
cc->iv_mode = NULL;
+ cc->io_queue = create_singlethread_workqueue("kcryptd_io");
+ if (!cc->io_queue) {
+ ti->error = "Couldn't create kcryptd io queue";
+ goto bad_io_queue;
+ }
+
+ cc->crypt_queue = create_singlethread_workqueue("kcryptd");
+ if (!cc->crypt_queue) {
+ ti->error = "Couldn't create kcryptd queue";
+ goto bad_crypt_queue;
+ }
+
ti->private = cc;
return 0;
-bad5:
+bad_crypt_queue:
+ destroy_workqueue(cc->io_queue);
+bad_io_queue:
+ kfree(cc->iv_mode);
+bad_ivmode_string:
+ dm_put_device(ti, cc->dev);
+bad_device:
bioset_free(cc->bs);
bad_bs:
mempool_destroy(cc->page_pool);
-bad4:
+bad_page_pool:
mempool_destroy(cc->io_pool);
-bad3:
+bad_slab_pool:
if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
cc->iv_gen_ops->dtr(cc);
-bad2:
+bad_ivmode:
crypto_free_blkcipher(tfm);
-bad1:
+bad_cipher:
/* Must zero key material before freeing */
memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8));
kfree(cc);
@@ -913,7 +932,8 @@ static void crypt_dtr(struct dm_target *ti)
{
struct crypt_config *cc = (struct crypt_config *) ti->private;
- flush_workqueue(_kcryptd_workqueue);
+ destroy_workqueue(cc->io_queue);
+ destroy_workqueue(cc->crypt_queue);
bioset_free(cc->bs);
mempool_destroy(cc->page_pool);
@@ -939,9 +959,13 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
io = mempool_alloc(cc->io_pool, GFP_NOIO);
io->target = ti;
io->base_bio = bio;
- io->error = io->post_process = 0;
+ io->error = 0;
atomic_set(&io->pending, 0);
- kcryptd_queue_io(io);
+
+ if (bio_data_dir(io->base_bio) == READ)
+ kcryptd_queue_io(io);
+ else
+ kcryptd_queue_crypt(io);
return DM_MAPIO_SUBMITTED;
}
@@ -1058,25 +1082,12 @@ static int __init dm_crypt_init(void)
if (!_crypt_io_pool)
return -ENOMEM;
- _kcryptd_workqueue = create_workqueue("kcryptd");
- if (!_kcryptd_workqueue) {
- r = -ENOMEM;
- DMERR("couldn't create kcryptd");
- goto bad1;
- }
-
r = dm_register_target(&crypt_target);
if (r < 0) {
DMERR("register failed %d", r);
- goto bad2;
+ kmem_cache_destroy(_crypt_io_pool);
}
- return 0;
-
-bad2:
- destroy_workqueue(_kcryptd_workqueue);
-bad1:
- kmem_cache_destroy(_crypt_io_pool);
return r;
}
@@ -1087,7 +1098,6 @@ static void __exit dm_crypt_exit(void)
if (r < 0)
DMERR("unregister failed %d", r);
- destroy_workqueue(_kcryptd_workqueue);
kmem_cache_destroy(_crypt_io_pool);
}
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 6928c136d3c..bdd37f881c4 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -83,7 +83,7 @@ static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all)
struct dm_delay_info *delayed, *next;
unsigned long next_expires = 0;
int start_timer = 0;
- BIO_LIST(flush_bios);
+ struct bio_list flush_bios = { };
mutex_lock(&delayed_bios_lock);
list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
@@ -163,34 +163,32 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
- if (argc == 3) {
- dc->dev_write = NULL;
+ dc->dev_write = NULL;
+ if (argc == 3)
goto out;
- }
if (sscanf(argv[4], "%llu", &tmpll) != 1) {
ti->error = "Invalid write device sector";
- goto bad;
+ goto bad_dev_read;
}
dc->start_write = tmpll;
if (sscanf(argv[5], "%u", &dc->write_delay) != 1) {
ti->error = "Invalid write delay";
- goto bad;
+ goto bad_dev_read;
}
if (dm_get_device(ti, argv[3], dc->start_write, ti->len,
dm_table_get_mode(ti->table), &dc->dev_write)) {
ti->error = "Write device lookup failed";
- dm_put_device(ti, dc->dev_read);
- goto bad;
+ goto bad_dev_read;
}
out:
dc->delayed_pool = mempool_create_slab_pool(128, delayed_cache);
if (!dc->delayed_pool) {
DMERR("Couldn't create delayed bio pool.");
- goto bad;
+ goto bad_dev_write;
}
setup_timer(&dc->delay_timer, handle_delayed_timer, (unsigned long)dc);
@@ -203,6 +201,11 @@ out:
ti->private = dc;
return 0;
+bad_dev_write:
+ if (dc->dev_write)
+ dm_put_device(ti, dc->dev_write);
+bad_dev_read:
+ dm_put_device(ti, dc->dev_read);
bad:
kfree(dc);
return -EINVAL;
@@ -305,7 +308,7 @@ static int delay_status(struct dm_target *ti, status_type_t type,
(unsigned long long) dc->start_read,
dc->read_delay);
if (dc->dev_write)
- DMEMIT("%s %llu %u", dc->dev_write->name,
+ DMEMIT(" %s %llu %u", dc->dev_write->name,
(unsigned long long) dc->start_write,
dc->write_delay);
break;
diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c
index 342517261ec..6b91b9ab1d4 100644
--- a/drivers/md/dm-emc.c
+++ b/drivers/md/dm-emc.c
@@ -81,7 +81,7 @@ static struct bio *get_failover_bio(struct dm_path *path, unsigned data_size)
}
if (bio_add_page(bio, page, data_size, 0) != data_size) {
- DMERR("get_failover_bio: alloc_page() failed.");
+ DMERR("get_failover_bio: bio_add_page() failed.");
__free_page(page);
bio_put(bio);
return NULL;
@@ -211,12 +211,10 @@ fail_path:
static struct emc_handler *alloc_emc_handler(void)
{
- struct emc_handler *h = kmalloc(sizeof(*h), GFP_KERNEL);
+ struct emc_handler *h = kzalloc(sizeof(*h), GFP_KERNEL);
- if (h) {
- memset(h, 0, sizeof(*h));
+ if (h)
spin_lock_init(&h->lock);
- }
return h;
}
diff --git a/drivers/md/dm-hw-handler.c b/drivers/md/dm-hw-handler.c
index baafaaba4d4..2ee84d8aa0b 100644
--- a/drivers/md/dm-hw-handler.c
+++ b/drivers/md/dm-hw-handler.c
@@ -91,12 +91,10 @@ void dm_put_hw_handler(struct hw_handler_type *hwht)
static struct hwh_internal *_alloc_hw_handler(struct hw_handler_type *hwht)
{
- struct hwh_internal *hwhi = kmalloc(sizeof(*hwhi), GFP_KERNEL);
+ struct hwh_internal *hwhi = kzalloc(sizeof(*hwhi), GFP_KERNEL);
- if (hwhi) {
- memset(hwhi, 0, sizeof(*hwhi));
+ if (hwhi)
hwhi->hwht = *hwht;
- }
return hwhi;
}
diff --git a/drivers/md/dm-hw-handler.h b/drivers/md/dm-hw-handler.h
index e0832e6fcf3..46809dcb121 100644
--- a/drivers/md/dm-hw-handler.h
+++ b/drivers/md/dm-hw-handler.h
@@ -58,5 +58,6 @@ unsigned dm_scsi_err_handler(struct hw_handler *hwh, struct bio *bio);
#define MP_FAIL_PATH 1
#define MP_BYPASS_PG 2
#define MP_ERROR_IO 4 /* Don't retry this I/O */
+#define MP_RETRY 8
#endif
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index b441d82c338..138200bf5e0 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -700,7 +700,7 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size)
int r;
char *new_name = (char *) param + param->data_start;
- if (new_name < (char *) (param + 1) ||
+ if (new_name < (char *) param->data ||
invalid_str(new_name, (void *) param + param_size)) {
DMWARN("Invalid new logical volume name supplied.");
return -EINVAL;
@@ -726,7 +726,7 @@ static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
if (!md)
return -ENXIO;
- if (geostr < (char *) (param + 1) ||
+ if (geostr < (char *) param->data ||
invalid_str(geostr, (void *) param + param_size)) {
DMWARN("Invalid geometry supplied.");
goto out;
@@ -1233,7 +1233,7 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
if (r)
goto out;
- if (tmsg < (struct dm_target_msg *) (param + 1) ||
+ if (tmsg < (struct dm_target_msg *) param->data ||
invalid_str(tmsg->message, (void *) param + param_size)) {
DMWARN("Invalid target message parameters.");
r = -EINVAL;
@@ -1358,7 +1358,7 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param)
if (tmp.data_size < sizeof(tmp))
return -EINVAL;
- dmi = (struct dm_ioctl *) vmalloc(tmp.data_size);
+ dmi = vmalloc(tmp.data_size);
if (!dmi)
return -ENOMEM;
@@ -1515,3 +1515,35 @@ void dm_interface_exit(void)
dm_hash_exit();
}
+
+/**
+ * dm_copy_name_and_uuid - Copy mapped device name & uuid into supplied buffers
+ * @md: Pointer to mapped_device
+ * @name: Buffer (size DM_NAME_LEN) for name
+ * @uuid: Buffer (size DM_UUID_LEN) for uuid or empty string if uuid not defined
+ */
+int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid)
+{
+ int r = 0;
+ struct hash_cell *hc;
+
+ if (!md)
+ return -ENXIO;
+
+ dm_get(md);
+ down_read(&_hash_lock);
+ hc = dm_get_mdptr(md);
+ if (!hc || hc->md != md) {
+ r = -ENXIO;
+ goto out;
+ }
+
+ strcpy(name, hc->name);
+ strcpy(uuid, hc->uuid ? : "");
+
+out:
+ up_read(&_hash_lock);
+ dm_put(md);
+
+ return r;
+}
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index a66428d860f..072ee4353ea 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -696,7 +696,7 @@ static struct dirty_log_type _disk_type = {
.module = THIS_MODULE,
.ctr = disk_ctr,
.dtr = disk_dtr,
- .suspend = disk_flush,
+ .postsuspend = disk_flush,
.resume = disk_resume,
.get_region_size = core_get_region_size,
.is_clean = core_is_clean,
diff --git a/drivers/md/dm-log.h b/drivers/md/dm-log.h
index 86a301c8daf..3fae87eb596 100644
--- a/drivers/md/dm-log.h
+++ b/drivers/md/dm-log.h
@@ -32,7 +32,8 @@ struct dirty_log_type {
* There are times when we don't want the log to touch
* the disk.
*/
- int (*suspend)(struct dirty_log *log);
+ int (*presuspend)(struct dirty_log *log);
+ int (*postsuspend)(struct dirty_log *log);
int (*resume)(struct dirty_log *log);
/*
diff --git a/drivers/md/dm-mpath-hp-sw.c b/drivers/md/dm-mpath-hp-sw.c
new file mode 100644
index 00000000000..204bf42c944
--- /dev/null
+++ b/drivers/md/dm-mpath-hp-sw.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright (C) 2005 Mike Christie, All rights reserved.
+ * Copyright (C) 2007 Red Hat, Inc. All rights reserved.
+ * Authors: Mike Christie
+ * Dave Wysochanski
+ *
+ * This file is released under the GPL.
+ *
+ * This module implements the specific path activation code for
+ * HP StorageWorks and FSC FibreCat Asymmetric (Active/Passive)
+ * storage arrays.
+ * These storage arrays have controller-based failover, not
+ * LUN-based failover. However, LUN-based failover is the design
+ * of dm-multipath. Thus, this module is written for LUN-based failover.
+ */
+#include <linux/blkdev.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_dbg.h>
+
+#include "dm.h"
+#include "dm-hw-handler.h"
+
+#define DM_MSG_PREFIX "multipath hp-sw"
+#define DM_HP_HWH_NAME "hp-sw"
+#define DM_HP_HWH_VER "1.0.0"
+
+struct hp_sw_context {
+ unsigned char sense[SCSI_SENSE_BUFFERSIZE];
+};
+
+/*
+ * hp_sw_error_is_retryable - Is an HP-specific check condition retryable?
+ * @req: path activation request
+ *
+ * Examine error codes of request and determine whether the error is retryable.
+ * Some error codes are already retried by scsi-ml (see
+ * scsi_decide_disposition), but some HP specific codes are not.
+ * The intent of this routine is to supply the logic for the HP specific
+ * check conditions.
+ *
+ * Returns:
+ * 1 - command completed with retryable error
+ * 0 - command completed with non-retryable error
+ *
+ * Possible optimizations
+ * 1. More hardware-specific error codes
+ */
+static int hp_sw_error_is_retryable(struct request *req)
+{
+ /*
+ * NOT_READY is known to be retryable
+ * For now we just dump out the sense data and call it retryable
+ */
+ if (status_byte(req->errors) == CHECK_CONDITION)
+ __scsi_print_sense(DM_HP_HWH_NAME, req->sense, req->sense_len);
+
+ /*
+ * At this point we don't have complete information about all the error
+ * codes from this hardware, so we are just conservative and retry
+ * when in doubt.
+ */
+ return 1;
+}
+
+/*
+ * hp_sw_end_io - Completion handler for HP path activation.
+ * @req: path activation request
+ * @error: scsi-ml error
+ *
+ * Check sense data, free request structure, and notify dm that
+ * pg initialization has completed.
+ *
+ * Context: scsi-ml softirq
+ *
+ */
+static void hp_sw_end_io(struct request *req, int error)
+{
+ struct dm_path *path = req->end_io_data;
+ unsigned err_flags = 0;
+
+ if (!error) {
+ DMDEBUG("%s path activation command - success",
+ path->dev->name);
+ goto out;
+ }
+
+ if (hp_sw_error_is_retryable(req)) {
+ DMDEBUG("%s path activation command - retry",
+ path->dev->name);
+ err_flags = MP_RETRY;
+ goto out;
+ }
+
+ DMWARN("%s path activation fail - error=0x%x",
+ path->dev->name, error);
+ err_flags = MP_FAIL_PATH;
+
+out:
+ req->end_io_data = NULL;
+ __blk_put_request(req->q, req);
+ dm_pg_init_complete(path, err_flags);
+}
+
+/*
+ * hp_sw_get_request - Allocate an HP specific path activation request
+ * @path: path on which request will be sent (needed for request queue)
+ *
+ * The START command is used for path activation request.
+ * These arrays are controller-based failover, not LUN based.
+ * One START command issued to a single path will fail over all
+ * LUNs for the same controller.
+ *
+ * Possible optimizations
+ * 1. Make timeout configurable
+ * 2. Preallocate request
+ */
+static struct request *hp_sw_get_request(struct dm_path *path)
+{
+ struct request *req;
+ struct block_device *bdev = path->dev->bdev;
+ struct request_queue *q = bdev_get_queue(bdev);
+ struct hp_sw_context *h = path->hwhcontext;
+
+ req = blk_get_request(q, WRITE, GFP_NOIO);
+ if (!req)
+ goto out;
+
+ req->timeout = 60 * HZ;
+
+ req->errors = 0;
+ req->cmd_type = REQ_TYPE_BLOCK_PC;
+ req->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE;
+ req->end_io_data = path;
+ req->sense = h->sense;
+ memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE);
+
+ memset(&req->cmd, 0, BLK_MAX_CDB);
+ req->cmd[0] = START_STOP;
+ req->cmd[4] = 1;
+ req->cmd_len = COMMAND_SIZE(req->cmd[0]);
+
+out:
+ return req;
+}
+
+/*
+ * hp_sw_pg_init - HP path activation implementation.
+ * @hwh: hardware handler specific data
+ * @bypassed: unused; is the path group bypassed? (see dm-mpath.c)
+ * @path: path to send initialization command
+ *
+ * Send an HP-specific path activation command on 'path'.
+ * Do not try to optimize in any way, just send the activation command.
+ * More than one path activation command may be sent to the same controller.
+ * This seems to work fine for basic failover support.
+ *
+ * Possible optimizations
+ * 1. Detect an in-progress activation request and avoid submitting another one
+ * 2. Model the controller and only send a single activation request at a time
+ * 3. Determine the state of a path before sending an activation request
+ *
+ * Context: kmpathd (see process_queued_ios() in dm-mpath.c)
+ */
+static void hp_sw_pg_init(struct hw_handler *hwh, unsigned bypassed,
+ struct dm_path *path)
+{
+ struct request *req;
+ struct hp_sw_context *h;
+
+ path->hwhcontext = hwh->context;
+ h = hwh->context;
+
+ req = hp_sw_get_request(path);
+ if (!req) {
+ DMERR("%s path activation command - allocation fail",
+ path->dev->name);
+ goto retry;
+ }
+
+ DMDEBUG("%s path activation command - sent", path->dev->name);
+
+ blk_execute_rq_nowait(req->q, NULL, req, 1, hp_sw_end_io);
+ return;
+
+retry:
+ dm_pg_init_complete(path, MP_RETRY);
+}
+
+static int hp_sw_create(struct hw_handler *hwh, unsigned argc, char **argv)
+{
+ struct hp_sw_context *h;
+
+ h = kmalloc(sizeof(*h), GFP_KERNEL);
+ if (!h)
+ return -ENOMEM;
+
+ hwh->context = h;
+
+ return 0;
+}
+
+static void hp_sw_destroy(struct hw_handler *hwh)
+{
+ struct hp_sw_context *h = hwh->context;
+
+ kfree(h);
+}
+
+static struct hw_handler_type hp_sw_hwh = {
+ .name = DM_HP_HWH_NAME,
+ .module = THIS_MODULE,
+ .create = hp_sw_create,
+ .destroy = hp_sw_destroy,
+ .pg_init = hp_sw_pg_init,
+};
+
+static int __init hp_sw_init(void)
+{
+ int r;
+
+ r = dm_register_hw_handler(&hp_sw_hwh);
+ if (r < 0)
+ DMERR("register failed %d", r);
+ else
+ DMINFO("version " DM_HP_HWH_VER " loaded");
+
+ return r;
+}
+
+static void __exit hp_sw_exit(void)
+{
+ int r;
+
+ r = dm_unregister_hw_handler(&hp_sw_hwh);
+ if (r < 0)
+ DMERR("unregister failed %d", r);
+}
+
+module_init(hp_sw_init);
+module_exit(hp_sw_exit);
+
+MODULE_DESCRIPTION("DM Multipath HP StorageWorks / FSC FibreCat (A/P) support");
+MODULE_AUTHOR("Mike Christie, Dave Wysochanski <dm-devel@redhat.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DM_HP_HWH_VER);
diff --git a/drivers/md/dm-mpath-rdac.c b/drivers/md/dm-mpath-rdac.c
index 16b16134577..e04eb5c697f 100644
--- a/drivers/md/dm-mpath-rdac.c
+++ b/drivers/md/dm-mpath-rdac.c
@@ -664,20 +664,21 @@ static struct hw_handler_type rdac_handler = {
static int __init rdac_init(void)
{
- int r = dm_register_hw_handler(&rdac_handler);
-
- if (r < 0) {
- DMERR("%s: register failed %d", RDAC_DM_HWH_NAME, r);
- return r;
- }
+ int r;
rdac_wkqd = create_singlethread_workqueue("rdac_wkqd");
if (!rdac_wkqd) {
DMERR("Failed to create workqueue rdac_wkqd.");
- dm_unregister_hw_handler(&rdac_handler);
return -ENOMEM;
}
+ r = dm_register_hw_handler(&rdac_handler);
+ if (r < 0) {
+ DMERR("%s: register failed %d", RDAC_DM_HWH_NAME, r);
+ destroy_workqueue(rdac_wkqd);
+ return r;
+ }
+
DMINFO("%s: version %s loaded", RDAC_DM_HWH_NAME, RDAC_DM_HWH_VER);
return 0;
}
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 31056abca89..24b2b1e32fa 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -10,6 +10,7 @@
#include "dm-hw-handler.h"
#include "dm-bio-list.h"
#include "dm-bio-record.h"
+#include "dm-uevent.h"
#include <linux/ctype.h>
#include <linux/init.h>
@@ -75,6 +76,8 @@ struct multipath {
unsigned queue_io; /* Must we queue all I/O? */
unsigned queue_if_no_path; /* Queue I/O if last path fails? */
unsigned saved_queue_if_no_path;/* Saved state during suspension */
+ unsigned pg_init_retries; /* Number of times to retry pg_init */
+ unsigned pg_init_count; /* Number of times pg_init called */
struct work_struct process_queued_ios;
struct bio_list queued_ios;
@@ -225,6 +228,8 @@ static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
m->pg_init_required = 0;
m->queue_io = 0;
}
+
+ m->pg_init_count = 0;
}
static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg)
@@ -424,6 +429,7 @@ static void process_queued_ios(struct work_struct *work)
must_queue = 0;
if (m->pg_init_required && !m->pg_init_in_progress) {
+ m->pg_init_count++;
m->pg_init_required = 0;
m->pg_init_in_progress = 1;
init_required = 1;
@@ -689,9 +695,11 @@ static int parse_features(struct arg_set *as, struct multipath *m)
int r;
unsigned argc;
struct dm_target *ti = m->ti;
+ const char *param_name;
static struct param _params[] = {
- {0, 1, "invalid number of feature args"},
+ {0, 3, "invalid number of feature args"},
+ {1, 50, "pg_init_retries must be between 1 and 50"},
};
r = read_param(_params, shift(as), &argc, &ti->error);
@@ -701,12 +709,28 @@ static int parse_features(struct arg_set *as, struct multipath *m)
if (!argc)
return 0;
- if (!strnicmp(shift(as), MESG_STR("queue_if_no_path")))
- return queue_if_no_path(m, 1, 0);
- else {
+ do {
+ param_name = shift(as);
+ argc--;
+
+ if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) {
+ r = queue_if_no_path(m, 1, 0);
+ continue;
+ }
+
+ if (!strnicmp(param_name, MESG_STR("pg_init_retries")) &&
+ (argc >= 1)) {
+ r = read_param(_params + 1, shift(as),
+ &m->pg_init_retries, &ti->error);
+ argc--;
+ continue;
+ }
+
ti->error = "Unrecognised multipath feature request";
- return -EINVAL;
- }
+ r = -EINVAL;
+ } while (argc && !r);
+
+ return r;
}
static int multipath_ctr(struct dm_target *ti, unsigned int argc,
@@ -834,6 +858,9 @@ static int fail_path(struct pgpath *pgpath)
if (pgpath == m->current_pgpath)
m->current_pgpath = NULL;
+ dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
+ pgpath->path.dev->name, m->nr_valid_paths);
+
queue_work(kmultipathd, &m->trigger_event);
out:
@@ -873,6 +900,9 @@ static int reinstate_path(struct pgpath *pgpath)
if (!m->nr_valid_paths++ && m->queue_size)
queue_work(kmultipathd, &m->process_queued_ios);
+ dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
+ pgpath->path.dev->name, m->nr_valid_paths);
+
queue_work(kmultipathd, &m->trigger_event);
out:
@@ -976,6 +1006,26 @@ static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed)
}
/*
+ * Should we retry pg_init immediately?
+ */
+static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
+{
+ unsigned long flags;
+ int limit_reached = 0;
+
+ spin_lock_irqsave(&m->lock, flags);
+
+ if (m->pg_init_count <= m->pg_init_retries)
+ m->pg_init_required = 1;
+ else
+ limit_reached = 1;
+
+ spin_unlock_irqrestore(&m->lock, flags);
+
+ return limit_reached;
+}
+
+/*
* pg_init must call this when it has completed its initialisation
*/
void dm_pg_init_complete(struct dm_path *path, unsigned err_flags)
@@ -985,8 +1035,14 @@ void dm_pg_init_complete(struct dm_path *path, unsigned err_flags)
struct multipath *m = pg->m;
unsigned long flags;
- /* We insist on failing the path if the PG is already bypassed. */
- if (err_flags && pg->bypassed)
+ /*
+ * If requested, retry pg_init until maximum number of retries exceeded.
+ * If retry not requested and PG already bypassed, always fail the path.
+ */
+ if (err_flags & MP_RETRY) {
+ if (pg_init_limit_reached(m, pgpath))
+ err_flags |= MP_FAIL_PATH;
+ } else if (err_flags && pg->bypassed)
err_flags |= MP_FAIL_PATH;
if (err_flags & MP_FAIL_PATH)
@@ -996,7 +1052,7 @@ void dm_pg_init_complete(struct dm_path *path, unsigned err_flags)
bypass_pg(m, pg, 1);
spin_lock_irqsave(&m->lock, flags);
- if (err_flags) {
+ if (err_flags & ~MP_RETRY) {
m->current_pgpath = NULL;
m->current_pg = NULL;
} else if (!m->pg_init_required)
@@ -1148,11 +1204,15 @@ static int multipath_status(struct dm_target *ti, status_type_t type,
/* Features */
if (type == STATUSTYPE_INFO)
- DMEMIT("1 %u ", m->queue_size);
- else if (m->queue_if_no_path)
- DMEMIT("1 queue_if_no_path ");
- else
- DMEMIT("0 ");
+ DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count);
+ else {
+ DMEMIT("%u ", m->queue_if_no_path +
+ (m->pg_init_retries > 0) * 2);
+ if (m->queue_if_no_path)
+ DMEMIT("queue_if_no_path ");
+ if (m->pg_init_retries)
+ DMEMIT("pg_init_retries %u ", m->pg_init_retries);
+ }
if (hwh->type && hwh->type->status)
sz += hwh->type->status(hwh, type, result + sz, maxlen - sz);
diff --git a/drivers/md/dm-path-selector.c b/drivers/md/dm-path-selector.c
index f10a0c89b3f..ca1bb636a3e 100644
--- a/drivers/md/dm-path-selector.c
+++ b/drivers/md/dm-path-selector.c
@@ -94,12 +94,10 @@ out:
static struct ps_internal *_alloc_path_selector(struct path_selector_type *pst)
{
- struct ps_internal *psi = kmalloc(sizeof(*psi), GFP_KERNEL);
+ struct ps_internal *psi = kzalloc(sizeof(*psi), GFP_KERNEL);
- if (psi) {
- memset(psi, 0, sizeof(*psi));
+ if (psi)
psi->pst = *pst;
- }
return psi;
}
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index d09ff15490a..31123d4a6b9 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -19,6 +19,7 @@
#include <linux/time.h>
#include <linux/vmalloc.h>
#include <linux/workqueue.h>
+#include <linux/log2.h>
#define DM_MSG_PREFIX "raid1"
#define DM_IO_PAGES 64
@@ -113,6 +114,7 @@ struct region {
* Mirror set structures.
*---------------------------------------------------------------*/
struct mirror {
+ struct mirror_set *ms;
atomic_t error_count;
struct dm_dev *dev;
sector_t offset;
@@ -974,6 +976,7 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) {
ti->error = "Error creating dirty region hash";
+ dm_io_client_destroy(ms->io_client);
kfree(ms);
return NULL;
}
@@ -994,7 +997,7 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti,
static inline int _check_region_size(struct dm_target *ti, uint32_t size)
{
- return !(size % (PAGE_SIZE >> 9) || (size & (size - 1)) ||
+ return !(size % (PAGE_SIZE >> 9) || !is_power_of_2(size) ||
size > ti->len);
}
@@ -1015,6 +1018,7 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
return -ENXIO;
}
+ ms->mirror[mirror].ms = ms;
ms->mirror[mirror].offset = offset;
return 0;
@@ -1163,16 +1167,14 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ms->kmirrord_wq = create_singlethread_workqueue("kmirrord");
if (!ms->kmirrord_wq) {
DMERR("couldn't start kmirrord");
- free_context(ms, ti, m);
- return -ENOMEM;
+ r = -ENOMEM;
+ goto err_free_context;
}
INIT_WORK(&ms->kmirrord_work, do_mirror);
r = parse_features(ms, argc, argv, &args_used);
- if (r) {
- free_context(ms, ti, ms->nr_mirrors);
- return r;
- }
+ if (r)
+ goto err_destroy_wq;
argv += args_used;
argc -= args_used;
@@ -1188,19 +1190,22 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (argc) {
ti->error = "Too many mirror arguments";
- free_context(ms, ti, ms->nr_mirrors);
- return -EINVAL;
+ r = -EINVAL;
+ goto err_destroy_wq;
}
r = kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client);
- if (r) {
- destroy_workqueue(ms->kmirrord_wq);
- free_context(ms, ti, ms->nr_mirrors);
- return r;
- }
+ if (r)
+ goto err_destroy_wq;
wake(ms);
return 0;
+
+err_destroy_wq:
+ destroy_workqueue(ms->kmirrord_wq);
+err_free_context:
+ free_context(ms, ti, ms->nr_mirrors);
+ return r;
}
static void mirror_dtr(struct dm_target *ti)
@@ -1302,7 +1307,7 @@ static void mirror_postsuspend(struct dm_target *ti)
wait_event(_kmirrord_recovery_stopped,
!atomic_read(&ms->rh.recovery_in_flight));
- if (log->type->suspend && log->type->suspend(log))
+ if (log->type->postsuspend && log->type->postsuspend(log))
/* FIXME: need better error handling */
DMWARN("log suspend failed");
}
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 98a633f3d6b..cee16fadd9e 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -17,6 +17,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/log2.h>
#include "dm-snap.h"
#include "dm-bio-list.h"
@@ -415,7 +416,7 @@ static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg,
chunk_size = round_up(chunk_size, PAGE_SIZE >> 9);
/* Check chunk_size is a power of 2 */
- if (chunk_size & (chunk_size - 1)) {
+ if (!is_power_of_2(chunk_size)) {
*error = "Chunk size is not a power of 2";
return -EINVAL;
}
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 51f5e076001..969944a8aba 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -11,6 +11,7 @@
#include <linux/blkdev.h>
#include <linux/bio.h>
#include <linux/slab.h>
+#include <linux/log2.h>
#define DM_MSG_PREFIX "striped"
@@ -99,7 +100,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
/*
* chunk_size is a power of two
*/
- if (!chunk_size || (chunk_size & (chunk_size - 1)) ||
+ if (!is_power_of_2(chunk_size) ||
(chunk_size < (PAGE_SIZE >> SECTOR_SHIFT))) {
ti->error = "Invalid chunk size";
return -EINVAL;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 2bcde5798b5..e298d8d11f2 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -102,6 +102,8 @@ static void combine_restrictions_low(struct io_restrictions *lhs,
lhs->seg_boundary_mask =
min_not_zero(lhs->seg_boundary_mask, rhs->seg_boundary_mask);
+ lhs->bounce_pfn = min_not_zero(lhs->bounce_pfn, rhs->bounce_pfn);
+
lhs->no_cluster |= rhs->no_cluster;
}
@@ -213,12 +215,11 @@ static int alloc_targets(struct dm_table *t, unsigned int num)
int dm_table_create(struct dm_table **result, int mode,
unsigned num_targets, struct mapped_device *md)
{
- struct dm_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
+ struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL);
if (!t)
return -ENOMEM;
- memset(t, 0, sizeof(*t));
INIT_LIST_HEAD(&t->devices);
atomic_set(&t->holders, 1);
@@ -567,6 +568,8 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
min_not_zero(rs->seg_boundary_mask,
q->seg_boundary_mask);
+ rs->bounce_pfn = min_not_zero(rs->bounce_pfn, q->bounce_pfn);
+
rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
}
EXPORT_SYMBOL_GPL(dm_set_device_limits);
@@ -708,6 +711,8 @@ static void check_for_valid_limits(struct io_restrictions *rs)
rs->max_segment_size = MAX_SEGMENT_SIZE;
if (!rs->seg_boundary_mask)
rs->seg_boundary_mask = -1;
+ if (!rs->bounce_pfn)
+ rs->bounce_pfn = -1;
}
int dm_table_add_target(struct dm_table *t, const char *type,
@@ -892,6 +897,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
q->hardsect_size = t->limits.hardsect_size;
q->max_segment_size = t->limits.max_segment_size;
q->seg_boundary_mask = t->limits.seg_boundary_mask;
+ q->bounce_pfn = t->limits.bounce_pfn;
if (t->limits.no_cluster)
q->queue_flags &= ~(1 << QUEUE_FLAG_CLUSTER);
else
@@ -994,38 +1000,10 @@ void dm_table_unplug_all(struct dm_table *t)
struct dm_dev *dd = list_entry(d, struct dm_dev, list);
struct request_queue *q = bdev_get_queue(dd->bdev);
- if (q->unplug_fn)
- q->unplug_fn(q);
+ blk_unplug(q);
}
}
-int dm_table_flush_all(struct dm_table *t)
-{
- struct list_head *d, *devices = dm_table_get_devices(t);
- int ret = 0;
- unsigned i;
-
- for (i = 0; i < t->num_targets; i++)
- if (t->targets[i].type->flush)
- t->targets[i].type->flush(&t->targets[i]);
-
- for (d = devices->next; d != devices; d = d->next) {
- struct dm_dev *dd = list_entry(d, struct dm_dev, list);
- struct request_queue *q = bdev_get_queue(dd->bdev);
- int err;
-
- if (!q->issue_flush_fn)
- err = -EOPNOTSUPP;
- else
- err = q->issue_flush_fn(q, dd->bdev->bd_disk, NULL);
-
- if (!ret)
- ret = err;
- }
-
- return ret;
-}
-
struct mapped_device *dm_table_get_md(struct dm_table *t)
{
dm_get(t->md);
@@ -1043,4 +1021,3 @@ EXPORT_SYMBOL(dm_table_get_md);
EXPORT_SYMBOL(dm_table_put);
EXPORT_SYMBOL(dm_table_get);
EXPORT_SYMBOL(dm_table_unplug_all);
-EXPORT_SYMBOL(dm_table_flush_all);
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 477a041a41c..835cf95b857 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -88,12 +88,10 @@ void dm_put_target_type(struct target_type *t)
static struct tt_internal *alloc_target(struct target_type *t)
{
- struct tt_internal *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
+ struct tt_internal *ti = kzalloc(sizeof(*ti), GFP_KERNEL);
- if (ti) {
- memset(ti, 0, sizeof(*ti));
+ if (ti)
ti->tt = *t;
- }
return ti;
}
diff --git a/drivers/md/dm-uevent.c b/drivers/md/dm-uevent.c
new file mode 100644
index 00000000000..50377e5dc2a
--- /dev/null
+++ b/drivers/md/dm-uevent.c
@@ -0,0 +1,222 @@
+/*
+ * Device Mapper Uevent Support (dm-uevent)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2007
+ * Author: Mike Anderson <andmike@linux.vnet.ibm.com>
+ */
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/kobject.h>
+#include <linux/dm-ioctl.h>
+
+#include "dm.h"
+#include "dm-uevent.h"
+
+#define DM_MSG_PREFIX "uevent"
+
+static const struct {
+ enum dm_uevent_type type;
+ enum kobject_action action;
+ char *name;
+} _dm_uevent_type_names[] = {
+ {DM_UEVENT_PATH_FAILED, KOBJ_CHANGE, "PATH_FAILED"},
+ {DM_UEVENT_PATH_REINSTATED, KOBJ_CHANGE, "PATH_REINSTATED"},
+};
+
+static struct kmem_cache *_dm_event_cache;
+
+struct dm_uevent {
+ struct mapped_device *md;
+ enum kobject_action action;
+ struct kobj_uevent_env ku_env;
+ struct list_head elist;
+ char name[DM_NAME_LEN];
+ char uuid[DM_UUID_LEN];
+};
+
+static void dm_uevent_free(struct dm_uevent *event)
+{
+ kmem_cache_free(_dm_event_cache, event);
+}
+
+static struct dm_uevent *dm_uevent_alloc(struct mapped_device *md)
+{
+ struct dm_uevent *event;
+
+ event = kmem_cache_zalloc(_dm_event_cache, GFP_ATOMIC);
+ if (!event)
+ return NULL;
+
+ INIT_LIST_HEAD(&event->elist);
+ event->md = md;
+
+ return event;
+}
+
+static struct dm_uevent *dm_build_path_uevent(struct mapped_device *md,
+ struct dm_target *ti,
+ enum kobject_action action,
+ const char *dm_action,
+ const char *path,
+ unsigned nr_valid_paths)
+{
+ struct dm_uevent *event;
+
+ event = dm_uevent_alloc(md);
+ if (!event) {
+ DMERR("%s: dm_uevent_alloc() failed", __FUNCTION__);
+ goto err_nomem;
+ }
+
+ event->action = action;
+
+ if (add_uevent_var(&event->ku_env, "DM_TARGET=%s", ti->type->name)) {
+ DMERR("%s: add_uevent_var() for DM_TARGET failed",
+ __FUNCTION__);
+ goto err_add;
+ }
+
+ if (add_uevent_var(&event->ku_env, "DM_ACTION=%s", dm_action)) {
+ DMERR("%s: add_uevent_var() for DM_ACTION failed",
+ __FUNCTION__);
+ goto err_add;
+ }
+
+ if (add_uevent_var(&event->ku_env, "DM_SEQNUM=%u",
+ dm_next_uevent_seq(md))) {
+ DMERR("%s: add_uevent_var() for DM_SEQNUM failed",
+ __FUNCTION__);
+ goto err_add;
+ }
+
+ if (add_uevent_var(&event->ku_env, "DM_PATH=%s", path)) {
+ DMERR("%s: add_uevent_var() for DM_PATH failed", __FUNCTION__);
+ goto err_add;
+ }
+
+ if (add_uevent_var(&event->ku_env, "DM_NR_VALID_PATHS=%d",
+ nr_valid_paths)) {
+ DMERR("%s: add_uevent_var() for DM_NR_VALID_PATHS failed",
+ __FUNCTION__);
+ goto err_add;
+ }
+
+ return event;
+
+err_add:
+ dm_uevent_free(event);
+err_nomem:
+ return ERR_PTR(-ENOMEM);
+}
+
+/**
+ * dm_send_uevents - send uevents for given list
+ *
+ * @events: list of events to send
+ * @kobj: kobject generating event
+ *
+ */
+void dm_send_uevents(struct list_head *events, struct kobject *kobj)
+{
+ int r;
+ struct dm_uevent *event, *next;
+
+ list_for_each_entry_safe(event, next, events, elist) {
+ list_del_init(&event->elist);
+
+ /*
+ * Need to call dm_copy_name_and_uuid from here for now.
+ * Context of previous var adds and locking used for
+ * hash_cell not compatable.
+ */
+ if (dm_copy_name_and_uuid(event->md, event->name,
+ event->uuid)) {
+ DMERR("%s: dm_copy_name_and_uuid() failed",
+ __FUNCTION__);
+ goto uevent_free;
+ }
+
+ if (add_uevent_var(&event->ku_env, "DM_NAME=%s", event->name)) {
+ DMERR("%s: add_uevent_var() for DM_NAME failed",
+ __FUNCTION__);
+ goto uevent_free;
+ }
+
+ if (add_uevent_var(&event->ku_env, "DM_UUID=%s", event->uuid)) {
+ DMERR("%s: add_uevent_var() for DM_UUID failed",
+ __FUNCTION__);
+ goto uevent_free;
+ }
+
+ r = kobject_uevent_env(kobj, event->action, event->ku_env.envp);
+ if (r)
+ DMERR("%s: kobject_uevent_env failed", __FUNCTION__);
+uevent_free:
+ dm_uevent_free(event);
+ }
+}
+EXPORT_SYMBOL_GPL(dm_send_uevents);
+
+/**
+ * dm_path_uevent - called to create a new path event and queue it
+ *
+ * @event_type: path event type enum
+ * @ti: pointer to a dm_target
+ * @path: string containing pathname
+ * @nr_valid_paths: number of valid paths remaining
+ *
+ */
+void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti,
+ const char *path, unsigned nr_valid_paths)
+{
+ struct mapped_device *md = dm_table_get_md(ti->table);
+ struct dm_uevent *event;
+
+ if (event_type >= ARRAY_SIZE(_dm_uevent_type_names)) {
+ DMERR("%s: Invalid event_type %d", __FUNCTION__, event_type);
+ goto out;
+ }
+
+ event = dm_build_path_uevent(md, ti,
+ _dm_uevent_type_names[event_type].action,
+ _dm_uevent_type_names[event_type].name,
+ path, nr_valid_paths);
+ if (IS_ERR(event))
+ goto out;
+
+ dm_uevent_add(md, &event->elist);
+
+out:
+ dm_put(md);
+}
+EXPORT_SYMBOL_GPL(dm_path_uevent);
+
+int dm_uevent_init(void)
+{
+ _dm_event_cache = KMEM_CACHE(dm_uevent, 0);
+ if (!_dm_event_cache)
+ return -ENOMEM;
+
+ DMINFO("version 1.0.3");
+
+ return 0;
+}
+
+void dm_uevent_exit(void)
+{
+ kmem_cache_destroy(_dm_event_cache);
+}
diff --git a/drivers/md/dm-uevent.h b/drivers/md/dm-uevent.h
new file mode 100644
index 00000000000..2eccc8bd671
--- /dev/null
+++ b/drivers/md/dm-uevent.h
@@ -0,0 +1,59 @@
+/*
+ * Device Mapper Uevent Support
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2007
+ * Author: Mike Anderson <andmike@linux.vnet.ibm.com>
+ */
+#ifndef DM_UEVENT_H
+#define DM_UEVENT_H
+
+enum dm_uevent_type {
+ DM_UEVENT_PATH_FAILED,
+ DM_UEVENT_PATH_REINSTATED,
+};
+
+#ifdef CONFIG_DM_UEVENT
+
+extern int dm_uevent_init(void);
+extern void dm_uevent_exit(void);
+extern void dm_send_uevents(struct list_head *events, struct kobject *kobj);
+extern void dm_path_uevent(enum dm_uevent_type event_type,
+ struct dm_target *ti, const char *path,
+ unsigned nr_valid_paths);
+
+#else
+
+static inline int dm_uevent_init(void)
+{
+ return 0;
+}
+static inline void dm_uevent_exit(void)
+{
+}
+static inline void dm_send_uevents(struct list_head *events,
+ struct kobject *kobj)
+{
+}
+static inline void dm_path_uevent(enum dm_uevent_type event_type,
+ struct dm_target *ti, const char *path,
+ unsigned nr_valid_paths)
+{
+}
+
+#endif /* CONFIG_DM_UEVENT */
+
+#endif /* DM_UEVENT_H */
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 167765c4774..07cbbb8eb3e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -7,6 +7,7 @@
#include "dm.h"
#include "dm-bio-list.h"
+#include "dm-uevent.h"
#include <linux/init.h>
#include <linux/module.h>
@@ -112,6 +113,9 @@ struct mapped_device {
*/
atomic_t event_nr;
wait_queue_head_t eventq;
+ atomic_t uevent_seq;
+ struct list_head uevent_list;
+ spinlock_t uevent_lock; /* Protect access to uevent_list */
/*
* freeze/thaw support require holding onto a super block
@@ -143,11 +147,19 @@ static int __init local_init(void)
return -ENOMEM;
}
+ r = dm_uevent_init();
+ if (r) {
+ kmem_cache_destroy(_tio_cache);
+ kmem_cache_destroy(_io_cache);
+ return r;
+ }
+
_major = major;
r = register_blkdev(_major, _name);
if (r < 0) {
kmem_cache_destroy(_tio_cache);
kmem_cache_destroy(_io_cache);
+ dm_uevent_exit();
return r;
}
@@ -162,6 +174,7 @@ static void local_exit(void)
kmem_cache_destroy(_tio_cache);
kmem_cache_destroy(_io_cache);
unregister_blkdev(_major, _name);
+ dm_uevent_exit();
_major = 0;
@@ -751,15 +764,13 @@ static void __clone_and_map(struct clone_info *ci)
/*
* Split the bio into several clones.
*/
-static void __split_bio(struct mapped_device *md, struct bio *bio)
+static int __split_bio(struct mapped_device *md, struct bio *bio)
{
struct clone_info ci;
ci.map = dm_get_table(md);
- if (!ci.map) {
- bio_io_error(bio);
- return;
- }
+ if (unlikely(!ci.map))
+ return -EIO;
ci.md = md;
ci.bio = bio;
@@ -779,6 +790,8 @@ static void __split_bio(struct mapped_device *md, struct bio *bio)
/* drop the extra reference count */
dec_pending(ci.io, 0);
dm_table_put(ci.map);
+
+ return 0;
}
/*-----------------------------------------------------------------
* CRUD END
@@ -790,7 +803,7 @@ static void __split_bio(struct mapped_device *md, struct bio *bio)
*/
static int dm_request(struct request_queue *q, struct bio *bio)
{
- int r;
+ int r = -EIO;
int rw = bio_data_dir(bio);
struct mapped_device *md = q->queuedata;
@@ -815,18 +828,11 @@ static int dm_request(struct request_queue *q, struct bio *bio)
while (test_bit(DMF_BLOCK_IO, &md->flags)) {
up_read(&md->io_lock);
- if (bio_rw(bio) == READA) {
- bio_io_error(bio);
- return 0;
- }
+ if (bio_rw(bio) != READA)
+ r = queue_io(md, bio);
- r = queue_io(md, bio);
- if (r < 0) {
- bio_io_error(bio);
- return 0;
-
- } else if (r == 0)
- return 0; /* deferred successfully */
+ if (r <= 0)
+ goto out_req;
/*
* We're in a while loop, because someone could suspend
@@ -835,24 +841,14 @@ static int dm_request(struct request_queue *q, struct bio *bio)
down_read(&md->io_lock);
}
- __split_bio(md, bio);
+ r = __split_bio(md, bio);
up_read(&md->io_lock);
- return 0;
-}
-
-static int dm_flush_all(struct request_queue *q, struct gendisk *disk,
- sector_t *error_sector)
-{
- struct mapped_device *md = q->queuedata;
- struct dm_table *map = dm_get_table(md);
- int ret = -ENXIO;
- if (map) {
- ret = dm_table_flush_all(map);
- dm_table_put(map);
- }
+out_req:
+ if (r < 0)
+ bio_io_error(bio);
- return ret;
+ return 0;
}
static void dm_unplug_all(struct request_queue *q)
@@ -992,6 +988,9 @@ static struct mapped_device *alloc_dev(int minor)
atomic_set(&md->holders, 1);
atomic_set(&md->open_count, 0);
atomic_set(&md->event_nr, 0);
+ atomic_set(&md->uevent_seq, 0);
+ INIT_LIST_HEAD(&md->uevent_list);
+ spin_lock_init(&md->uevent_lock);
md->queue = blk_alloc_queue(GFP_KERNEL);
if (!md->queue)
@@ -1003,7 +1002,6 @@ static struct mapped_device *alloc_dev(int minor)
blk_queue_make_request(md->queue, dm_request);
blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
md->queue->unplug_fn = dm_unplug_all;
- md->queue->issue_flush_fn = dm_flush_all;
md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache);
if (!md->io_pool)
@@ -1060,12 +1058,14 @@ static struct mapped_device *alloc_dev(int minor)
return NULL;
}
+static void unlock_fs(struct mapped_device *md);
+
static void free_dev(struct mapped_device *md)
{
int minor = md->disk->first_minor;
if (md->suspended_bdev) {
- thaw_bdev(md->suspended_bdev, NULL);
+ unlock_fs(md);
bdput(md->suspended_bdev);
}
mempool_destroy(md->tio_pool);
@@ -1089,8 +1089,16 @@ static void free_dev(struct mapped_device *md)
*/
static void event_callback(void *context)
{
+ unsigned long flags;
+ LIST_HEAD(uevents);
struct mapped_device *md = (struct mapped_device *) context;
+ spin_lock_irqsave(&md->uevent_lock, flags);
+ list_splice_init(&md->uevent_list, &uevents);
+ spin_unlock_irqrestore(&md->uevent_lock, flags);
+
+ dm_send_uevents(&uevents, &md->disk->kobj);
+
atomic_inc(&md->event_nr);
wake_up(&md->eventq);
}
@@ -1249,7 +1257,8 @@ static void __flush_deferred_io(struct mapped_device *md, struct bio *c)
while (c) {
n = c->bi_next;
c->bi_next = NULL;
- __split_bio(md, c);
+ if (__split_bio(md, c))
+ bio_io_error(c);
c = n;
}
}
@@ -1507,6 +1516,11 @@ out:
/*-----------------------------------------------------------------
* Event notification.
*---------------------------------------------------------------*/
+uint32_t dm_next_uevent_seq(struct mapped_device *md)
+{
+ return atomic_add_return(1, &md->uevent_seq);
+}
+
uint32_t dm_get_event_nr(struct mapped_device *md)
{
return atomic_read(&md->event_nr);
@@ -1518,6 +1532,15 @@ int dm_wait_event(struct mapped_device *md, int event_nr)
(event_nr != atomic_read(&md->event_nr)));
}
+void dm_uevent_add(struct mapped_device *md, struct list_head *elist)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&md->uevent_lock, flags);
+ list_add(elist, &md->uevent_list);
+ spin_unlock_irqrestore(&md->uevent_lock, flags);
+}
+
/*
* The gendisk is only valid as long as you have a reference
* count on 'md'.
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 462ee652a89..4b3faa45277 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -111,7 +111,6 @@ void dm_table_postsuspend_targets(struct dm_table *t);
int dm_table_resume_targets(struct dm_table *t);
int dm_table_any_congested(struct dm_table *t, int bdi_bits);
void dm_table_unplug_all(struct dm_table *t);
-int dm_table_flush_all(struct dm_table *t);
/*-----------------------------------------------------------------
* A registry of target types.
diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c
index 7e052378c47..f3831f31223 100644
--- a/drivers/md/kcopyd.c
+++ b/drivers/md/kcopyd.c
@@ -198,7 +198,7 @@ struct kcopyd_job {
* These fields are only used if the job has been split
* into more manageable parts.
*/
- struct semaphore lock;
+ struct mutex lock;
atomic_t sub_jobs;
sector_t progress;
};
@@ -456,7 +456,7 @@ static void segment_complete(int read_err,
sector_t count = 0;
struct kcopyd_job *job = (struct kcopyd_job *) context;
- down(&job->lock);
+ mutex_lock(&job->lock);
/* update the error */
if (read_err)
@@ -480,7 +480,7 @@ static void segment_complete(int read_err,
job->progress += count;
}
}
- up(&job->lock);
+ mutex_unlock(&job->lock);
if (count) {
int i;
@@ -562,7 +562,7 @@ int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
dispatch_job(job);
else {
- init_MUTEX(&job->lock);
+ mutex_init(&job->lock);
job->progress = 0;
split_job(job);
}
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 550148770bb..3dac1cfb818 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -87,30 +87,10 @@ static void linear_unplug(struct request_queue *q)
for (i=0; i < mddev->raid_disks; i++) {
struct request_queue *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev);
- if (r_queue->unplug_fn)
- r_queue->unplug_fn(r_queue);
+ blk_unplug(r_queue);
}
}
-static int linear_issue_flush(struct request_queue *q, struct gendisk *disk,
- sector_t *error_sector)
-{
- mddev_t *mddev = q->queuedata;
- linear_conf_t *conf = mddev_to_conf(mddev);
- int i, ret = 0;
-
- for (i=0; i < mddev->raid_disks && ret == 0; i++) {
- struct block_device *bdev = conf->disks[i].rdev->bdev;
- struct request_queue *r_queue = bdev_get_queue(bdev);
-
- if (!r_queue->issue_flush_fn)
- ret = -EOPNOTSUPP;
- else
- ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk, error_sector);
- }
- return ret;
-}
-
static int linear_congested(void *data, int bits)
{
mddev_t *mddev = data;
@@ -279,7 +259,6 @@ static int linear_run (mddev_t *mddev)
blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
mddev->queue->unplug_fn = linear_unplug;
- mddev->queue->issue_flush_fn = linear_issue_flush;
mddev->queue->backing_dev_info.congested_fn = linear_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
return 0;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index acf1b81b47c..cef9ebd5a04 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2714,7 +2714,7 @@ action_show(mddev_t *mddev, char *page)
{
char *type = "idle";
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
- test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) {
+ (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
type = "reshape";
else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
@@ -2833,6 +2833,12 @@ sync_max_store(mddev_t *mddev, const char *buf, size_t len)
static struct md_sysfs_entry md_sync_max =
__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
+static ssize_t
+degraded_show(mddev_t *mddev, char *page)
+{
+ return sprintf(page, "%d\n", mddev->degraded);
+}
+static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
static ssize_t
sync_speed_show(mddev_t *mddev, char *page)
@@ -2976,6 +2982,7 @@ static struct attribute *md_redundancy_attrs[] = {
&md_suspend_lo.attr,
&md_suspend_hi.attr,
&md_bitmap.attr,
+ &md_degraded.attr,
NULL,
};
static struct attribute_group md_redundancy_group = {
@@ -3463,7 +3470,6 @@ static int do_md_stop(mddev_t * mddev, int mode)
mddev->pers->stop(mddev);
mddev->queue->merge_bvec_fn = NULL;
mddev->queue->unplug_fn = NULL;
- mddev->queue->issue_flush_fn = NULL;
mddev->queue->backing_dev_info.congested_fn = NULL;
if (mddev->pers->sync_request)
sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
@@ -4711,7 +4717,7 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
void md_unregister_thread(mdk_thread_t *thread)
{
- dprintk("interrupting MD-thread pid %d\n", thread->tsk->pid);
+ dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
kthread_stop(thread->tsk);
kfree(thread);
@@ -5439,7 +5445,7 @@ void md_do_sync(mddev_t *mddev)
* about not overloading the IO subsystem. (things like an
* e2fsck being done on the RAID array should execute fast)
*/
- mddev->queue->unplug_fn(mddev->queue);
+ blk_unplug(mddev->queue);
cond_resched();
currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
@@ -5458,7 +5464,7 @@ void md_do_sync(mddev_t *mddev)
* this also signals 'finished resyncing' to md_stop
*/
out:
- mddev->queue->unplug_fn(mddev->queue);
+ blk_unplug(mddev->queue);
wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
@@ -5771,26 +5777,47 @@ static int __init md_init(void)
* Searches all registered partitions for autorun RAID arrays
* at boot time.
*/
-static dev_t detected_devices[128];
-static int dev_cnt;
+
+static LIST_HEAD(all_detected_devices);
+struct detected_devices_node {
+ struct list_head list;
+ dev_t dev;
+};
void md_autodetect_dev(dev_t dev)
{
- if (dev_cnt >= 0 && dev_cnt < 127)
- detected_devices[dev_cnt++] = dev;
+ struct detected_devices_node *node_detected_dev;
+
+ node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
+ if (node_detected_dev) {
+ node_detected_dev->dev = dev;
+ list_add_tail(&node_detected_dev->list, &all_detected_devices);
+ } else {
+ printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"
+ ", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev));
+ }
}
static void autostart_arrays(int part)
{
mdk_rdev_t *rdev;
- int i;
+ struct detected_devices_node *node_detected_dev;
+ dev_t dev;
+ int i_scanned, i_passed;
- printk(KERN_INFO "md: Autodetecting RAID arrays.\n");
+ i_scanned = 0;
+ i_passed = 0;
- for (i = 0; i < dev_cnt; i++) {
- dev_t dev = detected_devices[i];
+ printk(KERN_INFO "md: Autodetecting RAID arrays.\n");
+ while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
+ i_scanned++;
+ node_detected_dev = list_entry(all_detected_devices.next,
+ struct detected_devices_node, list);
+ list_del(&node_detected_dev->list);
+ dev = node_detected_dev->dev;
+ kfree(node_detected_dev);
rdev = md_import_device(dev,0, 90);
if (IS_ERR(rdev))
continue;
@@ -5800,8 +5827,11 @@ static void autostart_arrays(int part)
continue;
}
list_add(&rdev->same_set, &pending_raid_disks);
+ i_passed++;
}
- dev_cnt = 0;
+
+ printk(KERN_INFO "md: Scanned %d and added %d devices.\n",
+ i_scanned, i_passed);
autorun_devices(part);
}
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index f2a63f394ad..eb631ebed68 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -125,8 +125,7 @@ static void unplug_slaves(mddev_t *mddev)
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
- if (r_queue->unplug_fn)
- r_queue->unplug_fn(r_queue);
+ blk_unplug(r_queue);
rdev_dec_pending(rdev, mddev);
rcu_read_lock();
@@ -194,35 +193,6 @@ static void multipath_status (struct seq_file *seq, mddev_t *mddev)
seq_printf (seq, "]");
}
-static int multipath_issue_flush(struct request_queue *q, struct gendisk *disk,
- sector_t *error_sector)
-{
- mddev_t *mddev = q->queuedata;
- multipath_conf_t *conf = mddev_to_conf(mddev);
- int i, ret = 0;
-
- rcu_read_lock();
- for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
- if (rdev && !test_bit(Faulty, &rdev->flags)) {
- struct block_device *bdev = rdev->bdev;
- struct request_queue *r_queue = bdev_get_queue(bdev);
-
- if (!r_queue->issue_flush_fn)
- ret = -EOPNOTSUPP;
- else {
- atomic_inc(&rdev->nr_pending);
- rcu_read_unlock();
- ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
- error_sector);
- rdev_dec_pending(rdev, mddev);
- rcu_read_lock();
- }
- }
- }
- rcu_read_unlock();
- return ret;
-}
static int multipath_congested(void *data, int bits)
{
mddev_t *mddev = data;
@@ -527,7 +497,6 @@ static int multipath_run (mddev_t *mddev)
mddev->array_size = mddev->size;
mddev->queue->unplug_fn = multipath_unplug;
- mddev->queue->issue_flush_fn = multipath_issue_flush;
mddev->queue->backing_dev_info.congested_fn = multipath_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index ef0da2d8495..f8e591708d1 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -35,31 +35,10 @@ static void raid0_unplug(struct request_queue *q)
for (i=0; i<mddev->raid_disks; i++) {
struct request_queue *r_queue = bdev_get_queue(devlist[i]->bdev);
- if (r_queue->unplug_fn)
- r_queue->unplug_fn(r_queue);
+ blk_unplug(r_queue);
}
}
-static int raid0_issue_flush(struct request_queue *q, struct gendisk *disk,
- sector_t *error_sector)
-{
- mddev_t *mddev = q->queuedata;
- raid0_conf_t *conf = mddev_to_conf(mddev);
- mdk_rdev_t **devlist = conf->strip_zone[0].dev;
- int i, ret = 0;
-
- for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- struct block_device *bdev = devlist[i]->bdev;
- struct request_queue *r_queue = bdev_get_queue(bdev);
-
- if (!r_queue->issue_flush_fn)
- ret = -EOPNOTSUPP;
- else
- ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk, error_sector);
- }
- return ret;
-}
-
static int raid0_congested(void *data, int bits)
{
mddev_t *mddev = data;
@@ -250,7 +229,6 @@ static int create_strip_zones (mddev_t *mddev)
mddev->queue->unplug_fn = raid0_unplug;
- mddev->queue->issue_flush_fn = raid0_issue_flush;
mddev->queue->backing_dev_info.congested_fn = raid0_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
@@ -493,7 +471,7 @@ bad_map:
bio_io_error(bio);
return 0;
}
-
+
static void raid0_status (struct seq_file *seq, mddev_t *mddev)
{
#undef MD_DEBUG
@@ -501,18 +479,18 @@ static void raid0_status (struct seq_file *seq, mddev_t *mddev)
int j, k, h;
char b[BDEVNAME_SIZE];
raid0_conf_t *conf = mddev_to_conf(mddev);
-
+
h = 0;
for (j = 0; j < conf->nr_strip_zones; j++) {
seq_printf(seq, " z%d", j);
if (conf->hash_table[h] == conf->strip_zone+j)
- seq_printf("(h%d)", h++);
+ seq_printf(seq, "(h%d)", h++);
seq_printf(seq, "=[");
for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
- seq_printf (seq, "%s/", bdevname(
+ seq_printf(seq, "%s/", bdevname(
conf->strip_zone[j].dev[k]->bdev,b));
- seq_printf (seq, "] zo=%d do=%d s=%d\n",
+ seq_printf(seq, "] zo=%d do=%d s=%d\n",
conf->strip_zone[j].zone_offset,
conf->strip_zone[j].dev_offset,
conf->strip_zone[j].size);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 6d03bea6fa5..4a69c416e04 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -9,7 +9,7 @@
*
* Better read-balancing code written by Mika Kuoppala <miku@iki.fi>, 2000
*
- * Fixes to reconstruction by Jakob Østergaard" <jakob@ostenfeld.dk>
+ * Fixes to reconstruction by Jakob Østergaard" <jakob@ostenfeld.dk>
* Various fixes by Neil Brown <neilb@cse.unsw.edu.au>
*
* Changes by Peter T. Breuer <ptb@it.uc3m.es> 31/1/2003 to support
@@ -549,8 +549,7 @@ static void unplug_slaves(mddev_t *mddev)
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
- if (r_queue->unplug_fn)
- r_queue->unplug_fn(r_queue);
+ blk_unplug(r_queue);
rdev_dec_pending(rdev, mddev);
rcu_read_lock();
@@ -567,36 +566,6 @@ static void raid1_unplug(struct request_queue *q)
md_wakeup_thread(mddev->thread);
}
-static int raid1_issue_flush(struct request_queue *q, struct gendisk *disk,
- sector_t *error_sector)
-{
- mddev_t *mddev = q->queuedata;
- conf_t *conf = mddev_to_conf(mddev);
- int i, ret = 0;
-
- rcu_read_lock();
- for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
- if (rdev && !test_bit(Faulty, &rdev->flags)) {
- struct block_device *bdev = rdev->bdev;
- struct request_queue *r_queue = bdev_get_queue(bdev);
-
- if (!r_queue->issue_flush_fn)
- ret = -EOPNOTSUPP;
- else {
- atomic_inc(&rdev->nr_pending);
- rcu_read_unlock();
- ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
- error_sector);
- rdev_dec_pending(rdev, mddev);
- rcu_read_lock();
- }
- }
- }
- rcu_read_unlock();
- return ret;
-}
-
static int raid1_congested(void *data, int bits)
{
mddev_t *mddev = data;
@@ -1244,7 +1213,8 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
j = 0;
if (j >= 0)
mddev->resync_mismatches += r1_bio->sectors;
- if (j < 0 || test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
+ if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
+ && test_bit(BIO_UPTODATE, &sbio->bi_flags))) {
sbio->bi_end_io = NULL;
rdev_dec_pending(conf->mirrors[i].rdev, mddev);
} else {
@@ -1997,7 +1967,6 @@ static int run(mddev_t *mddev)
mddev->array_size = mddev->size;
mddev->queue->unplug_fn = raid1_unplug;
- mddev->queue->issue_flush_fn = raid1_issue_flush;
mddev->queue->backing_dev_info.congested_fn = raid1_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 25a96c42bdb..5cdcc938620 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -593,8 +593,7 @@ static void unplug_slaves(mddev_t *mddev)
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
- if (r_queue->unplug_fn)
- r_queue->unplug_fn(r_queue);
+ blk_unplug(r_queue);
rdev_dec_pending(rdev, mddev);
rcu_read_lock();
@@ -611,36 +610,6 @@ static void raid10_unplug(struct request_queue *q)
md_wakeup_thread(mddev->thread);
}
-static int raid10_issue_flush(struct request_queue *q, struct gendisk *disk,
- sector_t *error_sector)
-{
- mddev_t *mddev = q->queuedata;
- conf_t *conf = mddev_to_conf(mddev);
- int i, ret = 0;
-
- rcu_read_lock();
- for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
- if (rdev && !test_bit(Faulty, &rdev->flags)) {
- struct block_device *bdev = rdev->bdev;
- struct request_queue *r_queue = bdev_get_queue(bdev);
-
- if (!r_queue->issue_flush_fn)
- ret = -EOPNOTSUPP;
- else {
- atomic_inc(&rdev->nr_pending);
- rcu_read_unlock();
- ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
- error_sector);
- rdev_dec_pending(rdev, mddev);
- rcu_read_lock();
- }
- }
- }
- rcu_read_unlock();
- return ret;
-}
-
static int raid10_congested(void *data, int bits)
{
mddev_t *mddev = data;
@@ -2118,7 +2087,6 @@ static int run(mddev_t *mddev)
mddev->resync_max_sectors = size << conf->chunk_shift;
mddev->queue->unplug_fn = raid10_unplug;
- mddev->queue->issue_flush_fn = raid10_issue_flush;
mddev->queue->backing_dev_info.congested_fn = raid10_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index caaca9e178b..a5aad8cad84 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -376,7 +376,12 @@ static unsigned long get_stripe_work(struct stripe_head *sh)
ack++;
sh->ops.count -= ack;
- BUG_ON(sh->ops.count < 0);
+ if (unlikely(sh->ops.count < 0)) {
+ printk(KERN_ERR "pending: %#lx ops.pending: %#lx ops.ack: %#lx "
+ "ops.complete: %#lx\n", pending, sh->ops.pending,
+ sh->ops.ack, sh->ops.complete);
+ BUG();
+ }
return pending;
}
@@ -550,8 +555,7 @@ static void ops_complete_biofill(void *stripe_head_ref)
}
}
}
- clear_bit(STRIPE_OP_BIOFILL, &sh->ops.ack);
- clear_bit(STRIPE_OP_BIOFILL, &sh->ops.pending);
+ set_bit(STRIPE_OP_BIOFILL, &sh->ops.complete);
return_io(return_bi);
@@ -684,7 +688,8 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
}
static struct dma_async_tx_descriptor *
-ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
+ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
+ unsigned long pending)
{
int disks = sh->disks;
int pd_idx = sh->pd_idx, i;
@@ -692,7 +697,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
/* check if prexor is active which means only process blocks
* that are part of a read-modify-write (Wantprexor)
*/
- int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
+ int prexor = test_bit(STRIPE_OP_PREXOR, &pending);
pr_debug("%s: stripe %llu\n", __FUNCTION__,
(unsigned long long)sh->sector);
@@ -769,7 +774,8 @@ static void ops_complete_write(void *stripe_head_ref)
}
static void
-ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
+ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
+ unsigned long pending)
{
/* kernel stack size limits the total number of disks */
int disks = sh->disks;
@@ -777,7 +783,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
int count = 0, pd_idx = sh->pd_idx, i;
struct page *xor_dest;
- int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
+ int prexor = test_bit(STRIPE_OP_PREXOR, &pending);
unsigned long flags;
dma_async_tx_callback callback;
@@ -804,7 +810,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
}
/* check whether this postxor is part of a write */
- callback = test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending) ?
+ callback = test_bit(STRIPE_OP_BIODRAIN, &pending) ?
ops_complete_write : ops_complete_postxor;
/* 1/ if we prexor'd then the dest is reused as a source
@@ -892,12 +898,12 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long pending)
tx = ops_run_prexor(sh, tx);
if (test_bit(STRIPE_OP_BIODRAIN, &pending)) {
- tx = ops_run_biodrain(sh, tx);
+ tx = ops_run_biodrain(sh, tx, pending);
overlap_clear++;
}
if (test_bit(STRIPE_OP_POSTXOR, &pending))
- ops_run_postxor(sh, tx);
+ ops_run_postxor(sh, tx, pending);
if (test_bit(STRIPE_OP_CHECK, &pending))
ops_run_check(sh);
@@ -2620,6 +2626,13 @@ static void handle_stripe5(struct stripe_head *sh)
s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
/* Now to look around and see what can be done */
+ /* clean-up completed biofill operations */
+ if (test_bit(STRIPE_OP_BIOFILL, &sh->ops.complete)) {
+ clear_bit(STRIPE_OP_BIOFILL, &sh->ops.pending);
+ clear_bit(STRIPE_OP_BIOFILL, &sh->ops.ack);
+ clear_bit(STRIPE_OP_BIOFILL, &sh->ops.complete);
+ }
+
rcu_read_lock();
for (i=disks; i--; ) {
mdk_rdev_t *rdev;
@@ -3175,8 +3188,7 @@ static void unplug_slaves(mddev_t *mddev)
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
- if (r_queue->unplug_fn)
- r_queue->unplug_fn(r_queue);
+ blk_unplug(r_queue);
rdev_dec_pending(rdev, mddev);
rcu_read_lock();
@@ -3204,36 +3216,6 @@ static void raid5_unplug_device(struct request_queue *q)
unplug_slaves(mddev);
}
-static int raid5_issue_flush(struct request_queue *q, struct gendisk *disk,
- sector_t *error_sector)
-{
- mddev_t *mddev = q->queuedata;
- raid5_conf_t *conf = mddev_to_conf(mddev);
- int i, ret = 0;
-
- rcu_read_lock();
- for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
- if (rdev && !test_bit(Faulty, &rdev->flags)) {
- struct block_device *bdev = rdev->bdev;
- struct request_queue *r_queue = bdev_get_queue(bdev);
-
- if (!r_queue->issue_flush_fn)
- ret = -EOPNOTSUPP;
- else {
- atomic_inc(&rdev->nr_pending);
- rcu_read_unlock();
- ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
- error_sector);
- rdev_dec_pending(rdev, mddev);
- rcu_read_lock();
- }
- }
- }
- rcu_read_unlock();
- return ret;
-}
-
static int raid5_congested(void *data, int bits)
{
mddev_t *mddev = data;
@@ -4263,7 +4245,6 @@ static int run(mddev_t *mddev)
mdname(mddev));
mddev->queue->unplug_fn = raid5_unplug_device;
- mddev->queue->issue_flush_fn = raid5_issue_flush;
mddev->queue->backing_dev_info.congested_data = mddev;
mddev->queue->backing_dev_info.congested_fn = raid5_congested;
diff --git a/drivers/md/raid6algos.c b/drivers/md/raid6algos.c
index 92657615657..77a6e4bf503 100644
--- a/drivers/md/raid6algos.c
+++ b/drivers/md/raid6algos.c
@@ -52,7 +52,7 @@ const struct raid6_calls * const raid6_algos[] = {
&raid6_intx16,
&raid6_intx32,
#endif
-#if defined(__i386__)
+#if defined(__i386__) && !defined(__arch_um__)
&raid6_mmxx1,
&raid6_mmxx2,
&raid6_sse1x1,
@@ -60,7 +60,7 @@ const struct raid6_calls * const raid6_algos[] = {
&raid6_sse2x1,
&raid6_sse2x2,
#endif
-#if defined(__x86_64__)
+#if defined(__x86_64__) && !defined(__arch_um__)
&raid6_sse2x1,
&raid6_sse2x2,
&raid6_sse2x4,
diff --git a/drivers/md/raid6mmx.c b/drivers/md/raid6mmx.c
index 6181a5a3365..d4e4a1bd70a 100644
--- a/drivers/md/raid6mmx.c
+++ b/drivers/md/raid6mmx.c
@@ -16,7 +16,7 @@
* MMX implementation of RAID-6 syndrome functions
*/
-#if defined(__i386__)
+#if defined(__i386__) && !defined(__arch_um__)
#include "raid6.h"
#include "raid6x86.h"
diff --git a/drivers/md/raid6sse1.c b/drivers/md/raid6sse1.c
index f0a1ba8f40b..0666237276f 100644
--- a/drivers/md/raid6sse1.c
+++ b/drivers/md/raid6sse1.c
@@ -21,7 +21,7 @@
* worthwhile as a separate implementation.
*/
-#if defined(__i386__)
+#if defined(__i386__) && !defined(__arch_um__)
#include "raid6.h"
#include "raid6x86.h"
diff --git a/drivers/md/raid6sse2.c b/drivers/md/raid6sse2.c
index 0f019762a7c..b034ad86803 100644
--- a/drivers/md/raid6sse2.c
+++ b/drivers/md/raid6sse2.c
@@ -17,7 +17,7 @@
*
*/
-#if defined(__i386__) || defined(__x86_64__)
+#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
#include "raid6.h"
#include "raid6x86.h"
@@ -161,7 +161,7 @@ const struct raid6_calls raid6_sse2x2 = {
#endif
-#ifdef __x86_64__
+#if defined(__x86_64__) && !defined(__arch_um__)
/*
* Unrolled-by-4 SSE2 implementation
diff --git a/drivers/md/raid6x86.h b/drivers/md/raid6x86.h
index 9111950414f..99fea7a70ca 100644
--- a/drivers/md/raid6x86.h
+++ b/drivers/md/raid6x86.h
@@ -19,7 +19,7 @@
#ifndef LINUX_RAID_RAID6X86_H
#define LINUX_RAID_RAID6X86_H
-#if defined(__i386__) || defined(__x86_64__)
+#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
#ifdef __KERNEL__ /* Real code */