diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Kconfig | 12 | ||||
-rw-r--r-- | drivers/md/Makefile | 6 | ||||
-rw-r--r-- | drivers/md/dm-bio-list.h | 5 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 170 | ||||
-rw-r--r-- | drivers/md/dm-delay.c | 23 | ||||
-rw-r--r-- | drivers/md/dm-emc.c | 8 | ||||
-rw-r--r-- | drivers/md/dm-hw-handler.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-hw-handler.h | 1 | ||||
-rw-r--r-- | drivers/md/dm-ioctl.c | 40 | ||||
-rw-r--r-- | drivers/md/dm-log.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-log.h | 3 | ||||
-rw-r--r-- | drivers/md/dm-mpath-hp-sw.c | 248 | ||||
-rw-r--r-- | drivers/md/dm-mpath-rdac.c | 15 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 88 | ||||
-rw-r--r-- | drivers/md/dm-path-selector.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 35 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-stripe.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-target.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-uevent.c | 222 | ||||
-rw-r--r-- | drivers/md/dm-uevent.h | 59 | ||||
-rw-r--r-- | drivers/md/dm.c | 79 | ||||
-rw-r--r-- | drivers/md/kcopyd.c | 8 | ||||
-rw-r--r-- | drivers/md/md.c | 2 |
25 files changed, 882 insertions, 171 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 34a8c60a254..9b6fbf044fd 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -267,6 +267,12 @@ config DM_MULTIPATH_RDAC ---help--- Multipath support for LSI/Engenio RDAC. +config DM_MULTIPATH_HP + tristate "HP MSA multipath support (EXPERIMENTAL)" + depends on DM_MULTIPATH && BLK_DEV_DM && EXPERIMENTAL + ---help--- + Multipath support for HP MSA (Active/Passive) series hardware. + config DM_DELAY tristate "I/O delaying target (EXPERIMENTAL)" depends on BLK_DEV_DM && EXPERIMENTAL @@ -276,4 +282,10 @@ config DM_DELAY If unsure, say N. +config DM_UEVENT + bool "DM uevents (EXPERIMENTAL)" + depends on BLK_DEV_DM && EXPERIMENTAL + ---help--- + Generate udev events for DM events. + endif # MD diff --git a/drivers/md/Makefile b/drivers/md/Makefile index c49366cdc05..d9aa7edb878 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -8,6 +8,7 @@ dm-multipath-objs := dm-hw-handler.o dm-path-selector.o dm-mpath.o dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-mirror-objs := dm-log.o dm-raid1.o dm-rdac-objs := dm-mpath-rdac.o +dm-hp-sw-objs := dm-mpath-hp-sw.o md-mod-objs := md.o bitmap.o raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \ raid6int1.o raid6int2.o raid6int4.o \ @@ -35,6 +36,7 @@ obj-$(CONFIG_DM_CRYPT) += dm-crypt.o obj-$(CONFIG_DM_DELAY) += dm-delay.o obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc.o +obj-$(CONFIG_DM_MULTIPATH_HP) += dm-hp-sw.o obj-$(CONFIG_DM_MULTIPATH_RDAC) += dm-rdac.o obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o obj-$(CONFIG_DM_MIRROR) += dm-mirror.o @@ -48,6 +50,10 @@ ifeq ($(CONFIG_ALTIVEC),y) altivec_flags := -maltivec -mabi=altivec endif +ifeq ($(CONFIG_DM_UEVENT),y) +dm-mod-objs += dm-uevent.o +endif + targets += raid6int1.c $(obj)/raid6int1.c: UNROLL := 1 $(obj)/raid6int1.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE diff --git a/drivers/md/dm-bio-list.h b/drivers/md/dm-bio-list.h index 3f7b827649e..d4509be0fe6 100644 --- a/drivers/md/dm-bio-list.h +++ b/drivers/md/dm-bio-list.h @@ -21,11 +21,6 @@ static inline int bio_list_empty(const struct bio_list *bl) return bl->head == NULL; } -#define BIO_LIST_INIT { .head = NULL, .tail = NULL } - -#define BIO_LIST(bl) \ - struct bio_list bl = BIO_LIST_INIT - static inline void bio_list_init(struct bio_list *bl) { bl->head = bl->tail = NULL; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 64fee90bb68..b41f945df8a 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -36,7 +36,6 @@ struct dm_crypt_io { struct work_struct work; atomic_t pending; int error; - int post_process; }; /* @@ -57,7 +56,7 @@ struct crypt_config; struct crypt_iv_operations { int (*ctr)(struct crypt_config *cc, struct dm_target *ti, - const char *opts); + const char *opts); void (*dtr)(struct crypt_config *cc); const char *(*status)(struct crypt_config *cc); int (*generator)(struct crypt_config *cc, u8 *iv, sector_t sector); @@ -80,6 +79,8 @@ struct crypt_config { mempool_t *page_pool; struct bio_set *bs; + struct workqueue_struct *io_queue; + struct workqueue_struct *crypt_queue; /* * crypto related data */ @@ -137,7 +138,7 @@ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv, sector_t sector) } static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, - const char *opts) + const char *opts) { struct crypto_cipher *essiv_tfm; struct crypto_hash *hash_tfm; @@ -175,6 +176,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, if (err) { ti->error = "Error calculating hash in ESSIV"; + kfree(salt); return err; } @@ -188,7 +190,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, if (crypto_cipher_blocksize(essiv_tfm) != crypto_blkcipher_ivsize(cc->tfm)) { ti->error = "Block size of ESSIV cipher does " - "not match IV size of block cipher"; + "not match IV size of block cipher"; crypto_free_cipher(essiv_tfm); kfree(salt); return -EINVAL; @@ -319,10 +321,10 @@ crypt_convert_scatterlist(struct crypt_config *cc, struct scatterlist *out, return r; } -static void -crypt_convert_init(struct crypt_config *cc, struct convert_context *ctx, - struct bio *bio_out, struct bio *bio_in, - sector_t sector, int write) +static void crypt_convert_init(struct crypt_config *cc, + struct convert_context *ctx, + struct bio *bio_out, struct bio *bio_in, + sector_t sector, int write) { ctx->bio_in = bio_in; ctx->bio_out = bio_out; @@ -338,7 +340,7 @@ crypt_convert_init(struct crypt_config *cc, struct convert_context *ctx, * Encrypt / decrypt data from one bio to another one (can be the same one) */ static int crypt_convert(struct crypt_config *cc, - struct convert_context *ctx) + struct convert_context *ctx) { int r = 0; @@ -370,7 +372,7 @@ static int crypt_convert(struct crypt_config *cc, } r = crypt_convert_scatterlist(cc, &sg_out, &sg_in, sg_in.length, - ctx->write, ctx->sector); + ctx->write, ctx->sector); if (r < 0) break; @@ -380,13 +382,13 @@ static int crypt_convert(struct crypt_config *cc, return r; } - static void dm_crypt_bio_destructor(struct bio *bio) - { +static void dm_crypt_bio_destructor(struct bio *bio) +{ struct dm_crypt_io *io = bio->bi_private; struct crypt_config *cc = io->target->private; bio_free(bio, cc->bs); - } +} /* * Generate a new unfragmented bio with the given size @@ -458,7 +460,7 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) * One of the bios was finished. Check for completion of * the whole request and correctly clean up the buffer. */ -static void dec_pending(struct dm_crypt_io *io, int error) +static void crypt_dec_pending(struct dm_crypt_io *io, int error) { struct crypt_config *cc = (struct crypt_config *) io->target->private; @@ -474,18 +476,36 @@ static void dec_pending(struct dm_crypt_io *io, int error) } /* - * kcryptd: + * kcryptd/kcryptd_io: * * Needed because it would be very unwise to do decryption in an * interrupt context. + * + * kcryptd performs the actual encryption or decryption. + * + * kcryptd_io performs the IO submission. + * + * They must be separated as otherwise the final stages could be + * starved by new requests which can block in the first stages due + * to memory allocation. */ -static struct workqueue_struct *_kcryptd_workqueue; static void kcryptd_do_work(struct work_struct *work); +static void kcryptd_do_crypt(struct work_struct *work); static void kcryptd_queue_io(struct dm_crypt_io *io) { + struct crypt_config *cc = io->target->private; + INIT_WORK(&io->work, kcryptd_do_work); - queue_work(_kcryptd_workqueue, &io->work); + queue_work(cc->io_queue, &io->work); +} + +static void kcryptd_queue_crypt(struct dm_crypt_io *io) +{ + struct crypt_config *cc = io->target->private; + + INIT_WORK(&io->work, kcryptd_do_crypt); + queue_work(cc->crypt_queue, &io->work); } static void crypt_endio(struct bio *clone, int error) @@ -508,13 +528,12 @@ static void crypt_endio(struct bio *clone, int error) } bio_put(clone); - io->post_process = 1; - kcryptd_queue_io(io); + kcryptd_queue_crypt(io); return; out: bio_put(clone); - dec_pending(io, error); + crypt_dec_pending(io, error); } static void clone_init(struct dm_crypt_io *io, struct bio *clone) @@ -544,7 +563,7 @@ static void process_read(struct dm_crypt_io *io) */ clone = bio_alloc_bioset(GFP_NOIO, bio_segments(base_bio), cc->bs); if (unlikely(!clone)) { - dec_pending(io, -ENOMEM); + crypt_dec_pending(io, -ENOMEM); return; } @@ -579,7 +598,7 @@ static void process_write(struct dm_crypt_io *io) while (remaining) { clone = crypt_alloc_buffer(io, remaining); if (unlikely(!clone)) { - dec_pending(io, -ENOMEM); + crypt_dec_pending(io, -ENOMEM); return; } @@ -589,7 +608,7 @@ static void process_write(struct dm_crypt_io *io) if (unlikely(crypt_convert(cc, &ctx) < 0)) { crypt_free_buffer_pages(cc, clone); bio_put(clone); - dec_pending(io, -EIO); + crypt_dec_pending(io, -EIO); return; } @@ -624,17 +643,23 @@ static void process_read_endio(struct dm_crypt_io *io) crypt_convert_init(cc, &ctx, io->base_bio, io->base_bio, io->base_bio->bi_sector - io->target->begin, 0); - dec_pending(io, crypt_convert(cc, &ctx)); + crypt_dec_pending(io, crypt_convert(cc, &ctx)); } static void kcryptd_do_work(struct work_struct *work) { struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); - if (io->post_process) - process_read_endio(io); - else if (bio_data_dir(io->base_bio) == READ) + if (bio_data_dir(io->base_bio) == READ) process_read(io); +} + +static void kcryptd_do_crypt(struct work_struct *work) +{ + struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); + + if (bio_data_dir(io->base_bio) == READ) + process_read_endio(io); else process_write(io); } @@ -690,7 +715,7 @@ static int crypt_set_key(struct crypt_config *cc, char *key) cc->key_size = key_size; /* initial settings */ if ((!key_size && strcmp(key, "-")) || - (key_size && crypt_decode_key(cc->key, key, key_size) < 0)) + (key_size && crypt_decode_key(cc->key, key, key_size) < 0)) return -EINVAL; set_bit(DM_CRYPT_KEY_VALID, &cc->flags); @@ -746,7 +771,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (crypt_set_key(cc, argv[1])) { ti->error = "Error decoding key"; - goto bad1; + goto bad_cipher; } /* Compatiblity mode for old dm-crypt cipher strings */ @@ -757,19 +782,19 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (strcmp(chainmode, "ecb") && !ivmode) { ti->error = "This chaining mode requires an IV mechanism"; - goto bad1; + goto bad_cipher; } - if (snprintf(cc->cipher, CRYPTO_MAX_ALG_NAME, "%s(%s)", chainmode, - cipher) >= CRYPTO_MAX_ALG_NAME) { + if (snprintf(cc->cipher, CRYPTO_MAX_ALG_NAME, "%s(%s)", + chainmode, cipher) >= CRYPTO_MAX_ALG_NAME) { ti->error = "Chain mode + cipher name is too long"; - goto bad1; + goto bad_cipher; } tfm = crypto_alloc_blkcipher(cc->cipher, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) { ti->error = "Error allocating crypto tfm"; - goto bad1; + goto bad_cipher; } strcpy(cc->cipher, cipher); @@ -793,18 +818,18 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) cc->iv_gen_ops = &crypt_iv_null_ops; else { ti->error = "Invalid IV mode"; - goto bad2; + goto bad_ivmode; } if (cc->iv_gen_ops && cc->iv_gen_ops->ctr && cc->iv_gen_ops->ctr(cc, ti, ivopts) < 0) - goto bad2; + goto bad_ivmode; cc->iv_size = crypto_blkcipher_ivsize(tfm); if (cc->iv_size) /* at least a 64 bit sector number should fit in our buffer */ cc->iv_size = max(cc->iv_size, - (unsigned int)(sizeof(u64) / sizeof(u8))); + (unsigned int)(sizeof(u64) / sizeof(u8))); else { if (cc->iv_gen_ops) { DMWARN("Selected cipher does not support IVs"); @@ -817,13 +842,13 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool); if (!cc->io_pool) { ti->error = "Cannot allocate crypt io mempool"; - goto bad3; + goto bad_slab_pool; } cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0); if (!cc->page_pool) { ti->error = "Cannot allocate page mempool"; - goto bad4; + goto bad_page_pool; } cc->bs = bioset_create(MIN_IOS, MIN_IOS); @@ -834,25 +859,25 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (crypto_blkcipher_setkey(tfm, cc->key, key_size) < 0) { ti->error = "Error setting key"; - goto bad5; + goto bad_device; } if (sscanf(argv[2], "%llu", &tmpll) != 1) { ti->error = "Invalid iv_offset sector"; - goto bad5; + goto bad_device; } cc->iv_offset = tmpll; if (sscanf(argv[4], "%llu", &tmpll) != 1) { ti->error = "Invalid device sector"; - goto bad5; + goto bad_device; } cc->start = tmpll; if (dm_get_device(ti, argv[3], cc->start, ti->len, - dm_table_get_mode(ti->table), &cc->dev)) { + dm_table_get_mode(ti->table), &cc->dev)) { ti->error = "Device lookup failed"; - goto bad5; + goto bad_device; } if (ivmode && cc->iv_gen_ops) { @@ -861,27 +886,45 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) cc->iv_mode = kmalloc(strlen(ivmode) + 1, GFP_KERNEL); if (!cc->iv_mode) { ti->error = "Error kmallocing iv_mode string"; - goto bad5; + goto bad_ivmode_string; } strcpy(cc->iv_mode, ivmode); } else cc->iv_mode = NULL; + cc->io_queue = create_singlethread_workqueue("kcryptd_io"); + if (!cc->io_queue) { + ti->error = "Couldn't create kcryptd io queue"; + goto bad_io_queue; + } + + cc->crypt_queue = create_singlethread_workqueue("kcryptd"); + if (!cc->crypt_queue) { + ti->error = "Couldn't create kcryptd queue"; + goto bad_crypt_queue; + } + ti->private = cc; return 0; -bad5: +bad_crypt_queue: + destroy_workqueue(cc->io_queue); +bad_io_queue: + kfree(cc->iv_mode); +bad_ivmode_string: + dm_put_device(ti, cc->dev); +bad_device: bioset_free(cc->bs); bad_bs: mempool_destroy(cc->page_pool); -bad4: +bad_page_pool: mempool_destroy(cc->io_pool); -bad3: +bad_slab_pool: if (cc->iv_gen_ops && cc->iv_gen_ops->dtr) cc->iv_gen_ops->dtr(cc); -bad2: +bad_ivmode: crypto_free_blkcipher(tfm); -bad1: +bad_cipher: /* Must zero key material before freeing */ memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8)); kfree(cc); @@ -892,7 +935,8 @@ static void crypt_dtr(struct dm_target *ti) { struct crypt_config *cc = (struct crypt_config *) ti->private; - flush_workqueue(_kcryptd_workqueue); + destroy_workqueue(cc->io_queue); + destroy_workqueue(cc->crypt_queue); bioset_free(cc->bs); mempool_destroy(cc->page_pool); @@ -918,9 +962,13 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, io = mempool_alloc(cc->io_pool, GFP_NOIO); io->target = ti; io->base_bio = bio; - io->error = io->post_process = 0; + io->error = 0; atomic_set(&io->pending, 0); - kcryptd_queue_io(io); + + if (bio_data_dir(io->base_bio) == READ) + kcryptd_queue_io(io); + else + kcryptd_queue_crypt(io); return DM_MAPIO_SUBMITTED; } @@ -1037,25 +1085,12 @@ static int __init dm_crypt_init(void) if (!_crypt_io_pool) return -ENOMEM; - _kcryptd_workqueue = create_workqueue("kcryptd"); - if (!_kcryptd_workqueue) { - r = -ENOMEM; - DMERR("couldn't create kcryptd"); - goto bad1; - } - r = dm_register_target(&crypt_target); if (r < 0) { DMERR("register failed %d", r); - goto bad2; + kmem_cache_destroy(_crypt_io_pool); } - return 0; - -bad2: - destroy_workqueue(_kcryptd_workqueue); -bad1: - kmem_cache_destroy(_crypt_io_pool); return r; } @@ -1066,7 +1101,6 @@ static void __exit dm_crypt_exit(void) if (r < 0) DMERR("unregister failed %d", r); - destroy_workqueue(_kcryptd_workqueue); kmem_cache_destroy(_crypt_io_pool); } diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 6928c136d3c..bdd37f881c4 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -83,7 +83,7 @@ static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all) struct dm_delay_info *delayed, *next; unsigned long next_expires = 0; int start_timer = 0; - BIO_LIST(flush_bios); + struct bio_list flush_bios = { }; mutex_lock(&delayed_bios_lock); list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) { @@ -163,34 +163,32 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - if (argc == 3) { - dc->dev_write = NULL; + dc->dev_write = NULL; + if (argc == 3) goto out; - } if (sscanf(argv[4], "%llu", &tmpll) != 1) { ti->error = "Invalid write device sector"; - goto bad; + goto bad_dev_read; } dc->start_write = tmpll; if (sscanf(argv[5], "%u", &dc->write_delay) != 1) { ti->error = "Invalid write delay"; - goto bad; + goto bad_dev_read; } if (dm_get_device(ti, argv[3], dc->start_write, ti->len, dm_table_get_mode(ti->table), &dc->dev_write)) { ti->error = "Write device lookup failed"; - dm_put_device(ti, dc->dev_read); - goto bad; + goto bad_dev_read; } out: dc->delayed_pool = mempool_create_slab_pool(128, delayed_cache); if (!dc->delayed_pool) { DMERR("Couldn't create delayed bio pool."); - goto bad; + goto bad_dev_write; } setup_timer(&dc->delay_timer, handle_delayed_timer, (unsigned long)dc); @@ -203,6 +201,11 @@ out: ti->private = dc; return 0; +bad_dev_write: + if (dc->dev_write) + dm_put_device(ti, dc->dev_write); +bad_dev_read: + dm_put_device(ti, dc->dev_read); bad: kfree(dc); return -EINVAL; @@ -305,7 +308,7 @@ static int delay_status(struct dm_target *ti, status_type_t type, (unsigned long long) dc->start_read, dc->read_delay); if (dc->dev_write) - DMEMIT("%s %llu %u", dc->dev_write->name, + DMEMIT(" %s %llu %u", dc->dev_write->name, (unsigned long long) dc->start_write, dc->write_delay); break; diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c index 342517261ec..6b91b9ab1d4 100644 --- a/drivers/md/dm-emc.c +++ b/drivers/md/dm-emc.c @@ -81,7 +81,7 @@ static struct bio *get_failover_bio(struct dm_path *path, unsigned data_size) } if (bio_add_page(bio, page, data_size, 0) != data_size) { - DMERR("get_failover_bio: alloc_page() failed."); + DMERR("get_failover_bio: bio_add_page() failed."); __free_page(page); bio_put(bio); return NULL; @@ -211,12 +211,10 @@ fail_path: static struct emc_handler *alloc_emc_handler(void) { - struct emc_handler *h = kmalloc(sizeof(*h), GFP_KERNEL); + struct emc_handler *h = kzalloc(sizeof(*h), GFP_KERNEL); - if (h) { - memset(h, 0, sizeof(*h)); + if (h) spin_lock_init(&h->lock); - } return h; } diff --git a/drivers/md/dm-hw-handler.c b/drivers/md/dm-hw-handler.c index baafaaba4d4..2ee84d8aa0b 100644 --- a/drivers/md/dm-hw-handler.c +++ b/drivers/md/dm-hw-handler.c @@ -91,12 +91,10 @@ void dm_put_hw_handler(struct hw_handler_type *hwht) static struct hwh_internal *_alloc_hw_handler(struct hw_handler_type *hwht) { - struct hwh_internal *hwhi = kmalloc(sizeof(*hwhi), GFP_KERNEL); + struct hwh_internal *hwhi = kzalloc(sizeof(*hwhi), GFP_KERNEL); - if (hwhi) { - memset(hwhi, 0, sizeof(*hwhi)); + if (hwhi) hwhi->hwht = *hwht; - } return hwhi; } diff --git a/drivers/md/dm-hw-handler.h b/drivers/md/dm-hw-handler.h index e0832e6fcf3..46809dcb121 100644 --- a/drivers/md/dm-hw-handler.h +++ b/drivers/md/dm-hw-handler.h @@ -58,5 +58,6 @@ unsigned dm_scsi_err_handler(struct hw_handler *hwh, struct bio *bio); #define MP_FAIL_PATH 1 #define MP_BYPASS_PG 2 #define MP_ERROR_IO 4 /* Don't retry this I/O */ +#define MP_RETRY 8 #endif diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index b441d82c338..138200bf5e0 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -700,7 +700,7 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size) int r; char *new_name = (char *) param + param->data_start; - if (new_name < (char *) (param + 1) || + if (new_name < (char *) param->data || invalid_str(new_name, (void *) param + param_size)) { DMWARN("Invalid new logical volume name supplied."); return -EINVAL; @@ -726,7 +726,7 @@ static int dev_set_geometry(struct dm_ioctl *param, size_t param_size) if (!md) return -ENXIO; - if (geostr < (char *) (param + 1) || + if (geostr < (char *) param->data || invalid_str(geostr, (void *) param + param_size)) { DMWARN("Invalid geometry supplied."); goto out; @@ -1233,7 +1233,7 @@ static int target_message(struct dm_ioctl *param, size_t param_size) if (r) goto out; - if (tmsg < (struct dm_target_msg *) (param + 1) || + if (tmsg < (struct dm_target_msg *) param->data || invalid_str(tmsg->message, (void *) param + param_size)) { DMWARN("Invalid target message parameters."); r = -EINVAL; @@ -1358,7 +1358,7 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) if (tmp.data_size < sizeof(tmp)) return -EINVAL; - dmi = (struct dm_ioctl *) vmalloc(tmp.data_size); + dmi = vmalloc(tmp.data_size); if (!dmi) return -ENOMEM; @@ -1515,3 +1515,35 @@ void dm_interface_exit(void) dm_hash_exit(); } + +/** + * dm_copy_name_and_uuid - Copy mapped device name & uuid into supplied buffers + * @md: Pointer to mapped_device + * @name: Buffer (size DM_NAME_LEN) for name + * @uuid: Buffer (size DM_UUID_LEN) for uuid or empty string if uuid not defined + */ +int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid) +{ + int r = 0; + struct hash_cell *hc; + + if (!md) + return -ENXIO; + + dm_get(md); + down_read(&_hash_lock); + hc = dm_get_mdptr(md); + if (!hc || hc->md != md) { + r = -ENXIO; + goto out; + } + + strcpy(name, hc->name); + strcpy(uuid, hc->uuid ? : ""); + +out: + up_read(&_hash_lock); + dm_put(md); + + return r; +} diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index a66428d860f..072ee4353ea 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -696,7 +696,7 @@ static struct dirty_log_type _disk_type = { .module = THIS_MODULE, .ctr = disk_ctr, .dtr = disk_dtr, - .suspend = disk_flush, + .postsuspend = disk_flush, .resume = disk_resume, .get_region_size = core_get_region_size, .is_clean = core_is_clean, diff --git a/drivers/md/dm-log.h b/drivers/md/dm-log.h index 86a301c8daf..3fae87eb596 100644 --- a/drivers/md/dm-log.h +++ b/drivers/md/dm-log.h @@ -32,7 +32,8 @@ struct dirty_log_type { * There are times when we don't want the log to touch * the disk. */ - int (*suspend)(struct dirty_log *log); + int (*presuspend)(struct dirty_log *log); + int (*postsuspend)(struct dirty_log *log); int (*resume)(struct dirty_log *log); /* diff --git a/drivers/md/dm-mpath-hp-sw.c b/drivers/md/dm-mpath-hp-sw.c new file mode 100644 index 00000000000..204bf42c944 --- /dev/null +++ b/drivers/md/dm-mpath-hp-sw.c @@ -0,0 +1,248 @@ +/* + * Copyright (C) 2005 Mike Christie, All rights reserved. + * Copyright (C) 2007 Red Hat, Inc. All rights reserved. + * Authors: Mike Christie + * Dave Wysochanski + * + * This file is released under the GPL. + * + * This module implements the specific path activation code for + * HP StorageWorks and FSC FibreCat Asymmetric (Active/Passive) + * storage arrays. + * These storage arrays have controller-based failover, not + * LUN-based failover. However, LUN-based failover is the design + * of dm-multipath. Thus, this module is written for LUN-based failover. + */ +#include <linux/blkdev.h> +#include <linux/list.h> +#include <linux/types.h> +#include <scsi/scsi.h> +#include <scsi/scsi_cmnd.h> +#include <scsi/scsi_dbg.h> + +#include "dm.h" +#include "dm-hw-handler.h" + +#define DM_MSG_PREFIX "multipath hp-sw" +#define DM_HP_HWH_NAME "hp-sw" +#define DM_HP_HWH_VER "1.0.0" + +struct hp_sw_context { + unsigned char sense[SCSI_SENSE_BUFFERSIZE]; +}; + +/* + * hp_sw_error_is_retryable - Is an HP-specific check condition retryable? + * @req: path activation request + * + * Examine error codes of request and determine whether the error is retryable. + * Some error codes are already retried by scsi-ml (see + * scsi_decide_disposition), but some HP specific codes are not. + * The intent of this routine is to supply the logic for the HP specific + * check conditions. + * + * Returns: + * 1 - command completed with retryable error + * 0 - command completed with non-retryable error + * + * Possible optimizations + * 1. More hardware-specific error codes + */ +static int hp_sw_error_is_retryable(struct request *req) +{ + /* + * NOT_READY is known to be retryable + * For now we just dump out the sense data and call it retryable + */ + if (status_byte(req->errors) == CHECK_CONDITION) + __scsi_print_sense(DM_HP_HWH_NAME, req->sense, req->sense_len); + + /* + * At this point we don't have complete information about all the error + * codes from this hardware, so we are just conservative and retry + * when in doubt. + */ + return 1; +} + +/* + * hp_sw_end_io - Completion handler for HP path activation. + * @req: path activation request + * @error: scsi-ml error + * + * Check sense data, free request structure, and notify dm that + * pg initialization has completed. + * + * Context: scsi-ml softirq + * + */ +static void hp_sw_end_io(struct request *req, int error) +{ + struct dm_path *path = req->end_io_data; + unsigned err_flags = 0; + + if (!error) { + DMDEBUG("%s path activation command - success", + path->dev->name); + goto out; + } + + if (hp_sw_error_is_retryable(req)) { + DMDEBUG("%s path activation command - retry", + path->dev->name); + err_flags = MP_RETRY; + goto out; + } + + DMWARN("%s path activation fail - error=0x%x", + path->dev->name, error); + err_flags = MP_FAIL_PATH; + +out: + req->end_io_data = NULL; + __blk_put_request(req->q, req); + dm_pg_init_complete(path, err_flags); +} + +/* + * hp_sw_get_request - Allocate an HP specific path activation request + * @path: path on which request will be sent (needed for request queue) + * + * The START command is used for path activation request. + * These arrays are controller-based failover, not LUN based. + * One START command issued to a single path will fail over all + * LUNs for the same controller. + * + * Possible optimizations + * 1. Make timeout configurable + * 2. Preallocate request + */ +static struct request *hp_sw_get_request(struct dm_path *path) +{ + struct request *req; + struct block_device *bdev = path->dev->bdev; + struct request_queue *q = bdev_get_queue(bdev); + struct hp_sw_context *h = path->hwhcontext; + + req = blk_get_request(q, WRITE, GFP_NOIO); + if (!req) + goto out; + + req->timeout = 60 * HZ; + + req->errors = 0; + req->cmd_type = REQ_TYPE_BLOCK_PC; + req->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE; + req->end_io_data = path; + req->sense = h->sense; + memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE); + + memset(&req->cmd, 0, BLK_MAX_CDB); + req->cmd[0] = START_STOP; + req->cmd[4] = 1; + req->cmd_len = COMMAND_SIZE(req->cmd[0]); + +out: + return req; +} + +/* + * hp_sw_pg_init - HP path activation implementation. + * @hwh: hardware handler specific data + * @bypassed: unused; is the path group bypassed? (see dm-mpath.c) + * @path: path to send initialization command + * + * Send an HP-specific path activation command on 'path'. + * Do not try to optimize in any way, just send the activation command. + * More than one path activation command may be sent to the same controller. + * This seems to work fine for basic failover support. + * + * Possible optimizations + * 1. Detect an in-progress activation request and avoid submitting another one + * 2. Model the controller and only send a single activation request at a time + * 3. Determine the state of a path before sending an activation request + * + * Context: kmpathd (see process_queued_ios() in dm-mpath.c) + */ +static void hp_sw_pg_init(struct hw_handler *hwh, unsigned bypassed, + struct dm_path *path) +{ + struct request *req; + struct hp_sw_context *h; + + path->hwhcontext = hwh->context; + h = hwh->context; + + req = hp_sw_get_request(path); + if (!req) { + DMERR("%s path activation command - allocation fail", + path->dev->name); + goto retry; + } + + DMDEBUG("%s path activation command - sent", path->dev->name); + + blk_execute_rq_nowait(req->q, NULL, req, 1, hp_sw_end_io); + return; + +retry: + dm_pg_init_complete(path, MP_RETRY); +} + +static int hp_sw_create(struct hw_handler *hwh, unsigned argc, char **argv) +{ + struct hp_sw_context *h; + + h = kmalloc(sizeof(*h), GFP_KERNEL); + if (!h) + return -ENOMEM; + + hwh->context = h; + + return 0; +} + +static void hp_sw_destroy(struct hw_handler *hwh) +{ + struct hp_sw_context *h = hwh->context; + + kfree(h); +} + +static struct hw_handler_type hp_sw_hwh = { + .name = DM_HP_HWH_NAME, + .module = THIS_MODULE, + .create = hp_sw_create, + .destroy = hp_sw_destroy, + .pg_init = hp_sw_pg_init, +}; + +static int __init hp_sw_init(void) +{ + int r; + + r = dm_register_hw_handler(&hp_sw_hwh); + if (r < 0) + DMERR("register failed %d", r); + else + DMINFO("version " DM_HP_HWH_VER " loaded"); + + return r; +} + +static void __exit hp_sw_exit(void) +{ + int r; + + r = dm_unregister_hw_handler(&hp_sw_hwh); + if (r < 0) + DMERR("unregister failed %d", r); +} + +module_init(hp_sw_init); +module_exit(hp_sw_exit); + +MODULE_DESCRIPTION("DM Multipath HP StorageWorks / FSC FibreCat (A/P) support"); +MODULE_AUTHOR("Mike Christie, Dave Wysochanski <dm-devel@redhat.com>"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DM_HP_HWH_VER); diff --git a/drivers/md/dm-mpath-rdac.c b/drivers/md/dm-mpath-rdac.c index 16b16134577..e04eb5c697f 100644 --- a/drivers/md/dm-mpath-rdac.c +++ b/drivers/md/dm-mpath-rdac.c @@ -664,20 +664,21 @@ static struct hw_handler_type rdac_handler = { static int __init rdac_init(void) { - int r = dm_register_hw_handler(&rdac_handler); - - if (r < 0) { - DMERR("%s: register failed %d", RDAC_DM_HWH_NAME, r); - return r; - } + int r; rdac_wkqd = create_singlethread_workqueue("rdac_wkqd"); if (!rdac_wkqd) { DMERR("Failed to create workqueue rdac_wkqd."); - dm_unregister_hw_handler(&rdac_handler); return -ENOMEM; } + r = dm_register_hw_handler(&rdac_handler); + if (r < 0) { + DMERR("%s: register failed %d", RDAC_DM_HWH_NAME, r); + destroy_workqueue(rdac_wkqd); + return r; + } + DMINFO("%s: version %s loaded", RDAC_DM_HWH_NAME, RDAC_DM_HWH_VER); return 0; } diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 31056abca89..24b2b1e32fa 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -10,6 +10,7 @@ #include "dm-hw-handler.h" #include "dm-bio-list.h" #include "dm-bio-record.h" +#include "dm-uevent.h" #include <linux/ctype.h> #include <linux/init.h> @@ -75,6 +76,8 @@ struct multipath { unsigned queue_io; /* Must we queue all I/O? */ unsigned queue_if_no_path; /* Queue I/O if last path fails? */ unsigned saved_queue_if_no_path;/* Saved state during suspension */ + unsigned pg_init_retries; /* Number of times to retry pg_init */ + unsigned pg_init_count; /* Number of times pg_init called */ struct work_struct process_queued_ios; struct bio_list queued_ios; @@ -225,6 +228,8 @@ static void __switch_pg(struct multipath *m, struct pgpath *pgpath) m->pg_init_required = 0; m->queue_io = 0; } + + m->pg_init_count = 0; } static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg) @@ -424,6 +429,7 @@ static void process_queued_ios(struct work_struct *work) must_queue = 0; if (m->pg_init_required && !m->pg_init_in_progress) { + m->pg_init_count++; m->pg_init_required = 0; m->pg_init_in_progress = 1; init_required = 1; @@ -689,9 +695,11 @@ static int parse_features(struct arg_set *as, struct multipath *m) int r; unsigned argc; struct dm_target *ti = m->ti; + const char *param_name; static struct param _params[] = { - {0, 1, "invalid number of feature args"}, + {0, 3, "invalid number of feature args"}, + {1, 50, "pg_init_retries must be between 1 and 50"}, }; r = read_param(_params, shift(as), &argc, &ti->error); @@ -701,12 +709,28 @@ static int parse_features(struct arg_set *as, struct multipath *m) if (!argc) return 0; - if (!strnicmp(shift(as), MESG_STR("queue_if_no_path"))) - return queue_if_no_path(m, 1, 0); - else { + do { + param_name = shift(as); + argc--; + + if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) { + r = queue_if_no_path(m, 1, 0); + continue; + } + + if (!strnicmp(param_name, MESG_STR("pg_init_retries")) && + (argc >= 1)) { + r = read_param(_params + 1, shift(as), + &m->pg_init_retries, &ti->error); + argc--; + continue; + } + ti->error = "Unrecognised multipath feature request"; - return -EINVAL; - } + r = -EINVAL; + } while (argc && !r); + + return r; } static int multipath_ctr(struct dm_target *ti, unsigned int argc, @@ -834,6 +858,9 @@ static int fail_path(struct pgpath *pgpath) if (pgpath == m->current_pgpath) m->current_pgpath = NULL; + dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti, + pgpath->path.dev->name, m->nr_valid_paths); + queue_work(kmultipathd, &m->trigger_event); out: @@ -873,6 +900,9 @@ static int reinstate_path(struct pgpath *pgpath) if (!m->nr_valid_paths++ && m->queue_size) queue_work(kmultipathd, &m->process_queued_ios); + dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti, + pgpath->path.dev->name, m->nr_valid_paths); + queue_work(kmultipathd, &m->trigger_event); out: @@ -976,6 +1006,26 @@ static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed) } /* + * Should we retry pg_init immediately? + */ +static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath) +{ + unsigned long flags; + int limit_reached = 0; + + spin_lock_irqsave(&m->lock, flags); + + if (m->pg_init_count <= m->pg_init_retries) + m->pg_init_required = 1; + else + limit_reached = 1; + + spin_unlock_irqrestore(&m->lock, flags); + + return limit_reached; +} + +/* * pg_init must call this when it has completed its initialisation */ void dm_pg_init_complete(struct dm_path *path, unsigned err_flags) @@ -985,8 +1035,14 @@ void dm_pg_init_complete(struct dm_path *path, unsigned err_flags) struct multipath *m = pg->m; unsigned long flags; - /* We insist on failing the path if the PG is already bypassed. */ - if (err_flags && pg->bypassed) + /* + * If requested, retry pg_init until maximum number of retries exceeded. + * If retry not requested and PG already bypassed, always fail the path. + */ + if (err_flags & MP_RETRY) { + if (pg_init_limit_reached(m, pgpath)) + err_flags |= MP_FAIL_PATH; + } else if (err_flags && pg->bypassed) err_flags |= MP_FAIL_PATH; if (err_flags & MP_FAIL_PATH) @@ -996,7 +1052,7 @@ void dm_pg_init_complete(struct dm_path *path, unsigned err_flags) bypass_pg(m, pg, 1); spin_lock_irqsave(&m->lock, flags); - if (err_flags) { + if (err_flags & ~MP_RETRY) { m->current_pgpath = NULL; m->current_pg = NULL; } else if (!m->pg_init_required) @@ -1148,11 +1204,15 @@ static int multipath_status(struct dm_target *ti, status_type_t type, /* Features */ if (type == STATUSTYPE_INFO) - DMEMIT("1 %u ", m->queue_size); - else if (m->queue_if_no_path) - DMEMIT("1 queue_if_no_path "); - else - DMEMIT("0 "); + DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count); + else { + DMEMIT("%u ", m->queue_if_no_path + + (m->pg_init_retries > 0) * 2); + if (m->queue_if_no_path) + DMEMIT("queue_if_no_path "); + if (m->pg_init_retries) + DMEMIT("pg_init_retries %u ", m->pg_init_retries); + } if (hwh->type && hwh->type->status) sz += hwh->type->status(hwh, type, result + sz, maxlen - sz); diff --git a/drivers/md/dm-path-selector.c b/drivers/md/dm-path-selector.c index f10a0c89b3f..ca1bb636a3e 100644 --- a/drivers/md/dm-path-selector.c +++ b/drivers/md/dm-path-selector.c @@ -94,12 +94,10 @@ out: static struct ps_internal *_alloc_path_selector(struct path_selector_type *pst) { - struct ps_internal *psi = kmalloc(sizeof(*psi), GFP_KERNEL); + struct ps_internal *psi = kzalloc(sizeof(*psi), GFP_KERNEL); - if (psi) { - memset(psi, 0, sizeof(*psi)); + if (psi) psi->pst = *pst; - } return psi; } diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index d09ff15490a..31123d4a6b9 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -19,6 +19,7 @@ #include <linux/time.h> #include <linux/vmalloc.h> #include <linux/workqueue.h> +#include <linux/log2.h> #define DM_MSG_PREFIX "raid1" #define DM_IO_PAGES 64 @@ -113,6 +114,7 @@ struct region { * Mirror set structures. *---------------------------------------------------------------*/ struct mirror { + struct mirror_set *ms; atomic_t error_count; struct dm_dev *dev; sector_t offset; @@ -974,6 +976,7 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) { ti->error = "Error creating dirty region hash"; + dm_io_client_destroy(ms->io_client); kfree(ms); return NULL; } @@ -994,7 +997,7 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti, static inline int _check_region_size(struct dm_target *ti, uint32_t size) { - return !(size % (PAGE_SIZE >> 9) || (size & (size - 1)) || + return !(size % (PAGE_SIZE >> 9) || !is_power_of_2(size) || size > ti->len); } @@ -1015,6 +1018,7 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti, return -ENXIO; } + ms->mirror[mirror].ms = ms; ms->mirror[mirror].offset = offset; return 0; @@ -1163,16 +1167,14 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) ms->kmirrord_wq = create_singlethread_workqueue("kmirrord"); if (!ms->kmirrord_wq) { DMERR("couldn't start kmirrord"); - free_context(ms, ti, m); - return -ENOMEM; + r = -ENOMEM; + goto err_free_context; } INIT_WORK(&ms->kmirrord_work, do_mirror); r = parse_features(ms, argc, argv, &args_used); - if (r) { - free_context(ms, ti, ms->nr_mirrors); - return r; - } + if (r) + goto err_destroy_wq; argv += args_used; argc -= args_used; @@ -1188,19 +1190,22 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (argc) { ti->error = "Too many mirror arguments"; - free_context(ms, ti, ms->nr_mirrors); - return -EINVAL; + r = -EINVAL; + goto err_destroy_wq; } r = kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client); - if (r) { - destroy_workqueue(ms->kmirrord_wq); - free_context(ms, ti, ms->nr_mirrors); - return r; - } + if (r) + goto err_destroy_wq; wake(ms); return 0; + +err_destroy_wq: + destroy_workqueue(ms->kmirrord_wq); +err_free_context: + free_context(ms, ti, ms->nr_mirrors); + return r; } static void mirror_dtr(struct dm_target *ti) @@ -1302,7 +1307,7 @@ static void mirror_postsuspend(struct dm_target *ti) wait_event(_kmirrord_recovery_stopped, !atomic_read(&ms->rh.recovery_in_flight)); - if (log->type->suspend && log->type->suspend(log)) + if (log->type->postsuspend && log->type->postsuspend(log)) /* FIXME: need better error handling */ DMWARN("log suspend failed"); } diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 98a633f3d6b..cee16fadd9e 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -17,6 +17,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/log2.h> #include "dm-snap.h" #include "dm-bio-list.h" @@ -415,7 +416,7 @@ static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg, chunk_size = round_up(chunk_size, PAGE_SIZE >> 9); /* Check chunk_size is a power of 2 */ - if (chunk_size & (chunk_size - 1)) { + if (!is_power_of_2(chunk_size)) { *error = "Chunk size is not a power of 2"; return -EINVAL; } diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 51f5e076001..969944a8aba 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -11,6 +11,7 @@ #include <linux/blkdev.h> #include <linux/bio.h> #include <linux/slab.h> +#include <linux/log2.h> #define DM_MSG_PREFIX "striped" @@ -99,7 +100,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) /* * chunk_size is a power of two */ - if (!chunk_size || (chunk_size & (chunk_size - 1)) || + if (!is_power_of_2(chunk_size) || (chunk_size < (PAGE_SIZE >> SECTOR_SHIFT))) { ti->error = "Invalid chunk size"; return -EINVAL; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index fbe477bb2c6..8939e610508 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -213,12 +213,11 @@ static int alloc_targets(struct dm_table *t, unsigned int num) int dm_table_create(struct dm_table **result, int mode, unsigned num_targets, struct mapped_device *md) { - struct dm_table *t = kmalloc(sizeof(*t), GFP_KERNEL); + struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL); if (!t) return -ENOMEM; - memset(t, 0, sizeof(*t)); INIT_LIST_HEAD(&t->devices); atomic_set(&t->holders, 1); diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 477a041a41c..835cf95b857 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c @@ -88,12 +88,10 @@ void dm_put_target_type(struct target_type *t) static struct tt_internal *alloc_target(struct target_type *t) { - struct tt_internal *ti = kmalloc(sizeof(*ti), GFP_KERNEL); + struct tt_internal *ti = kzalloc(sizeof(*ti), GFP_KERNEL); - if (ti) { - memset(ti, 0, sizeof(*ti)); + if (ti) ti->tt = *t; - } return ti; } diff --git a/drivers/md/dm-uevent.c b/drivers/md/dm-uevent.c new file mode 100644 index 00000000000..50377e5dc2a --- /dev/null +++ b/drivers/md/dm-uevent.c @@ -0,0 +1,222 @@ +/* + * Device Mapper Uevent Support (dm-uevent) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2007 + * Author: Mike Anderson <andmike@linux.vnet.ibm.com> + */ +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/kobject.h> +#include <linux/dm-ioctl.h> + +#include "dm.h" +#include "dm-uevent.h" + +#define DM_MSG_PREFIX "uevent" + +static const struct { + enum dm_uevent_type type; + enum kobject_action action; + char *name; +} _dm_uevent_type_names[] = { + {DM_UEVENT_PATH_FAILED, KOBJ_CHANGE, "PATH_FAILED"}, + {DM_UEVENT_PATH_REINSTATED, KOBJ_CHANGE, "PATH_REINSTATED"}, +}; + +static struct kmem_cache *_dm_event_cache; + +struct dm_uevent { + struct mapped_device *md; + enum kobject_action action; + struct kobj_uevent_env ku_env; + struct list_head elist; + char name[DM_NAME_LEN]; + char uuid[DM_UUID_LEN]; +}; + +static void dm_uevent_free(struct dm_uevent *event) +{ + kmem_cache_free(_dm_event_cache, event); +} + +static struct dm_uevent *dm_uevent_alloc(struct mapped_device *md) +{ + struct dm_uevent *event; + + event = kmem_cache_zalloc(_dm_event_cache, GFP_ATOMIC); + if (!event) + return NULL; + + INIT_LIST_HEAD(&event->elist); + event->md = md; + + return event; +} + +static struct dm_uevent *dm_build_path_uevent(struct mapped_device *md, + struct dm_target *ti, + enum kobject_action action, + const char *dm_action, + const char *path, + unsigned nr_valid_paths) +{ + struct dm_uevent *event; + + event = dm_uevent_alloc(md); + if (!event) { + DMERR("%s: dm_uevent_alloc() failed", __FUNCTION__); + goto err_nomem; + } + + event->action = action; + + if (add_uevent_var(&event->ku_env, "DM_TARGET=%s", ti->type->name)) { + DMERR("%s: add_uevent_var() for DM_TARGET failed", + __FUNCTION__); + goto err_add; + } + + if (add_uevent_var(&event->ku_env, "DM_ACTION=%s", dm_action)) { + DMERR("%s: add_uevent_var() for DM_ACTION failed", + __FUNCTION__); + goto err_add; + } + + if (add_uevent_var(&event->ku_env, "DM_SEQNUM=%u", + dm_next_uevent_seq(md))) { + DMERR("%s: add_uevent_var() for DM_SEQNUM failed", + __FUNCTION__); + goto err_add; + } + + if (add_uevent_var(&event->ku_env, "DM_PATH=%s", path)) { + DMERR("%s: add_uevent_var() for DM_PATH failed", __FUNCTION__); + goto err_add; + } + + if (add_uevent_var(&event->ku_env, "DM_NR_VALID_PATHS=%d", + nr_valid_paths)) { + DMERR("%s: add_uevent_var() for DM_NR_VALID_PATHS failed", + __FUNCTION__); + goto err_add; + } + + return event; + +err_add: + dm_uevent_free(event); +err_nomem: + return ERR_PTR(-ENOMEM); +} + +/** + * dm_send_uevents - send uevents for given list + * + * @events: list of events to send + * @kobj: kobject generating event + * + */ +void dm_send_uevents(struct list_head *events, struct kobject *kobj) +{ + int r; + struct dm_uevent *event, *next; + + list_for_each_entry_safe(event, next, events, elist) { + list_del_init(&event->elist); + + /* + * Need to call dm_copy_name_and_uuid from here for now. + * Context of previous var adds and locking used for + * hash_cell not compatable. + */ + if (dm_copy_name_and_uuid(event->md, event->name, + event->uuid)) { + DMERR("%s: dm_copy_name_and_uuid() failed", + __FUNCTION__); + goto uevent_free; + } + + if (add_uevent_var(&event->ku_env, "DM_NAME=%s", event->name)) { + DMERR("%s: add_uevent_var() for DM_NAME failed", + __FUNCTION__); + goto uevent_free; + } + + if (add_uevent_var(&event->ku_env, "DM_UUID=%s", event->uuid)) { + DMERR("%s: add_uevent_var() for DM_UUID failed", + __FUNCTION__); + goto uevent_free; + } + + r = kobject_uevent_env(kobj, event->action, event->ku_env.envp); + if (r) + DMERR("%s: kobject_uevent_env failed", __FUNCTION__); +uevent_free: + dm_uevent_free(event); + } +} +EXPORT_SYMBOL_GPL(dm_send_uevents); + +/** + * dm_path_uevent - called to create a new path event and queue it + * + * @event_type: path event type enum + * @ti: pointer to a dm_target + * @path: string containing pathname + * @nr_valid_paths: number of valid paths remaining + * + */ +void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti, + const char *path, unsigned nr_valid_paths) +{ + struct mapped_device *md = dm_table_get_md(ti->table); + struct dm_uevent *event; + + if (event_type >= ARRAY_SIZE(_dm_uevent_type_names)) { + DMERR("%s: Invalid event_type %d", __FUNCTION__, event_type); + goto out; + } + + event = dm_build_path_uevent(md, ti, + _dm_uevent_type_names[event_type].action, + _dm_uevent_type_names[event_type].name, + path, nr_valid_paths); + if (IS_ERR(event)) + goto out; + + dm_uevent_add(md, &event->elist); + +out: + dm_put(md); +} +EXPORT_SYMBOL_GPL(dm_path_uevent); + +int dm_uevent_init(void) +{ + _dm_event_cache = KMEM_CACHE(dm_uevent, 0); + if (!_dm_event_cache) + return -ENOMEM; + + DMINFO("version 1.0.3"); + + return 0; +} + +void dm_uevent_exit(void) +{ + kmem_cache_destroy(_dm_event_cache); +} diff --git a/drivers/md/dm-uevent.h b/drivers/md/dm-uevent.h new file mode 100644 index 00000000000..2eccc8bd671 --- /dev/null +++ b/drivers/md/dm-uevent.h @@ -0,0 +1,59 @@ +/* + * Device Mapper Uevent Support + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2007 + * Author: Mike Anderson <andmike@linux.vnet.ibm.com> + */ +#ifndef DM_UEVENT_H +#define DM_UEVENT_H + +enum dm_uevent_type { + DM_UEVENT_PATH_FAILED, + DM_UEVENT_PATH_REINSTATED, +}; + +#ifdef CONFIG_DM_UEVENT + +extern int dm_uevent_init(void); +extern void dm_uevent_exit(void); +extern void dm_send_uevents(struct list_head *events, struct kobject *kobj); +extern void dm_path_uevent(enum dm_uevent_type event_type, + struct dm_target *ti, const char *path, + unsigned nr_valid_paths); + +#else + +static inline int dm_uevent_init(void) +{ + return 0; +} +static inline void dm_uevent_exit(void) +{ +} +static inline void dm_send_uevents(struct list_head *events, + struct kobject *kobj) +{ +} +static inline void dm_path_uevent(enum dm_uevent_type event_type, + struct dm_target *ti, const char *path, + unsigned nr_valid_paths) +{ +} + +#endif /* CONFIG_DM_UEVENT */ + +#endif /* DM_UEVENT_H */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d837d37f620..07cbbb8eb3e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -7,6 +7,7 @@ #include "dm.h" #include "dm-bio-list.h" +#include "dm-uevent.h" #include <linux/init.h> #include <linux/module.h> @@ -112,6 +113,9 @@ struct mapped_device { */ atomic_t event_nr; wait_queue_head_t eventq; + atomic_t uevent_seq; + struct list_head uevent_list; + spinlock_t uevent_lock; /* Protect access to uevent_list */ /* * freeze/thaw support require holding onto a super block @@ -143,11 +147,19 @@ static int __init local_init(void) return -ENOMEM; } + r = dm_uevent_init(); + if (r) { + kmem_cache_destroy(_tio_cache); + kmem_cache_destroy(_io_cache); + return r; + } + _major = major; r = register_blkdev(_major, _name); if (r < 0) { kmem_cache_destroy(_tio_cache); kmem_cache_destroy(_io_cache); + dm_uevent_exit(); return r; } @@ -162,6 +174,7 @@ static void local_exit(void) kmem_cache_destroy(_tio_cache); kmem_cache_destroy(_io_cache); unregister_blkdev(_major, _name); + dm_uevent_exit(); _major = 0; @@ -751,15 +764,13 @@ static void __clone_and_map(struct clone_info *ci) /* * Split the bio into several clones. */ -static void __split_bio(struct mapped_device *md, struct bio *bio) +static int __split_bio(struct mapped_device *md, struct bio *bio) { struct clone_info ci; ci.map = dm_get_table(md); - if (!ci.map) { - bio_io_error(bio); - return; - } + if (unlikely(!ci.map)) + return -EIO; ci.md = md; ci.bio = bio; @@ -779,6 +790,8 @@ static void __split_bio(struct mapped_device *md, struct bio *bio) /* drop the extra reference count */ dec_pending(ci.io, 0); dm_table_put(ci.map); + + return 0; } /*----------------------------------------------------------------- * CRUD END @@ -790,7 +803,7 @@ static void __split_bio(struct mapped_device *md, struct bio *bio) */ static int dm_request(struct request_queue *q, struct bio *bio) { - int r; + int r = -EIO; int rw = bio_data_dir(bio); struct mapped_device *md = q->queuedata; @@ -815,18 +828,11 @@ static int dm_request(struct request_queue *q, struct bio *bio) while (test_bit(DMF_BLOCK_IO, &md->flags)) { up_read(&md->io_lock); - if (bio_rw(bio) == READA) { - bio_io_error(bio); - return 0; - } - - r = queue_io(md, bio); - if (r < 0) { - bio_io_error(bio); - return 0; + if (bio_rw(bio) != READA) + r = queue_io(md, bio); - } else if (r == 0) - return 0; /* deferred successfully */ + if (r <= 0) + goto out_req; /* * We're in a while loop, because someone could suspend @@ -835,8 +841,13 @@ static int dm_request(struct request_queue *q, struct bio *bio) down_read(&md->io_lock); } - __split_bio(md, bio); + r = __split_bio(md, bio); up_read(&md->io_lock); + +out_req: + if (r < 0) + bio_io_error(bio); + return 0; } @@ -977,6 +988,9 @@ static struct mapped_device *alloc_dev(int minor) atomic_set(&md->holders, 1); atomic_set(&md->open_count, 0); atomic_set(&md->event_nr, 0); + atomic_set(&md->uevent_seq, 0); + INIT_LIST_HEAD(&md->uevent_list); + spin_lock_init(&md->uevent_lock); md->queue = blk_alloc_queue(GFP_KERNEL); if (!md->queue) @@ -1044,12 +1058,14 @@ static struct mapped_device *alloc_dev(int minor) return NULL; } +static void unlock_fs(struct mapped_device *md); + static void free_dev(struct mapped_device *md) { int minor = md->disk->first_minor; if (md->suspended_bdev) { - thaw_bdev(md->suspended_bdev, NULL); + unlock_fs(md); bdput(md->suspended_bdev); } mempool_destroy(md->tio_pool); @@ -1073,8 +1089,16 @@ static void free_dev(struct mapped_device *md) */ static void event_callback(void *context) { + unsigned long flags; + LIST_HEAD(uevents); struct mapped_device *md = (struct mapped_device *) context; + spin_lock_irqsave(&md->uevent_lock, flags); + list_splice_init(&md->uevent_list, &uevents); + spin_unlock_irqrestore(&md->uevent_lock, flags); + + dm_send_uevents(&uevents, &md->disk->kobj); + atomic_inc(&md->event_nr); wake_up(&md->eventq); } @@ -1233,7 +1257,8 @@ static void __flush_deferred_io(struct mapped_device *md, struct bio *c) while (c) { n = c->bi_next; c->bi_next = NULL; - __split_bio(md, c); + if (__split_bio(md, c)) + bio_io_error(c); c = n; } } @@ -1491,6 +1516,11 @@ out: /*----------------------------------------------------------------- * Event notification. *---------------------------------------------------------------*/ +uint32_t dm_next_uevent_seq(struct mapped_device *md) +{ + return atomic_add_return(1, &md->uevent_seq); +} + uint32_t dm_get_event_nr(struct mapped_device *md) { return atomic_read(&md->event_nr); @@ -1502,6 +1532,15 @@ int dm_wait_event(struct mapped_device *md, int event_nr) (event_nr != atomic_read(&md->event_nr))); } +void dm_uevent_add(struct mapped_device *md, struct list_head *elist) +{ + unsigned long flags; + + spin_lock_irqsave(&md->uevent_lock, flags); + list_add(elist, &md->uevent_list); + spin_unlock_irqrestore(&md->uevent_lock, flags); +} + /* * The gendisk is only valid as long as you have a reference * count on 'md'. diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c index 7e052378c47..f3831f31223 100644 --- a/drivers/md/kcopyd.c +++ b/drivers/md/kcopyd.c @@ -198,7 +198,7 @@ struct kcopyd_job { * These fields are only used if the job has been split * into more manageable parts. */ - struct semaphore lock; + struct mutex lock; atomic_t sub_jobs; sector_t progress; }; @@ -456,7 +456,7 @@ static void segment_complete(int read_err, sector_t count = 0; struct kcopyd_job *job = (struct kcopyd_job *) context; - down(&job->lock); + mutex_lock(&job->lock); /* update the error */ if (read_err) @@ -480,7 +480,7 @@ static void segment_complete(int read_err, job->progress += count; } } - up(&job->lock); + mutex_unlock(&job->lock); if (count) { int i; @@ -562,7 +562,7 @@ int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from, dispatch_job(job); else { - init_MUTEX(&job->lock); + mutex_init(&job->lock); job->progress = 0; split_job(job); } diff --git a/drivers/md/md.c b/drivers/md/md.c index c059ae6f37e..808cd954945 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4717,7 +4717,7 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, void md_unregister_thread(mdk_thread_t *thread) { - dprintk("interrupting MD-thread pid %d\n", thread->tsk->pid); + dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk)); kthread_stop(thread->tsk); kfree(thread); |