From e09b457bdb7e8d23fc54dcef0930ac697d8de895 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 13 Nov 2010 11:55:17 +0100 Subject: block: simplify holder symlink handling Code to manage symlinks in /sys/block/*/{holders|slaves} are overly complex with multiple holder considerations, redundant extra references to all involved kobjects, unused generic kobject holder support and unnecessary mixup with bd_claim/release functionalities. Strip it down to what's necessary (single gendisk holder) and make it use a separate interface. This is a step for cleaning up bd_claim/release. This patch makes dm-table slightly more complex but it will be simplified again with further changes. Signed-off-by: Tejun Heo Acked-by: Neil Brown Acked-by: Mike Snitzer Cc: dm-devel@redhat.com --- drivers/md/dm-table.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'drivers/md/dm-table.c') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 90267f8d64e..2c876ffc63d 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -328,12 +328,22 @@ static int open_dev(struct dm_dev_internal *d, dev_t dev, bdev = open_by_devnum(dev, d->dm_dev.mode); if (IS_ERR(bdev)) return PTR_ERR(bdev); - r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md)); - if (r) + + r = bd_claim(bdev, _claim_ptr); + if (r) { blkdev_put(bdev, d->dm_dev.mode); - else - d->dm_dev.bdev = bdev; - return r; + return r; + } + + r = bd_link_disk_holder(bdev, dm_disk(md)); + if (r) { + bd_release(bdev); + blkdev_put(bdev, d->dm_dev.mode); + return r; + } + + d->dm_dev.bdev = bdev; + return 0; } /* @@ -344,7 +354,8 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) if (!d->dm_dev.bdev) return; - bd_release_from_disk(d->dm_dev.bdev, dm_disk(md)); + bd_unlink_disk_holder(d->dm_dev.bdev); + bd_release(d->dm_dev.bdev); blkdev_put(d->dm_dev.bdev, d->dm_dev.mode); d->dm_dev.bdev = NULL; } -- cgit v1.2.3-70-g09d2 From e525fd89d380c4a94c0d63913a1dd1a593ed25e7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 13 Nov 2010 11:55:17 +0100 Subject: block: make blkdev_get/put() handle exclusive access Over time, block layer has accumulated a set of APIs dealing with bdev open, close, claim and release. * blkdev_get/put() are the primary open and close functions. * bd_claim/release() deal with exclusive open. * open/close_bdev_exclusive() are combination of open and claim and the other way around, respectively. * bd_link/unlink_disk_holder() to create and remove holder/slave symlinks. * open_by_devnum() wraps bdget() + blkdev_get(). The interface is a bit confusing and the decoupling of open and claim makes it impossible to properly guarantee exclusive access as in-kernel open + claim sequence can disturb the existing exclusive open even before the block layer knows the current open if for another exclusive access. Reorganize the interface such that, * blkdev_get() is extended to include exclusive access management. @holder argument is added and, if is @FMODE_EXCL specified, it will gain exclusive access atomically w.r.t. other exclusive accesses. * blkdev_put() is similarly extended. It now takes @mode argument and if @FMODE_EXCL is set, it releases an exclusive access. Also, when the last exclusive claim is released, the holder/slave symlinks are removed automatically. * bd_claim/release() and close_bdev_exclusive() are no longer necessary and either made static or removed. * bd_link_disk_holder() remains the same but bd_unlink_disk_holder() is no longer necessary and removed. * open_bdev_exclusive() becomes a simple wrapper around lookup_bdev() and blkdev_get(). It also has an unexpected extra bdev_read_only() test which probably should be moved into blkdev_get(). * open_by_devnum() is modified to take @holder argument and pass it to blkdev_get(). Most of bdev open/close operations are unified into blkdev_get/put() and most exclusive accesses are tested atomically at the open time (as it should). This cleans up code and removes some, both valid and invalid, but unnecessary all the same, corner cases. open_bdev_exclusive() and open_by_devnum() can use further cleanup - rename to blkdev_get_by_path() and blkdev_get_by_devt() and drop special features. Well, let's leave them for another day. Most conversions are straight-forward. drbd conversion is a bit more involved as there was some reordering, but the logic should stay the same. Signed-off-by: Tejun Heo Acked-by: Neil Brown Acked-by: Ryusuke Konishi Acked-by: Mike Snitzer Acked-by: Philipp Reisner Cc: Peter Osterlund Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Jan Kara Cc: Andrew Morton Cc: Andreas Dilger Cc: "Theodore Ts'o" Cc: Mark Fasheh Cc: Joel Becker Cc: Alex Elder Cc: Christoph Hellwig Cc: dm-devel@redhat.com Cc: drbd-dev@lists.linbit.com Cc: Leo Chen Cc: Scott Branden Cc: Chris Mason Cc: Steven Whitehouse Cc: Dave Kleikamp Cc: Joern Engel Cc: reiserfs-devel@vger.kernel.org Cc: Alexander Viro --- block/ioctl.c | 5 +- drivers/block/drbd/drbd_int.h | 2 - drivers/block/drbd/drbd_main.c | 7 +- drivers/block/drbd/drbd_nl.c | 103 ++++++++++----------------- drivers/block/pktcdvd.c | 22 +++--- drivers/char/raw.c | 14 ++-- drivers/md/dm-table.c | 15 +--- drivers/md/md.c | 14 +--- drivers/mtd/devices/block2mtd.c | 17 ++--- drivers/s390/block/dasd_genhd.c | 2 +- fs/block_dev.c | 149 ++++++++++++++-------------------------- fs/btrfs/volumes.c | 14 ++-- fs/ext3/super.c | 12 +--- fs/ext4/super.c | 12 +--- fs/gfs2/ops_fstype.c | 4 +- fs/jfs/jfs_logmgr.c | 17 ++--- fs/logfs/dev_bdev.c | 4 +- fs/nilfs2/super.c | 4 +- fs/ocfs2/cluster/heartbeat.c | 2 +- fs/partitions/check.c | 2 +- fs/reiserfs/journal.c | 17 ++--- fs/super.c | 14 ++-- fs/xfs/linux-2.6/xfs_super.c | 2 +- include/linux/fs.h | 14 ++-- kernel/power/swap.c | 5 +- mm/swapfile.c | 7 +- 26 files changed, 162 insertions(+), 318 deletions(-) (limited to 'drivers/md/dm-table.c') diff --git a/block/ioctl.c b/block/ioctl.c index d724ceb1d46..cc46d499fd2 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -294,11 +294,12 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, return -EINVAL; if (get_user(n, (int __user *) arg)) return -EFAULT; - if (!(mode & FMODE_EXCL) && bd_claim(bdev, &bdev) < 0) + if (!(mode & FMODE_EXCL) && + blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0) return -EBUSY; ret = set_blocksize(bdev, n); if (!(mode & FMODE_EXCL)) - bd_release(bdev); + blkdev_put(bdev, mode | FMODE_EXCL); return ret; case BLKPG: ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 9bdcf4393c0..0590b9f67ec 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -923,8 +923,6 @@ struct drbd_md { struct drbd_backing_dev { struct block_device *backing_bdev; struct block_device *md_bdev; - struct file *lo_file; - struct file *md_file; struct drbd_md md; struct disk_conf dc; /* The user provided config... */ sector_t known_size; /* last known size of that backing device */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 25c7a73c506..7ec1a82064a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3361,11 +3361,8 @@ void drbd_free_bc(struct drbd_backing_dev *ldev) if (ldev == NULL) return; - bd_release(ldev->backing_bdev); - bd_release(ldev->md_bdev); - - fput(ldev->lo_file); - fput(ldev->md_file); + blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); kfree(ldev); } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 87925e97e61..fd034609028 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -855,7 +855,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp sector_t max_possible_sectors; sector_t min_md_device_sectors; struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */ - struct inode *inode, *inode2; + struct block_device *bdev; struct lru_cache *resync_lru = NULL; union drbd_state ns, os; unsigned int max_seg_s; @@ -902,46 +902,40 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } } - nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0); - if (IS_ERR(nbc->lo_file)) { + bdev = open_bdev_exclusive(nbc->dc.backing_dev, + FMODE_READ | FMODE_WRITE, mdev); + if (IS_ERR(bdev)) { dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev, - PTR_ERR(nbc->lo_file)); - nbc->lo_file = NULL; + PTR_ERR(bdev)); retcode = ERR_OPEN_DISK; goto fail; } + nbc->backing_bdev = bdev; - inode = nbc->lo_file->f_dentry->d_inode; - - if (!S_ISBLK(inode->i_mode)) { - retcode = ERR_DISK_NOT_BDEV; - goto fail; - } - - nbc->md_file = filp_open(nbc->dc.meta_dev, O_RDWR, 0); - if (IS_ERR(nbc->md_file)) { + /* + * meta_dev_idx >= 0: external fixed size, possibly multiple + * drbd sharing one meta device. TODO in that case, paranoia + * check that [md_bdev, meta_dev_idx] is not yet used by some + * other drbd minor! (if you use drbd.conf + drbdadm, that + * should check it for you already; but if you don't, or + * someone fooled it, we need to double check here) + */ + bdev = open_bdev_exclusive(nbc->dc.meta_dev, + FMODE_READ | FMODE_WRITE, + (nbc->dc.meta_dev_idx < 0) ? + (void *)mdev : (void *)drbd_m_holder); + if (IS_ERR(bdev)) { dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, - PTR_ERR(nbc->md_file)); - nbc->md_file = NULL; + PTR_ERR(bdev)); retcode = ERR_OPEN_MD_DISK; goto fail; } + nbc->md_bdev = bdev; - inode2 = nbc->md_file->f_dentry->d_inode; - - if (!S_ISBLK(inode2->i_mode)) { - retcode = ERR_MD_NOT_BDEV; - goto fail; - } - - nbc->backing_bdev = inode->i_bdev; - if (bd_claim(nbc->backing_bdev, mdev)) { - printk(KERN_ERR "drbd: bd_claim(%p,%p); failed [%p;%p;%u]\n", - nbc->backing_bdev, mdev, - nbc->backing_bdev->bd_holder, - nbc->backing_bdev->bd_contains->bd_holder, - nbc->backing_bdev->bd_holders); - retcode = ERR_BDCLAIM_DISK; + if ((nbc->backing_bdev == nbc->md_bdev) != + (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL || + nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) { + retcode = ERR_MD_IDX_INVALID; goto fail; } @@ -950,28 +944,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp offsetof(struct bm_extent, lce)); if (!resync_lru) { retcode = ERR_NOMEM; - goto release_bdev_fail; - } - - /* meta_dev_idx >= 0: external fixed size, - * possibly multiple drbd sharing one meta device. - * TODO in that case, paranoia check that [md_bdev, meta_dev_idx] is - * not yet used by some other drbd minor! - * (if you use drbd.conf + drbdadm, - * that should check it for you already; but if you don't, or someone - * fooled it, we need to double check here) */ - nbc->md_bdev = inode2->i_bdev; - if (bd_claim(nbc->md_bdev, (nbc->dc.meta_dev_idx < 0) ? (void *)mdev - : (void *) drbd_m_holder)) { - retcode = ERR_BDCLAIM_MD_DISK; - goto release_bdev_fail; - } - - if ((nbc->backing_bdev == nbc->md_bdev) != - (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL || - nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) { - retcode = ERR_MD_IDX_INVALID; - goto release_bdev2_fail; + goto fail; } /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */ @@ -982,7 +955,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp (unsigned long long) drbd_get_max_capacity(nbc), (unsigned long long) nbc->dc.disk_size); retcode = ERR_DISK_TO_SMALL; - goto release_bdev2_fail; + goto fail; } if (nbc->dc.meta_dev_idx < 0) { @@ -999,7 +972,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp dev_warn(DEV, "refusing attach: md-device too small, " "at least %llu sectors needed for this meta-disk type\n", (unsigned long long) min_md_device_sectors); - goto release_bdev2_fail; + goto fail; } /* Make sure the new disk is big enough @@ -1007,7 +980,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp if (drbd_get_max_capacity(nbc) < drbd_get_capacity(mdev->this_bdev)) { retcode = ERR_DISK_TO_SMALL; - goto release_bdev2_fail; + goto fail; } nbc->known_size = drbd_get_capacity(nbc->backing_bdev); @@ -1030,7 +1003,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); drbd_resume_io(mdev); if (retcode < SS_SUCCESS) - goto release_bdev2_fail; + goto fail; if (!get_ldev_if_state(mdev, D_ATTACHING)) goto force_diskless; @@ -1264,18 +1237,14 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp force_diskless: drbd_force_state(mdev, NS(disk, D_DISKLESS)); drbd_md_sync(mdev); - release_bdev2_fail: - if (nbc) - bd_release(nbc->md_bdev); - release_bdev_fail: - if (nbc) - bd_release(nbc->backing_bdev); fail: if (nbc) { - if (nbc->lo_file) - fput(nbc->lo_file); - if (nbc->md_file) - fput(nbc->md_file); + if (nbc->backing_bdev) + blkdev_put(nbc->backing_bdev, + FMODE_READ | FMODE_WRITE | FMODE_EXCL); + if (nbc->md_bdev) + blkdev_put(nbc->md_bdev, + FMODE_READ | FMODE_WRITE | FMODE_EXCL); kfree(nbc); } lc_destroy(resync_lru); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 19b3568e932..77d70eebb6b 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2296,15 +2296,12 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) * so bdget() can't fail. */ bdget(pd->bdev->bd_dev); - if ((ret = blkdev_get(pd->bdev, FMODE_READ))) + if ((ret = blkdev_get(pd->bdev, FMODE_READ | FMODE_EXCL, pd))) goto out; - if ((ret = bd_claim(pd->bdev, pd))) - goto out_putdev; - if ((ret = pkt_get_last_written(pd, &lba))) { printk(DRIVER_NAME": pkt_get_last_written failed\n"); - goto out_unclaim; + goto out_putdev; } set_capacity(pd->disk, lba << 2); @@ -2314,7 +2311,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) q = bdev_get_queue(pd->bdev); if (write) { if ((ret = pkt_open_write(pd))) - goto out_unclaim; + goto out_putdev; /* * Some CDRW drives can not handle writes larger than one packet, * even if the size is a multiple of the packet size. @@ -2329,23 +2326,21 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) } if ((ret = pkt_set_segment_merging(pd, q))) - goto out_unclaim; + goto out_putdev; if (write) { if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) { printk(DRIVER_NAME": not enough memory for buffers\n"); ret = -ENOMEM; - goto out_unclaim; + goto out_putdev; } printk(DRIVER_NAME": %lukB available on disc\n", lba << 1); } return 0; -out_unclaim: - bd_release(pd->bdev); out_putdev: - blkdev_put(pd->bdev, FMODE_READ); + blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL); out: return ret; } @@ -2362,8 +2357,7 @@ static void pkt_release_dev(struct pktcdvd_device *pd, int flush) pkt_lock_door(pd, 0); pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); - bd_release(pd->bdev); - blkdev_put(pd->bdev, FMODE_READ); + blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL); pkt_shrink_pktlist(pd); } @@ -2733,7 +2727,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) bdev = bdget(dev); if (!bdev) return -ENOMEM; - ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY); + ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL); if (ret) return ret; diff --git a/drivers/char/raw.c b/drivers/char/raw.c index bfe25ea9766..b4b9d5a4788 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -65,15 +65,12 @@ static int raw_open(struct inode *inode, struct file *filp) if (!bdev) goto out; igrab(bdev->bd_inode); - err = blkdev_get(bdev, filp->f_mode); + err = blkdev_get(bdev, filp->f_mode | FMODE_EXCL, raw_open); if (err) goto out; - err = bd_claim(bdev, raw_open); - if (err) - goto out1; err = set_blocksize(bdev, bdev_logical_block_size(bdev)); if (err) - goto out2; + goto out1; filp->f_flags |= O_DIRECT; filp->f_mapping = bdev->bd_inode->i_mapping; if (++raw_devices[minor].inuse == 1) @@ -83,10 +80,8 @@ static int raw_open(struct inode *inode, struct file *filp) mutex_unlock(&raw_mutex); return 0; -out2: - bd_release(bdev); out1: - blkdev_put(bdev, filp->f_mode); + blkdev_put(bdev, filp->f_mode | FMODE_EXCL); out: mutex_unlock(&raw_mutex); return err; @@ -110,8 +105,7 @@ static int raw_release(struct inode *inode, struct file *filp) } mutex_unlock(&raw_mutex); - bd_release(bdev); - blkdev_put(bdev, filp->f_mode); + blkdev_put(bdev, filp->f_mode | FMODE_EXCL); return 0; } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 2c876ffc63d..9e88ca0c55e 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -325,20 +325,13 @@ static int open_dev(struct dm_dev_internal *d, dev_t dev, BUG_ON(d->dm_dev.bdev); - bdev = open_by_devnum(dev, d->dm_dev.mode); + bdev = open_by_devnum(dev, d->dm_dev.mode | FMODE_EXCL, _claim_ptr); if (IS_ERR(bdev)) return PTR_ERR(bdev); - r = bd_claim(bdev, _claim_ptr); - if (r) { - blkdev_put(bdev, d->dm_dev.mode); - return r; - } - r = bd_link_disk_holder(bdev, dm_disk(md)); if (r) { - bd_release(bdev); - blkdev_put(bdev, d->dm_dev.mode); + blkdev_put(bdev, d->dm_dev.mode | FMODE_EXCL); return r; } @@ -354,9 +347,7 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) if (!d->dm_dev.bdev) return; - bd_unlink_disk_holder(d->dm_dev.bdev); - bd_release(d->dm_dev.bdev); - blkdev_put(d->dm_dev.bdev, d->dm_dev.mode); + blkdev_put(d->dm_dev.bdev, d->dm_dev.mode | FMODE_EXCL); d->dm_dev.bdev = NULL; } diff --git a/drivers/md/md.c b/drivers/md/md.c index c47644fca1a..6af951ffe0b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1907,7 +1907,6 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) MD_BUG(); return; } - bd_unlink_disk_holder(rdev->bdev); list_del_rcu(&rdev->same_set); printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); rdev->mddev = NULL; @@ -1935,19 +1934,13 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared) struct block_device *bdev; char b[BDEVNAME_SIZE]; - bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); + bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, + shared ? (mdk_rdev_t *)lock_rdev : rdev); if (IS_ERR(bdev)) { printk(KERN_ERR "md: could not open %s.\n", __bdevname(dev, b)); return PTR_ERR(bdev); } - err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev); - if (err) { - printk(KERN_ERR "md: could not bd_claim %s.\n", - bdevname(bdev, b)); - blkdev_put(bdev, FMODE_READ|FMODE_WRITE); - return err; - } if (!shared) set_bit(AllReserved, &rdev->flags); rdev->bdev = bdev; @@ -1960,8 +1953,7 @@ static void unlock_rdev(mdk_rdev_t *rdev) rdev->bdev = NULL; if (!bdev) MD_BUG(); - bd_release(bdev); - blkdev_put(bdev, FMODE_READ|FMODE_WRITE); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); } void md_autodetect_dev(dev_t dev); diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c index a9e2d3b38ae..aa557beb8f5 100644 --- a/drivers/mtd/devices/block2mtd.c +++ b/drivers/mtd/devices/block2mtd.c @@ -224,7 +224,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev) if (dev->blkdev) { invalidate_mapping_pages(dev->blkdev->bd_inode->i_mapping, 0, -1); - close_bdev_exclusive(dev->blkdev, FMODE_READ|FMODE_WRITE); + blkdev_put(dev->blkdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); } kfree(dev); @@ -234,7 +234,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev) /* FIXME: ensure that mtd->size % erase_size == 0 */ static struct block2mtd_dev *add_device(char *devname, int erase_size) { - const fmode_t mode = FMODE_READ | FMODE_WRITE; + const fmode_t mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL; struct block_device *bdev; struct block2mtd_dev *dev; char *name; @@ -255,17 +255,8 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size) to resolve the device name by other means. */ dev_t devt = name_to_dev_t(devname); - if (devt) { - bdev = open_by_devnum(devt, mode); - if (!IS_ERR(bdev)) { - int ret; - ret = bd_claim(bdev, dev); - if (ret) { - blkdev_put(bdev, mode); - bdev = ERR_PTR(ret); - } - } - } + if (devt) + bdev = open_by_devnum(devt, mode, dev); } #endif diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 30a1ca3d08b..5505bc07e1e 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -103,7 +103,7 @@ int dasd_scan_partitions(struct dasd_block *block) struct block_device *bdev; bdev = bdget_disk(block->gdp, 0); - if (!bdev || blkdev_get(bdev, FMODE_READ) < 0) + if (!bdev || blkdev_get(bdev, FMODE_READ, NULL) < 0) return -ENODEV; /* * See fs/partition/check.c:register_disk,rescan_partitions diff --git a/fs/block_dev.c b/fs/block_dev.c index 9329068684d..fc48912354d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -660,7 +660,7 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, else if (bdev->bd_contains == bdev) return true; /* is a whole device which isn't held */ - else if (whole->bd_holder == bd_claim) + else if (whole->bd_holder == bd_may_claim) return true; /* is a partition of a device that is being partitioned */ else if (whole->bd_holder != NULL) return false; /* is a partition of a held device */ @@ -807,10 +807,10 @@ static void __bd_claim(struct block_device *bdev, struct block_device *whole, { /* note that for a whole device bd_holders * will be incremented twice, and bd_holder will - * be set to bd_claim before being set to holder + * be set to bd_may_claim before being set to holder */ whole->bd_holders++; - whole->bd_holder = bd_claim; + whole->bd_holder = bd_may_claim; bdev->bd_holders++; bdev->bd_holder = holder; } @@ -835,37 +835,7 @@ static void bd_finish_claiming(struct block_device *bdev, __bd_abort_claiming(whole, holder); /* not actually an abort */ } -/** - * bd_claim - claim a block device - * @bdev: block device to claim - * @holder: holder trying to claim @bdev - * - * Try to claim @bdev which must have been opened successfully. - * - * CONTEXT: - * Might sleep. - * - * RETURNS: - * 0 if successful, -EBUSY if @bdev is already claimed. - */ -int bd_claim(struct block_device *bdev, void *holder) -{ - struct block_device *whole = bdev->bd_contains; - int res; - - might_sleep(); - - spin_lock(&bdev_lock); - res = bd_prepare_to_claim(bdev, whole, holder); - if (res == 0) - __bd_claim(bdev, whole, holder); - spin_unlock(&bdev_lock); - - return res; -} -EXPORT_SYMBOL(bd_claim); - -void bd_release(struct block_device *bdev) +static void bd_release(struct block_device *bdev) { spin_lock(&bdev_lock); if (!--bdev->bd_contains->bd_holders) @@ -875,8 +845,6 @@ void bd_release(struct block_device *bdev) spin_unlock(&bdev_lock); } -EXPORT_SYMBOL(bd_release); - #ifdef CONFIG_SYSFS static int add_symlink(struct kobject *from, struct kobject *to) { @@ -943,7 +911,7 @@ out_unlock: } EXPORT_SYMBOL_GPL(bd_link_disk_holder); -void bd_unlink_disk_holder(struct block_device *bdev) +static void bd_unlink_disk_holder(struct block_device *bdev) { struct gendisk *disk = bdev->bd_holder_disk; @@ -954,7 +922,9 @@ void bd_unlink_disk_holder(struct block_device *bdev) del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); } -EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); +#else +static inline void bd_unlink_disk_holder(struct block_device *bdev) +{ } #endif /* @@ -964,12 +934,12 @@ EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); * to be used for internal purposes. If you ever need it - reconsider * your API. */ -struct block_device *open_by_devnum(dev_t dev, fmode_t mode) +struct block_device *open_by_devnum(dev_t dev, fmode_t mode, void *holder) { struct block_device *bdev = bdget(dev); int err = -ENOMEM; if (bdev) - err = blkdev_get(bdev, mode); + err = blkdev_get(bdev, mode, holder); return err ? ERR_PTR(err) : bdev; } @@ -1235,17 +1205,37 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) return ret; } -int blkdev_get(struct block_device *bdev, fmode_t mode) +int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) { - return __blkdev_get(bdev, mode, 0); + struct block_device *whole = NULL; + int res; + + WARN_ON_ONCE((mode & FMODE_EXCL) && !holder); + + if ((mode & FMODE_EXCL) && holder) { + whole = bd_start_claiming(bdev, holder); + if (IS_ERR(whole)) { + bdput(bdev); + return PTR_ERR(whole); + } + } + + res = __blkdev_get(bdev, mode, 0); + + if (whole) { + if (res == 0) + bd_finish_claiming(bdev, whole, holder); + else + bd_abort_claiming(whole, holder); + } + + return res; } EXPORT_SYMBOL(blkdev_get); static int blkdev_open(struct inode * inode, struct file * filp) { - struct block_device *whole = NULL; struct block_device *bdev; - int res; /* * Preserve backwards compatibility and allow large file access @@ -1266,26 +1256,9 @@ static int blkdev_open(struct inode * inode, struct file * filp) if (bdev == NULL) return -ENOMEM; - if (filp->f_mode & FMODE_EXCL) { - whole = bd_start_claiming(bdev, filp); - if (IS_ERR(whole)) { - bdput(bdev); - return PTR_ERR(whole); - } - } - filp->f_mapping = bdev->bd_inode->i_mapping; - res = blkdev_get(bdev, filp->f_mode); - - if (whole) { - if (res == 0) - bd_finish_claiming(bdev, whole, filp); - else - bd_abort_claiming(whole, filp); - } - - return res; + return blkdev_get(bdev, filp->f_mode, filp); } static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) @@ -1329,6 +1302,13 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) int blkdev_put(struct block_device *bdev, fmode_t mode) { + if (mode & FMODE_EXCL) { + mutex_lock(&bdev->bd_mutex); + bd_release(bdev); + if (!bdev->bd_holders) + bd_unlink_disk_holder(bdev); + mutex_unlock(&bdev->bd_mutex); + } return __blkdev_put(bdev, mode, 0); } EXPORT_SYMBOL(blkdev_put); @@ -1336,8 +1316,7 @@ EXPORT_SYMBOL(blkdev_put); static int blkdev_close(struct inode * inode, struct file * filp) { struct block_device *bdev = I_BDEV(filp->f_mapping->host); - if (bdev->bd_holder == filp) - bd_release(bdev); + return blkdev_put(bdev, filp->f_mode); } @@ -1494,55 +1473,27 @@ EXPORT_SYMBOL(lookup_bdev); */ struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) { - struct block_device *bdev, *whole; + struct block_device *bdev; int error; bdev = lookup_bdev(path); if (IS_ERR(bdev)) return bdev; - whole = bd_start_claiming(bdev, holder); - if (IS_ERR(whole)) { - bdput(bdev); - return whole; - } - - error = blkdev_get(bdev, mode); + error = blkdev_get(bdev, mode | FMODE_EXCL, holder); if (error) - goto out_abort_claiming; + return ERR_PTR(error); - error = -EACCES; - if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) - goto out_blkdev_put; + if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) { + blkdev_put(bdev, mode); + return ERR_PTR(-EACCES); + } - bd_finish_claiming(bdev, whole, holder); return bdev; - -out_blkdev_put: - blkdev_put(bdev, mode); -out_abort_claiming: - bd_abort_claiming(whole, holder); - return ERR_PTR(error); } EXPORT_SYMBOL(open_bdev_exclusive); -/** - * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive() - * - * @bdev: blockdevice to close - * @mode: mode, must match that used to open. - * - * This is the counterpart to open_bdev_exclusive(). - */ -void close_bdev_exclusive(struct block_device *bdev, fmode_t mode) -{ - bd_release(bdev); - blkdev_put(bdev, mode); -} - -EXPORT_SYMBOL(close_bdev_exclusive); - int __invalidate_device(struct block_device *bdev) { struct super_block *sb = get_super(bdev); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d39596224d2..f1b729d3b88 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -489,7 +489,7 @@ again: continue; if (device->bdev) { - close_bdev_exclusive(device->bdev, device->mode); + blkdev_put(device->bdev, device->mode | FMODE_EXCL); device->bdev = NULL; fs_devices->open_devices--; } @@ -523,7 +523,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) list_for_each_entry(device, &fs_devices->devices, dev_list) { if (device->bdev) { - close_bdev_exclusive(device->bdev, device->mode); + blkdev_put(device->bdev, device->mode | FMODE_EXCL); fs_devices->open_devices--; } if (device->writeable) { @@ -638,7 +638,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, error_brelse: brelse(bh); error_close: - close_bdev_exclusive(bdev, flags); + blkdev_put(bdev, flags | FMODE_EXCL); error: continue; } @@ -716,7 +716,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, brelse(bh); error_close: - close_bdev_exclusive(bdev, flags); + blkdev_put(bdev, flags | FMODE_EXCL); error: mutex_unlock(&uuid_mutex); return ret; @@ -1244,7 +1244,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) root->fs_info->fs_devices->latest_bdev = next_device->bdev; if (device->bdev) { - close_bdev_exclusive(device->bdev, device->mode); + blkdev_put(device->bdev, device->mode | FMODE_EXCL); device->bdev = NULL; device->fs_devices->open_devices--; } @@ -1287,7 +1287,7 @@ error_brelse: brelse(bh); error_close: if (bdev) - close_bdev_exclusive(bdev, FMODE_READ); + blkdev_put(bdev, FMODE_READ | FMODE_EXCL); out: mutex_unlock(&root->fs_info->volume_mutex); mutex_unlock(&uuid_mutex); @@ -1565,7 +1565,7 @@ out: mutex_unlock(&root->fs_info->volume_mutex); return ret; error: - close_bdev_exclusive(bdev, 0); + blkdev_put(bdev, FMODE_EXCL); if (seeding_dev) { mutex_unlock(&uuid_mutex); up_write(&sb->s_umount); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 2fedaf8b501..23e7513dba9 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -347,7 +347,7 @@ static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb) struct block_device *bdev; char b[BDEVNAME_SIZE]; - bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); + bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); if (IS_ERR(bdev)) goto fail; return bdev; @@ -364,8 +364,7 @@ fail: */ static int ext3_blkdev_put(struct block_device *bdev) { - bd_release(bdev); - return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); + return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); } static int ext3_blkdev_remove(struct ext3_sb_info *sbi) @@ -2136,13 +2135,6 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb, if (bdev == NULL) return NULL; - if (bd_claim(bdev, sb)) { - ext3_msg(sb, KERN_ERR, - "error: failed to claim external journal device"); - blkdev_put(bdev, FMODE_READ|FMODE_WRITE); - return NULL; - } - blocksize = sb->s_blocksize; hblock = bdev_logical_block_size(bdev); if (blocksize < hblock) { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 61182fe6254..5dd0b3e76fa 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -647,7 +647,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) struct block_device *bdev; char b[BDEVNAME_SIZE]; - bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); + bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); if (IS_ERR(bdev)) goto fail; return bdev; @@ -663,8 +663,7 @@ fail: */ static int ext4_blkdev_put(struct block_device *bdev) { - bd_release(bdev); - return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); + return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); } static int ext4_blkdev_remove(struct ext4_sb_info *sbi) @@ -3758,13 +3757,6 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, if (bdev == NULL) return NULL; - if (bd_claim(bdev, sb)) { - ext4_msg(sb, KERN_ERR, - "failed to claim external journal device"); - blkdev_put(bdev, FMODE_READ|FMODE_WRITE); - return NULL; - } - blocksize = sb->s_blocksize; hblock = bdev_logical_block_size(bdev); if (blocksize < hblock) { diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 3eb1393f7b8..c1f0763a022 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1298,7 +1298,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, goto error_bdev; if (s->s_root) - close_bdev_exclusive(bdev, mode); + blkdev_put(bdev, mode | FMODE_EXCL); memset(&args, 0, sizeof(args)); args.ar_quota = GFS2_QUOTA_DEFAULT; @@ -1342,7 +1342,7 @@ error_super: deactivate_locked_super(s); return ERR_PTR(error); error_bdev: - close_bdev_exclusive(bdev, mode); + blkdev_put(bdev, mode | FMODE_EXCL); return ERR_PTR(error); } diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index e1b8493b9aa..5a290f22dcc 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1120,16 +1120,13 @@ int lmLogOpen(struct super_block *sb) * file systems to log may have n-to-1 relationship; */ - bdev = open_by_devnum(sbi->logdev, FMODE_READ|FMODE_WRITE); + bdev = open_by_devnum(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, + log); if (IS_ERR(bdev)) { rc = -PTR_ERR(bdev); goto free; } - if ((rc = bd_claim(bdev, log))) { - goto close; - } - log->bdev = bdev; memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid)); @@ -1137,7 +1134,7 @@ int lmLogOpen(struct super_block *sb) * initialize log: */ if ((rc = lmLogInit(log))) - goto unclaim; + goto close; list_add(&log->journal_list, &jfs_external_logs); @@ -1163,11 +1160,8 @@ journal_found: list_del(&log->journal_list); lbmLogShutdown(log); - unclaim: - bd_release(bdev); - close: /* close external log device */ - blkdev_put(bdev, FMODE_READ|FMODE_WRITE); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); free: /* free log descriptor */ mutex_unlock(&jfs_log_mutex); @@ -1512,8 +1506,7 @@ int lmLogClose(struct super_block *sb) bdev = log->bdev; rc = lmLogShutdown(log); - bd_release(bdev); - blkdev_put(bdev, FMODE_READ|FMODE_WRITE); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); kfree(log); diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index 92ca6fbe09b..734b9025858 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -300,7 +300,7 @@ static int bdev_write_sb(struct super_block *sb, struct page *page) static void bdev_put_device(struct logfs_super *s) { - close_bdev_exclusive(s->s_bdev, FMODE_READ|FMODE_WRITE); + blkdev_put(s->s_bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); } static int bdev_can_write_buf(struct super_block *sb, u64 ofs) @@ -331,7 +331,7 @@ int logfs_get_sb_bdev(struct logfs_super *p, struct file_system_type *type, if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) { int mtdnr = MINOR(bdev->bd_dev); - close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); return logfs_get_sb_mtd(p, mtdnr); } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index f804d41ec9d..756a6798d7c 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1233,7 +1233,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, } if (!s_new) - close_bdev_exclusive(sd.bdev, mode); + blkdev_put(sd.bdev, mode | FMODE_EXCL); return root_dentry; @@ -1242,7 +1242,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, failed: if (!s_new) - close_bdev_exclusive(sd.bdev, mode); + blkdev_put(sd.bdev, mode | FMODE_EXCL); return ERR_PTR(err); } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 52c7557f3e2..d0a2721eace 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1674,7 +1674,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, goto out; reg->hr_bdev = I_BDEV(filp->f_mapping->host); - ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ); + ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL); if (ret) { reg->hr_bdev = NULL; goto out; diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 0a8b0ad0c7e..2e6501d034a 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -549,7 +549,7 @@ void register_disk(struct gendisk *disk) goto exit; bdev->bd_invalidated = 1; - err = blkdev_get(bdev, FMODE_READ); + err = blkdev_get(bdev, FMODE_READ, NULL); if (err < 0) goto exit; blkdev_put(bdev, FMODE_READ); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 076c8b19468..b488136f5ac 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2552,8 +2552,6 @@ static int release_journal_dev(struct super_block *super, result = 0; if (journal->j_dev_bd != NULL) { - if (journal->j_dev_bd->bd_dev != super->s_dev) - bd_release(journal->j_dev_bd); result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode); journal->j_dev_bd = NULL; } @@ -2571,7 +2569,7 @@ static int journal_init_dev(struct super_block *super, { int result; dev_t jdev; - fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE; + fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL; char b[BDEVNAME_SIZE]; result = 0; @@ -2585,7 +2583,9 @@ static int journal_init_dev(struct super_block *super, /* there is no "jdev" option and journal is on separate device */ if ((!jdev_name || !jdev_name[0])) { - journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode); + if (jdev == super->s_dev) + blkdev_mode &= ~FMODE_EXCL; + journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode, journal); journal->j_dev_mode = blkdev_mode; if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); @@ -2594,15 +2594,8 @@ static int journal_init_dev(struct super_block *super, "cannot init journal device '%s': %i", __bdevname(jdev, b), result); return result; - } else if (jdev != super->s_dev) { - result = bd_claim(journal->j_dev_bd, journal); - if (result) { - blkdev_put(journal->j_dev_bd, blkdev_mode); - return result; - } - + } else if (jdev != super->s_dev) set_blocksize(journal->j_dev_bd, super->s_blocksize); - } return 0; } diff --git a/fs/super.c b/fs/super.c index ca696155cd9..22374bf0ba8 100644 --- a/fs/super.c +++ b/fs/super.c @@ -801,13 +801,13 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, /* * s_umount nests inside bd_mutex during - * __invalidate_device(). close_bdev_exclusive() - * acquires bd_mutex and can't be called under - * s_umount. Drop s_umount temporarily. This is safe - * as we're holding an active reference. + * __invalidate_device(). blkdev_put() acquires + * bd_mutex and can't be called under s_umount. Drop + * s_umount temporarily. This is safe as we're + * holding an active reference. */ up_write(&s->s_umount); - close_bdev_exclusive(bdev, mode); + blkdev_put(bdev, mode | FMODE_EXCL); down_write(&s->s_umount); } else { char b[BDEVNAME_SIZE]; @@ -831,7 +831,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, error_s: error = PTR_ERR(s); error_bdev: - close_bdev_exclusive(bdev, mode); + blkdev_put(bdev, mode | FMODE_EXCL); error: return ERR_PTR(error); } @@ -862,7 +862,7 @@ void kill_block_super(struct super_block *sb) bdev->bd_super = NULL; generic_shutdown_super(sb); sync_blockdev(bdev); - close_bdev_exclusive(bdev, mode); + blkdev_put(bdev, mode | FMODE_EXCL); } EXPORT_SYMBOL(kill_block_super); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 9f3a78fe6ae..a1a6e5ceea6 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -623,7 +623,7 @@ xfs_blkdev_put( struct block_device *bdev) { if (bdev) - close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); } /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 66b7f2c5d7e..1a033e8ebe4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2006,7 +2006,8 @@ extern struct block_device *bdgrab(struct block_device *bdev); extern void bd_set_size(struct block_device *, loff_t size); extern void bd_forget(struct inode *inode); extern void bdput(struct block_device *); -extern struct block_device *open_by_devnum(dev_t, fmode_t); +extern struct block_device *open_by_devnum(dev_t dev, fmode_t mode, + void *holder); extern void invalidate_bdev(struct block_device *); extern int sync_blockdev(struct block_device *bdev); extern struct super_block *freeze_bdev(struct block_device *); @@ -2037,22 +2038,16 @@ extern const struct file_operations def_fifo_fops; extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); -extern int blkdev_get(struct block_device *, fmode_t); -extern int blkdev_put(struct block_device *, fmode_t); -extern int bd_claim(struct block_device *, void *); -extern void bd_release(struct block_device *); +extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder); +extern int blkdev_put(struct block_device *bdev, fmode_t mode); #ifdef CONFIG_SYSFS extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); -extern void bd_unlink_disk_holder(struct block_device *bdev); #else static inline int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) { return 0; } -static inline void bd_unlink_disk_holder(struct block_device *bdev) -{ -} #endif #endif @@ -2089,7 +2084,6 @@ extern const char *__bdevname(dev_t, char *buffer); extern const char *bdevname(struct block_device *bdev, char *buffer); extern struct block_device *lookup_bdev(const char *); extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *); -extern void close_bdev_exclusive(struct block_device *, fmode_t); extern void blkdev_show(struct seq_file *,off_t); #else diff --git a/kernel/power/swap.c b/kernel/power/swap.c index a0e4a86ccf9..513a77f1a0b 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -223,7 +223,7 @@ static int swsusp_swap_check(void) return res; root_swap = res; - res = blkdev_get(hib_resume_bdev, FMODE_WRITE); + res = blkdev_get(hib_resume_bdev, FMODE_WRITE, NULL); if (res) return res; @@ -907,7 +907,8 @@ int swsusp_check(void) { int error; - hib_resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); + hib_resume_bdev = open_by_devnum(swsusp_resume_device, + FMODE_READ, NULL); if (!IS_ERR(hib_resume_bdev)) { set_blocksize(hib_resume_bdev, PAGE_SIZE); clear_page(swsusp_header); diff --git a/mm/swapfile.c b/mm/swapfile.c index 67ddaaf98c7..b6adcfbf6f4 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1677,7 +1677,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) if (S_ISBLK(inode->i_mode)) { struct block_device *bdev = I_BDEV(inode); set_blocksize(bdev, p->old_block_size); - bd_release(bdev); + blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); } else { mutex_lock(&inode->i_mutex); inode->i_flags &= ~S_SWAPFILE; @@ -1939,7 +1939,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = -EINVAL; if (S_ISBLK(inode->i_mode)) { bdev = I_BDEV(inode); - error = bd_claim(bdev, sys_swapon); + error = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, + sys_swapon); if (error < 0) { bdev = NULL; error = -EINVAL; @@ -2136,7 +2137,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) bad_swap: if (bdev) { set_blocksize(bdev, p->old_block_size); - bd_release(bdev); + blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); } destroy_swap_extents(p); swap_cgroup_swapoff(type); -- cgit v1.2.3-70-g09d2 From d4d77629953eabd3c14f6fa5746f6b28babfc55f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 13 Nov 2010 11:55:18 +0100 Subject: block: clean up blkdev_get() wrappers and their users After recent blkdev_get() modifications, open_by_devnum() and open_bdev_exclusive() are simple wrappers around blkdev_get(). Replace them with blkdev_get_by_dev() and blkdev_get_by_path(). blkdev_get_by_dev() is identical to open_by_devnum(). blkdev_get_by_path() is slightly different in that it doesn't automatically add %FMODE_EXCL to @mode. All users are converted. Most conversions are mechanical and don't introduce any behavior difference. There are several exceptions. * btrfs now sets FMODE_EXCL in btrfs_device->mode, so there's no reason to OR it explicitly on blkdev_put(). * gfs2, nilfs2 and the generic mount_bdev() now set FMODE_EXCL in sb->s_mode. * With the above changes, sb->s_mode now always should contain FMODE_EXCL. WARN_ON_ONCE() added to kill_block_super() to detect errors. The new blkdev_get_*() functions are with proper docbook comments. While at it, add function description to blkdev_get() too. Signed-off-by: Tejun Heo Cc: Philipp Reisner Cc: Neil Brown Cc: Mike Snitzer Cc: Joern Engel Cc: Chris Mason Cc: Jan Kara Cc: "Theodore Ts'o" Cc: KONISHI Ryusuke Cc: reiserfs-devel@vger.kernel.org Cc: xfs-masters@oss.sgi.com Cc: Alexander Viro --- drivers/block/drbd/drbd_nl.c | 12 ++-- drivers/md/dm-table.c | 2 +- drivers/md/md.c | 4 +- drivers/mtd/devices/block2mtd.c | 4 +- fs/block_dev.c | 139 +++++++++++++++++++++++++++------------- fs/btrfs/volumes.c | 24 ++++--- fs/btrfs/volumes.h | 2 +- fs/ext3/super.c | 2 +- fs/ext4/super.c | 2 +- fs/gfs2/ops_fstype.c | 8 +-- fs/jfs/jfs_logmgr.c | 4 +- fs/logfs/dev_bdev.c | 3 +- fs/nilfs2/super.c | 8 +-- fs/reiserfs/journal.c | 6 +- fs/super.c | 9 +-- fs/xfs/linux-2.6/xfs_super.c | 3 +- include/linux/fs.h | 7 +- kernel/power/swap.c | 4 +- 18 files changed, 149 insertions(+), 94 deletions(-) (limited to 'drivers/md/dm-table.c') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index fd034609028..650e43ba4f7 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -902,8 +902,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } } - bdev = open_bdev_exclusive(nbc->dc.backing_dev, - FMODE_READ | FMODE_WRITE, mdev); + bdev = blkdev_get_by_path(nbc->dc.backing_dev, + FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev); if (IS_ERR(bdev)) { dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev, PTR_ERR(bdev)); @@ -920,10 +920,10 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp * should check it for you already; but if you don't, or * someone fooled it, we need to double check here) */ - bdev = open_bdev_exclusive(nbc->dc.meta_dev, - FMODE_READ | FMODE_WRITE, - (nbc->dc.meta_dev_idx < 0) ? - (void *)mdev : (void *)drbd_m_holder); + bdev = blkdev_get_by_path(nbc->dc.meta_dev, + FMODE_READ | FMODE_WRITE | FMODE_EXCL, + (nbc->dc.meta_dev_idx < 0) ? + (void *)mdev : (void *)drbd_m_holder); if (IS_ERR(bdev)) { dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, PTR_ERR(bdev)); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 9e88ca0c55e..67150c32986 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -325,7 +325,7 @@ static int open_dev(struct dm_dev_internal *d, dev_t dev, BUG_ON(d->dm_dev.bdev); - bdev = open_by_devnum(dev, d->dm_dev.mode | FMODE_EXCL, _claim_ptr); + bdev = blkdev_get_by_dev(dev, d->dm_dev.mode | FMODE_EXCL, _claim_ptr); if (IS_ERR(bdev)) return PTR_ERR(bdev); diff --git a/drivers/md/md.c b/drivers/md/md.c index 6af951ffe0b..5aaa6bfbe63 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1934,8 +1934,8 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared) struct block_device *bdev; char b[BDEVNAME_SIZE]; - bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, - shared ? (mdk_rdev_t *)lock_rdev : rdev); + bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, + shared ? (mdk_rdev_t *)lock_rdev : rdev); if (IS_ERR(bdev)) { printk(KERN_ERR "md: could not open %s.\n", __bdevname(dev, b)); diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c index aa557beb8f5..f29a6f9df6e 100644 --- a/drivers/mtd/devices/block2mtd.c +++ b/drivers/mtd/devices/block2mtd.c @@ -247,7 +247,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size) return NULL; /* Get a handle on the device */ - bdev = open_bdev_exclusive(devname, mode, dev); + bdev = blkdev_get_by_path(devname, mode, dev); #ifndef MODULE if (IS_ERR(bdev)) { @@ -256,7 +256,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size) dev_t devt = name_to_dev_t(devname); if (devt) - bdev = open_by_devnum(devt, mode, dev); + bdev = blkdev_get_by_dev(devt, mode, dev); } #endif diff --git a/fs/block_dev.c b/fs/block_dev.c index 606a5259f87..c1c1b8c3fb9 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -854,24 +854,6 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev) { } #endif -/* - * Tries to open block device by device number. Use it ONLY if you - * really do not have anything better - i.e. when you are behind a - * truly sucky interface and all you are given is a device number. _Never_ - * to be used for internal purposes. If you ever need it - reconsider - * your API. - */ -struct block_device *open_by_devnum(dev_t dev, fmode_t mode, void *holder) -{ - struct block_device *bdev = bdget(dev); - int err = -ENOMEM; - if (bdev) - err = blkdev_get(bdev, mode, holder); - return err ? ERR_PTR(err) : bdev; -} - -EXPORT_SYMBOL(open_by_devnum); - /** * flush_disk - invalidates all buffer-cache entries on a disk * @@ -1132,6 +1114,25 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) return ret; } +/** + * blkdev_get - open a block device + * @bdev: block_device to open + * @mode: FMODE_* mask + * @holder: exclusive holder identifier + * + * Open @bdev with @mode. If @mode includes %FMODE_EXCL, @bdev is + * open with exclusive access. Specifying %FMODE_EXCL with %NULL + * @holder is invalid. Exclusive opens may nest for the same @holder. + * + * On success, the reference count of @bdev is unchanged. On failure, + * @bdev is put. + * + * CONTEXT: + * Might sleep. + * + * RETURNS: + * 0 on success, -errno on failure. + */ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) { struct block_device *whole = NULL; @@ -1186,6 +1187,80 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) } EXPORT_SYMBOL(blkdev_get); +/** + * blkdev_get_by_path - open a block device by name + * @path: path to the block device to open + * @mode: FMODE_* mask + * @holder: exclusive holder identifier + * + * Open the blockdevice described by the device file at @path. @mode + * and @holder are identical to blkdev_get(). + * + * On success, the returned block_device has reference count of one. + * + * CONTEXT: + * Might sleep. + * + * RETURNS: + * Pointer to block_device on success, ERR_PTR(-errno) on failure. + */ +struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, + void *holder) +{ + struct block_device *bdev; + int err; + + bdev = lookup_bdev(path); + if (IS_ERR(bdev)) + return bdev; + + err = blkdev_get(bdev, mode, holder); + if (err) + return ERR_PTR(err); + + return bdev; +} +EXPORT_SYMBOL(blkdev_get_by_path); + +/** + * blkdev_get_by_dev - open a block device by device number + * @dev: device number of block device to open + * @mode: FMODE_* mask + * @holder: exclusive holder identifier + * + * Open the blockdevice described by device number @dev. @mode and + * @holder are identical to blkdev_get(). + * + * Use it ONLY if you really do not have anything better - i.e. when + * you are behind a truly sucky interface and all you are given is a + * device number. _Never_ to be used for internal purposes. If you + * ever need it - reconsider your API. + * + * On success, the returned block_device has reference count of one. + * + * CONTEXT: + * Might sleep. + * + * RETURNS: + * Pointer to block_device on success, ERR_PTR(-errno) on failure. + */ +struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) +{ + struct block_device *bdev; + int err; + + bdev = bdget(dev); + if (!bdev) + return ERR_PTR(-ENOMEM); + + err = blkdev_get(bdev, mode, holder); + if (err) + return ERR_PTR(err); + + return bdev; +} +EXPORT_SYMBOL(blkdev_get_by_dev); + static int blkdev_open(struct inode * inode, struct file * filp) { struct block_device *bdev; @@ -1436,34 +1511,6 @@ fail: } EXPORT_SYMBOL(lookup_bdev); -/** - * open_bdev_exclusive - open a block device by name and set it up for use - * - * @path: special file representing the block device - * @mode: FMODE_... combination to pass be used - * @holder: owner for exclusion - * - * Open the blockdevice described by the special file at @path, claim it - * for the @holder. - */ -struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) -{ - struct block_device *bdev; - int error; - - bdev = lookup_bdev(path); - if (IS_ERR(bdev)) - return bdev; - - error = blkdev_get(bdev, mode | FMODE_EXCL, holder); - if (error) - return ERR_PTR(error); - - return bdev; -} - -EXPORT_SYMBOL(open_bdev_exclusive); - int __invalidate_device(struct block_device *bdev) { struct super_block *sb = get_super(bdev); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f1b729d3b88..95324e9f928 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -489,7 +489,7 @@ again: continue; if (device->bdev) { - blkdev_put(device->bdev, device->mode | FMODE_EXCL); + blkdev_put(device->bdev, device->mode); device->bdev = NULL; fs_devices->open_devices--; } @@ -523,7 +523,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) list_for_each_entry(device, &fs_devices->devices, dev_list) { if (device->bdev) { - blkdev_put(device->bdev, device->mode | FMODE_EXCL); + blkdev_put(device->bdev, device->mode); fs_devices->open_devices--; } if (device->writeable) { @@ -580,13 +580,15 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int seeding = 1; int ret = 0; + flags |= FMODE_EXCL; + list_for_each_entry(device, head, dev_list) { if (device->bdev) continue; if (!device->name) continue; - bdev = open_bdev_exclusive(device->name, flags, holder); + bdev = blkdev_get_by_path(device->name, flags, holder); if (IS_ERR(bdev)) { printk(KERN_INFO "open %s failed\n", device->name); goto error; @@ -638,7 +640,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, error_brelse: brelse(bh); error_close: - blkdev_put(bdev, flags | FMODE_EXCL); + blkdev_put(bdev, flags); error: continue; } @@ -684,7 +686,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, mutex_lock(&uuid_mutex); - bdev = open_bdev_exclusive(path, flags, holder); + flags |= FMODE_EXCL; + bdev = blkdev_get_by_path(path, flags, holder); if (IS_ERR(bdev)) { ret = PTR_ERR(bdev); @@ -716,7 +719,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, brelse(bh); error_close: - blkdev_put(bdev, flags | FMODE_EXCL); + blkdev_put(bdev, flags); error: mutex_unlock(&uuid_mutex); return ret; @@ -1179,8 +1182,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) goto out; } } else { - bdev = open_bdev_exclusive(device_path, FMODE_READ, - root->fs_info->bdev_holder); + bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL, + root->fs_info->bdev_holder); if (IS_ERR(bdev)) { ret = PTR_ERR(bdev); goto out; @@ -1244,7 +1247,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) root->fs_info->fs_devices->latest_bdev = next_device->bdev; if (device->bdev) { - blkdev_put(device->bdev, device->mode | FMODE_EXCL); + blkdev_put(device->bdev, device->mode); device->bdev = NULL; device->fs_devices->open_devices--; } @@ -1439,7 +1442,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) return -EINVAL; - bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder); + bdev = blkdev_get_by_path(device_path, FMODE_EXCL, + root->fs_info->bdev_holder); if (IS_ERR(bdev)) return PTR_ERR(bdev); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 2b638b6e4ee..856e7577030 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -49,7 +49,7 @@ struct btrfs_device { struct block_device *bdev; - /* the mode sent to open_bdev_exclusive */ + /* the mode sent to blkdev_get */ fmode_t mode; char *name; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 23e7513dba9..123720ba786 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -347,7 +347,7 @@ static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb) struct block_device *bdev; char b[BDEVNAME_SIZE]; - bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); + bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); if (IS_ERR(bdev)) goto fail; return bdev; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5dd0b3e76fa..bd63e692721 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -647,7 +647,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) struct block_device *bdev; char b[BDEVNAME_SIZE]; - bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); + bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); if (IS_ERR(bdev)) goto fail; return bdev; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index c1f0763a022..bc56ccf98ff 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1268,7 +1268,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, { struct block_device *bdev; struct super_block *s; - fmode_t mode = FMODE_READ; + fmode_t mode = FMODE_READ | FMODE_EXCL; int error; struct gfs2_args args; struct gfs2_sbd *sdp; @@ -1276,7 +1276,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, if (!(flags & MS_RDONLY)) mode |= FMODE_WRITE; - bdev = open_bdev_exclusive(dev_name, mode, fs_type); + bdev = blkdev_get_by_path(dev_name, mode, fs_type); if (IS_ERR(bdev)) return ERR_CAST(bdev); @@ -1298,7 +1298,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, goto error_bdev; if (s->s_root) - blkdev_put(bdev, mode | FMODE_EXCL); + blkdev_put(bdev, mode); memset(&args, 0, sizeof(args)); args.ar_quota = GFS2_QUOTA_DEFAULT; @@ -1342,7 +1342,7 @@ error_super: deactivate_locked_super(s); return ERR_PTR(error); error_bdev: - blkdev_put(bdev, mode | FMODE_EXCL); + blkdev_put(bdev, mode); return ERR_PTR(error); } diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 5a290f22dcc..278e3fb40b7 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1120,8 +1120,8 @@ int lmLogOpen(struct super_block *sb) * file systems to log may have n-to-1 relationship; */ - bdev = open_by_devnum(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, - log); + bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, + log); if (IS_ERR(bdev)) { rc = -PTR_ERR(bdev); goto free; diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index 734b9025858..723bc5bca09 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -325,7 +325,8 @@ int logfs_get_sb_bdev(struct logfs_super *p, struct file_system_type *type, { struct block_device *bdev; - bdev = open_bdev_exclusive(devname, FMODE_READ|FMODE_WRITE, type); + bdev = blkdev_get_by_path(devname, FMODE_READ|FMODE_WRITE|FMODE_EXCL, + type); if (IS_ERR(bdev)) return PTR_ERR(bdev); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 756a6798d7c..0030640e2d7 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1147,14 +1147,14 @@ nilfs_mount(struct file_system_type *fs_type, int flags, { struct nilfs_super_data sd; struct super_block *s; - fmode_t mode = FMODE_READ; + fmode_t mode = FMODE_READ | FMODE_EXCL; struct dentry *root_dentry; int err, s_new = false; if (!(flags & MS_RDONLY)) mode |= FMODE_WRITE; - sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type); + sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type); if (IS_ERR(sd.bdev)) return ERR_CAST(sd.bdev); @@ -1233,7 +1233,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, } if (!s_new) - blkdev_put(sd.bdev, mode | FMODE_EXCL); + blkdev_put(sd.bdev, mode); return root_dentry; @@ -1242,7 +1242,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, failed: if (!s_new) - blkdev_put(sd.bdev, mode | FMODE_EXCL); + blkdev_put(sd.bdev, mode); return ERR_PTR(err); } diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index b488136f5ac..e2fce519c0f 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2585,7 +2585,8 @@ static int journal_init_dev(struct super_block *super, if ((!jdev_name || !jdev_name[0])) { if (jdev == super->s_dev) blkdev_mode &= ~FMODE_EXCL; - journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode, journal); + journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode, + journal); journal->j_dev_mode = blkdev_mode; if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); @@ -2601,8 +2602,7 @@ static int journal_init_dev(struct super_block *super, } journal->j_dev_mode = blkdev_mode; - journal->j_dev_bd = open_bdev_exclusive(jdev_name, - blkdev_mode, journal); + journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal); if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); journal->j_dev_bd = NULL; diff --git a/fs/super.c b/fs/super.c index 22374bf0ba8..5d9a4497849 100644 --- a/fs/super.c +++ b/fs/super.c @@ -766,13 +766,13 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, { struct block_device *bdev; struct super_block *s; - fmode_t mode = FMODE_READ; + fmode_t mode = FMODE_READ | FMODE_EXCL; int error = 0; if (!(flags & MS_RDONLY)) mode |= FMODE_WRITE; - bdev = open_bdev_exclusive(dev_name, mode, fs_type); + bdev = blkdev_get_by_path(dev_name, mode, fs_type); if (IS_ERR(bdev)) return ERR_CAST(bdev); @@ -807,7 +807,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, * holding an active reference. */ up_write(&s->s_umount); - blkdev_put(bdev, mode | FMODE_EXCL); + blkdev_put(bdev, mode); down_write(&s->s_umount); } else { char b[BDEVNAME_SIZE]; @@ -831,7 +831,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, error_s: error = PTR_ERR(s); error_bdev: - blkdev_put(bdev, mode | FMODE_EXCL); + blkdev_put(bdev, mode); error: return ERR_PTR(error); } @@ -862,6 +862,7 @@ void kill_block_super(struct super_block *sb) bdev->bd_super = NULL; generic_shutdown_super(sb); sync_blockdev(bdev); + WARN_ON_ONCE(!(mode & FMODE_EXCL)); blkdev_put(bdev, mode | FMODE_EXCL); } diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index a1a6e5ceea6..9209cd199c4 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -609,7 +609,8 @@ xfs_blkdev_get( { int error = 0; - *bdevp = open_bdev_exclusive(name, FMODE_READ|FMODE_WRITE, mp); + *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL, + mp); if (IS_ERR(*bdevp)) { error = PTR_ERR(*bdevp); printk("XFS: Invalid device [%s], error=%d\n", name, error); diff --git a/include/linux/fs.h b/include/linux/fs.h index 1a033e8ebe4..f4850156391 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2006,8 +2006,6 @@ extern struct block_device *bdgrab(struct block_device *bdev); extern void bd_set_size(struct block_device *, loff_t size); extern void bd_forget(struct inode *inode); extern void bdput(struct block_device *); -extern struct block_device *open_by_devnum(dev_t dev, fmode_t mode, - void *holder); extern void invalidate_bdev(struct block_device *); extern int sync_blockdev(struct block_device *bdev); extern struct super_block *freeze_bdev(struct block_device *); @@ -2039,6 +2037,10 @@ extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder); +extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, + void *holder); +extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, + void *holder); extern int blkdev_put(struct block_device *bdev, fmode_t mode); #ifdef CONFIG_SYSFS extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); @@ -2083,7 +2085,6 @@ static inline void unregister_chrdev(unsigned int major, const char *name) extern const char *__bdevname(dev_t, char *buffer); extern const char *bdevname(struct block_device *bdev, char *buffer); extern struct block_device *lookup_bdev(const char *); -extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *); extern void blkdev_show(struct seq_file *,off_t); #else diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 513a77f1a0b..b019609d1b4 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -907,8 +907,8 @@ int swsusp_check(void) { int error; - hib_resume_bdev = open_by_devnum(swsusp_resume_device, - FMODE_READ, NULL); + hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, + FMODE_READ, NULL); if (!IS_ERR(hib_resume_bdev)) { set_blocksize(hib_resume_bdev, PAGE_SIZE); clear_page(swsusp_header); -- cgit v1.2.3-70-g09d2 From e692cb668fdd5a712c6ed2a2d6f2a36ee83997b4 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 1 Dec 2010 19:41:49 +0100 Subject: block: Deprecate QUEUE_FLAG_CLUSTER and use queue_limits instead When stacking devices, a request_queue is not always available. This forced us to have a no_cluster flag in the queue_limits that could be used as a carrier until the request_queue had been set up for a metadevice. There were several problems with that approach. First of all it was up to the stacking device to remember to set queue flag after stacking had completed. Also, the queue flag and the queue limits had to be kept in sync at all times. We got that wrong, which could lead to us issuing commands that went beyond the max scatterlist limit set by the driver. The proper fix is to avoid having two flags for tracking the same thing. We deprecate QUEUE_FLAG_CLUSTER and use the queue limit directly in the block layer merging functions. The queue_limit 'no_cluster' is turned into 'cluster' to avoid double negatives and to ease stacking. Clustering defaults to being enabled as before. The queue flag logic is removed from the stacking function, and explicitly setting the cluster flag is no longer necessary in DM and MD. Reported-by: Ed Lin Signed-off-by: Martin K. Petersen Acked-by: Mike Snitzer Cc: stable@kernel.org Signed-off-by: Jens Axboe --- block/blk-merge.c | 6 +++--- block/blk-settings.c | 25 ++----------------------- block/blk-sysfs.c | 2 +- drivers/md/dm-table.c | 5 ----- drivers/md/md.c | 3 --- drivers/scsi/scsi_lib.c | 3 +-- include/linux/blkdev.h | 9 ++++++--- 7 files changed, 13 insertions(+), 40 deletions(-) (limited to 'drivers/md/dm-table.c') diff --git a/block/blk-merge.c b/block/blk-merge.c index 77b7c26df6b..74bc4a768f3 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -21,7 +21,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, return 0; fbio = bio; - cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); + cluster = blk_queue_cluster(q); seg_size = 0; nr_phys_segs = 0; for_each_bio(bio) { @@ -87,7 +87,7 @@ EXPORT_SYMBOL(blk_recount_segments); static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, struct bio *nxt) { - if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) + if (!blk_queue_cluster(q)) return 0; if (bio->bi_seg_back_size + nxt->bi_seg_front_size > @@ -123,7 +123,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, int nsegs, cluster; nsegs = 0; - cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); + cluster = blk_queue_cluster(q); /* * for each bio in rq diff --git a/block/blk-settings.c b/block/blk-settings.c index 701859fb964..e55f5fc4ca2 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -126,7 +126,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->alignment_offset = 0; lim->io_opt = 0; lim->misaligned = 0; - lim->no_cluster = 0; + lim->cluster = 1; } EXPORT_SYMBOL(blk_set_default_limits); @@ -464,15 +464,6 @@ EXPORT_SYMBOL(blk_queue_io_opt); void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) { blk_stack_limits(&t->limits, &b->limits, 0); - - if (!t->queue_lock) - WARN_ON_ONCE(1); - else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { - unsigned long flags; - spin_lock_irqsave(t->queue_lock, flags); - queue_flag_clear(QUEUE_FLAG_CLUSTER, t); - spin_unlock_irqrestore(t->queue_lock, flags); - } } EXPORT_SYMBOL(blk_queue_stack_limits); @@ -545,7 +536,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->io_min = max(t->io_min, b->io_min); t->io_opt = lcm(t->io_opt, b->io_opt); - t->no_cluster |= b->no_cluster; + t->cluster &= b->cluster; t->discard_zeroes_data &= b->discard_zeroes_data; /* Physical block size a multiple of the logical block size? */ @@ -641,7 +632,6 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, sector_t offset) { struct request_queue *t = disk->queue; - struct request_queue *b = bdev_get_queue(bdev); if (bdev_stack_limits(&t->limits, bdev, offset >> 9) < 0) { char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE]; @@ -652,17 +642,6 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n", top, bottom); } - - if (!t->queue_lock) - WARN_ON_ONCE(1); - else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) { - unsigned long flags; - - spin_lock_irqsave(t->queue_lock, flags); - if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) - queue_flag_clear(QUEUE_FLAG_CLUSTER, t); - spin_unlock_irqrestore(t->queue_lock, flags); - } } EXPORT_SYMBOL(disk_stack_limits); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 013457f47fd..41fb69150b4 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -119,7 +119,7 @@ static ssize_t queue_max_integrity_segments_show(struct request_queue *q, char * static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page) { - if (test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) + if (blk_queue_cluster(q)) return queue_var_show(queue_max_segment_size(q), (page)); return queue_var_show(PAGE_CACHE_SIZE, (page)); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 90267f8d64e..e2da1912a2c 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1131,11 +1131,6 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, */ q->limits = *limits; - if (limits->no_cluster) - queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); - else - queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q); - if (!dm_table_supports_discards(t)) queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); else diff --git a/drivers/md/md.c b/drivers/md/md.c index 84c46a16192..52694d29663 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4296,9 +4296,6 @@ static int md_alloc(dev_t dev, char *name) goto abort; mddev->queue->queuedata = mddev; - /* Can be unlocked because the queue is new: no concurrency */ - queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue); - blk_queue_make_request(mddev->queue, md_make_request); disk = alloc_disk(1 << shift); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index eafeeda6e19..9d7ba07dc5e 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1642,9 +1642,8 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, blk_queue_max_segment_size(q, dma_get_max_seg_size(dev)); - /* New queue, no concurrency on queue_flags */ if (!shost->use_clustering) - queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); + q->limits.cluster = 0; /* * set a reasonable default alignment on word boundaries: the diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index aae86fd10c4..95aeeeb49e8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -250,7 +250,7 @@ struct queue_limits { unsigned char misaligned; unsigned char discard_misaligned; - unsigned char no_cluster; + unsigned char cluster; signed char discard_zeroes_data; }; @@ -380,7 +380,6 @@ struct request_queue #endif }; -#define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ @@ -403,7 +402,6 @@ struct request_queue #define QUEUE_FLAG_SECDISCARD 19 /* supports SECDISCARD */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ - (1 << QUEUE_FLAG_CLUSTER) | \ (1 << QUEUE_FLAG_STACKABLE) | \ (1 << QUEUE_FLAG_SAME_COMP) | \ (1 << QUEUE_FLAG_ADD_RANDOM)) @@ -510,6 +508,11 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define rq_data_dir(rq) ((rq)->cmd_flags & 1) +static inline unsigned int blk_queue_cluster(struct request_queue *q) +{ + return q->limits.cluster; +} + /* * We regard a request as sync, if either a read or a sync write */ -- cgit v1.2.3-70-g09d2 From 72d4cd9f38b5ed96b75df4c622be25e1c2648dd3 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 17 Dec 2010 08:34:20 +0100 Subject: block: max hardware sectors limit wrapper Implement blk_limits_max_hw_sectors() and make blk_queue_max_hw_sectors() a wrapper around it. DM needs this to avoid setting queue_limits' max_hw_sectors and max_sectors directly. dm_set_device_limits() now leverages blk_limits_max_hw_sectors() logic to establish the appropriate max_hw_sectors minimum (PAGE_SIZE). Fixes issue where DM was incorrectly setting max_sectors rather than max_hw_sectors (which caused dm_merge_bvec()'s max_hw_sectors check to be ineffective). Signed-off-by: Mike Snitzer Cc: stable@kernel.org Acked-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-settings.c | 26 ++++++++++++++++++++------ drivers/md/dm-table.c | 5 ++--- include/linux/blkdev.h | 1 + 3 files changed, 23 insertions(+), 9 deletions(-) (limited to 'drivers/md/dm-table.c') diff --git a/block/blk-settings.c b/block/blk-settings.c index e55f5fc4ca2..36c8c1f2af1 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -229,8 +229,8 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask) EXPORT_SYMBOL(blk_queue_bounce_limit); /** - * blk_queue_max_hw_sectors - set max sectors for a request for this queue - * @q: the request queue for the device + * blk_limits_max_hw_sectors - set hard and soft limit of max sectors for request + * @limits: the queue limits * @max_hw_sectors: max hardware sectors in the usual 512b unit * * Description: @@ -244,7 +244,7 @@ EXPORT_SYMBOL(blk_queue_bounce_limit); * per-device basis in /sys/block//queue/max_sectors_kb. * The soft limit can not exceed max_hw_sectors. **/ -void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors) +void blk_limits_max_hw_sectors(struct queue_limits *limits, unsigned int max_hw_sectors) { if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) { max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9); @@ -252,9 +252,23 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto __func__, max_hw_sectors); } - q->limits.max_hw_sectors = max_hw_sectors; - q->limits.max_sectors = min_t(unsigned int, max_hw_sectors, - BLK_DEF_MAX_SECTORS); + limits->max_hw_sectors = max_hw_sectors; + limits->max_sectors = min_t(unsigned int, max_hw_sectors, + BLK_DEF_MAX_SECTORS); +} +EXPORT_SYMBOL(blk_limits_max_hw_sectors); + +/** + * blk_queue_max_hw_sectors - set max sectors for a request for this queue + * @q: the request queue for the device + * @max_hw_sectors: max hardware sectors in the usual 512b unit + * + * Description: + * See description for blk_limits_max_hw_sectors(). + **/ +void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors) +{ + blk_limits_max_hw_sectors(&q->limits, max_hw_sectors); } EXPORT_SYMBOL(blk_queue_max_hw_sectors); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index e2da1912a2c..4d705cea0f8 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -517,9 +517,8 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, */ if (q->merge_bvec_fn && !ti->type->merge) - limits->max_sectors = - min_not_zero(limits->max_sectors, - (unsigned int) (PAGE_SIZE >> 9)); + blk_limits_max_hw_sectors(limits, + (unsigned int) (PAGE_SIZE >> 9)); return 0; } EXPORT_SYMBOL_GPL(dm_set_device_limits); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 95aeeeb49e8..36ab42c9bb9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -808,6 +808,7 @@ extern struct request_queue *blk_init_allocated_queue(struct request_queue *, extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); +extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); -- cgit v1.2.3-70-g09d2 From 9d357b0787bb3c91835d5e658c3bda178f9ca419 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Jan 2011 20:00:01 +0000 Subject: dm: introduce target callbacks and congestion callback DM currently implements congestion checking by checking on congestion in each component device. For raid456 we need to also check if the stripe cache is congested. Add per-target congestion checker callback support. Extending the target_callbacks structure with additional callback functions allows for establishing multiple callbacks per-target (a callback is also needed for unplug). Cc: linux-raid@vger.kernel.org Signed-off-by: NeilBrown Signed-off-by: Jonathan Brassow Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-table.c | 14 ++++++++++++++ include/linux/device-mapper.h | 11 +++++++++++ 2 files changed, 25 insertions(+) (limited to 'drivers/md/dm-table.c') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 985c20a4f30..7e2ec3c0555 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -71,6 +71,8 @@ struct dm_table { void *event_context; struct dm_md_mempools *mempools; + + struct list_head target_callbacks; }; /* @@ -204,6 +206,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode, return -ENOMEM; INIT_LIST_HEAD(&t->devices); + INIT_LIST_HEAD(&t->target_callbacks); atomic_set(&t->holders, 0); t->discards_supported = 1; @@ -1225,10 +1228,17 @@ int dm_table_resume_targets(struct dm_table *t) return 0; } +void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callbacks *cb) +{ + list_add(&cb->list, &t->target_callbacks); +} +EXPORT_SYMBOL_GPL(dm_table_add_target_callbacks); + int dm_table_any_congested(struct dm_table *t, int bdi_bits) { struct dm_dev_internal *dd; struct list_head *devices = dm_table_get_devices(t); + struct dm_target_callbacks *cb; int r = 0; list_for_each_entry(dd, devices, list) { @@ -1243,6 +1253,10 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits) bdevname(dd->dm_dev.bdev, b)); } + list_for_each_entry(cb, &t->target_callbacks, list) + if (cb->congested_fn) + r |= cb->congested_fn(cb, bdi_bits); + return r; } diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 2970022faa6..4b1c63d478a 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -193,6 +193,12 @@ struct dm_target { char *error; }; +/* Each target can link one of these into the table */ +struct dm_target_callbacks { + struct list_head list; + int (*congested_fn) (struct dm_target_callbacks *, int); +}; + int dm_register_target(struct target_type *t); void dm_unregister_target(struct target_type *t); @@ -268,6 +274,11 @@ int dm_table_create(struct dm_table **result, fmode_t mode, int dm_table_add_target(struct dm_table *t, const char *type, sector_t start, sector_t len, char *params); +/* + * Target_ctr should call this if it needs to add any callbacks. + */ +void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callbacks *cb); + /* * Finally call this to make the table ready for use. */ -- cgit v1.2.3-70-g09d2 From 99d03c141b40914b67d63c9d23b8da4386422ed7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Jan 2011 20:00:02 +0000 Subject: dm: per target unplug callback support Add per-target unplug callback support. Cc: linux-raid@vger.kernel.org Signed-off-by: NeilBrown Signed-off-by: Jonathan Brassow Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-table.c | 5 +++++ include/linux/device-mapper.h | 1 + 2 files changed, 6 insertions(+) (limited to 'drivers/md/dm-table.c') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 7e2ec3c0555..dffa0ac7c4f 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1278,6 +1278,7 @@ void dm_table_unplug_all(struct dm_table *t) { struct dm_dev_internal *dd; struct list_head *devices = dm_table_get_devices(t); + struct dm_target_callbacks *cb; list_for_each_entry(dd, devices, list) { struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev); @@ -1290,6 +1291,10 @@ void dm_table_unplug_all(struct dm_table *t) dm_device_name(t->md), bdevname(dd->dm_dev.bdev, b)); } + + list_for_each_entry(cb, &t->target_callbacks, list) + if (cb->unplug_fn) + cb->unplug_fn(cb); } struct mapped_device *dm_table_get_md(struct dm_table *t) diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 4b1c63d478a..272496d1fae 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -197,6 +197,7 @@ struct dm_target { struct dm_target_callbacks { struct list_head list; int (*congested_fn) (struct dm_target_callbacks *, int); + void (*unplug_fn)(struct dm_target_callbacks *); }; int dm_register_target(struct target_type *t); -- cgit v1.2.3-70-g09d2 From 49731baa41df404c2c3f44555869ab387363af43 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Jan 2011 18:43:57 +0100 Subject: block: restore multiple bd_link_disk_holder() support Commit e09b457b (block: simplify holder symlink handling) incorrectly assumed that there is only one link at maximum. dm may use multiple links and expects block layer to track reference count for each link, which is different from and unrelated to the exclusive device holder identified by @holder when the device is opened. Remove the single holder assumption and automatic removal of the link and revive the per-link reference count tracking. The code essentially behaves the same as before commit e09b457b sans the unnecessary kobject reference count dancing. While at it, note that this facility should not be used by anyone else than the current ones. Sysfs symlinks shouldn't be abused like this and the whole thing doesn't belong in the block layer at all. Signed-off-by: Tejun Heo Reported-by: Milan Broz Cc: Jun'ichi Nomura Cc: Neil Brown Cc: linux-raid@vger.kernel.org Cc: Kay Sievers Signed-off-by: Jens Axboe --- drivers/md/dm-table.c | 1 + drivers/md/md.c | 1 + fs/block_dev.c | 93 +++++++++++++++++++++++++++++++++++++++++---------- include/linux/fs.h | 8 ++++- 4 files changed, 84 insertions(+), 19 deletions(-) (limited to 'drivers/md/dm-table.c') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index dffa0ac7c4f..38e4eb1bb96 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -350,6 +350,7 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) if (!d->dm_dev.bdev) return; + bd_unlink_disk_holder(d->dm_dev.bdev, dm_disk(md)); blkdev_put(d->dm_dev.bdev, d->dm_dev.mode | FMODE_EXCL); d->dm_dev.bdev = NULL; } diff --git a/drivers/md/md.c b/drivers/md/md.c index cf8594c5ea2..b76cfc89e1b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1912,6 +1912,7 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) MD_BUG(); return; } + bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk); list_del_rcu(&rdev->same_set); printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); rdev->mddev = NULL; diff --git a/fs/block_dev.c b/fs/block_dev.c index fe3f59c14a0..333a7bb4cb9 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -432,6 +432,9 @@ static void init_once(void *foo) mutex_init(&bdev->bd_mutex); INIT_LIST_HEAD(&bdev->bd_inodes); INIT_LIST_HEAD(&bdev->bd_list); +#ifdef CONFIG_SYSFS + INIT_LIST_HEAD(&bdev->bd_holder_disks); +#endif inode_init_once(&ei->vfs_inode); /* Initialize mutex for freeze. */ mutex_init(&bdev->bd_fsfreeze_mutex); @@ -779,6 +782,23 @@ static struct block_device *bd_start_claiming(struct block_device *bdev, } #ifdef CONFIG_SYSFS +struct bd_holder_disk { + struct list_head list; + struct gendisk *disk; + int refcnt; +}; + +static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev, + struct gendisk *disk) +{ + struct bd_holder_disk *holder; + + list_for_each_entry(holder, &bdev->bd_holder_disks, list) + if (holder->disk == disk) + return holder; + return NULL; +} + static int add_symlink(struct kobject *from, struct kobject *to) { return sysfs_create_link(from, to, kobject_name(to)); @@ -794,6 +814,8 @@ static void del_symlink(struct kobject *from, struct kobject *to) * @bdev: the claimed slave bdev * @disk: the holding disk * + * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT. + * * This functions creates the following sysfs symlinks. * * - from "slaves" directory of the holder @disk to the claimed @bdev @@ -817,47 +839,83 @@ static void del_symlink(struct kobject *from, struct kobject *to) */ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) { + struct bd_holder_disk *holder; int ret = 0; mutex_lock(&bdev->bd_mutex); - WARN_ON_ONCE(!bdev->bd_holder || bdev->bd_holder_disk); + WARN_ON_ONCE(!bdev->bd_holder); /* FIXME: remove the following once add_disk() handles errors */ if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir)) goto out_unlock; - ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); - if (ret) + holder = bd_find_holder_disk(bdev, disk); + if (holder) { + holder->refcnt++; goto out_unlock; + } - ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); - if (ret) { - del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); + holder = kzalloc(sizeof(*holder), GFP_KERNEL); + if (!holder) { + ret = -ENOMEM; goto out_unlock; } - bdev->bd_holder_disk = disk; + INIT_LIST_HEAD(&holder->list); + holder->disk = disk; + holder->refcnt = 1; + + ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); + if (ret) + goto out_free; + + ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); + if (ret) + goto out_del; + + list_add(&holder->list, &bdev->bd_holder_disks); + goto out_unlock; + +out_del: + del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); +out_free: + kfree(holder); out_unlock: mutex_unlock(&bdev->bd_mutex); return ret; } EXPORT_SYMBOL_GPL(bd_link_disk_holder); -static void bd_unlink_disk_holder(struct block_device *bdev) +/** + * bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder() + * @bdev: the calimed slave bdev + * @disk: the holding disk + * + * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT. + * + * CONTEXT: + * Might sleep. + */ +void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) { - struct gendisk *disk = bdev->bd_holder_disk; + struct bd_holder_disk *holder; - bdev->bd_holder_disk = NULL; - if (!disk) - return; + mutex_lock(&bdev->bd_mutex); - del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); - del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); + holder = bd_find_holder_disk(bdev, disk); + + if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) { + del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); + del_symlink(bdev->bd_part->holder_dir, + &disk_to_dev(disk)->kobj); + list_del_init(&holder->list); + kfree(holder); + } + + mutex_unlock(&bdev->bd_mutex); } -#else -static inline void bd_unlink_disk_holder(struct block_device *bdev) -{ } +EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); #endif /** @@ -1380,7 +1438,6 @@ int blkdev_put(struct block_device *bdev, fmode_t mode) * unblock evpoll if it was a write holder. */ if (bdev_free) { - bd_unlink_disk_holder(bdev); if (bdev->bd_write_holder) { disk_unblock_events(bdev->bd_disk); bdev->bd_write_holder = false; diff --git a/include/linux/fs.h b/include/linux/fs.h index 3984f2358d1..fb2190349cd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -666,7 +666,7 @@ struct block_device { int bd_holders; bool bd_write_holder; #ifdef CONFIG_SYSFS - struct gendisk * bd_holder_disk; /* for sysfs slave linkng */ + struct list_head bd_holder_disks; #endif struct block_device * bd_contains; unsigned bd_block_size; @@ -2058,12 +2058,18 @@ extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, extern int blkdev_put(struct block_device *bdev, fmode_t mode); #ifdef CONFIG_SYSFS extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); +extern void bd_unlink_disk_holder(struct block_device *bdev, + struct gendisk *disk); #else static inline int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) { return 0; } +static inline void bd_unlink_disk_holder(struct block_device *bdev, + struct gendisk *disk) +{ +} #endif #endif -- cgit v1.2.3-70-g09d2