From eb60fa1066622ddb2278732cf61e0c4544e82c6f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 10 Nov 2008 15:28:59 +0900 Subject: block: fix add_partition() error path Partition stats structure was not freed on devt allocation failure path. Fix it. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/partitions/check.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 633f7a0ebb2..90bcf136a9d 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -395,7 +395,7 @@ int add_partition(struct gendisk *disk, int partno, err = blk_alloc_devt(p, &devt); if (err) - goto out_free; + goto out_free_stats; pdev->devt = devt; /* delay uevent until 'holders' subdir is created */ @@ -426,6 +426,8 @@ int add_partition(struct gendisk *disk, int partno, return 0; +out_free_stats: + free_part_stats(p); out_free: kfree(p); return err; -- cgit v1.2.3-70-g09d2 From ba32929a91fe2c0628f5be62d1597b379c8d3062 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 10 Nov 2008 15:29:58 +0900 Subject: block: make add_partition() return pointer to hd_struct Make add_partition() return pointer to the new hd_struct on success and ERR_PTR() value on failure. This change will be used to fix md autodetection bug. Signed-off-by: Tejun Heo Cc: Neil Brown Signed-off-by: Jens Axboe --- block/ioctl.c | 7 +++---- fs/partitions/check.c | 25 +++++++++++++------------ include/linux/genhd.h | 4 +++- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/block/ioctl.c b/block/ioctl.c index c832d639b6e..d03985b04d6 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -18,7 +18,6 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user struct disk_part_iter piter; long long start, length; int partno; - int err; if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -61,10 +60,10 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user disk_part_iter_exit(&piter); /* all seems OK */ - err = add_partition(disk, partno, start, length, - ADDPART_FLAG_NONE); + part = add_partition(disk, partno, start, length, + ADDPART_FLAG_NONE); mutex_unlock(&bdev->bd_mutex); - return err; + return IS_ERR(part) ? PTR_ERR(part) : 0; case BLKPG_DEL_PARTITION: part = disk_get_part(disk, partno); if (!part) diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 90bcf136a9d..63302534023 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -348,8 +348,8 @@ static ssize_t whole_disk_show(struct device *dev, static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, whole_disk_show, NULL); -int add_partition(struct gendisk *disk, int partno, - sector_t start, sector_t len, int flags) +struct hd_struct *add_partition(struct gendisk *disk, int partno, + sector_t start, sector_t len, int flags) { struct hd_struct *p; dev_t devt = MKDEV(0, 0); @@ -361,15 +361,15 @@ int add_partition(struct gendisk *disk, int partno, err = disk_expand_part_tbl(disk, partno); if (err) - return err; + return ERR_PTR(err); ptbl = disk->part_tbl; if (ptbl->part[partno]) - return -EBUSY; + return ERR_PTR(-EBUSY); p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) - return -ENOMEM; + return ERR_PTR(-EBUSY); if (!init_part_stats(p)) { err = -ENOMEM; @@ -424,20 +424,20 @@ int add_partition(struct gendisk *disk, int partno, if (!ddev->uevent_suppress) kobject_uevent(&pdev->kobj, KOBJ_ADD); - return 0; + return p; out_free_stats: free_part_stats(p); out_free: kfree(p); - return err; + return ERR_PTR(err); out_del: kobject_put(p->holder_dir); device_del(pdev); out_put: put_device(pdev); blk_free_devt(devt); - return err; + return ERR_PTR(err); } /* Not exported, helper to add_disk(). */ @@ -568,10 +568,11 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) disk->disk_name, p, (unsigned long long) size); size = get_capacity(disk) - from; } - res = add_partition(disk, p, from, size, state->parts[p].flags); - if (res) { - printk(KERN_ERR " %s: p%d could not be added: %d\n", - disk->disk_name, p, -res); + part = add_partition(disk, p, from, size, + state->parts[p].flags); + if (IS_ERR(part)) { + printk(KERN_ERR " %s: p%d could not be added: %ld\n", + disk->disk_name, p, -PTR_ERR(part)); continue; } #ifdef CONFIG_BLK_DEV_MD diff --git a/include/linux/genhd.h b/include/linux/genhd.h index e439e6aed83..3df7742ce24 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -522,7 +522,9 @@ extern char *disk_name (struct gendisk *hd, int partno, char *buf); extern int disk_expand_part_tbl(struct gendisk *disk, int target); extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); -extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int); +extern struct hd_struct * __must_check add_partition(struct gendisk *disk, + int partno, sector_t start, + sector_t len, int flags); extern void delete_partition(struct gendisk *, int); extern void printk_all_partitions(void); -- cgit v1.2.3-70-g09d2 From 55e8e30c382d25c34f8aafcc78efec948571a941 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 10 Nov 2008 15:30:47 +0900 Subject: block/md: fix md autodetection Block ext devt conversion missed md_autodetect_dev() call in rescan_partitions() leaving md autodetect unable to see partitions. Fix it. Signed-off-by: Tejun Heo Cc: Neil Brown Signed-off-by: Jens Axboe --- fs/partitions/check.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 63302534023..6d5b213b8a9 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -577,7 +577,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) } #ifdef CONFIG_BLK_DEV_MD if (state->parts[p].flags & ADDPART_FLAG_RAID) - md_autodetect_dev(bdev->bd_dev+p); + md_autodetect_dev(part_to_dev(part)->devt); #endif } kfree(state); -- cgit v1.2.3-70-g09d2 From 561ec68e4de7947167937c49c451728e6b19e63b Mon Sep 17 00:00:00 2001 From: "Zhang, Yanmin" Date: Fri, 14 Nov 2008 08:26:30 +0100 Subject: block: fix boot failure with CONFIG_DEBUG_BLOCK_EXT_DEVT=y and nash We run into system boot failure with kernel 2.6.28-rc. We found it on a couple of machines, including T61 notebook, nehalem machine, and another HPC NX6325 notebook. All the machines use FedoraCore 8 or FedoraCore 9. With kernel prior to 2.6.28-rc, system boot doesn't fail. I debug it and locate the root cause. Pls. see http://bugzilla.kernel.org/show_bug.cgi?id=11899 https://bugzilla.redhat.com/show_bug.cgi?id=471517 As a matter of fact, there are 2 bugs. 1)root=/dev/sda1, system boot randomly fails. Mostly, boot for 5 times and fails once. nash has a bug. Some of its functions misuse return value 0. Sometimes, 0 means timeout and no uevent available. Sometimes, 0 means nash gets an uevent, but the uevent isn't block-related (for exmaple, usb). If by coincidence, kernel tells nash that uevents are available, but kernel also set timeout, nash might stops collecting other uevents in queue if current uevent isn't block-related. I work out a patch for nash to fix it. http://bugzilla.kernel.org/attachment.cgi?id=18858 2) root=LABEL=/, system always can't boot. initrd init reports switchroot fails. Here is an executation branch of nash when booting: (1) nash read /sys/block/sda/dev; Assume major is 8 (on my desktop) (2) nash query /proc/devices with the major number; It found line "8 sd"; (3) nash use 'sd' to search its own probe table to find device (DISK) type for the device and add it to its own list; (4) Later on, it probes all devices in its list to get filesystem labels; scsi register "8 sd" always. When major is 259, nash fails to find the device(DISK) type. I enables CONFIG_DEBUG_BLOCK_EXT_DEVT=y when compiling kernel, so 259 is picked up for device /dev/sda1, which causes nash to fail to find device (DISK) type. To fixing issue 2), I create a patch for nash and another patch for kernel. http://bugzilla.kernel.org/attachment.cgi?id=18859 http://bugzilla.kernel.org/attachment.cgi?id=18837 Below is the patch for kernel 2.6.28-rc4. It registers blkext, a new block device in proc/devices. With 2 patches on nash and 1 patch on kernel, I boot my machines for dozens of times without failure. Signed-off-by Zhang Yanmin Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/genhd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/genhd.c b/block/genhd.c index 4e5e7493f67..27549e470da 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -768,6 +768,8 @@ static int __init genhd_device_init(void) bdev_map = kobj_map_init(base_probe, &block_class_lock); blk_dev_init(); + register_blkdev(BLOCK_EXT_MAJOR, "blkext"); + #ifndef CONFIG_SYSFS_DEPRECATED /* create top-level block dir */ block_depr = kobject_create_and_add("block", NULL); -- cgit v1.2.3-70-g09d2 From 68aee07f9bad2c830a898cf6d6bfc11ea24efc40 Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Fri, 14 Nov 2008 09:44:33 +0100 Subject: Release old elevator on change elevator We should release old elevator when change to use a new one. Signed-off-by: Zhao Lei Signed-off-by: Jens Axboe --- drivers/block/xen-blkfront.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index b220c686089..2d19f0cc47f 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -338,12 +338,18 @@ wait: static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) { struct request_queue *rq; + elevator_t *old_e; rq = blk_init_queue(do_blkif_request, &blkif_io_lock); if (rq == NULL) return -1; - elevator_init(rq, "noop"); + old_e = rq->elevator; + if (IS_ERR_VALUE(elevator_init(rq, "noop"))) + printk(KERN_WARNING + "blkfront: Switch elevator failed, use default\n"); + else + elevator_exit(old_e); /* Hard sector size and max sectors impersonate the equiv. hardware. */ blk_queue_hardsect_size(rq, sector_size); -- cgit v1.2.3-70-g09d2 From 98ba4031ab2adc8b394295e68aa4c8fe9d5060db Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 14 Nov 2008 10:44:59 +0100 Subject: relay: fix cpu offline problem relay_open() will close allocated buffers when failed. but if cpu offlined, some buffer will not be closed. this patch fixed it. and did cleanup for relay_reset() too. Signed-off-by: Lai Jiangshan Signed-off-by: Jens Axboe --- kernel/relay.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/kernel/relay.c b/kernel/relay.c index 8d13a7855c0..32b0befdcb6 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -400,7 +400,7 @@ void relay_reset(struct rchan *chan) } mutex_lock(&relay_channels_mutex); - for_each_online_cpu(i) + for_each_possible_cpu(i) if (chan->buf[i]) __relay_reset(chan->buf[i], 0); mutex_unlock(&relay_channels_mutex); @@ -611,10 +611,9 @@ struct rchan *relay_open(const char *base_filename, return chan; free_bufs: - for_each_online_cpu(i) { - if (!chan->buf[i]) - break; - relay_close_buf(chan->buf[i]); + for_each_possible_cpu(i) { + if (chan->buf[i]) + relay_close_buf(chan->buf[i]); } kref_put(&chan->kref, relay_destroy_channel); -- cgit v1.2.3-70-g09d2 From c26156b2534c75bb3cdedf76f6ad1340971cf5bd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 18 Nov 2008 15:07:05 +0100 Subject: block: hold extra reference to bio in blk_rq_map_user_iov() If the size passed in is OK but we end up mapping too many segments, we call the unmap path directly like from IO completion. But from IO completion we have an extra reference to the bio, so this error case goes OOPS when it attempts to free and already free bio. Fix it by getting an extra reference to the bio before calling the unmap failure case. Reported-by: Petr Vandrovec Signed-off-by: Jens Axboe --- block/blk-map.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/block/blk-map.c b/block/blk-map.c index 4849fa36161..0f4b4b88181 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -217,6 +217,12 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, return PTR_ERR(bio); if (bio->bi_size != len) { + /* + * Grab an extra reference to this bio, as bio_unmap_user() + * expects to be able to drop it twice as it happens on the + * normal IO completion path + */ + bio_get(bio); bio_endio(bio, 0); bio_unmap_user(bio); return -EINVAL; -- cgit v1.2.3-70-g09d2