diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-11-26 08:22:50 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-11-26 08:22:50 +0100 |
commit | 7fbb8759eff9a348efa5f352ffaa51c364837c4b (patch) | |
tree | d40cd3f47b9f667ba94d9613270132080dcb6a1a /drivers/md/md.c | |
parent | 6003ab0bad4cc56f3c4fadf62a0d23a967b9c53b (diff) | |
parent | 13d428afc007fcfcd6deeb215618f54cf9c0cae6 (diff) |
Merge commit 'v2.6.28-rc6' into core/debug
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 181 |
1 files changed, 90 insertions, 91 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 4790c83d78d..1b1d32694f6 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -32,31 +32,21 @@ Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <linux/module.h> -#include <linux/kernel.h> #include <linux/kthread.h> -#include <linux/linkage.h> #include <linux/raid/md.h> #include <linux/raid/bitmap.h> #include <linux/sysctl.h> #include <linux/buffer_head.h> /* for invalidate_bdev */ #include <linux/poll.h> -#include <linux/mutex.h> #include <linux/ctype.h> -#include <linux/freezer.h> - -#include <linux/init.h> - +#include <linux/hdreg.h> +#include <linux/proc_fs.h> +#include <linux/random.h> +#include <linux/reboot.h> #include <linux/file.h> - -#ifdef CONFIG_KMOD -#include <linux/kmod.h> -#endif - -#include <asm/unaligned.h> +#include <linux/delay.h> #define MAJOR_NR MD_MAJOR -#define MD_DRIVER /* 63 partitions with the alternate major number (mdp) */ #define MdpMinorShift 6 @@ -66,7 +56,7 @@ #ifndef MODULE -static void autostart_arrays (int part); +static void autostart_arrays(int part); #endif static LIST_HEAD(pers_list); @@ -212,7 +202,7 @@ static DEFINE_SPINLOCK(all_mddevs_lock); ) -static int md_fail_request (struct request_queue *q, struct bio *bio) +static int md_fail_request(struct request_queue *q, struct bio *bio) { bio_io_error(bio); return 0; @@ -232,6 +222,9 @@ static void mddev_put(mddev_t *mddev) list_del(&mddev->all_mddevs); spin_unlock(&all_mddevs_lock); blk_cleanup_queue(mddev->queue); + if (mddev->sysfs_state) + sysfs_put(mddev->sysfs_state); + mddev->sysfs_state = NULL; kobject_put(&mddev->kobj); } else spin_unlock(&all_mddevs_lock); @@ -1464,14 +1457,13 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b))) goto fail; - if (rdev->bdev->bd_part) - ko = &rdev->bdev->bd_part->dev.kobj; - else - ko = &rdev->bdev->bd_disk->dev.kobj; + ko = &part_to_dev(rdev->bdev->bd_part)->kobj; if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) { kobject_del(&rdev->kobj); goto fail; } + rdev->sysfs_state = sysfs_get_dirent(rdev->kobj.sd, "state"); + list_add_rcu(&rdev->same_set, &mddev->disks); bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); return 0; @@ -1501,7 +1493,8 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); rdev->mddev = NULL; sysfs_remove_link(&rdev->kobj, "block"); - + sysfs_put(rdev->sysfs_state); + rdev->sysfs_state = NULL; /* We need to delay this, otherwise we can deadlock when * writing to 'remove' to "dev/state". We also need * to delay it due to rcu usage. @@ -1533,7 +1526,7 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared) if (err) { printk(KERN_ERR "md: could not bd_claim %s.\n", bdevname(bdev, b)); - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE); return err; } if (!shared) @@ -1549,7 +1542,7 @@ static void unlock_rdev(mdk_rdev_t *rdev) if (!bdev) MD_BUG(); bd_release(bdev); - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE); } void md_autodetect_dev(dev_t dev); @@ -1936,8 +1929,8 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) err = 0; } - if (!err) - sysfs_notify(&rdev->kobj, NULL, "state"); + if (!err && rdev->sysfs_state) + sysfs_notify_dirent(rdev->sysfs_state); return err ? err : len; } static struct rdev_sysfs_entry rdev_state = @@ -2032,7 +2025,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) rdev->raid_disk = -1; return err; } else - sysfs_notify(&rdev->kobj, NULL, "state"); + sysfs_notify_dirent(rdev->sysfs_state); sprintf(nm, "rd%d", rdev->raid_disk); if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm)) printk(KERN_WARNING @@ -2049,7 +2042,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) clear_bit(Faulty, &rdev->flags); clear_bit(WriteMostly, &rdev->flags); set_bit(In_sync, &rdev->flags); - sysfs_notify(&rdev->kobj, NULL, "state"); + sysfs_notify_dirent(rdev->sysfs_state); } return len; } @@ -2109,8 +2102,6 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) if (strict_strtoull(buf, 10, &size) < 0) return -EINVAL; - if (size < my_mddev->size) - return -EINVAL; if (my_mddev->pers && rdev->raid_disk >= 0) { if (my_mddev->persistent) { size = super_types[my_mddev->major_version]. @@ -2121,9 +2112,9 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) size = (rdev->bdev->bd_inode->i_size >> 10); size -= rdev->data_offset/2; } - if (size < my_mddev->size) - return -EINVAL; /* component must fit device */ } + if (size < my_mddev->size) + return -EINVAL; /* component must fit device */ rdev->size = size; if (size > oldsize && my_mddev->external) { @@ -2409,12 +2400,11 @@ safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len) int i; unsigned long msec; char buf[30]; - char *e; + /* remove a period, and count digits after it */ if (len >= sizeof(buf)) return -EINVAL; - strlcpy(buf, cbuf, len); - buf[len] = 0; + strlcpy(buf, cbuf, sizeof(buf)); for (i=0; i<len; i++) { if (dot) { if (isdigit(buf[i])) { @@ -2427,8 +2417,7 @@ safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len) buf[i] = 0; } } - msec = simple_strtoul(buf, &e, 10); - if (e == buf || (*e && *e != '\n')) + if (strict_strtoul(buf, 10, &msec) < 0) return -EINVAL; msec = (msec * 1000) / scale; if (msec == 0) @@ -2730,9 +2719,9 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) break; case read_auto: if (mddev->pers) { - if (mddev->ro != 1) + if (mddev->ro == 0) err = do_md_stop(mddev, 1, 0); - else + else if (mddev->ro == 1) err = restart_array(mddev); if (err == 0) { mddev->ro = 2; @@ -2787,7 +2776,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) if (err) return err; else { - sysfs_notify(&mddev->kobj, NULL, "array_state"); + sysfs_notify_dirent(mddev->sysfs_state); return len; } } @@ -2948,7 +2937,13 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len) { int major, minor; char *e; - if (!list_empty(&mddev->disks)) + /* Changing the details of 'external' metadata is + * always permitted. Otherwise there must be + * no devices attached to the array. + */ + if (mddev->external && strncmp(buf, "external:", 9) == 0) + ; + else if (!list_empty(&mddev->disks)) return -EBUSY; if (cmd_match(buf, "none")) { @@ -3468,16 +3463,23 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) disk->fops = &md_fops; disk->private_data = mddev; disk->queue = mddev->queue; + /* Allow extended partitions. This makes the + * 'mdp' device redundant, but we can really + * remove it now. + */ + disk->flags |= GENHD_FL_EXT_DEVT; add_disk(disk); mddev->gendisk = disk; - error = kobject_init_and_add(&mddev->kobj, &md_ktype, &disk->dev.kobj, - "%s", "md"); + error = kobject_init_and_add(&mddev->kobj, &md_ktype, + &disk_to_dev(disk)->kobj, "%s", "md"); mutex_unlock(&disks_mutex); if (error) printk(KERN_WARNING "md: cannot register %s/md - name in use\n", disk->disk_name); - else + else { kobject_uevent(&mddev->kobj, KOBJ_ADD); + mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state"); + } return NULL; } @@ -3488,7 +3490,7 @@ static void md_safemode_timeout(unsigned long data) if (!atomic_read(&mddev->writes_pending)) { mddev->safemode = 1; if (mddev->external) - set_bit(MD_NOTIFY_ARRAY_STATE, &mddev->flags); + sysfs_notify_dirent(mddev->sysfs_state); } md_wakeup_thread(mddev->thread); } @@ -3530,17 +3532,12 @@ static int do_md_run(mddev_t * mddev) return -EINVAL; } /* - * chunk-size has to be a power of 2 and multiples of PAGE_SIZE + * chunk-size has to be a power of 2 */ if ( (1 << ffz(~chunk_size)) != chunk_size) { printk(KERN_ERR "chunk_size of %d not valid\n", chunk_size); return -EINVAL; } - if (chunk_size < PAGE_SIZE) { - printk(KERN_ERR "too small chunk_size: %d < %ld\n", - chunk_size, PAGE_SIZE); - return -EINVAL; - } /* devices must have minimum size of one chunk */ rdev_for_each(rdev, tmp, mddev) { @@ -3558,12 +3555,10 @@ static int do_md_run(mddev_t * mddev) } } -#ifdef CONFIG_KMOD if (mddev->level != LEVEL_NONE) request_module("md-level-%d", mddev->level); else if (mddev->clevel[0]) request_module("md-%s", mddev->clevel); -#endif /* * Drop all container device buffers, from now on @@ -3596,7 +3591,7 @@ static int do_md_run(mddev_t * mddev) return -EINVAL; } } - sysfs_notify(&rdev->kobj, NULL, "state"); + sysfs_notify_dirent(rdev->sysfs_state); } md_probe(mddev->unit, NULL, NULL); @@ -3758,10 +3753,10 @@ static int do_md_run(mddev_t * mddev) mddev->changed = 1; md_new_event(mddev); - sysfs_notify(&mddev->kobj, NULL, "array_state"); + sysfs_notify_dirent(mddev->sysfs_state); sysfs_notify(&mddev->kobj, NULL, "sync_action"); sysfs_notify(&mddev->kobj, NULL, "degraded"); - kobject_uevent(&mddev->gendisk->dev.kobj, KOBJ_CHANGE); + kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); return 0; } @@ -3785,7 +3780,7 @@ static int restart_array(mddev_t *mddev) set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->sync_thread); - sysfs_notify(&mddev->kobj, NULL, "array_state"); + sysfs_notify_dirent(mddev->sysfs_state); return 0; } @@ -3865,7 +3860,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) module_put(mddev->pers->owner); mddev->pers = NULL; /* tell userspace to handle 'inactive' */ - sysfs_notify(&mddev->kobj, NULL, "array_state"); + sysfs_notify_dirent(mddev->sysfs_state); set_capacity(disk, 0); mddev->changed = 1; @@ -3945,13 +3940,14 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) mddev->degraded = 0; mddev->barriers_work = 0; mddev->safemode = 0; + kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); } else if (mddev->pers) printk(KERN_INFO "md: %s switched to read-only mode.\n", mdname(mddev)); err = 0; md_new_event(mddev); - sysfs_notify(&mddev->kobj, NULL, "array_state"); + sysfs_notify_dirent(mddev->sysfs_state); out: return err; } @@ -3974,10 +3970,10 @@ static void autorun_array(mddev_t *mddev) } printk("\n"); - err = do_md_run (mddev); + err = do_md_run(mddev); if (err) { printk(KERN_WARNING "md: do_md_run() returned %d\n", err); - do_md_stop (mddev, 0, 0); + do_md_stop(mddev, 0, 0); } } @@ -4315,7 +4311,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) if (err) export_rdev(rdev); else - sysfs_notify(&rdev->kobj, NULL, "state"); + sysfs_notify_dirent(rdev->sysfs_state); md_update_sb(mddev, 1); if (mddev->degraded) @@ -4336,7 +4332,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) if (!(info->state & (1<<MD_DISK_FAULTY))) { int err; - rdev = md_import_device (dev, -1, 0); + rdev = md_import_device(dev, -1, 0); if (IS_ERR(rdev)) { printk(KERN_WARNING "md: error, md_import_device() returned %ld\n", @@ -4418,7 +4414,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) return -EINVAL; } - rdev = md_import_device (dev, -1, 0); + rdev = md_import_device(dev, -1, 0); if (IS_ERR(rdev)) { printk(KERN_WARNING "md: error, md_import_device() returned %ld\n", @@ -4803,7 +4799,7 @@ static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } -static int md_ioctl(struct inode *inode, struct file *file, +static int md_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { int err = 0; @@ -4841,7 +4837,7 @@ static int md_ioctl(struct inode *inode, struct file *file, * Commands creating/starting a new array: */ - mddev = inode->i_bdev->bd_disk->private_data; + mddev = bdev->bd_disk->private_data; if (!mddev) { BUG(); @@ -4937,11 +4933,11 @@ static int md_ioctl(struct inode *inode, struct file *file, goto done_unlock; case STOP_ARRAY: - err = do_md_stop (mddev, 0, 1); + err = do_md_stop(mddev, 0, 1); goto done_unlock; case STOP_ARRAY_RO: - err = do_md_stop (mddev, 1, 1); + err = do_md_stop(mddev, 1, 1); goto done_unlock; } @@ -4956,7 +4952,7 @@ static int md_ioctl(struct inode *inode, struct file *file, if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) { if (mddev->ro == 2) { mddev->ro = 0; - sysfs_notify(&mddev->kobj, NULL, "array_state"); + sysfs_notify_dirent(mddev->sysfs_state); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); } else { @@ -4990,7 +4986,7 @@ static int md_ioctl(struct inode *inode, struct file *file, goto done_unlock; case RUN_ARRAY: - err = do_md_run (mddev); + err = do_md_run(mddev); goto done_unlock; case SET_BITMAP_FILE: @@ -5014,13 +5010,13 @@ abort: return err; } -static int md_open(struct inode *inode, struct file *file) +static int md_open(struct block_device *bdev, fmode_t mode) { /* * Succeed if we can lock the mddev, which confirms that * it isn't being stopped right now. */ - mddev_t *mddev = inode->i_bdev->bd_disk->private_data; + mddev_t *mddev = bdev->bd_disk->private_data; int err; if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) @@ -5031,14 +5027,14 @@ static int md_open(struct inode *inode, struct file *file) atomic_inc(&mddev->openers); mddev_unlock(mddev); - check_disk_change(inode->i_bdev); + check_disk_change(bdev); out: return err; } -static int md_release(struct inode *inode, struct file * file) +static int md_release(struct gendisk *disk, fmode_t mode) { - mddev_t *mddev = inode->i_bdev->bd_disk->private_data; + mddev_t *mddev = disk->private_data; BUG_ON(!mddev); atomic_dec(&mddev->openers); @@ -5066,7 +5062,7 @@ static struct block_device_operations md_fops = .owner = THIS_MODULE, .open = md_open, .release = md_release, - .ioctl = md_ioctl, + .locked_ioctl = md_ioctl, .getgeo = md_getgeo, .media_changed = md_media_changed, .revalidate_disk= md_revalidate, @@ -5428,11 +5424,11 @@ static int md_seq_show(struct seq_file *seq, void *v) seq_printf(seq, " super non-persistent"); if (mddev->pers) { - mddev->pers->status (seq, mddev); + mddev->pers->status(seq, mddev); seq_printf(seq, "\n "); if (mddev->pers->sync_request) { if (mddev->curr_resync > 2) { - status_resync (seq, mddev); + status_resync(seq, mddev); seq_printf(seq, "\n "); } else if (mddev->curr_resync == 1 || mddev->curr_resync == 2) seq_printf(seq, "\tresync=DELAYED\n "); @@ -5549,8 +5545,8 @@ static int is_mddev_idle(mddev_t *mddev) rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) { struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; - curr_events = disk_stat_read(disk, sectors[0]) + - disk_stat_read(disk, sectors[1]) - + curr_events = part_stat_read(&disk->part0, sectors[0]) + + part_stat_read(&disk->part0, sectors[1]) - atomic_read(&disk->sync_io); /* sync IO will cause sync_io to increase before the disk_stats * as sync_io is counted when a request starts, and @@ -5630,7 +5626,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi) spin_unlock_irq(&mddev->write_lock); } if (did_change) - sysfs_notify(&mddev->kobj, NULL, "array_state"); + sysfs_notify_dirent(mddev->sysfs_state); wait_event(mddev->sb_wait, !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && !test_bit(MD_CHANGE_PENDING, &mddev->flags)); @@ -5673,7 +5669,7 @@ int md_allow_write(mddev_t *mddev) mddev->safemode = 1; spin_unlock_irq(&mddev->write_lock); md_update_sb(mddev, 0); - sysfs_notify(&mddev->kobj, NULL, "array_state"); + sysfs_notify_dirent(mddev->sysfs_state); } else spin_unlock_irq(&mddev->write_lock); @@ -5761,7 +5757,11 @@ void md_do_sync(mddev_t *mddev) * time 'round when curr_resync == 2 */ continue; - prepare_to_wait(&resync_wait, &wq, TASK_UNINTERRUPTIBLE); + /* We need to wait 'interruptible' so as not to + * contribute to the load average, and not to + * be caught by 'softlockup' + */ + prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE); if (!kthread_should_stop() && mddev2->curr_resync >= mddev->curr_resync) { printk(KERN_INFO "md: delaying %s of %s" @@ -5769,6 +5769,8 @@ void md_do_sync(mddev_t *mddev) " share one or more physical units)\n", desc, mdname(mddev), mdname(mddev2)); mddev_put(mddev2); + if (signal_pending(current)) + flush_signals(current); schedule(); finish_wait(&resync_wait, &wq); goto try_again; @@ -6060,9 +6062,6 @@ void md_check_recovery(mddev_t *mddev) if (mddev->bitmap) bitmap_daemon_work(mddev->bitmap); - if (test_and_clear_bit(MD_NOTIFY_ARRAY_STATE, &mddev->flags)) - sysfs_notify(&mddev->kobj, NULL, "array_state"); - if (mddev->ro) return; @@ -6115,7 +6114,7 @@ void md_check_recovery(mddev_t *mddev) mddev->safemode = 0; spin_unlock_irq(&mddev->write_lock); if (did_change) - sysfs_notify(&mddev->kobj, NULL, "array_state"); + sysfs_notify_dirent(mddev->sysfs_state); } if (mddev->flags) @@ -6123,7 +6122,7 @@ void md_check_recovery(mddev_t *mddev) rdev_for_each(rdev, rtmp, mddev) if (test_and_clear_bit(StateChanged, &rdev->flags)) - sysfs_notify(&rdev->kobj, NULL, "state"); + sysfs_notify_dirent(rdev->sysfs_state); if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && @@ -6233,7 +6232,7 @@ void md_check_recovery(mddev_t *mddev) void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev) { - sysfs_notify(&rdev->kobj, NULL, "state"); + sysfs_notify_dirent(rdev->sysfs_state); wait_event_timeout(rdev->blocked_wait, !test_bit(Blocked, &rdev->flags), msecs_to_jiffies(5000)); @@ -6257,7 +6256,7 @@ static int md_notify_reboot(struct notifier_block *this, * appears to still be in use. Hence * the '100'. */ - do_md_stop (mddev, 1, 100); + do_md_stop(mddev, 1, 100); mddev_unlock(mddev); } /* @@ -6301,7 +6300,7 @@ static int __init md_init(void) raid_table_header = register_sysctl_table(raid_root_table); md_geninit(); - return (0); + return 0; } |