summaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-11-26 08:22:50 +0100
committerIngo Molnar <mingo@elte.hu>2008-11-26 08:22:50 +0100
commit7fbb8759eff9a348efa5f352ffaa51c364837c4b (patch)
treed40cd3f47b9f667ba94d9613270132080dcb6a1a /drivers/md/md.c
parent6003ab0bad4cc56f3c4fadf62a0d23a967b9c53b (diff)
parent13d428afc007fcfcd6deeb215618f54cf9c0cae6 (diff)
Merge commit 'v2.6.28-rc6' into core/debug
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c181
1 files changed, 90 insertions, 91 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 4790c83d78d..1b1d32694f6 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -32,31 +32,21 @@
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <linux/module.h>
-#include <linux/kernel.h>
#include <linux/kthread.h>
-#include <linux/linkage.h>
#include <linux/raid/md.h>
#include <linux/raid/bitmap.h>
#include <linux/sysctl.h>
#include <linux/buffer_head.h> /* for invalidate_bdev */
#include <linux/poll.h>
-#include <linux/mutex.h>
#include <linux/ctype.h>
-#include <linux/freezer.h>
-
-#include <linux/init.h>
-
+#include <linux/hdreg.h>
+#include <linux/proc_fs.h>
+#include <linux/random.h>
+#include <linux/reboot.h>
#include <linux/file.h>
-
-#ifdef CONFIG_KMOD
-#include <linux/kmod.h>
-#endif
-
-#include <asm/unaligned.h>
+#include <linux/delay.h>
#define MAJOR_NR MD_MAJOR
-#define MD_DRIVER
/* 63 partitions with the alternate major number (mdp) */
#define MdpMinorShift 6
@@ -66,7 +56,7 @@
#ifndef MODULE
-static void autostart_arrays (int part);
+static void autostart_arrays(int part);
#endif
static LIST_HEAD(pers_list);
@@ -212,7 +202,7 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
)
-static int md_fail_request (struct request_queue *q, struct bio *bio)
+static int md_fail_request(struct request_queue *q, struct bio *bio)
{
bio_io_error(bio);
return 0;
@@ -232,6 +222,9 @@ static void mddev_put(mddev_t *mddev)
list_del(&mddev->all_mddevs);
spin_unlock(&all_mddevs_lock);
blk_cleanup_queue(mddev->queue);
+ if (mddev->sysfs_state)
+ sysfs_put(mddev->sysfs_state);
+ mddev->sysfs_state = NULL;
kobject_put(&mddev->kobj);
} else
spin_unlock(&all_mddevs_lock);
@@ -1464,14 +1457,13 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
goto fail;
- if (rdev->bdev->bd_part)
- ko = &rdev->bdev->bd_part->dev.kobj;
- else
- ko = &rdev->bdev->bd_disk->dev.kobj;
+ ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) {
kobject_del(&rdev->kobj);
goto fail;
}
+ rdev->sysfs_state = sysfs_get_dirent(rdev->kobj.sd, "state");
+
list_add_rcu(&rdev->same_set, &mddev->disks);
bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
return 0;
@@ -1501,7 +1493,8 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
rdev->mddev = NULL;
sysfs_remove_link(&rdev->kobj, "block");
-
+ sysfs_put(rdev->sysfs_state);
+ rdev->sysfs_state = NULL;
/* We need to delay this, otherwise we can deadlock when
* writing to 'remove' to "dev/state". We also need
* to delay it due to rcu usage.
@@ -1533,7 +1526,7 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared)
if (err) {
printk(KERN_ERR "md: could not bd_claim %s.\n",
bdevname(bdev, b));
- blkdev_put(bdev);
+ blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
return err;
}
if (!shared)
@@ -1549,7 +1542,7 @@ static void unlock_rdev(mdk_rdev_t *rdev)
if (!bdev)
MD_BUG();
bd_release(bdev);
- blkdev_put(bdev);
+ blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
}
void md_autodetect_dev(dev_t dev);
@@ -1936,8 +1929,8 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
err = 0;
}
- if (!err)
- sysfs_notify(&rdev->kobj, NULL, "state");
+ if (!err && rdev->sysfs_state)
+ sysfs_notify_dirent(rdev->sysfs_state);
return err ? err : len;
}
static struct rdev_sysfs_entry rdev_state =
@@ -2032,7 +2025,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
rdev->raid_disk = -1;
return err;
} else
- sysfs_notify(&rdev->kobj, NULL, "state");
+ sysfs_notify_dirent(rdev->sysfs_state);
sprintf(nm, "rd%d", rdev->raid_disk);
if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm))
printk(KERN_WARNING
@@ -2049,7 +2042,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
clear_bit(Faulty, &rdev->flags);
clear_bit(WriteMostly, &rdev->flags);
set_bit(In_sync, &rdev->flags);
- sysfs_notify(&rdev->kobj, NULL, "state");
+ sysfs_notify_dirent(rdev->sysfs_state);
}
return len;
}
@@ -2109,8 +2102,6 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
if (strict_strtoull(buf, 10, &size) < 0)
return -EINVAL;
- if (size < my_mddev->size)
- return -EINVAL;
if (my_mddev->pers && rdev->raid_disk >= 0) {
if (my_mddev->persistent) {
size = super_types[my_mddev->major_version].
@@ -2121,9 +2112,9 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
size = (rdev->bdev->bd_inode->i_size >> 10);
size -= rdev->data_offset/2;
}
- if (size < my_mddev->size)
- return -EINVAL; /* component must fit device */
}
+ if (size < my_mddev->size)
+ return -EINVAL; /* component must fit device */
rdev->size = size;
if (size > oldsize && my_mddev->external) {
@@ -2409,12 +2400,11 @@ safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len)
int i;
unsigned long msec;
char buf[30];
- char *e;
+
/* remove a period, and count digits after it */
if (len >= sizeof(buf))
return -EINVAL;
- strlcpy(buf, cbuf, len);
- buf[len] = 0;
+ strlcpy(buf, cbuf, sizeof(buf));
for (i=0; i<len; i++) {
if (dot) {
if (isdigit(buf[i])) {
@@ -2427,8 +2417,7 @@ safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len)
buf[i] = 0;
}
}
- msec = simple_strtoul(buf, &e, 10);
- if (e == buf || (*e && *e != '\n'))
+ if (strict_strtoul(buf, 10, &msec) < 0)
return -EINVAL;
msec = (msec * 1000) / scale;
if (msec == 0)
@@ -2730,9 +2719,9 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
break;
case read_auto:
if (mddev->pers) {
- if (mddev->ro != 1)
+ if (mddev->ro == 0)
err = do_md_stop(mddev, 1, 0);
- else
+ else if (mddev->ro == 1)
err = restart_array(mddev);
if (err == 0) {
mddev->ro = 2;
@@ -2787,7 +2776,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
if (err)
return err;
else {
- sysfs_notify(&mddev->kobj, NULL, "array_state");
+ sysfs_notify_dirent(mddev->sysfs_state);
return len;
}
}
@@ -2948,7 +2937,13 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len)
{
int major, minor;
char *e;
- if (!list_empty(&mddev->disks))
+ /* Changing the details of 'external' metadata is
+ * always permitted. Otherwise there must be
+ * no devices attached to the array.
+ */
+ if (mddev->external && strncmp(buf, "external:", 9) == 0)
+ ;
+ else if (!list_empty(&mddev->disks))
return -EBUSY;
if (cmd_match(buf, "none")) {
@@ -3468,16 +3463,23 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
disk->fops = &md_fops;
disk->private_data = mddev;
disk->queue = mddev->queue;
+ /* Allow extended partitions. This makes the
+ * 'mdp' device redundant, but we can really
+ * remove it now.
+ */
+ disk->flags |= GENHD_FL_EXT_DEVT;
add_disk(disk);
mddev->gendisk = disk;
- error = kobject_init_and_add(&mddev->kobj, &md_ktype, &disk->dev.kobj,
- "%s", "md");
+ error = kobject_init_and_add(&mddev->kobj, &md_ktype,
+ &disk_to_dev(disk)->kobj, "%s", "md");
mutex_unlock(&disks_mutex);
if (error)
printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
disk->disk_name);
- else
+ else {
kobject_uevent(&mddev->kobj, KOBJ_ADD);
+ mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state");
+ }
return NULL;
}
@@ -3488,7 +3490,7 @@ static void md_safemode_timeout(unsigned long data)
if (!atomic_read(&mddev->writes_pending)) {
mddev->safemode = 1;
if (mddev->external)
- set_bit(MD_NOTIFY_ARRAY_STATE, &mddev->flags);
+ sysfs_notify_dirent(mddev->sysfs_state);
}
md_wakeup_thread(mddev->thread);
}
@@ -3530,17 +3532,12 @@ static int do_md_run(mddev_t * mddev)
return -EINVAL;
}
/*
- * chunk-size has to be a power of 2 and multiples of PAGE_SIZE
+ * chunk-size has to be a power of 2
*/
if ( (1 << ffz(~chunk_size)) != chunk_size) {
printk(KERN_ERR "chunk_size of %d not valid\n", chunk_size);
return -EINVAL;
}
- if (chunk_size < PAGE_SIZE) {
- printk(KERN_ERR "too small chunk_size: %d < %ld\n",
- chunk_size, PAGE_SIZE);
- return -EINVAL;
- }
/* devices must have minimum size of one chunk */
rdev_for_each(rdev, tmp, mddev) {
@@ -3558,12 +3555,10 @@ static int do_md_run(mddev_t * mddev)
}
}
-#ifdef CONFIG_KMOD
if (mddev->level != LEVEL_NONE)
request_module("md-level-%d", mddev->level);
else if (mddev->clevel[0])
request_module("md-%s", mddev->clevel);
-#endif
/*
* Drop all container device buffers, from now on
@@ -3596,7 +3591,7 @@ static int do_md_run(mddev_t * mddev)
return -EINVAL;
}
}
- sysfs_notify(&rdev->kobj, NULL, "state");
+ sysfs_notify_dirent(rdev->sysfs_state);
}
md_probe(mddev->unit, NULL, NULL);
@@ -3758,10 +3753,10 @@ static int do_md_run(mddev_t * mddev)
mddev->changed = 1;
md_new_event(mddev);
- sysfs_notify(&mddev->kobj, NULL, "array_state");
+ sysfs_notify_dirent(mddev->sysfs_state);
sysfs_notify(&mddev->kobj, NULL, "sync_action");
sysfs_notify(&mddev->kobj, NULL, "degraded");
- kobject_uevent(&mddev->gendisk->dev.kobj, KOBJ_CHANGE);
+ kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
return 0;
}
@@ -3785,7 +3780,7 @@ static int restart_array(mddev_t *mddev)
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
md_wakeup_thread(mddev->sync_thread);
- sysfs_notify(&mddev->kobj, NULL, "array_state");
+ sysfs_notify_dirent(mddev->sysfs_state);
return 0;
}
@@ -3865,7 +3860,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
module_put(mddev->pers->owner);
mddev->pers = NULL;
/* tell userspace to handle 'inactive' */
- sysfs_notify(&mddev->kobj, NULL, "array_state");
+ sysfs_notify_dirent(mddev->sysfs_state);
set_capacity(disk, 0);
mddev->changed = 1;
@@ -3945,13 +3940,14 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
mddev->degraded = 0;
mddev->barriers_work = 0;
mddev->safemode = 0;
+ kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
} else if (mddev->pers)
printk(KERN_INFO "md: %s switched to read-only mode.\n",
mdname(mddev));
err = 0;
md_new_event(mddev);
- sysfs_notify(&mddev->kobj, NULL, "array_state");
+ sysfs_notify_dirent(mddev->sysfs_state);
out:
return err;
}
@@ -3974,10 +3970,10 @@ static void autorun_array(mddev_t *mddev)
}
printk("\n");
- err = do_md_run (mddev);
+ err = do_md_run(mddev);
if (err) {
printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
- do_md_stop (mddev, 0, 0);
+ do_md_stop(mddev, 0, 0);
}
}
@@ -4315,7 +4311,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
if (err)
export_rdev(rdev);
else
- sysfs_notify(&rdev->kobj, NULL, "state");
+ sysfs_notify_dirent(rdev->sysfs_state);
md_update_sb(mddev, 1);
if (mddev->degraded)
@@ -4336,7 +4332,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
if (!(info->state & (1<<MD_DISK_FAULTY))) {
int err;
- rdev = md_import_device (dev, -1, 0);
+ rdev = md_import_device(dev, -1, 0);
if (IS_ERR(rdev)) {
printk(KERN_WARNING
"md: error, md_import_device() returned %ld\n",
@@ -4418,7 +4414,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
return -EINVAL;
}
- rdev = md_import_device (dev, -1, 0);
+ rdev = md_import_device(dev, -1, 0);
if (IS_ERR(rdev)) {
printk(KERN_WARNING
"md: error, md_import_device() returned %ld\n",
@@ -4803,7 +4799,7 @@ static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return 0;
}
-static int md_ioctl(struct inode *inode, struct file *file,
+static int md_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
int err = 0;
@@ -4841,7 +4837,7 @@ static int md_ioctl(struct inode *inode, struct file *file,
* Commands creating/starting a new array:
*/
- mddev = inode->i_bdev->bd_disk->private_data;
+ mddev = bdev->bd_disk->private_data;
if (!mddev) {
BUG();
@@ -4937,11 +4933,11 @@ static int md_ioctl(struct inode *inode, struct file *file,
goto done_unlock;
case STOP_ARRAY:
- err = do_md_stop (mddev, 0, 1);
+ err = do_md_stop(mddev, 0, 1);
goto done_unlock;
case STOP_ARRAY_RO:
- err = do_md_stop (mddev, 1, 1);
+ err = do_md_stop(mddev, 1, 1);
goto done_unlock;
}
@@ -4956,7 +4952,7 @@ static int md_ioctl(struct inode *inode, struct file *file,
if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) {
if (mddev->ro == 2) {
mddev->ro = 0;
- sysfs_notify(&mddev->kobj, NULL, "array_state");
+ sysfs_notify_dirent(mddev->sysfs_state);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
} else {
@@ -4990,7 +4986,7 @@ static int md_ioctl(struct inode *inode, struct file *file,
goto done_unlock;
case RUN_ARRAY:
- err = do_md_run (mddev);
+ err = do_md_run(mddev);
goto done_unlock;
case SET_BITMAP_FILE:
@@ -5014,13 +5010,13 @@ abort:
return err;
}
-static int md_open(struct inode *inode, struct file *file)
+static int md_open(struct block_device *bdev, fmode_t mode)
{
/*
* Succeed if we can lock the mddev, which confirms that
* it isn't being stopped right now.
*/
- mddev_t *mddev = inode->i_bdev->bd_disk->private_data;
+ mddev_t *mddev = bdev->bd_disk->private_data;
int err;
if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1)))
@@ -5031,14 +5027,14 @@ static int md_open(struct inode *inode, struct file *file)
atomic_inc(&mddev->openers);
mddev_unlock(mddev);
- check_disk_change(inode->i_bdev);
+ check_disk_change(bdev);
out:
return err;
}
-static int md_release(struct inode *inode, struct file * file)
+static int md_release(struct gendisk *disk, fmode_t mode)
{
- mddev_t *mddev = inode->i_bdev->bd_disk->private_data;
+ mddev_t *mddev = disk->private_data;
BUG_ON(!mddev);
atomic_dec(&mddev->openers);
@@ -5066,7 +5062,7 @@ static struct block_device_operations md_fops =
.owner = THIS_MODULE,
.open = md_open,
.release = md_release,
- .ioctl = md_ioctl,
+ .locked_ioctl = md_ioctl,
.getgeo = md_getgeo,
.media_changed = md_media_changed,
.revalidate_disk= md_revalidate,
@@ -5428,11 +5424,11 @@ static int md_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, " super non-persistent");
if (mddev->pers) {
- mddev->pers->status (seq, mddev);
+ mddev->pers->status(seq, mddev);
seq_printf(seq, "\n ");
if (mddev->pers->sync_request) {
if (mddev->curr_resync > 2) {
- status_resync (seq, mddev);
+ status_resync(seq, mddev);
seq_printf(seq, "\n ");
} else if (mddev->curr_resync == 1 || mddev->curr_resync == 2)
seq_printf(seq, "\tresync=DELAYED\n ");
@@ -5549,8 +5545,8 @@ static int is_mddev_idle(mddev_t *mddev)
rcu_read_lock();
rdev_for_each_rcu(rdev, mddev) {
struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
- curr_events = disk_stat_read(disk, sectors[0]) +
- disk_stat_read(disk, sectors[1]) -
+ curr_events = part_stat_read(&disk->part0, sectors[0]) +
+ part_stat_read(&disk->part0, sectors[1]) -
atomic_read(&disk->sync_io);
/* sync IO will cause sync_io to increase before the disk_stats
* as sync_io is counted when a request starts, and
@@ -5630,7 +5626,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi)
spin_unlock_irq(&mddev->write_lock);
}
if (did_change)
- sysfs_notify(&mddev->kobj, NULL, "array_state");
+ sysfs_notify_dirent(mddev->sysfs_state);
wait_event(mddev->sb_wait,
!test_bit(MD_CHANGE_CLEAN, &mddev->flags) &&
!test_bit(MD_CHANGE_PENDING, &mddev->flags));
@@ -5673,7 +5669,7 @@ int md_allow_write(mddev_t *mddev)
mddev->safemode = 1;
spin_unlock_irq(&mddev->write_lock);
md_update_sb(mddev, 0);
- sysfs_notify(&mddev->kobj, NULL, "array_state");
+ sysfs_notify_dirent(mddev->sysfs_state);
} else
spin_unlock_irq(&mddev->write_lock);
@@ -5761,7 +5757,11 @@ void md_do_sync(mddev_t *mddev)
* time 'round when curr_resync == 2
*/
continue;
- prepare_to_wait(&resync_wait, &wq, TASK_UNINTERRUPTIBLE);
+ /* We need to wait 'interruptible' so as not to
+ * contribute to the load average, and not to
+ * be caught by 'softlockup'
+ */
+ prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
if (!kthread_should_stop() &&
mddev2->curr_resync >= mddev->curr_resync) {
printk(KERN_INFO "md: delaying %s of %s"
@@ -5769,6 +5769,8 @@ void md_do_sync(mddev_t *mddev)
" share one or more physical units)\n",
desc, mdname(mddev), mdname(mddev2));
mddev_put(mddev2);
+ if (signal_pending(current))
+ flush_signals(current);
schedule();
finish_wait(&resync_wait, &wq);
goto try_again;
@@ -6060,9 +6062,6 @@ void md_check_recovery(mddev_t *mddev)
if (mddev->bitmap)
bitmap_daemon_work(mddev->bitmap);
- if (test_and_clear_bit(MD_NOTIFY_ARRAY_STATE, &mddev->flags))
- sysfs_notify(&mddev->kobj, NULL, "array_state");
-
if (mddev->ro)
return;
@@ -6115,7 +6114,7 @@ void md_check_recovery(mddev_t *mddev)
mddev->safemode = 0;
spin_unlock_irq(&mddev->write_lock);
if (did_change)
- sysfs_notify(&mddev->kobj, NULL, "array_state");
+ sysfs_notify_dirent(mddev->sysfs_state);
}
if (mddev->flags)
@@ -6123,7 +6122,7 @@ void md_check_recovery(mddev_t *mddev)
rdev_for_each(rdev, rtmp, mddev)
if (test_and_clear_bit(StateChanged, &rdev->flags))
- sysfs_notify(&rdev->kobj, NULL, "state");
+ sysfs_notify_dirent(rdev->sysfs_state);
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
@@ -6233,7 +6232,7 @@ void md_check_recovery(mddev_t *mddev)
void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
{
- sysfs_notify(&rdev->kobj, NULL, "state");
+ sysfs_notify_dirent(rdev->sysfs_state);
wait_event_timeout(rdev->blocked_wait,
!test_bit(Blocked, &rdev->flags),
msecs_to_jiffies(5000));
@@ -6257,7 +6256,7 @@ static int md_notify_reboot(struct notifier_block *this,
* appears to still be in use. Hence
* the '100'.
*/
- do_md_stop (mddev, 1, 100);
+ do_md_stop(mddev, 1, 100);
mddev_unlock(mddev);
}
/*
@@ -6301,7 +6300,7 @@ static int __init md_init(void)
raid_table_header = register_sysctl_table(raid_root_table);
md_geninit();
- return (0);
+ return 0;
}