summaryrefslogtreecommitdiffstats
path: root/drivers/md/raid1.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r--drivers/md/raid1.c179
1 files changed, 74 insertions, 105 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index fd86b372692..6e17f8181c4 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -92,7 +92,6 @@ static void r1bio_pool_free(void *r1_bio, void *data)
static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
{
struct pool_info *pi = data;
- struct page *page;
struct r1bio *r1_bio;
struct bio *bio;
int i, j;
@@ -122,14 +121,10 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
j = 1;
while(j--) {
bio = r1_bio->bios[j];
- for (i = 0; i < RESYNC_PAGES; i++) {
- page = alloc_page(gfp_flags);
- if (unlikely(!page))
- goto out_free_pages;
+ bio->bi_vcnt = RESYNC_PAGES;
- bio->bi_io_vec[i].bv_page = page;
- bio->bi_vcnt = i+1;
- }
+ if (bio_alloc_pages(bio, gfp_flags))
+ goto out_free_bio;
}
/* If not user-requests, copy the page pointers to all bios */
if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) {
@@ -143,11 +138,6 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
return r1_bio;
-out_free_pages:
- for (j=0 ; j < pi->raid_disks; j++)
- for (i=0; i < r1_bio->bios[j]->bi_vcnt ; i++)
- put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page);
- j = -1;
out_free_bio:
while (++j < pi->raid_disks)
bio_put(r1_bio->bios[j]);
@@ -267,7 +257,7 @@ static void raid_end_bio_io(struct r1bio *r1_bio)
(bio_data_dir(bio) == WRITE) ? "write" : "read",
(unsigned long long) bio->bi_sector,
(unsigned long long) bio->bi_sector +
- (bio->bi_size >> 9) - 1);
+ bio_sectors(bio) - 1);
call_bio_endio(r1_bio);
}
@@ -427,7 +417,17 @@ static void raid1_end_write_request(struct bio *bio, int error)
r1_bio->bios[mirror] = NULL;
to_put = bio;
- set_bit(R1BIO_Uptodate, &r1_bio->state);
+ /*
+ * Do not set R1BIO_Uptodate if the current device is
+ * rebuilding or Faulty. This is because we cannot use
+ * such device for properly reading the data back (we could
+ * potentially use it, if the current write would have felt
+ * before rdev->recovery_offset, but for simplicity we don't
+ * check this here.
+ */
+ if (test_bit(In_sync, &conf->mirrors[mirror].rdev->flags) &&
+ !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags))
+ set_bit(R1BIO_Uptodate, &r1_bio->state);
/* Maybe we can clear some bad blocks. */
if (is_badblock(conf->mirrors[mirror].rdev,
@@ -458,7 +458,7 @@ static void raid1_end_write_request(struct bio *bio, int error)
" %llu-%llu\n",
(unsigned long long) mbio->bi_sector,
(unsigned long long) mbio->bi_sector +
- (mbio->bi_size >> 9) - 1);
+ bio_sectors(mbio) - 1);
call_bio_endio(r1_bio);
}
}
@@ -880,17 +880,17 @@ static void allow_barrier(struct r1conf *conf)
wake_up(&conf->wait_barrier);
}
-static void freeze_array(struct r1conf *conf)
+static void freeze_array(struct r1conf *conf, int extra)
{
/* stop syncio and normal IO and wait for everything to
* go quite.
* We increment barrier and nr_waiting, and then
- * wait until nr_pending match nr_queued+1
+ * wait until nr_pending match nr_queued+extra
* This is called in the context of one normal IO request
* that has failed. Thus any sync request that might be pending
* will be blocked by nr_pending, and we need to wait for
* pending IO requests to complete or be queued for re-try.
- * Thus the number queued (nr_queued) plus this request (1)
+ * Thus the number queued (nr_queued) plus this request (extra)
* must match the number of pending IOs (nr_pending) before
* we continue.
*/
@@ -898,7 +898,7 @@ static void freeze_array(struct r1conf *conf)
conf->barrier++;
conf->nr_waiting++;
wait_event_lock_irq_cmd(conf->wait_barrier,
- conf->nr_pending == conf->nr_queued+1,
+ conf->nr_pending == conf->nr_queued+extra,
conf->resync_lock,
flush_pending_writes(conf));
spin_unlock_irq(&conf->resync_lock);
@@ -925,7 +925,7 @@ static void alloc_behind_pages(struct bio *bio, struct r1bio *r1_bio)
if (unlikely(!bvecs))
return;
- bio_for_each_segment(bvec, bio, i) {
+ bio_for_each_segment_all(bvec, bio, i) {
bvecs[i] = *bvec;
bvecs[i].bv_page = alloc_page(GFP_NOIO);
if (unlikely(!bvecs[i].bv_page))
@@ -981,7 +981,12 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
while (bio) { /* submit pending writes */
struct bio *next = bio->bi_next;
bio->bi_next = NULL;
- generic_make_request(bio);
+ if (unlikely((bio->bi_rw & REQ_DISCARD) &&
+ !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
+ /* Just ignore it */
+ bio_endio(bio, 0);
+ else
+ generic_make_request(bio);
bio = next;
}
kfree(plug);
@@ -1018,7 +1023,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
md_write_start(mddev, bio); /* wait on superblock update early */
if (bio_data_dir(bio) == WRITE &&
- bio->bi_sector + bio->bi_size/512 > mddev->suspend_lo &&
+ bio_end_sector(bio) > mddev->suspend_lo &&
bio->bi_sector < mddev->suspend_hi) {
/* As the suspend_* range is controlled by
* userspace, we want an interruptible
@@ -1029,7 +1034,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
flush_signals(current);
prepare_to_wait(&conf->wait_barrier,
&w, TASK_INTERRUPTIBLE);
- if (bio->bi_sector + bio->bi_size/512 <= mddev->suspend_lo ||
+ if (bio_end_sector(bio) <= mddev->suspend_lo ||
bio->bi_sector >= mddev->suspend_hi)
break;
schedule();
@@ -1049,7 +1054,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
r1_bio->master_bio = bio;
- r1_bio->sectors = bio->bi_size >> 9;
+ r1_bio->sectors = bio_sectors(bio);
r1_bio->state = 0;
r1_bio->mddev = mddev;
r1_bio->sector = bio->bi_sector;
@@ -1127,7 +1132,7 @@ read_again:
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
r1_bio->master_bio = bio;
- r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
+ r1_bio->sectors = bio_sectors(bio) - sectors_handled;
r1_bio->state = 0;
r1_bio->mddev = mddev;
r1_bio->sector = bio->bi_sector + sectors_handled;
@@ -1284,14 +1289,10 @@ read_again:
struct bio_vec *bvec;
int j;
- /* Yes, I really want the '__' version so that
- * we clear any unused pointer in the io_vec, rather
- * than leave them unchanged. This is important
- * because when we come to free the pages, we won't
- * know the original bi_idx, so we just free
- * them all
+ /*
+ * We trimmed the bio, so _all is legit
*/
- __bio_for_each_segment(bvec, mbio, j, 0)
+ bio_for_each_segment_all(bvec, mbio, j)
bvec->bv_page = r1_bio->behind_bvecs[j].bv_page;
if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
atomic_inc(&r1_bio->behind_remaining);
@@ -1329,14 +1330,14 @@ read_again:
/* Mustn't call r1_bio_write_done before this next test,
* as it could result in the bio being freed.
*/
- if (sectors_handled < (bio->bi_size >> 9)) {
+ if (sectors_handled < bio_sectors(bio)) {
r1_bio_write_done(r1_bio);
/* We need another r1_bio. It has already been counted
* in bio->bi_phys_segments
*/
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
r1_bio->master_bio = bio;
- r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
+ r1_bio->sectors = bio_sectors(bio) - sectors_handled;
r1_bio->state = 0;
r1_bio->mddev = mddev;
r1_bio->sector = bio->bi_sector + sectors_handled;
@@ -1553,8 +1554,8 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
* we wait for all outstanding requests to complete.
*/
synchronize_sched();
- raise_barrier(conf);
- lower_barrier(conf);
+ freeze_array(conf, 0);
+ unfreeze_array(conf);
clear_bit(Unmerged, &rdev->flags);
}
md_integrity_add_rdev(rdev, mddev);
@@ -1604,11 +1605,11 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
*/
struct md_rdev *repl =
conf->mirrors[conf->raid_disks + number].rdev;
- raise_barrier(conf);
+ freeze_array(conf, 0);
clear_bit(Replacement, &repl->flags);
p->rdev = repl;
conf->mirrors[conf->raid_disks + number].rdev = NULL;
- lower_barrier(conf);
+ unfreeze_array(conf);
clear_bit(WantReplacement, &rdev->flags);
} else
clear_bit(WantReplacement, &rdev->flags);
@@ -1862,7 +1863,7 @@ static int process_checks(struct r1bio *r1_bio)
struct bio *sbio = r1_bio->bios[i];
int size;
- if (r1_bio->bios[i]->bi_end_io != end_sync_read)
+ if (sbio->bi_end_io != end_sync_read)
continue;
if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
@@ -1887,16 +1888,15 @@ static int process_checks(struct r1bio *r1_bio)
continue;
}
/* fixup the bio for reuse */
+ bio_reset(sbio);
sbio->bi_vcnt = vcnt;
sbio->bi_size = r1_bio->sectors << 9;
- sbio->bi_idx = 0;
- sbio->bi_phys_segments = 0;
- sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
- sbio->bi_flags |= 1 << BIO_UPTODATE;
- sbio->bi_next = NULL;
sbio->bi_sector = r1_bio->sector +
conf->mirrors[i].rdev->data_offset;
sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
+ sbio->bi_end_io = end_sync_read;
+ sbio->bi_private = r1_bio;
+
size = sbio->bi_size;
for (j = 0; j < vcnt ; j++) {
struct bio_vec *bi;
@@ -1907,10 +1907,9 @@ static int process_checks(struct r1bio *r1_bio)
else
bi->bv_len = size;
size -= PAGE_SIZE;
- memcpy(page_address(bi->bv_page),
- page_address(pbio->bi_io_vec[j].bv_page),
- PAGE_SIZE);
}
+
+ bio_copy_data(sbio, pbio);
}
return 0;
}
@@ -1947,7 +1946,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
wbio->bi_rw = WRITE;
wbio->bi_end_io = end_sync_write;
atomic_inc(&r1_bio->remaining);
- md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9);
+ md_sync_acct(conf->mirrors[i].rdev->bdev, bio_sectors(wbio));
generic_make_request(wbio);
}
@@ -2059,32 +2058,11 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
}
}
-static void bi_complete(struct bio *bio, int error)
-{
- complete((struct completion *)bio->bi_private);
-}
-
-static int submit_bio_wait(int rw, struct bio *bio)
-{
- struct completion event;
- rw |= REQ_SYNC;
-
- init_completion(&event);
- bio->bi_private = &event;
- bio->bi_end_io = bi_complete;
- submit_bio(rw, bio);
- wait_for_completion(&event);
-
- return test_bit(BIO_UPTODATE, &bio->bi_flags);
-}
-
static int narrow_write_error(struct r1bio *r1_bio, int i)
{
struct mddev *mddev = r1_bio->mddev;
struct r1conf *conf = mddev->private;
struct md_rdev *rdev = conf->mirrors[i].rdev;
- int vcnt, idx;
- struct bio_vec *vec;
/* bio has the data to be written to device 'i' where
* we just recently had a write error.
@@ -2112,30 +2090,32 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
& ~(sector_t)(block_sectors - 1))
- sector;
- if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
- vcnt = r1_bio->behind_page_count;
- vec = r1_bio->behind_bvecs;
- idx = 0;
- while (vec[idx].bv_page == NULL)
- idx++;
- } else {
- vcnt = r1_bio->master_bio->bi_vcnt;
- vec = r1_bio->master_bio->bi_io_vec;
- idx = r1_bio->master_bio->bi_idx;
- }
while (sect_to_write) {
struct bio *wbio;
if (sectors > sect_to_write)
sectors = sect_to_write;
/* Write at 'sector' for 'sectors'*/
- wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
- memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
- wbio->bi_sector = r1_bio->sector;
+ if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
+ unsigned vcnt = r1_bio->behind_page_count;
+ struct bio_vec *vec = r1_bio->behind_bvecs;
+
+ while (!vec->bv_page) {
+ vec++;
+ vcnt--;
+ }
+
+ wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
+ memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
+
+ wbio->bi_vcnt = vcnt;
+ } else {
+ wbio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev);
+ }
+
wbio->bi_rw = WRITE;
- wbio->bi_vcnt = vcnt;
+ wbio->bi_sector = r1_bio->sector;
wbio->bi_size = r1_bio->sectors << 9;
- wbio->bi_idx = idx;
md_trim_bio(wbio, sector - r1_bio->sector, sectors);
wbio->bi_sector += rdev->data_offset;
@@ -2225,7 +2205,7 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
* frozen
*/
if (mddev->ro == 0) {
- freeze_array(conf);
+ freeze_array(conf, 1);
fix_read_error(conf, r1_bio->read_disk,
r1_bio->sector, r1_bio->sectors);
unfreeze_array(conf);
@@ -2284,8 +2264,7 @@ read_more:
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
r1_bio->master_bio = mbio;
- r1_bio->sectors = (mbio->bi_size >> 9)
- - sectors_handled;
+ r1_bio->sectors = bio_sectors(mbio) - sectors_handled;
r1_bio->state = 0;
set_bit(R1BIO_ReadError, &r1_bio->state);
r1_bio->mddev = mddev;
@@ -2459,18 +2438,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
for (i = 0; i < conf->raid_disks * 2; i++) {
struct md_rdev *rdev;
bio = r1_bio->bios[i];
-
- /* take from bio_init */
- bio->bi_next = NULL;
- bio->bi_flags &= ~(BIO_POOL_MASK-1);
- bio->bi_flags |= 1 << BIO_UPTODATE;
- bio->bi_rw = READ;
- bio->bi_vcnt = 0;
- bio->bi_idx = 0;
- bio->bi_phys_segments = 0;
- bio->bi_size = 0;
- bio->bi_end_io = NULL;
- bio->bi_private = NULL;
+ bio_reset(bio);
rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev == NULL ||
@@ -2822,8 +2790,8 @@ static int run(struct mddev *mddev)
return PTR_ERR(conf);
if (mddev->queue)
- blk_queue_max_write_same_sectors(mddev->queue,
- mddev->chunk_sectors);
+ blk_queue_max_write_same_sectors(mddev->queue, 0);
+
rdev_for_each(rdev, mddev) {
if (!mddev->gendisk)
continue;
@@ -2901,6 +2869,7 @@ static int stop(struct mddev *mddev)
if (conf->r1bio_pool)
mempool_destroy(conf->r1bio_pool);
kfree(conf->mirrors);
+ safe_put_page(conf->tmppage);
kfree(conf->poolinfo);
kfree(conf);
mddev->private = NULL;
@@ -3004,7 +2973,7 @@ static int raid1_reshape(struct mddev *mddev)
return -ENOMEM;
}
- raise_barrier(conf);
+ freeze_array(conf, 0);
/* ok, everything is stopped */
oldpool = conf->r1bio_pool;
@@ -3035,7 +3004,7 @@ static int raid1_reshape(struct mddev *mddev)
conf->raid_disks = mddev->raid_disks = raid_disks;
mddev->delta_disks = 0;
- lower_barrier(conf);
+ unfreeze_array(conf);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);