diff options
author | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2012-09-12 11:14:33 -0400 |
---|---|---|
committer | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2012-09-12 11:14:33 -0400 |
commit | 25a765b7f05cb8460fa01b54568894b20e184862 (patch) | |
tree | 0b56db57b4d9f912393ab303c269e0fe6cdf8635 /drivers/md/raid5.c | |
parent | 9d2be9287107695708e6aae5105a8a518a6cb4d0 (diff) | |
parent | 64282278989d5b0398dcb3ba7904cb00c621dc35 (diff) |
Merge branch 'x86/platform' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into stable/for-linus-3.7
* 'x86/platform' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (9690 commits)
x86: Document x86_init.paging.pagetable_init()
x86: xen: Cleanup and remove x86_init.paging.pagetable_setup_done()
x86: Move paging_init() call to x86_init.paging.pagetable_init()
x86: Rename pagetable_setup_start() to pagetable_init()
x86: Remove base argument from x86_init.paging.pagetable_setup_start
Linux 3.6-rc5
HID: tpkbd: work even if the new Lenovo Keyboard driver is not configured
Remove user-triggerable BUG from mpol_to_str
xen/pciback: Fix proper FLR steps.
uml: fix compile error in deliver_alarm()
dj: memory scribble in logi_dj
Fix order of arguments to compat_put_time[spec|val]
xen: Use correct masking in xen_swiotlb_alloc_coherent.
xen: fix logical error in tlb flushing
xen/p2m: Fix one-off error in checking the P2M tree directory.
powerpc: Don't use __put_user() in patch_instruction
powerpc: Make sure IPI handlers see data written by IPI senders
powerpc: Restore correct DSCR in context switch
powerpc: Fix DSCR inheritance in copy_thread()
powerpc: Keep thread.dscr and thread.dscr_inherit in sync
...
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 317 |
1 files changed, 205 insertions, 112 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 04348d76bb3..adda94df5eb 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -99,34 +99,40 @@ static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector) * We maintain a biased count of active stripes in the bottom 16 bits of * bi_phys_segments, and a count of processed stripes in the upper 16 bits */ -static inline int raid5_bi_phys_segments(struct bio *bio) +static inline int raid5_bi_processed_stripes(struct bio *bio) { - return bio->bi_phys_segments & 0xffff; + atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; + return (atomic_read(segments) >> 16) & 0xffff; } -static inline int raid5_bi_hw_segments(struct bio *bio) +static inline int raid5_dec_bi_active_stripes(struct bio *bio) { - return (bio->bi_phys_segments >> 16) & 0xffff; + atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; + return atomic_sub_return(1, segments) & 0xffff; } -static inline int raid5_dec_bi_phys_segments(struct bio *bio) +static inline void raid5_inc_bi_active_stripes(struct bio *bio) { - --bio->bi_phys_segments; - return raid5_bi_phys_segments(bio); + atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; + atomic_inc(segments); } -static inline int raid5_dec_bi_hw_segments(struct bio *bio) +static inline void raid5_set_bi_processed_stripes(struct bio *bio, + unsigned int cnt) { - unsigned short val = raid5_bi_hw_segments(bio); + atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; + int old, new; - --val; - bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio); - return val; + do { + old = atomic_read(segments); + new = (old & 0xffff) | (cnt << 16); + } while (atomic_cmpxchg(segments, old, new) != old); } -static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt) +static inline void raid5_set_bi_stripes(struct bio *bio, unsigned int cnt) { - bio->bi_phys_segments = raid5_bi_phys_segments(bio) | (cnt << 16); + atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; + atomic_set(segments, cnt); } /* Find first data disk in a raid6 stripe */ @@ -190,49 +196,56 @@ static int stripe_operations_active(struct stripe_head *sh) test_bit(STRIPE_COMPUTE_RUN, &sh->state); } -static void __release_stripe(struct r5conf *conf, struct stripe_head *sh) +static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh) { - if (atomic_dec_and_test(&sh->count)) { - BUG_ON(!list_empty(&sh->lru)); - BUG_ON(atomic_read(&conf->active_stripes)==0); - if (test_bit(STRIPE_HANDLE, &sh->state)) { - if (test_bit(STRIPE_DELAYED, &sh->state) && - !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) - list_add_tail(&sh->lru, &conf->delayed_list); - else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && - sh->bm_seq - conf->seq_write > 0) - list_add_tail(&sh->lru, &conf->bitmap_list); - else { - clear_bit(STRIPE_DELAYED, &sh->state); - clear_bit(STRIPE_BIT_DELAY, &sh->state); - list_add_tail(&sh->lru, &conf->handle_list); - } - md_wakeup_thread(conf->mddev->thread); - } else { - BUG_ON(stripe_operations_active(sh)); - if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) - if (atomic_dec_return(&conf->preread_active_stripes) - < IO_THRESHOLD) - md_wakeup_thread(conf->mddev->thread); - atomic_dec(&conf->active_stripes); - if (!test_bit(STRIPE_EXPANDING, &sh->state)) { - list_add_tail(&sh->lru, &conf->inactive_list); - wake_up(&conf->wait_for_stripe); - if (conf->retry_read_aligned) - md_wakeup_thread(conf->mddev->thread); - } + BUG_ON(!list_empty(&sh->lru)); + BUG_ON(atomic_read(&conf->active_stripes)==0); + if (test_bit(STRIPE_HANDLE, &sh->state)) { + if (test_bit(STRIPE_DELAYED, &sh->state) && + !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) + list_add_tail(&sh->lru, &conf->delayed_list); + else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && + sh->bm_seq - conf->seq_write > 0) + list_add_tail(&sh->lru, &conf->bitmap_list); + else { + clear_bit(STRIPE_DELAYED, &sh->state); + clear_bit(STRIPE_BIT_DELAY, &sh->state); + list_add_tail(&sh->lru, &conf->handle_list); + } + md_wakeup_thread(conf->mddev->thread); + } else { + BUG_ON(stripe_operations_active(sh)); + if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) + if (atomic_dec_return(&conf->preread_active_stripes) + < IO_THRESHOLD) + md_wakeup_thread(conf->mddev->thread); + atomic_dec(&conf->active_stripes); + if (!test_bit(STRIPE_EXPANDING, &sh->state)) { + list_add_tail(&sh->lru, &conf->inactive_list); + wake_up(&conf->wait_for_stripe); + if (conf->retry_read_aligned) + md_wakeup_thread(conf->mddev->thread); } } } +static void __release_stripe(struct r5conf *conf, struct stripe_head *sh) +{ + if (atomic_dec_and_test(&sh->count)) + do_release_stripe(conf, sh); +} + static void release_stripe(struct stripe_head *sh) { struct r5conf *conf = sh->raid_conf; unsigned long flags; - spin_lock_irqsave(&conf->device_lock, flags); - __release_stripe(conf, sh); - spin_unlock_irqrestore(&conf->device_lock, flags); + local_irq_save(flags); + if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) { + do_release_stripe(conf, sh); + spin_unlock(&conf->device_lock); + } + local_irq_restore(flags); } static inline void remove_hash(struct stripe_head *sh) @@ -471,7 +484,8 @@ get_active_stripe(struct r5conf *conf, sector_t sector, } else { if (atomic_read(&sh->count)) { BUG_ON(!list_empty(&sh->lru) - && !test_bit(STRIPE_EXPANDING, &sh->state)); + && !test_bit(STRIPE_EXPANDING, &sh->state) + && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)); } else { if (!test_bit(STRIPE_HANDLE, &sh->state)) atomic_inc(&conf->active_stripes); @@ -640,6 +654,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) else bi->bi_sector = (sh->sector + rdev->data_offset); + if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) + bi->bi_rw |= REQ_FLUSH; + bi->bi_flags = 1 << BIO_UPTODATE; bi->bi_idx = 0; bi->bi_io_vec[0].bv_len = STRIPE_SIZE; @@ -749,14 +766,12 @@ static void ops_complete_biofill(void *stripe_head_ref) { struct stripe_head *sh = stripe_head_ref; struct bio *return_bi = NULL; - struct r5conf *conf = sh->raid_conf; int i; pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); /* clear completed biofills */ - spin_lock_irq(&conf->device_lock); for (i = sh->disks; i--; ) { struct r5dev *dev = &sh->dev[i]; @@ -774,7 +789,7 @@ static void ops_complete_biofill(void *stripe_head_ref) while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { rbi2 = r5_next_bio(rbi, dev->sector); - if (!raid5_dec_bi_phys_segments(rbi)) { + if (!raid5_dec_bi_active_stripes(rbi)) { rbi->bi_next = return_bi; return_bi = rbi; } @@ -782,7 +797,6 @@ static void ops_complete_biofill(void *stripe_head_ref) } } } - spin_unlock_irq(&conf->device_lock); clear_bit(STRIPE_BIOFILL_RUN, &sh->state); return_io(return_bi); @@ -794,7 +808,6 @@ static void ops_complete_biofill(void *stripe_head_ref) static void ops_run_biofill(struct stripe_head *sh) { struct dma_async_tx_descriptor *tx = NULL; - struct r5conf *conf = sh->raid_conf; struct async_submit_ctl submit; int i; @@ -805,10 +818,10 @@ static void ops_run_biofill(struct stripe_head *sh) struct r5dev *dev = &sh->dev[i]; if (test_bit(R5_Wantfill, &dev->flags)) { struct bio *rbi; - spin_lock_irq(&conf->device_lock); + spin_lock_irq(&sh->stripe_lock); dev->read = rbi = dev->toread; dev->toread = NULL; - spin_unlock_irq(&conf->device_lock); + spin_unlock_irq(&sh->stripe_lock); while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { tx = async_copy_data(0, rbi, dev->page, @@ -1144,12 +1157,12 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) { struct bio *wbi; - spin_lock_irq(&sh->raid_conf->device_lock); + spin_lock_irq(&sh->stripe_lock); chosen = dev->towrite; dev->towrite = NULL; BUG_ON(dev->written); wbi = dev->written = chosen; - spin_unlock_irq(&sh->raid_conf->device_lock); + spin_unlock_irq(&sh->stripe_lock); while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { @@ -1454,6 +1467,8 @@ static int grow_one_stripe(struct r5conf *conf) init_waitqueue_head(&sh->ops.wait_for_ops); #endif + spin_lock_init(&sh->stripe_lock); + if (grow_buffers(sh)) { shrink_buffers(sh); kmem_cache_free(conf->slab_cache, sh); @@ -1739,7 +1754,9 @@ static void raid5_end_read_request(struct bio * bi, int error) atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); clear_bit(R5_ReadError, &sh->dev[i].flags); clear_bit(R5_ReWrite, &sh->dev[i].flags); - } + } else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) + clear_bit(R5_ReadNoMerge, &sh->dev[i].flags); + if (atomic_read(&rdev->read_errors)) atomic_set(&rdev->read_errors, 0); } else { @@ -1784,7 +1801,11 @@ static void raid5_end_read_request(struct bio * bi, int error) else retry = 1; if (retry) - set_bit(R5_ReadError, &sh->dev[i].flags); + if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) { + set_bit(R5_ReadError, &sh->dev[i].flags); + clear_bit(R5_ReadNoMerge, &sh->dev[i].flags); + } else + set_bit(R5_ReadNoMerge, &sh->dev[i].flags); else { clear_bit(R5_ReadError, &sh->dev[i].flags); clear_bit(R5_ReWrite, &sh->dev[i].flags); @@ -2340,11 +2361,18 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in (unsigned long long)bi->bi_sector, (unsigned long long)sh->sector); - - spin_lock_irq(&conf->device_lock); + /* + * If several bio share a stripe. The bio bi_phys_segments acts as a + * reference count to avoid race. The reference count should already be + * increased before this function is called (for example, in + * make_request()), so other bio sharing this stripe will not free the + * stripe. If a stripe is owned by one stripe, the stripe lock will + * protect it. + */ + spin_lock_irq(&sh->stripe_lock); if (forwrite) { bip = &sh->dev[dd_idx].towrite; - if (*bip == NULL && sh->dev[dd_idx].written == NULL) + if (*bip == NULL) firstwrite = 1; } else bip = &sh->dev[dd_idx].toread; @@ -2360,7 +2388,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in if (*bip) bi->bi_next = *bip; *bip = bi; - bi->bi_phys_segments++; + raid5_inc_bi_active_stripes(bi); if (forwrite) { /* check if page is covered */ @@ -2375,7 +2403,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); } - spin_unlock_irq(&conf->device_lock); + spin_unlock_irq(&sh->stripe_lock); pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", (unsigned long long)(*bip)->bi_sector, @@ -2391,7 +2419,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in overlap: set_bit(R5_Overlap, &sh->dev[dd_idx].flags); - spin_unlock_irq(&conf->device_lock); + spin_unlock_irq(&sh->stripe_lock); return 0; } @@ -2441,10 +2469,11 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, rdev_dec_pending(rdev, conf->mddev); } } - spin_lock_irq(&conf->device_lock); + spin_lock_irq(&sh->stripe_lock); /* fail all writes first */ bi = sh->dev[i].towrite; sh->dev[i].towrite = NULL; + spin_unlock_irq(&sh->stripe_lock); if (bi) { s->to_write--; bitmap_end = 1; @@ -2457,13 +2486,17 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, sh->dev[i].sector + STRIPE_SECTORS) { struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); clear_bit(BIO_UPTODATE, &bi->bi_flags); - if (!raid5_dec_bi_phys_segments(bi)) { + if (!raid5_dec_bi_active_stripes(bi)) { md_write_end(conf->mddev); bi->bi_next = *return_bi; *return_bi = bi; } bi = nextbi; } + if (bitmap_end) + bitmap_endwrite(conf->mddev->bitmap, sh->sector, + STRIPE_SECTORS, 0, 0); + bitmap_end = 0; /* and fail all 'written' */ bi = sh->dev[i].written; sh->dev[i].written = NULL; @@ -2472,7 +2505,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, sh->dev[i].sector + STRIPE_SECTORS) { struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); clear_bit(BIO_UPTODATE, &bi->bi_flags); - if (!raid5_dec_bi_phys_segments(bi)) { + if (!raid5_dec_bi_active_stripes(bi)) { md_write_end(conf->mddev); bi->bi_next = *return_bi; *return_bi = bi; @@ -2496,14 +2529,13 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); clear_bit(BIO_UPTODATE, &bi->bi_flags); - if (!raid5_dec_bi_phys_segments(bi)) { + if (!raid5_dec_bi_active_stripes(bi)) { bi->bi_next = *return_bi; *return_bi = bi; } bi = nextbi; } } - spin_unlock_irq(&conf->device_lock); if (bitmap_end) bitmap_endwrite(conf->mddev->bitmap, sh->sector, STRIPE_SECTORS, 0, 0); @@ -2707,30 +2739,23 @@ static void handle_stripe_clean_event(struct r5conf *conf, test_bit(R5_UPTODATE, &dev->flags)) { /* We can return any write requests */ struct bio *wbi, *wbi2; - int bitmap_end = 0; pr_debug("Return write for disc %d\n", i); - spin_lock_irq(&conf->device_lock); wbi = dev->written; dev->written = NULL; while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { wbi2 = r5_next_bio(wbi, dev->sector); - if (!raid5_dec_bi_phys_segments(wbi)) { + if (!raid5_dec_bi_active_stripes(wbi)) { md_write_end(conf->mddev); wbi->bi_next = *return_bi; *return_bi = wbi; } wbi = wbi2; } - if (dev->towrite == NULL) - bitmap_end = 1; - spin_unlock_irq(&conf->device_lock); - if (bitmap_end) - bitmap_endwrite(conf->mddev->bitmap, - sh->sector, - STRIPE_SECTORS, + bitmap_endwrite(conf->mddev->bitmap, sh->sector, + STRIPE_SECTORS, !test_bit(STRIPE_DEGRADED, &sh->state), - 0); + 0); } } @@ -3182,7 +3207,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) /* Now to look around and see what can be done */ rcu_read_lock(); - spin_lock_irq(&conf->device_lock); for (i=disks; i--; ) { struct md_rdev *rdev; sector_t first_bad; @@ -3328,7 +3352,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) do_recovery = 1; } } - spin_unlock_irq(&conf->device_lock); if (test_bit(STRIPE_SYNCING, &sh->state)) { /* If there is a failed device being replaced, * we must be recovering. @@ -3791,7 +3814,7 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf) * this sets the active strip count to 1 and the processed * strip count to zero (upper 8 bits) */ - bi->bi_phys_segments = 1; /* biased count of active stripes */ + raid5_set_bi_stripes(bi, 1); /* biased count of active stripes */ } return bi; @@ -3988,6 +4011,62 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf) return sh; } +struct raid5_plug_cb { + struct blk_plug_cb cb; + struct list_head list; +}; + +static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule) +{ + struct raid5_plug_cb *cb = container_of( + blk_cb, struct raid5_plug_cb, cb); + struct stripe_head *sh; + struct mddev *mddev = cb->cb.data; + struct r5conf *conf = mddev->private; + + if (cb->list.next && !list_empty(&cb->list)) { + spin_lock_irq(&conf->device_lock); + while (!list_empty(&cb->list)) { + sh = list_first_entry(&cb->list, struct stripe_head, lru); + list_del_init(&sh->lru); + /* + * avoid race release_stripe_plug() sees + * STRIPE_ON_UNPLUG_LIST clear but the stripe + * is still in our list + */ + smp_mb__before_clear_bit(); + clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state); + __release_stripe(conf, sh); + } + spin_unlock_irq(&conf->device_lock); + } + kfree(cb); +} + +static void release_stripe_plug(struct mddev *mddev, + struct stripe_head *sh) +{ + struct blk_plug_cb *blk_cb = blk_check_plugged( + raid5_unplug, mddev, + sizeof(struct raid5_plug_cb)); + struct raid5_plug_cb *cb; + + if (!blk_cb) { + release_stripe(sh); + return; + } + + cb = container_of(blk_cb, struct raid5_plug_cb, cb); + + if (cb->list.next == NULL) + INIT_LIST_HEAD(&cb->list); + + if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)) + list_add_tail(&sh->lru, &cb->list); + else + release_stripe(sh); +} + static void make_request(struct mddev *mddev, struct bio * bi) { struct r5conf *conf = mddev->private; @@ -4113,11 +4192,10 @@ static void make_request(struct mddev *mddev, struct bio * bi) finish_wait(&conf->wait_for_overlap, &w); set_bit(STRIPE_HANDLE, &sh->state); clear_bit(STRIPE_DELAYED, &sh->state); - if ((bi->bi_rw & REQ_SYNC) && + if ((bi->bi_rw & REQ_NOIDLE) && !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) atomic_inc(&conf->preread_active_stripes); - mddev_check_plugged(mddev); - release_stripe(sh); + release_stripe_plug(mddev, sh); } else { /* cannot get stripe for read-ahead, just give-up */ clear_bit(BIO_UPTODATE, &bi->bi_flags); @@ -4126,9 +4204,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) } } - spin_lock_irq(&conf->device_lock); - remaining = raid5_dec_bi_phys_segments(bi); - spin_unlock_irq(&conf->device_lock); + remaining = raid5_dec_bi_active_stripes(bi); if (remaining == 0) { if ( rw == WRITE ) @@ -4484,7 +4560,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) sector += STRIPE_SECTORS, scnt++) { - if (scnt < raid5_bi_hw_segments(raid_bio)) + if (scnt < raid5_bi_processed_stripes(raid_bio)) /* already done this stripe */ continue; @@ -4492,25 +4568,24 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) if (!sh) { /* failed to get a stripe - must wait */ - raid5_set_bi_hw_segments(raid_bio, scnt); + raid5_set_bi_processed_stripes(raid_bio, scnt); conf->retry_read_aligned = raid_bio; return handled; } if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) { release_stripe(sh); - raid5_set_bi_hw_segments(raid_bio, scnt); + raid5_set_bi_processed_stripes(raid_bio, scnt); conf->retry_read_aligned = raid_bio; return handled; } + set_bit(R5_ReadNoMerge, &sh->dev[dd_idx].flags); handle_stripe(sh); release_stripe(sh); handled++; } - spin_lock_irq(&conf->device_lock); - remaining = raid5_dec_bi_phys_segments(raid_bio); - spin_unlock_irq(&conf->device_lock); + remaining = raid5_dec_bi_active_stripes(raid_bio); if (remaining == 0) bio_endio(raid_bio, 0); if (atomic_dec_and_test(&conf->active_aligned_reads)) @@ -4518,6 +4593,30 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) return handled; } +#define MAX_STRIPE_BATCH 8 +static int handle_active_stripes(struct r5conf *conf) +{ + struct stripe_head *batch[MAX_STRIPE_BATCH], *sh; + int i, batch_size = 0; + + while (batch_size < MAX_STRIPE_BATCH && + (sh = __get_priority_stripe(conf)) != NULL) + batch[batch_size++] = sh; + + if (batch_size == 0) + return batch_size; + spin_unlock_irq(&conf->device_lock); + + for (i = 0; i < batch_size; i++) + handle_stripe(batch[i]); + + cond_resched(); + + spin_lock_irq(&conf->device_lock); + for (i = 0; i < batch_size; i++) + __release_stripe(conf, batch[i]); + return batch_size; +} /* * This is our raid5 kernel thread. @@ -4528,7 +4627,6 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) */ static void raid5d(struct mddev *mddev) { - struct stripe_head *sh; struct r5conf *conf = mddev->private; int handled; struct blk_plug plug; @@ -4542,8 +4640,9 @@ static void raid5d(struct mddev *mddev) spin_lock_irq(&conf->device_lock); while (1) { struct bio *bio; + int batch_size; - if (atomic_read(&mddev->plug_cnt) == 0 && + if ( !list_empty(&conf->bitmap_list)) { /* Now is a good time to flush some bitmap updates */ conf->seq_flush++; @@ -4553,8 +4652,7 @@ static void raid5d(struct mddev *mddev) conf->seq_write = conf->seq_flush; activate_bit_delay(conf); } - if (atomic_read(&mddev->plug_cnt) == 0) - raid5_activate_delayed(conf); + raid5_activate_delayed(conf); while ((bio = remove_bio_from_retry(conf))) { int ok; @@ -4566,21 +4664,16 @@ static void raid5d(struct mddev *mddev) handled++; } - sh = __get_priority_stripe(conf); - - if (!sh) + batch_size = handle_active_stripes(conf); + if (!batch_size) break; - spin_unlock_irq(&conf->device_lock); - - handled++; - handle_stripe(sh); - release_stripe(sh); - cond_resched(); + handled += batch_size; - if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) + if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) { + spin_unlock_irq(&conf->device_lock); md_check_recovery(mddev); - - spin_lock_irq(&conf->device_lock); + spin_lock_irq(&conf->device_lock); + } } pr_debug("%d stripes handled\n", handled); |