From c4081ba5c9f6f7bdffe49e501a8604a2c0797ef9 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Mon, 22 Feb 2010 13:49:24 +0100 Subject: cfq: reorder cfq_queue removing padding on 64bit This removes 8 bytes of padding from struct cfq_queue on 64 bit builds, shrinking it's size to 256 bytes, so fitting into 1 fewer cachelines and allowing 1 more object/slab in it's kmem_cache. Signed-off-by: Richard Kennedy Reviewed-by: Jeff Moyer ---- patch against 2.6.33-rc8 tested on x86_64 AMDX2 Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 023f4e69a33..e3dedfd3bcb 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -115,11 +115,11 @@ struct cfq_queue { /* time when queue got scheduled in to dispatch first request. */ unsigned long dispatch_start; unsigned int allocated_slice; + unsigned int slice_dispatch; /* time when first request from queue completed and slice started. */ unsigned long slice_start; unsigned long slice_end; long slice_resid; - unsigned int slice_dispatch; /* pending metadata requests */ int meta_pending; @@ -130,13 +130,13 @@ struct cfq_queue { unsigned short ioprio, org_ioprio; unsigned short ioprio_class, org_ioprio_class; + pid_t pid; + unsigned int seek_samples; u64 seek_total; sector_t seek_mean; sector_t last_request_pos; - pid_t pid; - struct cfq_rb_root *service_tree; struct cfq_queue *new_cfqq; struct cfq_group *cfqg; -- cgit v1.2.3-70-g09d2 From 024f9066165ffe9c8284431c78adcbcddd309831 Mon Sep 17 00:00:00 2001 From: Gui Jianfeng Date: Fri, 26 Feb 2010 08:56:15 +0100 Subject: cfq: Remove useless css reference get There's no need to take css reference here, for the caller has already called rcu_read_lock() to prevent cgroup from being removed. Signed-off-by: Gui Jianfeng Reviewed-by: Li Zefan Acked-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 14 -------------- block/blk-cgroup.h | 3 --- block/cfq-iosched.c | 5 ----- 3 files changed, 22 deletions(-) (limited to 'block/cfq-iosched.c') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index e7dbbaf5fb3..c85d74cae20 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -23,20 +23,6 @@ static LIST_HEAD(blkio_list); struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; EXPORT_SYMBOL_GPL(blkio_root_cgroup); -bool blkiocg_css_tryget(struct blkio_cgroup *blkcg) -{ - if (!css_tryget(&blkcg->css)) - return false; - return true; -} -EXPORT_SYMBOL_GPL(blkiocg_css_tryget); - -void blkiocg_css_put(struct blkio_cgroup *blkcg) -{ - css_put(&blkcg->css); -} -EXPORT_SYMBOL_GPL(blkiocg_css_put); - struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 4d316df863b..84bf745fa77 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -43,9 +43,6 @@ struct blkio_group { unsigned long sectors; }; -extern bool blkiocg_css_tryget(struct blkio_cgroup *blkcg); -extern void blkiocg_css_put(struct blkio_cgroup *blkcg); - typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg); typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg, unsigned int weight); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index e3dedfd3bcb..10eb286f1f4 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -951,10 +951,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; unsigned int major, minor; - /* Do we need to take this reference */ - if (!blkiocg_css_tryget(blkcg)) - return NULL;; - cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key)); if (cfqg || !create) goto done; @@ -985,7 +981,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); done: - blkiocg_css_put(blkcg); return cfqg; } -- cgit v1.2.3-70-g09d2 From 3dde36ddea3e07dd025c4c1ba47edec91606fec0 Mon Sep 17 00:00:00 2001 From: Corrado Zoccolo Date: Sat, 27 Feb 2010 19:45:39 +0100 Subject: cfq-iosched: rework seeky detection Current seeky detection is based on average seek lenght. This is suboptimal, since the average will not distinguish between: * a process doing medium sized seeks * a process doing some sequential requests interleaved with larger seeks and even a medium seek can take lot of time, if the requested sector happens to be behind the disk head in the rotation (50% probability). Therefore, we change the seeky queue detection to work as follows: * each request can be classified as sequential if it is very close to the current head position, i.e. it is likely in the disk cache (disks usually read more data than requested, and put it in cache for subsequent reads). Otherwise, the request is classified as seeky. * an history window of the last 32 requests is kept, storing the classification result. * A queue is marked as seeky if more than 1/8 of the last 32 requests were seeky. This patch fixes a regression reported by Yanmin, on mmap 64k random reads. Reported-by: Yanmin Zhang Signed-off-by: Corrado Zoccolo Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 54 ++++++++++++++--------------------------------------- 1 file changed, 14 insertions(+), 40 deletions(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 10eb286f1f4..3fd8afc2174 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -46,8 +46,8 @@ static const int cfq_hist_divisor = 4; #define CFQ_HW_QUEUE_MIN (5) #define CFQ_SERVICE_SHIFT 12 -#define CFQQ_SEEK_THR 8 * 1024 -#define CFQQ_SEEKY(cfqq) ((cfqq)->seek_mean > CFQQ_SEEK_THR) +#define CFQQ_SEEK_THR (sector_t)(8 * 100) +#define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8) #define RQ_CIC(rq) \ ((struct cfq_io_context *) (rq)->elevator_private) @@ -132,9 +132,7 @@ struct cfq_queue { pid_t pid; - unsigned int seek_samples; - u64 seek_total; - sector_t seek_mean; + u32 seek_history; sector_t last_request_pos; struct cfq_rb_root *service_tree; @@ -1668,16 +1666,7 @@ static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd, static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct request *rq, bool for_preempt) { - sector_t sdist = cfqq->seek_mean; - - if (!sample_valid(cfqq->seek_samples)) - sdist = CFQQ_SEEK_THR; - - /* if seek_mean is big, using it as close criteria is meaningless */ - if (sdist > CFQQ_SEEK_THR && !for_preempt) - sdist = CFQQ_SEEK_THR; - - return cfq_dist_from_last(cfqd, rq) <= sdist; + return cfq_dist_from_last(cfqd, rq) <= CFQQ_SEEK_THR; } static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, @@ -2975,30 +2964,16 @@ static void cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct request *rq) { - sector_t sdist; - u64 total; - - if (!cfqq->last_request_pos) - sdist = 0; - else if (cfqq->last_request_pos < blk_rq_pos(rq)) - sdist = blk_rq_pos(rq) - cfqq->last_request_pos; - else - sdist = cfqq->last_request_pos - blk_rq_pos(rq); - - /* - * Don't allow the seek distance to get too large from the - * odd fragment, pagein, etc - */ - if (cfqq->seek_samples <= 60) /* second&third seek */ - sdist = min(sdist, (cfqq->seek_mean * 4) + 2*1024*1024); - else - sdist = min(sdist, (cfqq->seek_mean * 4) + 2*1024*64); + sector_t sdist = 0; + if (cfqq->last_request_pos) { + if (cfqq->last_request_pos < blk_rq_pos(rq)) + sdist = blk_rq_pos(rq) - cfqq->last_request_pos; + else + sdist = cfqq->last_request_pos - blk_rq_pos(rq); + } - cfqq->seek_samples = (7*cfqq->seek_samples + 256) / 8; - cfqq->seek_total = (7*cfqq->seek_total + (u64)256*sdist) / 8; - total = cfqq->seek_total + (cfqq->seek_samples/2); - do_div(total, cfqq->seek_samples); - cfqq->seek_mean = (sector_t)total; + cfqq->seek_history <<= 1; + cfqq->seek_history |= (sdist > CFQQ_SEEK_THR); } /* @@ -3023,8 +2998,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfq_mark_cfqq_deep(cfqq); if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || - (!cfq_cfqq_deep(cfqq) && sample_valid(cfqq->seek_samples) - && CFQQ_SEEKY(cfqq))) + (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) enable_idle = 0; else if (sample_valid(cic->ttime_samples)) { if (cic->ttime_mean > cfqd->cfq_slice_idle) -- cgit v1.2.3-70-g09d2 From 41647e7a91338dba21773a16af7474ef95e0929e Mon Sep 17 00:00:00 2001 From: Corrado Zoccolo Date: Sat, 27 Feb 2010 19:45:40 +0100 Subject: cfq-iosched: rethink seeky detection for SSDs CFQ currently applies the same logic of detecting seeky queues and grouping them together for rotational disks as well as SSDs. For SSDs, the time to complete a request doesn't depend on the request location, but only on the size. This patch therefore changes the criterion to group queues by request size in case of SSDs, in order to achieve better fairness. Signed-off-by: Corrado Zoccolo Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 3fd8afc2174..423aee3fd19 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -47,6 +47,7 @@ static const int cfq_hist_divisor = 4; #define CFQ_SERVICE_SHIFT 12 #define CFQQ_SEEK_THR (sector_t)(8 * 100) +#define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32) #define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8) #define RQ_CIC(rq) \ @@ -2965,6 +2966,7 @@ cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct request *rq) { sector_t sdist = 0; + sector_t n_sec = blk_rq_sectors(rq); if (cfqq->last_request_pos) { if (cfqq->last_request_pos < blk_rq_pos(rq)) sdist = blk_rq_pos(rq) - cfqq->last_request_pos; @@ -2973,7 +2975,10 @@ cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq, } cfqq->seek_history <<= 1; - cfqq->seek_history |= (sdist > CFQQ_SEEK_THR); + if (blk_queue_nonrot(cfqd->queue)) + cfqq->seek_history |= (n_sec < CFQQ_SECT_THR_NONROT); + else + cfqq->seek_history |= (sdist > CFQQ_SEEK_THR); } /* -- cgit v1.2.3-70-g09d2 From 53c583d2269851de9df1c2e992cb2f7f124a5f55 Mon Sep 17 00:00:00 2001 From: Corrado Zoccolo Date: Sun, 28 Feb 2010 19:45:05 +0100 Subject: cfq-iosched: requests "in flight" vs "in driver" clarification Counters for requests "in flight" and "in driver" are used asymmetrically in cfq_may_dispatch, and have slightly different meaning. We split the rq_in_flight counter (was sync_flight) to count both sync and async requests, in order to use this one, which is more accurate in some corner cases. The rq_in_driver counter is coalesced, since individual sync/async counts are not used any more. Signed-off-by: Corrado Zoccolo Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 44 ++++++++++++++++++-------------------------- 1 file changed, 18 insertions(+), 26 deletions(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 423aee3fd19..f27e535ce26 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -222,8 +222,8 @@ struct cfq_data { unsigned int busy_queues; - int rq_in_driver[2]; - int sync_flight; + int rq_in_driver; + int rq_in_flight[2]; /* * queue-depth detection @@ -416,11 +416,6 @@ static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool, static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *, struct io_context *); -static inline int rq_in_driver(struct cfq_data *cfqd) -{ - return cfqd->rq_in_driver[0] + cfqd->rq_in_driver[1]; -} - static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic, bool is_sync) { @@ -1414,9 +1409,9 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq) { struct cfq_data *cfqd = q->elevator->elevator_data; - cfqd->rq_in_driver[rq_is_sync(rq)]++; + cfqd->rq_in_driver++; cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d", - rq_in_driver(cfqd)); + cfqd->rq_in_driver); cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); } @@ -1424,12 +1419,11 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq) static void cfq_deactivate_request(struct request_queue *q, struct request *rq) { struct cfq_data *cfqd = q->elevator->elevator_data; - const int sync = rq_is_sync(rq); - WARN_ON(!cfqd->rq_in_driver[sync]); - cfqd->rq_in_driver[sync]--; + WARN_ON(!cfqd->rq_in_driver); + cfqd->rq_in_driver--; cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d", - rq_in_driver(cfqd)); + cfqd->rq_in_driver); } static void cfq_remove_request(struct request *rq) @@ -1863,8 +1857,7 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) cfqq->dispatched++; elv_dispatch_sort(q, rq); - if (cfq_cfqq_sync(cfqq)) - cfqd->sync_flight++; + cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++; cfqq->nr_sectors += blk_rq_sectors(rq); } @@ -2211,13 +2204,13 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) /* * Drain async requests before we start sync IO */ - if (cfq_should_idle(cfqd, cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC]) + if (cfq_should_idle(cfqd, cfqq) && cfqd->rq_in_flight[BLK_RW_ASYNC]) return false; /* * If this is an async queue and we have sync IO in flight, let it wait */ - if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq)) + if (cfqd->rq_in_flight[BLK_RW_SYNC] && !cfq_cfqq_sync(cfqq)) return false; max_dispatch = cfqd->cfq_quantum; @@ -3189,14 +3182,14 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd) { struct cfq_queue *cfqq = cfqd->active_queue; - if (rq_in_driver(cfqd) > cfqd->hw_tag_est_depth) - cfqd->hw_tag_est_depth = rq_in_driver(cfqd); + if (cfqd->rq_in_driver > cfqd->hw_tag_est_depth) + cfqd->hw_tag_est_depth = cfqd->rq_in_driver; if (cfqd->hw_tag == 1) return; if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN && - rq_in_driver(cfqd) <= CFQ_HW_QUEUE_MIN) + cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN) return; /* @@ -3206,7 +3199,7 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd) */ if (cfqq && cfq_cfqq_idle_window(cfqq) && cfqq->dispatched + cfqq->queued[0] + cfqq->queued[1] < - CFQ_HW_QUEUE_MIN && rq_in_driver(cfqd) < CFQ_HW_QUEUE_MIN) + CFQ_HW_QUEUE_MIN && cfqd->rq_in_driver < CFQ_HW_QUEUE_MIN) return; if (cfqd->hw_tag_samples++ < 50) @@ -3259,13 +3252,12 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) cfq_update_hw_tag(cfqd); - WARN_ON(!cfqd->rq_in_driver[sync]); + WARN_ON(!cfqd->rq_in_driver); WARN_ON(!cfqq->dispatched); - cfqd->rq_in_driver[sync]--; + cfqd->rq_in_driver--; cfqq->dispatched--; - if (cfq_cfqq_sync(cfqq)) - cfqd->sync_flight--; + cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; if (sync) { RQ_CIC(rq)->last_end_request = now; @@ -3319,7 +3311,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) } } - if (!rq_in_driver(cfqd)) + if (!cfqd->rq_in_driver) cfq_schedule_dispatch(cfqd); } -- cgit v1.2.3-70-g09d2 From abc3c744d0d7f4ad710a948ae73852ffea5fbc3b Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 1 Mar 2010 09:20:54 +0100 Subject: cfq-iosched: quantum check tweak Currently a queue can only dispatch up to 4 requests if there are other queues. This isn't optimal, device can handle more requests, for example, AHCI can handle 31 requests. I can understand the limit is for fairness, but we could do a tweak: if the queue still has a lot of slice left, sounds we could ignore the limit. Test shows this boost my workload (two thread randread of a SSD) from 78m/s to 100m/s. Thanks for suggestions from Corrado and Vivek for the patch. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index f27e535ce26..0db07d7771b 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -19,7 +19,7 @@ * tunables */ /* max queue in one round of service */ -static const int cfq_quantum = 4; +static const int cfq_quantum = 8; static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; /* maximum backwards seek, in KiB */ static const int cfq_back_max = 16 * 1024; @@ -2197,6 +2197,19 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd) return dispatched; } +static inline bool cfq_slice_used_soon(struct cfq_data *cfqd, + struct cfq_queue *cfqq) +{ + /* the queue hasn't finished any request, can't estimate */ + if (cfq_cfqq_slice_new(cfqq)) + return 1; + if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched, + cfqq->slice_end)) + return 1; + + return 0; +} + static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) { unsigned int max_dispatch; @@ -2213,7 +2226,7 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) if (cfqd->rq_in_flight[BLK_RW_SYNC] && !cfq_cfqq_sync(cfqq)) return false; - max_dispatch = cfqd->cfq_quantum; + max_dispatch = max_t(unsigned int, cfqd->cfq_quantum / 2, 1); if (cfq_class_idle(cfqq)) max_dispatch = 1; @@ -2230,13 +2243,22 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) /* * We have other queues, don't allow more IO from this one */ - if (cfqd->busy_queues > 1) + if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq)) return false; /* * Sole queue user, no limit */ - max_dispatch = -1; + if (cfqd->busy_queues == 1) + max_dispatch = -1; + else + /* + * Normally we start throttling cfqq when cfq_quantum/2 + * requests have been dispatched. But we can drive + * deeper queue depths at the beginning of slice + * subjected to upper limit of cfq_quantum. + * */ + max_dispatch = cfqd->cfq_quantum; } /* -- cgit v1.2.3-70-g09d2 From 73e9ffdd0cc8159f876d5e29ecf2d9c1bfca544f Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Mon, 1 Mar 2010 10:50:20 +0100 Subject: cfq: remove 8 bytes of padding from cfq_rb_root on 64 bit builds Reorder cfq_rb_root to remove 8 bytes of padding on 64 bit builds. Consequently removing 56 bytes from cfq_group and 64 bytes from cfq_data. Signed-off-by: Richard Kennedy Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 0db07d7771b..dee9d9378fe 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -78,11 +78,12 @@ struct cfq_rb_root { struct rb_root rb; struct rb_node *left; unsigned count; + unsigned total_weight; u64 min_vdisktime; struct rb_node *active; - unsigned total_weight; }; -#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, } +#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \ + .count = 0, .min_vdisktime = 0, } /* * Per process-grouping structure -- cgit v1.2.3-70-g09d2