From 908c7f1949cb7cc6e92ba8f18f2998e87e265b8e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 09:51:29 +0900 Subject: percpu_counter: add @gfp to percpu_counter_init() Percpu allocator now supports allocation mask. Add @gfp to percpu_counter_init() so that !GFP_KERNEL allocation masks can be used with percpu_counters too. We could have left percpu_counter_init() alone and added percpu_counter_init_gfp(); however, the number of users isn't that high and introducing _gfp variants to all percpu data structures would be quite ugly, so let's just do the conversion. This is the one with the most users. Other percpu data structures are a lot easier to convert. This patch doesn't make any functional difference. Signed-off-by: Tejun Heo Acked-by: Jan Kara Acked-by: "David S. Miller" Cc: x86@kernel.org Cc: Jens Axboe Cc: "Theodore Ts'o" Cc: Alexander Viro Cc: Andrew Morton --- mm/backing-dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/backing-dev.c') diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 1706cbbdf5f..f19a818be2d 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -455,7 +455,7 @@ int bdi_init(struct backing_dev_info *bdi) bdi_wb_init(&bdi->wb, bdi); for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { - err = percpu_counter_init(&bdi->bdi_stat[i], 0); + err = percpu_counter_init(&bdi->bdi_stat[i], 0, GFP_KERNEL); if (err) goto err; } -- cgit v1.2.3-70-g09d2 From 20ae00792c6f1f18fc4fc5965445a145df92827e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 09:51:30 +0900 Subject: proportions: add @gfp to init functions Percpu allocator now supports allocation mask. Add @gfp to [flex_]proportions init functions so that !GFP_KERNEL allocation masks can be used with them too. This patch doesn't make any functional difference. Signed-off-by: Tejun Heo Reviewed-by: Jan Kara Cc: Peter Zijlstra --- include/linux/flex_proportions.h | 5 +++-- include/linux/proportions.h | 5 +++-- lib/flex_proportions.c | 8 ++++---- lib/proportions.c | 10 +++++----- mm/backing-dev.c | 2 +- mm/page-writeback.c | 2 +- 6 files changed, 17 insertions(+), 15 deletions(-) (limited to 'mm/backing-dev.c') diff --git a/include/linux/flex_proportions.h b/include/linux/flex_proportions.h index 4ebc49fae39..0d348e011a6 100644 --- a/include/linux/flex_proportions.h +++ b/include/linux/flex_proportions.h @@ -10,6 +10,7 @@ #include #include #include +#include /* * When maximum proportion of some event type is specified, this is the @@ -32,7 +33,7 @@ struct fprop_global { seqcount_t sequence; }; -int fprop_global_init(struct fprop_global *p); +int fprop_global_init(struct fprop_global *p, gfp_t gfp); void fprop_global_destroy(struct fprop_global *p); bool fprop_new_period(struct fprop_global *p, int periods); @@ -79,7 +80,7 @@ struct fprop_local_percpu { raw_spinlock_t lock; /* Protect period and numerator */ }; -int fprop_local_init_percpu(struct fprop_local_percpu *pl); +int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp); void fprop_local_destroy_percpu(struct fprop_local_percpu *pl); void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl); void __fprop_inc_percpu_max(struct fprop_global *p, struct fprop_local_percpu *pl, diff --git a/include/linux/proportions.h b/include/linux/proportions.h index 26a8a4ed9b0..00e8e8fa735 100644 --- a/include/linux/proportions.h +++ b/include/linux/proportions.h @@ -12,6 +12,7 @@ #include #include #include +#include struct prop_global { /* @@ -40,7 +41,7 @@ struct prop_descriptor { struct mutex mutex; /* serialize the prop_global switch */ }; -int prop_descriptor_init(struct prop_descriptor *pd, int shift); +int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp); void prop_change_shift(struct prop_descriptor *pd, int new_shift); /* @@ -61,7 +62,7 @@ struct prop_local_percpu { raw_spinlock_t lock; /* protect the snapshot state */ }; -int prop_local_init_percpu(struct prop_local_percpu *pl); +int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp); void prop_local_destroy_percpu(struct prop_local_percpu *pl); void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl); void prop_fraction_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl, diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c index b9d026bfcf3..8f25652f40d 100644 --- a/lib/flex_proportions.c +++ b/lib/flex_proportions.c @@ -34,13 +34,13 @@ */ #include -int fprop_global_init(struct fprop_global *p) +int fprop_global_init(struct fprop_global *p, gfp_t gfp) { int err; p->period = 0; /* Use 1 to avoid dealing with periods with 0 events... */ - err = percpu_counter_init(&p->events, 1, GFP_KERNEL); + err = percpu_counter_init(&p->events, 1, gfp); if (err) return err; seqcount_init(&p->sequence); @@ -168,11 +168,11 @@ void fprop_fraction_single(struct fprop_global *p, */ #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids))) -int fprop_local_init_percpu(struct fprop_local_percpu *pl) +int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp) { int err; - err = percpu_counter_init(&pl->events, 0, GFP_KERNEL); + err = percpu_counter_init(&pl->events, 0, gfp); if (err) return err; pl->period = 0; diff --git a/lib/proportions.c b/lib/proportions.c index ca95f8d5438..6f724298f67 100644 --- a/lib/proportions.c +++ b/lib/proportions.c @@ -73,7 +73,7 @@ #include #include -int prop_descriptor_init(struct prop_descriptor *pd, int shift) +int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp) { int err; @@ -83,11 +83,11 @@ int prop_descriptor_init(struct prop_descriptor *pd, int shift) pd->index = 0; pd->pg[0].shift = shift; mutex_init(&pd->mutex); - err = percpu_counter_init(&pd->pg[0].events, 0, GFP_KERNEL); + err = percpu_counter_init(&pd->pg[0].events, 0, gfp); if (err) goto out; - err = percpu_counter_init(&pd->pg[1].events, 0, GFP_KERNEL); + err = percpu_counter_init(&pd->pg[1].events, 0, gfp); if (err) percpu_counter_destroy(&pd->pg[0].events); @@ -188,12 +188,12 @@ prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift) #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids))) -int prop_local_init_percpu(struct prop_local_percpu *pl) +int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp) { raw_spin_lock_init(&pl->lock); pl->shift = 0; pl->period = 0; - return percpu_counter_init(&pl->events, 0, GFP_KERNEL); + return percpu_counter_init(&pl->events, 0, gfp); } void prop_local_destroy_percpu(struct prop_local_percpu *pl) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index f19a818be2d..64ec49d1772 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -470,7 +470,7 @@ int bdi_init(struct backing_dev_info *bdi) bdi->write_bandwidth = INIT_BW; bdi->avg_write_bandwidth = INIT_BW; - err = fprop_local_init_percpu(&bdi->completions); + err = fprop_local_init_percpu(&bdi->completions, GFP_KERNEL); if (err) { err: diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 91d73ef1744..50859940372 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1777,7 +1777,7 @@ void __init page_writeback_init(void) writeback_set_ratelimit(); register_cpu_notifier(&ratelimit_nb); - fprop_global_init(&writeout_completions); + fprop_global_init(&writeout_completions, GFP_KERNEL); } /** -- cgit v1.2.3-70-g09d2 From b68757341d8015d28e261990deea58dd836e04da Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 08:03:58 +0900 Subject: bdi: remove bdi->wb_lock locking around bdi->dev clearing in bdi_unregister() The only places where NULL test on bdi->dev is used are bdi_[un]register(). The functions can't be called in parallel anyway and there's no point in protecting bdi->dev clearing with a lock. Remove bdi->wb_lock grabbing around bdi->dev clearing and move it after device_unregister() call so that bdi->dev doesn't have to be cached in a local variable. This patch shouldn't introduce any behavior difference. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- mm/backing-dev.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'mm/backing-dev.c') diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 1706cbbdf5f..4afeefe9e36 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -402,21 +402,15 @@ static void bdi_prune_sb(struct backing_dev_info *bdi) void bdi_unregister(struct backing_dev_info *bdi) { - struct device *dev = bdi->dev; - - if (dev) { + if (bdi->dev) { bdi_set_min_ratio(bdi, 0); trace_writeback_bdi_unregister(bdi); bdi_prune_sb(bdi); bdi_wb_shutdown(bdi); bdi_debug_unregister(bdi); - - spin_lock_bh(&bdi->wb_lock); + device_unregister(bdi->dev); bdi->dev = NULL; - spin_unlock_bh(&bdi->wb_lock); - - device_unregister(dev); } } EXPORT_SYMBOL(bdi_unregister); -- cgit v1.2.3-70-g09d2 From c0ea1c22bce63a27b47da90ad1ac49ce48e1a8aa Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 08:03:59 +0900 Subject: bdi: make backing_dev_info->wb.dwork canceling stricter Canceling of bdi->wb.dwork is currently a bit mushy. bdi_wb_shutdown() performs cancel_delayed_work_sync() at the end after shutting down and flushing the delayed_work and bdi_destroy() tries yet again after bdi_unregister(). bdi->wb.dwork is queued only after checking BDI_registered while holding bdi->wb_lock and bdi_wb_shutdown() clears the flag while holding the same lock and then flushes the delayed_work. There's no way the delayed_work can be queued again after that. Replace the two unnecessary cancel_delayed_work_sync() invocations with WARNs on pending. This simplifies and clarifies the code a bit and will help future changes in further isolating bdi_writeback handling. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- mm/backing-dev.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'mm/backing-dev.c') diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 4afeefe9e36..cb7c5e32381 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -376,13 +376,7 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); flush_delayed_work(&bdi->wb.dwork); WARN_ON(!list_empty(&bdi->work_list)); - - /* - * This shouldn't be necessary unless @bdi for some reason has - * unflushed dirty IO after work_list is drained. Do it anyway - * just in case. - */ - cancel_delayed_work_sync(&bdi->wb.dwork); + WARN_ON(delayed_work_pending(&bdi->wb.dwork)); } /* @@ -497,12 +491,7 @@ void bdi_destroy(struct backing_dev_info *bdi) bdi_unregister(bdi); - /* - * If bdi_unregister() had already been called earlier, the dwork - * could still be pending because bdi_prune_sb() can race with the - * bdi_wakeup_thread_delayed() calls from __mark_inode_dirty(). - */ - cancel_delayed_work_sync(&bdi->wb.dwork); + WARN_ON(delayed_work_pending(&bdi->wb.dwork)); for (i = 0; i < NR_BDI_STAT_ITEMS; i++) percpu_counter_destroy(&bdi->bdi_stat[i]); -- cgit v1.2.3-70-g09d2 From 1a1e4530eacca37e85a4d66a164273c7dba9110c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 08:04:00 +0900 Subject: bdi: explain the dirty list transferring in bdi_destroy() bdi_destroy() has code to transfer the remaining dirty inodes to the default_backing_dev_info; however, given the shutdown sequence, it isn't clear how such condition would happen. Also, it isn't a full solution as the transferred inodes stlil point to the bdi which is being destroyed. Operations on those inodes can end up accessing already released fields such as the percpu stat fields. Digging through the history, it seems that the code was added as a quick workaround for a bug report without fully root-causing the issue. We probably want to remove the code in time but for now let's add a comment noting that it is a quick workaround. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- mm/backing-dev.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'mm/backing-dev.c') diff --git a/mm/backing-dev.c b/mm/backing-dev.c index cb7c5e32381..b65fe93ad61 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -475,8 +475,17 @@ void bdi_destroy(struct backing_dev_info *bdi) int i; /* - * Splice our entries to the default_backing_dev_info, if this - * bdi disappears + * Splice our entries to the default_backing_dev_info. This + * condition shouldn't happen. @wb must be empty at this point and + * dirty inodes on it might cause other issues. This workaround is + * added by ce5f8e779519 ("writeback: splice dirty inode entries to + * default bdi on bdi_destroy()") without root-causing the issue. + * + * http://lkml.kernel.org/g/1253038617-30204-11-git-send-email-jens.axboe@oracle.com + * http://thread.gmane.org/gmane.linux.file-systems/35341/focus=35350 + * + * We should probably add WARN_ON() to find out whether it still + * happens and track it down if so. */ if (bdi_has_dirty_io(bdi)) { struct bdi_writeback *dst = &default_backing_dev_info.wb; -- cgit v1.2.3-70-g09d2 From 018a17bdc8658ad448497c84d4ba21b6985820ec Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 08:04:01 +0900 Subject: bdi: reimplement bdev_inode_switch_bdi() A block_device may be attached to different gendisks and thus different bdis over time. bdev_inode_switch_bdi() is used to switch the associated bdi. The function assumes that the inode could be dirty and transfers it between bdis if so. This is a bit nasty in that it reaches into bdi internals. This patch reimplements the function so that it writes out the inode if dirty. This is a lot simpler and can be implemented without exposing bdi internals. Signed-off-by: Tejun Heo Cc: Alexander Viro Signed-off-by: Jens Axboe --- fs/block_dev.c | 32 +++++++++++--------------------- include/linux/backing-dev.h | 1 - mm/backing-dev.c | 2 +- 3 files changed, 12 insertions(+), 23 deletions(-) (limited to 'mm/backing-dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index d3251eca642..cc8d68ac29a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -50,32 +50,22 @@ inline struct block_device *I_BDEV(struct inode *inode) EXPORT_SYMBOL(I_BDEV); /* - * Move the inode from its current bdi to a new bdi. If the inode is dirty we - * need to move it onto the dirty list of @dst so that the inode is always on - * the right list. + * Move the inode from its current bdi to a new bdi. Make sure the inode + * is clean before moving so that it doesn't linger on the old bdi. */ static void bdev_inode_switch_bdi(struct inode *inode, struct backing_dev_info *dst) { - struct backing_dev_info *old = inode->i_data.backing_dev_info; - bool wakeup_bdi = false; - - if (unlikely(dst == old)) /* deadlock avoidance */ - return; - bdi_lock_two(&old->wb, &dst->wb); - spin_lock(&inode->i_lock); - inode->i_data.backing_dev_info = dst; - if (inode->i_state & I_DIRTY) { - if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb)) - wakeup_bdi = true; - list_move(&inode->i_wb_list, &dst->wb.b_dirty); + while (true) { + spin_lock(&inode->i_lock); + if (!(inode->i_state & I_DIRTY)) { + inode->i_data.backing_dev_info = dst; + spin_unlock(&inode->i_lock); + return; + } + spin_unlock(&inode->i_lock); + WARN_ON_ONCE(write_inode_now(inode, true)); } - spin_unlock(&inode->i_lock); - spin_unlock(&old->wb.list_lock); - spin_unlock(&dst->wb.list_lock); - - if (wakeup_bdi) - bdi_wakeup_thread_delayed(dst); } /* Kill _all_ buffers and pagecache , dirty or not.. */ diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 2103a7fa3fd..5da6012b7a1 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -121,7 +121,6 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi); void bdi_writeback_workfn(struct work_struct *work); int bdi_has_dirty_io(struct backing_dev_info *bdi); void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); -void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); extern spinlock_t bdi_lock; extern struct list_head bdi_list; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index b65fe93ad61..7d63d5e9d3d 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -40,7 +40,7 @@ LIST_HEAD(bdi_list); /* bdi_wq serves all asynchronous writeback tasks */ struct workqueue_struct *bdi_wq; -void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) +static void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) { if (wb1 < wb2) { spin_lock(&wb1->list_lock); -- cgit v1.2.3-70-g09d2 From 5705465174686d007473e017b76c4b64b44aa690 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 9 Oct 2014 15:28:17 -0700 Subject: mm: clean up zone flags Page reclaim tests zone_is_reclaim_dirty(), but the site that actually sets this state does zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY), sending the reader through layers indirection just to track down a simple bit. Remove all zone flag wrappers and just use bitops against zone->flags directly. It's just as readable and the lines are barely any longer. Also rename ZONE_TAIL_LRU_DIRTY to ZONE_DIRTY to match ZONE_WRITEBACK, and remove the zone_flags_t typedef. Signed-off-by: Johannes Weiner Acked-by: David Rientjes Acked-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 51 +++----------------------------------------------- mm/backing-dev.c | 2 +- mm/oom_kill.c | 6 +++--- mm/page_alloc.c | 8 ++++---- mm/vmscan.c | 28 +++++++++++++-------------- 5 files changed, 25 insertions(+), 70 deletions(-) (limited to 'mm/backing-dev.c') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 318df705185..48bf12ef662 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -521,13 +521,13 @@ struct zone { atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; } ____cacheline_internodealigned_in_smp; -typedef enum { +enum zone_flags { ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */ ZONE_OOM_LOCKED, /* zone is in OOM killer zonelist */ ZONE_CONGESTED, /* zone has many dirty pages backed by * a congested BDI */ - ZONE_TAIL_LRU_DIRTY, /* reclaim scanning has recently found + ZONE_DIRTY, /* reclaim scanning has recently found * many dirty file pages at the tail * of the LRU. */ @@ -535,52 +535,7 @@ typedef enum { * many pages under writeback */ ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */ -} zone_flags_t; - -static inline void zone_set_flag(struct zone *zone, zone_flags_t flag) -{ - set_bit(flag, &zone->flags); -} - -static inline int zone_test_and_set_flag(struct zone *zone, zone_flags_t flag) -{ - return test_and_set_bit(flag, &zone->flags); -} - -static inline void zone_clear_flag(struct zone *zone, zone_flags_t flag) -{ - clear_bit(flag, &zone->flags); -} - -static inline int zone_is_reclaim_congested(const struct zone *zone) -{ - return test_bit(ZONE_CONGESTED, &zone->flags); -} - -static inline int zone_is_reclaim_dirty(const struct zone *zone) -{ - return test_bit(ZONE_TAIL_LRU_DIRTY, &zone->flags); -} - -static inline int zone_is_reclaim_writeback(const struct zone *zone) -{ - return test_bit(ZONE_WRITEBACK, &zone->flags); -} - -static inline int zone_is_reclaim_locked(const struct zone *zone) -{ - return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags); -} - -static inline int zone_is_fair_depleted(const struct zone *zone) -{ - return test_bit(ZONE_FAIR_DEPLETED, &zone->flags); -} - -static inline int zone_is_oom_locked(const struct zone *zone) -{ - return test_bit(ZONE_OOM_LOCKED, &zone->flags); -} +}; static inline unsigned long zone_end_pfn(const struct zone *zone) { diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 1706cbbdf5f..b27714f1b40 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -631,7 +631,7 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout) * of sleeping on the congestion queue */ if (atomic_read(&nr_bdi_congested[sync]) == 0 || - !zone_is_reclaim_congested(zone)) { + !test_bit(ZONE_CONGESTED, &zone->flags)) { cond_resched(); /* In case we scheduled, work out time remaining */ diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 1e11df8fa7e..bbf405a3a18 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -565,7 +565,7 @@ bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_mask) spin_lock(&zone_scan_lock); for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) - if (zone_is_oom_locked(zone)) { + if (test_bit(ZONE_OOM_LOCKED, &zone->flags)) { ret = false; goto out; } @@ -575,7 +575,7 @@ bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_mask) * call to oom_zonelist_trylock() doesn't succeed when it shouldn't. */ for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) - zone_set_flag(zone, ZONE_OOM_LOCKED); + set_bit(ZONE_OOM_LOCKED, &zone->flags); out: spin_unlock(&zone_scan_lock); @@ -594,7 +594,7 @@ void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask) spin_lock(&zone_scan_lock); for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) - zone_clear_flag(zone, ZONE_OOM_LOCKED); + clear_bit(ZONE_OOM_LOCKED, &zone->flags); spin_unlock(&zone_scan_lock); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ae2f8474273..f3769f0fce3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1614,8 +1614,8 @@ again: __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); if (atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]) <= 0 && - !zone_is_fair_depleted(zone)) - zone_set_flag(zone, ZONE_FAIR_DEPLETED); + !test_bit(ZONE_FAIR_DEPLETED, &zone->flags)) + set_bit(ZONE_FAIR_DEPLETED, &zone->flags); __count_zone_vm_events(PGALLOC, zone, 1 << order); zone_statistics(preferred_zone, zone, gfp_flags); @@ -1935,7 +1935,7 @@ static void reset_alloc_batches(struct zone *preferred_zone) mod_zone_page_state(zone, NR_ALLOC_BATCH, high_wmark_pages(zone) - low_wmark_pages(zone) - atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH])); - zone_clear_flag(zone, ZONE_FAIR_DEPLETED); + clear_bit(ZONE_FAIR_DEPLETED, &zone->flags); } while (zone++ != preferred_zone); } @@ -1986,7 +1986,7 @@ zonelist_scan: if (alloc_flags & ALLOC_FAIR) { if (!zone_local(preferred_zone, zone)) break; - if (zone_is_fair_depleted(zone)) { + if (test_bit(ZONE_FAIR_DEPLETED, &zone->flags)) { nr_fair_skipped++; continue; } diff --git a/mm/vmscan.c b/mm/vmscan.c index af72fe8e8d7..06123f20a32 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -920,7 +920,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, /* Case 1 above */ if (current_is_kswapd() && PageReclaim(page) && - zone_is_reclaim_writeback(zone)) { + test_bit(ZONE_WRITEBACK, &zone->flags)) { nr_immediate++; goto keep_locked; @@ -1002,7 +1002,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, */ if (page_is_file_cache(page) && (!current_is_kswapd() || - !zone_is_reclaim_dirty(zone))) { + !test_bit(ZONE_DIRTY, &zone->flags))) { /* * Immediately reclaim when written back. * Similar in principal to deactivate_page() @@ -1563,7 +1563,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * are encountered in the nr_immediate check below. */ if (nr_writeback && nr_writeback == nr_taken) - zone_set_flag(zone, ZONE_WRITEBACK); + set_bit(ZONE_WRITEBACK, &zone->flags); /* * memcg will stall in page writeback so only consider forcibly @@ -1575,16 +1575,16 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * backed by a congested BDI and wait_iff_congested will stall. */ if (nr_dirty && nr_dirty == nr_congested) - zone_set_flag(zone, ZONE_CONGESTED); + set_bit(ZONE_CONGESTED, &zone->flags); /* * If dirty pages are scanned that are not queued for IO, it * implies that flushers are not keeping up. In this case, flag - * the zone ZONE_TAIL_LRU_DIRTY and kswapd will start writing - * pages from reclaim context. + * the zone ZONE_DIRTY and kswapd will start writing pages from + * reclaim context. */ if (nr_unqueued_dirty == nr_taken) - zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY); + set_bit(ZONE_DIRTY, &zone->flags); /* * If kswapd scans pages marked marked for immediate @@ -2984,7 +2984,7 @@ static bool kswapd_shrink_zone(struct zone *zone, /* Account for the number of pages attempted to reclaim */ *nr_attempted += sc->nr_to_reclaim; - zone_clear_flag(zone, ZONE_WRITEBACK); + clear_bit(ZONE_WRITEBACK, &zone->flags); /* * If a zone reaches its high watermark, consider it to be no longer @@ -2994,8 +2994,8 @@ static bool kswapd_shrink_zone(struct zone *zone, */ if (zone_reclaimable(zone) && zone_balanced(zone, testorder, 0, classzone_idx)) { - zone_clear_flag(zone, ZONE_CONGESTED); - zone_clear_flag(zone, ZONE_TAIL_LRU_DIRTY); + clear_bit(ZONE_CONGESTED, &zone->flags); + clear_bit(ZONE_DIRTY, &zone->flags); } return sc->nr_scanned >= sc->nr_to_reclaim; @@ -3086,8 +3086,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, * If balanced, clear the dirty and congested * flags */ - zone_clear_flag(zone, ZONE_CONGESTED); - zone_clear_flag(zone, ZONE_TAIL_LRU_DIRTY); + clear_bit(ZONE_CONGESTED, &zone->flags); + clear_bit(ZONE_DIRTY, &zone->flags); } } @@ -3714,11 +3714,11 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) if (node_state(node_id, N_CPU) && node_id != numa_node_id()) return ZONE_RECLAIM_NOSCAN; - if (zone_test_and_set_flag(zone, ZONE_RECLAIM_LOCKED)) + if (test_and_set_bit(ZONE_RECLAIM_LOCKED, &zone->flags)) return ZONE_RECLAIM_NOSCAN; ret = __zone_reclaim(zone, gfp_mask, order); - zone_clear_flag(zone, ZONE_RECLAIM_LOCKED); + clear_bit(ZONE_RECLAIM_LOCKED, &zone->flags); if (!ret) count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED); -- cgit v1.2.3-70-g09d2