summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.c129
-rw-r--r--fs/btrfs/ioctl.c20
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/gfs2/glock.c2
-rw-r--r--fs/gfs2/quota.c2
-rw-r--r--fs/gfs2/quota.h2
-rw-r--r--fs/inode.c2
-rw-r--r--fs/mbcache.c5
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/nfs/internal.h3
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/ubifs/shrinker.c2
-rw-r--r--fs/ubifs/ubifs.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c130
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h3
-rw-r--r--fs/xfs/quota/xfs_qm.c7
-rw-r--r--fs/xfs/xfs_mount.h2
20 files changed, 229 insertions, 97 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 0d1d966b0fe..c3df14ce2cc 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2304,12 +2304,17 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
return ret;
}
+/*
+ * min slot controls the lowest index we're willing to push to the
+ * right. We'll push up to and including min_slot, but no lower
+ */
static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
int data_size, int empty,
struct extent_buffer *right,
- int free_space, u32 left_nritems)
+ int free_space, u32 left_nritems,
+ u32 min_slot)
{
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *upper = path->nodes[1];
@@ -2327,7 +2332,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
if (empty)
nr = 0;
else
- nr = 1;
+ nr = max_t(u32, 1, min_slot);
if (path->slots[0] >= left_nritems)
push_space += data_size;
@@ -2469,10 +2474,14 @@ out_unlock:
*
* returns 1 if the push failed because the other node didn't have enough
* room, 0 if everything worked out and < 0 if there were major errors.
+ *
+ * this will push starting from min_slot to the end of the leaf. It won't
+ * push any slot lower than min_slot
*/
static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path, int data_size,
- int empty)
+ *root, struct btrfs_path *path,
+ int min_data_size, int data_size,
+ int empty, u32 min_slot)
{
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *right;
@@ -2514,8 +2523,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (left_nritems == 0)
goto out_unlock;
- return __push_leaf_right(trans, root, path, data_size, empty,
- right, free_space, left_nritems);
+ return __push_leaf_right(trans, root, path, min_data_size, empty,
+ right, free_space, left_nritems, min_slot);
out_unlock:
btrfs_tree_unlock(right);
free_extent_buffer(right);
@@ -2525,12 +2534,17 @@ out_unlock:
/*
* push some data in the path leaf to the left, trying to free up at
* least data_size bytes. returns zero if the push worked, nonzero otherwise
+ *
+ * max_slot can put a limit on how far into the leaf we'll push items. The
+ * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
+ * items
*/
static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, int data_size,
int empty, struct extent_buffer *left,
- int free_space, int right_nritems)
+ int free_space, u32 right_nritems,
+ u32 max_slot)
{
struct btrfs_disk_key disk_key;
struct extent_buffer *right = path->nodes[0];
@@ -2549,9 +2563,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
slot = path->slots[1];
if (empty)
- nr = right_nritems;
+ nr = min(right_nritems, max_slot);
else
- nr = right_nritems - 1;
+ nr = min(right_nritems - 1, max_slot);
for (i = 0; i < nr; i++) {
item = btrfs_item_nr(right, i);
@@ -2712,10 +2726,14 @@ out:
/*
* push some data in the path leaf to the left, trying to free up at
* least data_size bytes. returns zero if the push worked, nonzero otherwise
+ *
+ * max_slot can put a limit on how far into the leaf we'll push items. The
+ * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the
+ * items
*/
static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path, int data_size,
- int empty)
+ *root, struct btrfs_path *path, int min_data_size,
+ int data_size, int empty, u32 max_slot)
{
struct extent_buffer *right = path->nodes[0];
struct extent_buffer *left;
@@ -2761,8 +2779,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
goto out;
}
- return __push_leaf_left(trans, root, path, data_size,
- empty, left, free_space, right_nritems);
+ return __push_leaf_left(trans, root, path, min_data_size,
+ empty, left, free_space, right_nritems,
+ max_slot);
out:
btrfs_tree_unlock(left);
free_extent_buffer(left);
@@ -2855,6 +2874,64 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
}
/*
+ * double splits happen when we need to insert a big item in the middle
+ * of a leaf. A double split can leave us with 3 mostly empty leaves:
+ * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
+ * A B C
+ *
+ * We avoid this by trying to push the items on either side of our target
+ * into the adjacent leaves. If all goes well we can avoid the double split
+ * completely.
+ */
+static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ int data_size)
+{
+ int ret;
+ int progress = 0;
+ int slot;
+ u32 nritems;
+
+ slot = path->slots[0];
+
+ /*
+ * try to push all the items after our slot into the
+ * right leaf
+ */
+ ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
+ if (ret < 0)
+ return ret;
+
+ if (ret == 0)
+ progress++;
+
+ nritems = btrfs_header_nritems(path->nodes[0]);
+ /*
+ * our goal is to get our slot at the start or end of a leaf. If
+ * we've done so we're done
+ */
+ if (path->slots[0] == 0 || path->slots[0] == nritems)
+ return 0;
+
+ if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
+ return 0;
+
+ /* try to push all the items before our slot into the next leaf */
+ slot = path->slots[0];
+ ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
+ if (ret < 0)
+ return ret;
+
+ if (ret == 0)
+ progress++;
+
+ if (progress)
+ return 0;
+ return 1;
+}
+
+/*
* split the path's leaf in two, making sure there is at least data_size
* available for the resulting leaf level of the path.
*
@@ -2876,6 +2953,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
int wret;
int split;
int num_doubles = 0;
+ int tried_avoid_double = 0;
l = path->nodes[0];
slot = path->slots[0];
@@ -2884,12 +2962,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
return -EOVERFLOW;
/* first try to make some room by pushing left and right */
- if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
- wret = push_leaf_right(trans, root, path, data_size, 0);
+ if (data_size) {
+ wret = push_leaf_right(trans, root, path, data_size,
+ data_size, 0, 0);
if (wret < 0)
return wret;
if (wret) {
- wret = push_leaf_left(trans, root, path, data_size, 0);
+ wret = push_leaf_left(trans, root, path, data_size,
+ data_size, 0, (u32)-1);
if (wret < 0)
return wret;
}
@@ -2923,6 +3003,8 @@ again:
if (mid != nritems &&
leaf_space_used(l, mid, nritems - mid) +
data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+ if (data_size && !tried_avoid_double)
+ goto push_for_double;
split = 2;
}
}
@@ -2939,6 +3021,8 @@ again:
if (mid != nritems &&
leaf_space_used(l, mid, nritems - mid) +
data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+ if (data_size && !tried_avoid_double)
+ goto push_for_double;
split = 2 ;
}
}
@@ -3019,6 +3103,13 @@ again:
}
return ret;
+
+push_for_double:
+ push_for_double_split(trans, root, path, data_size);
+ tried_avoid_double = 1;
+ if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
+ return 0;
+ goto again;
}
static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
@@ -3915,13 +4006,15 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
extent_buffer_get(leaf);
btrfs_set_path_blocking(path);
- wret = push_leaf_left(trans, root, path, 1, 1);
+ wret = push_leaf_left(trans, root, path, 1, 1,
+ 1, (u32)-1);
if (wret < 0 && wret != -ENOSPC)
ret = wret;
if (path->nodes[0] == leaf &&
btrfs_header_nritems(leaf)) {
- wret = push_leaf_right(trans, root, path, 1, 1);
+ wret = push_leaf_right(trans, root, path, 1,
+ 1, 1, 0);
if (wret < 0 && wret != -ENOSPC)
ret = wret;
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4dbaf89b133..9254b3d58db 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1458,7 +1458,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
*/
/* the destination must be opened for writing */
- if (!(file->f_mode & FMODE_WRITE))
+ if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
return -EINVAL;
ret = mnt_want_write(file->f_path.mnt);
@@ -1511,7 +1511,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
/* determine range to clone */
ret = -EINVAL;
- if (off >= src->i_size || off + len > src->i_size)
+ if (off + len > src->i_size || off + len < off)
goto out_unlock;
if (len == 0)
olen = len = src->i_size - off;
@@ -1578,6 +1578,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
u64 disko = 0, diskl = 0;
u64 datao = 0, datal = 0;
u8 comp;
+ u64 endoff;
size = btrfs_item_size_nr(leaf, slot);
read_extent_buffer(leaf, buf,
@@ -1712,9 +1713,18 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
btrfs_release_path(root, path);
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- if (new_key.offset + datal > inode->i_size)
- btrfs_i_size_write(inode,
- new_key.offset + datal);
+
+ /*
+ * we round up to the block size at eof when
+ * determining which extents to clone above,
+ * but shouldn't round up the file size
+ */
+ endoff = new_key.offset + datal;
+ if (endoff > off+olen)
+ endoff = off+olen;
+ if (endoff > inode->i_size)
+ btrfs_i_size_write(inode, endoff);
+
BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
diff --git a/fs/dcache.c b/fs/dcache.c
index c8c78ba0782..86d4db15473 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -896,7 +896,7 @@ EXPORT_SYMBOL(shrink_dcache_parent);
*
* In this case we return -1 to tell the caller that we baled.
*/
-static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
+static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
if (nr) {
if (!(gfp_mask & __GFP_FS))
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index dbab3fdc258..0898f3ec821 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1358,7 +1358,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
}
-static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask)
+static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
struct gfs2_glock *gl;
int may_demote;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index b256d6f2428..8f02d3db8f4 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -77,7 +77,7 @@ static LIST_HEAD(qd_lru_list);
static atomic_t qd_lru_count = ATOMIC_INIT(0);
static DEFINE_SPINLOCK(qd_lru_lock);
-int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask)
+int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
struct gfs2_quota_data *qd;
struct gfs2_sbd *sdp;
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 195f60c8bd1..e7d236ca48b 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -51,7 +51,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
return ret;
}
-extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask);
+extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask);
extern const struct quotactl_ops gfs2_quotactl_ops;
#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/inode.c b/fs/inode.c
index 2bee20ae3d6..722860b323a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -512,7 +512,7 @@ static void prune_icache(int nr_to_scan)
* This function is passed the number of inodes to scan, and it returns the
* total number of remaining possibly-reclaimable inodes.
*/
-static int shrink_icache_memory(int nr, gfp_t gfp_mask)
+static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
if (nr) {
/*
diff --git a/fs/mbcache.c b/fs/mbcache.c
index ec88ff3d04a..e28f21b9534 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -115,7 +115,7 @@ mb_cache_indexes(struct mb_cache *cache)
* What the mbcache registers as to get shrunk dynamically.
*/
-static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask);
+static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
static struct shrinker mb_cache_shrinker = {
.shrink = mb_cache_shrink_fn,
@@ -191,13 +191,14 @@ forget:
* This function is called by the kernel memory management when memory
* gets low.
*
+ * @shrink: (ignored)
* @nr_to_scan: Number of objects to scan
* @gfp_mask: (ignored)
*
* Returns the number of objects which are present in the cache.
*/
static int
-mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask)
+mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
{
LIST_HEAD(free_list);
struct list_head *l, *ltmp;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 782b431ef91..e60416d3f81 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1710,7 +1710,7 @@ static void nfs_access_free_list(struct list_head *head)
}
}
-int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
{
LIST_HEAD(head);
struct nfs_inode *nfsi;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d8bd619e386..e70f44b9b3f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -205,7 +205,8 @@ extern struct rpc_procinfo nfs4_procedures[];
void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
/* dir.c */
-extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
+extern int nfs_access_cache_shrinker(struct shrinker *shrink,
+ int nr_to_scan, gfp_t gfp_mask);
/* inode.c */
extern struct workqueue_struct *nfsiod_workqueue;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 12c233da1b6..437d2ca2de9 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -676,7 +676,7 @@ static void prune_dqcache(int count)
* This is called from kswapd when we think we need some
* more memory
*/
-static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
+static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
if (nr) {
spin_lock(&dq_list_lock);
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index 02feb59cefc..0b201114a5a 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -277,7 +277,7 @@ static int kick_a_thread(void)
return 0;
}
-int ubifs_shrinker(int nr, gfp_t gfp_mask)
+int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
int freed, contention = 0;
long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 2eef553d50c..04310878f44 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1575,7 +1575,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot);
int ubifs_tnc_end_commit(struct ubifs_info *c);
/* shrinker.c */
-int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask);
+int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
/* commit.c */
int ubifs_bg_thread(void *info);
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 649ade8ef59..2ee3f7a6016 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -45,7 +45,7 @@
static kmem_zone_t *xfs_buf_zone;
STATIC int xfsbufd(void *);
-STATIC int xfsbufd_wakeup(int, gfp_t);
+STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t);
STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
static struct shrinker xfs_buf_shake = {
.shrink = xfsbufd_wakeup,
@@ -340,7 +340,7 @@ _xfs_buf_lookup_pages(
__func__, gfp_mask);
XFS_STATS_INC(xb_page_retries);
- xfsbufd_wakeup(0, gfp_mask);
+ xfsbufd_wakeup(NULL, 0, gfp_mask);
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry;
}
@@ -1762,6 +1762,7 @@ xfs_buf_runall_queues(
STATIC int
xfsbufd_wakeup(
+ struct shrinker *shrink,
int priority,
gfp_t mask)
{
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index f2d1718c916..80938c736c2 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1883,7 +1883,6 @@ init_xfs_fs(void)
goto out_cleanup_procfs;
vfs_initquota();
- xfs_inode_shrinker_init();
error = register_filesystem(&xfs_fs_type);
if (error)
@@ -1911,7 +1910,6 @@ exit_xfs_fs(void)
{
vfs_exitquota();
unregister_filesystem(&xfs_fs_type);
- xfs_inode_shrinker_destroy();
xfs_sysctl_unregister();
xfs_cleanup_procfs();
xfs_buf_terminate();
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index ef7f0218bcc..a51a07c3a70 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -144,6 +144,41 @@ restart:
return last_error;
}
+/*
+ * Select the next per-ag structure to iterate during the walk. The reclaim
+ * walk is optimised only to walk AGs with reclaimable inodes in them.
+ */
+static struct xfs_perag *
+xfs_inode_ag_iter_next_pag(
+ struct xfs_mount *mp,
+ xfs_agnumber_t *first,
+ int tag)
+{
+ struct xfs_perag *pag = NULL;
+
+ if (tag == XFS_ICI_RECLAIM_TAG) {
+ int found;
+ int ref;
+
+ spin_lock(&mp->m_perag_lock);
+ found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
+ (void **)&pag, *first, 1, tag);
+ if (found <= 0) {
+ spin_unlock(&mp->m_perag_lock);
+ return NULL;
+ }
+ *first = pag->pag_agno + 1;
+ /* open coded pag reference increment */
+ ref = atomic_inc_return(&pag->pag_ref);
+ spin_unlock(&mp->m_perag_lock);
+ trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
+ } else {
+ pag = xfs_perag_get(mp, *first);
+ (*first)++;
+ }
+ return pag;
+}
+
int
xfs_inode_ag_iterator(
struct xfs_mount *mp,
@@ -154,16 +189,15 @@ xfs_inode_ag_iterator(
int exclusive,
int *nr_to_scan)
{
+ struct xfs_perag *pag;
int error = 0;
int last_error = 0;
xfs_agnumber_t ag;
int nr;
nr = nr_to_scan ? *nr_to_scan : INT_MAX;
- for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
- struct xfs_perag *pag;
-
- pag = xfs_perag_get(mp, ag);
+ ag = 0;
+ while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) {
error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
exclusive, &nr);
xfs_perag_put(pag);
@@ -640,6 +674,17 @@ __xfs_inode_set_reclaim_tag(
radix_tree_tag_set(&pag->pag_ici_root,
XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
XFS_ICI_RECLAIM_TAG);
+
+ if (!pag->pag_ici_reclaimable) {
+ /* propagate the reclaim tag up into the perag radix tree */
+ spin_lock(&ip->i_mount->m_perag_lock);
+ radix_tree_tag_set(&ip->i_mount->m_perag_tree,
+ XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+ XFS_ICI_RECLAIM_TAG);
+ spin_unlock(&ip->i_mount->m_perag_lock);
+ trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
+ -1, _RET_IP_);
+ }
pag->pag_ici_reclaimable++;
}
@@ -674,6 +719,16 @@ __xfs_inode_clear_reclaim_tag(
radix_tree_tag_clear(&pag->pag_ici_root,
XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
pag->pag_ici_reclaimable--;
+ if (!pag->pag_ici_reclaimable) {
+ /* clear the reclaim tag from the perag radix tree */
+ spin_lock(&ip->i_mount->m_perag_lock);
+ radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
+ XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+ XFS_ICI_RECLAIM_TAG);
+ spin_unlock(&ip->i_mount->m_perag_lock);
+ trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
+ -1, _RET_IP_);
+ }
}
/*
@@ -828,83 +883,52 @@ xfs_reclaim_inodes(
/*
* Shrinker infrastructure.
- *
- * This is all far more complex than it needs to be. It adds a global list of
- * mounts because the shrinkers can only call a global context. We need to make
- * the shrinkers pass a context to avoid the need for global state.
*/
-static LIST_HEAD(xfs_mount_list);
-static struct rw_semaphore xfs_mount_list_lock;
-
static int
xfs_reclaim_inode_shrink(
+ struct shrinker *shrink,
int nr_to_scan,
gfp_t gfp_mask)
{
struct xfs_mount *mp;
struct xfs_perag *pag;
xfs_agnumber_t ag;
- int reclaimable = 0;
+ int reclaimable;
+ mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
if (nr_to_scan) {
if (!(gfp_mask & __GFP_FS))
return -1;
- down_read(&xfs_mount_list_lock);
- list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
- xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
+ xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
- if (nr_to_scan <= 0)
- break;
- }
- up_read(&xfs_mount_list_lock);
- }
+ /* if we don't exhaust the scan, don't bother coming back */
+ if (nr_to_scan > 0)
+ return -1;
+ }
- down_read(&xfs_mount_list_lock);
- list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
- for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
- pag = xfs_perag_get(mp, ag);
- reclaimable += pag->pag_ici_reclaimable;
- xfs_perag_put(pag);
- }
+ reclaimable = 0;
+ ag = 0;
+ while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag,
+ XFS_ICI_RECLAIM_TAG))) {
+ reclaimable += pag->pag_ici_reclaimable;
+ xfs_perag_put(pag);
}
- up_read(&xfs_mount_list_lock);
return reclaimable;
}
-static struct shrinker xfs_inode_shrinker = {
- .shrink = xfs_reclaim_inode_shrink,
- .seeks = DEFAULT_SEEKS,
-};
-
-void __init
-xfs_inode_shrinker_init(void)
-{
- init_rwsem(&xfs_mount_list_lock);
- register_shrinker(&xfs_inode_shrinker);
-}
-
-void
-xfs_inode_shrinker_destroy(void)
-{
- ASSERT(list_empty(&xfs_mount_list));
- unregister_shrinker(&xfs_inode_shrinker);
-}
-
void
xfs_inode_shrinker_register(
struct xfs_mount *mp)
{
- down_write(&xfs_mount_list_lock);
- list_add_tail(&mp->m_mplist, &xfs_mount_list);
- up_write(&xfs_mount_list_lock);
+ mp->m_inode_shrink.shrink = xfs_reclaim_inode_shrink;
+ mp->m_inode_shrink.seeks = DEFAULT_SEEKS;
+ register_shrinker(&mp->m_inode_shrink);
}
void
xfs_inode_shrinker_unregister(
struct xfs_mount *mp)
{
- down_write(&xfs_mount_list_lock);
- list_del(&mp->m_mplist);
- up_write(&xfs_mount_list_lock);
+ unregister_shrinker(&mp->m_inode_shrink);
}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index cdcbaaca988..e28139aaa4a 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -55,8 +55,6 @@ int xfs_inode_ag_iterator(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
int flags, int tag, int write_lock, int *nr_to_scan);
-void xfs_inode_shrinker_init(void);
-void xfs_inode_shrinker_destroy(void);
void xfs_inode_shrinker_register(struct xfs_mount *mp);
void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 73d5aa11738..30282069090 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -124,7 +124,10 @@ DEFINE_EVENT(xfs_perag_class, name, \
unsigned long caller_ip), \
TP_ARGS(mp, agno, refcount, caller_ip))
DEFINE_PERAG_REF_EVENT(xfs_perag_get);
+DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim);
DEFINE_PERAG_REF_EVENT(xfs_perag_put);
+DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
+DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
TRACE_EVENT(xfs_attr_list_node_descend,
TP_PROTO(struct xfs_attr_list_context *ctx,
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 8c117ff2e3a..67c018392d6 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -69,7 +69,7 @@ STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
-STATIC int xfs_qm_shake(int, gfp_t);
+STATIC int xfs_qm_shake(struct shrinker *, int, gfp_t);
static struct shrinker xfs_qm_shaker = {
.shrink = xfs_qm_shake,
@@ -2117,7 +2117,10 @@ xfs_qm_shake_freelist(
*/
/* ARGSUSED */
STATIC int
-xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
+xfs_qm_shake(
+ struct shrinker *shrink,
+ int nr_to_scan,
+ gfp_t gfp_mask)
{
int ndqused, nfree, n;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1d2c7eed4ed..5761087ee8e 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -259,7 +259,7 @@ typedef struct xfs_mount {
wait_queue_head_t m_wait_single_sync_task;
__int64_t m_update_flags; /* sb flags we need to update
on the next remount,rw */
- struct list_head m_mplist; /* inode shrinker mount list */
+ struct shrinker m_inode_shrink; /* inode reclaim shrinker */
} xfs_mount_t;
/*