From c9c129714e71c890bed1bd5b61697a896c3c2d54 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:49:59 +0000 Subject: xfs: Don't wake xfsbufd when idle The xfsbufd wakes every xfsbufd_centisecs (once per second by default) for each filesystem even when the filesystem is idle. If the xfsbufd has nothing to do, put it into a long term sleep and only wake it up when there is work pending (i.e. dirty buffers to flush soon). This will make laptop power misers happy. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_buf.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_buf.c') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 77b8be81c76..18ae3ba8f78 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1595,6 +1595,11 @@ xfs_buf_delwri_queue( list_del(&bp->b_list); } + if (list_empty(dwq)) { + /* start xfsbufd as it is about to have something to do */ + wake_up_process(bp->b_target->bt_task); + } + bp->b_flags |= _XBF_DELWRI_Q; list_add_tail(&bp->b_list, dwq); bp->b_queuetime = jiffies; @@ -1644,6 +1649,8 @@ xfsbufd_wakeup( list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) continue; + if (list_empty(&btp->bt_delwrite_queue)) + continue; set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); wake_up_process(btp->bt_task); } @@ -1708,6 +1715,9 @@ xfsbufd( set_freezable(); do { + long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); + long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); + if (unlikely(freezing(current))) { set_bit(XBT_FORCE_SLEEP, &target->bt_flags); refrigerator(); @@ -1715,12 +1725,12 @@ xfsbufd( clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); } - schedule_timeout_interruptible( - xfs_buf_timer_centisecs * msecs_to_jiffies(10)); - - xfs_buf_delwri_split(target, &tmp, - xfs_buf_age_centisecs * msecs_to_jiffies(10)); + /* sleep for a long time if there is nothing to do. */ + if (list_empty(&target->bt_delwrite_queue)) + tout = MAX_SCHEDULE_TIMEOUT; + schedule_timeout_interruptible(tout); + xfs_buf_delwri_split(target, &tmp, age); count = 0; while (!list_empty(&tmp)) { bp = list_entry(tmp.next, xfs_buf_t, b_list); -- cgit v1.2.3-70-g09d2 From 64e0bc7d2a6609ad265757a600e2a0d93c8adb47 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Jan 2010 22:17:58 +0000 Subject: xfs: clean up xfs_bwrite Fold XFS_bwrite into it's only caller, xfs_bwrite and move it into xfs_buf.c instead of leaving it as a fairly large inline function. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_buf.c | 27 +++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_buf.h | 19 +------------------ fs/xfs/xfs_rw.c | 31 ------------------------------- fs/xfs/xfs_rw.h | 1 - 4 files changed, 28 insertions(+), 50 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_buf.c') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 18ae3ba8f78..492465c6e0b 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1050,6 +1050,33 @@ xfs_buf_ioerror( trace_xfs_buf_ioerror(bp, error, _RET_IP_); } +int +xfs_bwrite( + struct xfs_mount *mp, + struct xfs_buf *bp) +{ + int iowait = (bp->b_flags & XBF_ASYNC) == 0; + int error = 0; + + bp->b_strat = xfs_bdstrat_cb; + bp->b_mount = mp; + bp->b_flags |= XBF_WRITE; + if (!iowait) + bp->b_flags |= _XBF_RUN_QUEUES; + + xfs_buf_delwri_dequeue(bp); + xfs_buf_iostrategy(bp); + + if (iowait) { + error = xfs_buf_iowait(bp); + if (error) + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); + xfs_buf_relse(bp); + } + + return error; +} + int xfs_bawrite( void *mp, diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index c20a7600186..f69b8e714a1 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -232,6 +232,7 @@ extern void xfs_buf_lock(xfs_buf_t *); extern void xfs_buf_unlock(xfs_buf_t *); /* Buffer Read and Write Routines */ +extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); extern int xfs_bawrite(void *mp, xfs_buf_t *bp); extern void xfs_bdwrite(void *mp, xfs_buf_t *bp); extern void xfs_buf_ioend(xfs_buf_t *, int); @@ -390,24 +391,6 @@ static inline void xfs_buf_relse(xfs_buf_t *bp) #define xfs_biozero(bp, off, len) \ xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) - -static inline int XFS_bwrite(xfs_buf_t *bp) -{ - int iowait = (bp->b_flags & XBF_ASYNC) == 0; - int error = 0; - - if (!iowait) - bp->b_flags |= _XBF_RUN_QUEUES; - - xfs_buf_delwri_dequeue(bp); - xfs_buf_iostrategy(bp); - if (iowait) { - error = xfs_buf_iowait(bp); - xfs_buf_relse(bp); - } - return error; -} - #define xfs_iowait(bp) xfs_buf_iowait(bp) #define xfs_baread(target, rablkno, ralen) \ diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index 5aa07caea5f..9d933a10d6b 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c @@ -305,37 +305,6 @@ xfs_read_buf( return (error); } -/* - * Wrapper around bwrite() so that we can trap - * write errors, and act accordingly. - */ -int -xfs_bwrite( - struct xfs_mount *mp, - struct xfs_buf *bp) -{ - int error; - - /* - * XXXsup how does this work for quotas. - */ - XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb); - bp->b_mount = mp; - XFS_BUF_WRITE(bp); - - if ((error = XFS_bwrite(bp))) { - ASSERT(mp); - /* - * Cannot put a buftrace here since if the buffer is not - * B_HOLD then we will brelse() the buffer before returning - * from bwrite and we could be tracing a buffer that has - * been reused. - */ - xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); - } - return (error); -} - /* * helper function to extract extent size hint from inode */ diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h index 571f2174435..ff68eb5e738 100644 --- a/fs/xfs/xfs_rw.h +++ b/fs/xfs/xfs_rw.h @@ -40,7 +40,6 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) * Prototypes for functions in xfs_rw.c. */ extern int xfs_write_clear_setuid(struct xfs_inode *ip); -extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); extern int xfs_bioerror(struct xfs_buf *bp); extern int xfs_bioerror_relse(struct xfs_buf *bp); extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp, -- cgit v1.2.3-70-g09d2 From 4e23471a3f3aba885ea70100db47ccacb5f069f6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Jan 2010 22:17:56 +0000 Subject: xfs: move more buffer helpers into xfs_buf.c Move xfsbdstrat and xfs_bdstrat_cb from xfs_lrw.c and xfs_bioerror and xfs_bioerror_relse from xfs_rw.c into xfs_buf.c. This also means xfs_bioerror and xfs_bioerror_relse can be marked static now. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_buf.c | 120 +++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_buf.h | 4 ++ fs/xfs/linux-2.6/xfs_lrw.c | 47 ------------------ fs/xfs/linux-2.6/xfs_lrw.h | 3 -- fs/xfs/xfs_rw.c | 82 ------------------------------- fs/xfs/xfs_rw.h | 2 - 6 files changed, 124 insertions(+), 134 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_buf.c') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 492465c6e0b..158fad4550d 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1112,6 +1112,126 @@ xfs_bdwrite( xfs_buf_delwri_queue(bp, 1); } +/* + * Called when we want to stop a buffer from getting written or read. + * We attach the EIO error, muck with its flags, and call biodone + * so that the proper iodone callbacks get called. + */ +STATIC int +xfs_bioerror( + xfs_buf_t *bp) +{ +#ifdef XFSERRORDEBUG + ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone); +#endif + + /* + * No need to wait until the buffer is unpinned, we aren't flushing it. + */ + XFS_BUF_ERROR(bp, EIO); + + /* + * We're calling biodone, so delete XBF_DONE flag. + */ + XFS_BUF_UNREAD(bp); + XFS_BUF_UNDELAYWRITE(bp); + XFS_BUF_UNDONE(bp); + XFS_BUF_STALE(bp); + + XFS_BUF_CLR_BDSTRAT_FUNC(bp); + xfs_biodone(bp); + + return EIO; +} + +/* + * Same as xfs_bioerror, except that we are releasing the buffer + * here ourselves, and avoiding the biodone call. + * This is meant for userdata errors; metadata bufs come with + * iodone functions attached, so that we can track down errors. + */ +STATIC int +xfs_bioerror_relse( + struct xfs_buf *bp) +{ + int64_t fl = XFS_BUF_BFLAGS(bp); + /* + * No need to wait until the buffer is unpinned. + * We aren't flushing it. + * + * chunkhold expects B_DONE to be set, whether + * we actually finish the I/O or not. We don't want to + * change that interface. + */ + XFS_BUF_UNREAD(bp); + XFS_BUF_UNDELAYWRITE(bp); + XFS_BUF_DONE(bp); + XFS_BUF_STALE(bp); + XFS_BUF_CLR_IODONE_FUNC(bp); + XFS_BUF_CLR_BDSTRAT_FUNC(bp); + if (!(fl & XFS_B_ASYNC)) { + /* + * Mark b_error and B_ERROR _both_. + * Lot's of chunkcache code assumes that. + * There's no reason to mark error for + * ASYNC buffers. + */ + XFS_BUF_ERROR(bp, EIO); + XFS_BUF_FINISH_IOWAIT(bp); + } else { + xfs_buf_relse(bp); + } + + return EIO; +} + + +/* + * All xfs metadata buffers except log state machine buffers + * get this attached as their b_bdstrat callback function. + * This is so that we can catch a buffer + * after prematurely unpinning it to forcibly shutdown the filesystem. + */ +int +xfs_bdstrat_cb( + struct xfs_buf *bp) +{ + if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { + trace_xfs_bdstrat_shut(bp, _RET_IP_); + /* + * Metadata write that didn't get logged but + * written delayed anyway. These aren't associated + * with a transaction, and can be ignored. + */ + if (!bp->b_iodone && !XFS_BUF_ISREAD(bp)) + return xfs_bioerror_relse(bp); + else + return xfs_bioerror(bp); + } + + xfs_buf_iorequest(bp); + return 0; +} + +/* + * Wrapper around bdstrat so that we can stop data from going to disk in case + * we are shutting down the filesystem. Typically user data goes thru this + * path; one of the exceptions is the superblock. + */ +void +xfsbdstrat( + struct xfs_mount *mp, + struct xfs_buf *bp) +{ + if (XFS_FORCED_SHUTDOWN(mp)) { + trace_xfs_bdstrat_shut(bp, _RET_IP_); + xfs_bioerror_relse(bp); + return; + } + + xfs_buf_iorequest(bp); +} + STATIC void _xfs_buf_ioend( xfs_buf_t *bp, diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index f69b8e714a1..9a29d18656e 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -235,6 +235,10 @@ extern void xfs_buf_unlock(xfs_buf_t *); extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); extern int xfs_bawrite(void *mp, xfs_buf_t *bp); extern void xfs_bdwrite(void *mp, xfs_buf_t *bp); + +extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); +extern int xfs_bdstrat_cb(struct xfs_buf *); + extern void xfs_buf_ioend(xfs_buf_t *, int); extern void xfs_buf_ioerror(xfs_buf_t *, int); extern int xfs_buf_iorequest(xfs_buf_t *); diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 0d32457abef..c80fa00d2ad 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -783,53 +783,6 @@ write_retry: return -error; } -/* - * All xfs metadata buffers except log state machine buffers - * get this attached as their b_bdstrat callback function. - * This is so that we can catch a buffer - * after prematurely unpinning it to forcibly shutdown the filesystem. - */ -int -xfs_bdstrat_cb(struct xfs_buf *bp) -{ - if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { - trace_xfs_bdstrat_shut(bp, _RET_IP_); - /* - * Metadata write that didn't get logged but - * written delayed anyway. These aren't associated - * with a transaction, and can be ignored. - */ - if (XFS_BUF_IODONE_FUNC(bp) == NULL && - (XFS_BUF_ISREAD(bp)) == 0) - return (xfs_bioerror_relse(bp)); - else - return (xfs_bioerror(bp)); - } - - xfs_buf_iorequest(bp); - return 0; -} - -/* - * Wrapper around bdstrat so that we can stop data from going to disk in case - * we are shutting down the filesystem. Typically user data goes thru this - * path; one of the exceptions is the superblock. - */ -void -xfsbdstrat( - struct xfs_mount *mp, - struct xfs_buf *bp) -{ - ASSERT(mp); - if (!XFS_FORCED_SHUTDOWN(mp)) { - xfs_buf_iorequest(bp); - return; - } - - trace_xfs_bdstrat_shut(bp, _RET_IP_); - xfs_bioerror_relse(bp); -} - /* * If the underlying (data/log/rt) device is readonly, there are some * operations that cannot proceed. diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h index d1f7789c7ff..342ae8c0d01 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.h +++ b/fs/xfs/linux-2.6/xfs_lrw.h @@ -22,9 +22,6 @@ struct xfs_mount; struct xfs_inode; struct xfs_buf; -/* errors from xfsbdstrat() must be extracted from the buffer */ -extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); -extern int xfs_bdstrat_cb(struct xfs_buf *); extern int xfs_dev_is_read_only(struct xfs_mount *, char *); extern int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index 9d933a10d6b..abb2c458b14 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c @@ -153,88 +153,6 @@ xfs_do_force_shutdown( } } - -/* - * Called when we want to stop a buffer from getting written or read. - * We attach the EIO error, muck with its flags, and call biodone - * so that the proper iodone callbacks get called. - */ -int -xfs_bioerror( - xfs_buf_t *bp) -{ - -#ifdef XFSERRORDEBUG - ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone); -#endif - - /* - * No need to wait until the buffer is unpinned. - * We aren't flushing it. - */ - XFS_BUF_ERROR(bp, EIO); - /* - * We're calling biodone, so delete B_DONE flag. Either way - * we have to call the iodone callback, and calling biodone - * probably is the best way since it takes care of - * GRIO as well. - */ - XFS_BUF_UNREAD(bp); - XFS_BUF_UNDELAYWRITE(bp); - XFS_BUF_UNDONE(bp); - XFS_BUF_STALE(bp); - - XFS_BUF_CLR_BDSTRAT_FUNC(bp); - xfs_biodone(bp); - - return (EIO); -} - -/* - * Same as xfs_bioerror, except that we are releasing the buffer - * here ourselves, and avoiding the biodone call. - * This is meant for userdata errors; metadata bufs come with - * iodone functions attached, so that we can track down errors. - */ -int -xfs_bioerror_relse( - xfs_buf_t *bp) -{ - int64_t fl; - - ASSERT(XFS_BUF_IODONE_FUNC(bp) != xfs_buf_iodone_callbacks); - ASSERT(XFS_BUF_IODONE_FUNC(bp) != xlog_iodone); - - fl = XFS_BUF_BFLAGS(bp); - /* - * No need to wait until the buffer is unpinned. - * We aren't flushing it. - * - * chunkhold expects B_DONE to be set, whether - * we actually finish the I/O or not. We don't want to - * change that interface. - */ - XFS_BUF_UNREAD(bp); - XFS_BUF_UNDELAYWRITE(bp); - XFS_BUF_DONE(bp); - XFS_BUF_STALE(bp); - XFS_BUF_CLR_IODONE_FUNC(bp); - XFS_BUF_CLR_BDSTRAT_FUNC(bp); - if (!(fl & XFS_B_ASYNC)) { - /* - * Mark b_error and B_ERROR _both_. - * Lot's of chunkcache code assumes that. - * There's no reason to mark error for - * ASYNC buffers. - */ - XFS_BUF_ERROR(bp, EIO); - XFS_BUF_FINISH_IOWAIT(bp); - } else { - xfs_buf_relse(bp); - } - return (EIO); -} - /* * Prints out an ALERT message about I/O error. */ diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h index ff68eb5e738..a54c3b7cd37 100644 --- a/fs/xfs/xfs_rw.h +++ b/fs/xfs/xfs_rw.h @@ -40,8 +40,6 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) * Prototypes for functions in xfs_rw.c. */ extern int xfs_write_clear_setuid(struct xfs_inode *ip); -extern int xfs_bioerror(struct xfs_buf *bp); -extern int xfs_bioerror_relse(struct xfs_buf *bp); extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp, xfs_daddr_t blkno, int len, uint flags, struct xfs_buf **bpp); -- cgit v1.2.3-70-g09d2 From b9c48649577dfc4a8c263c106d518effa24ea54b Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 20 Jan 2010 10:47:39 +1100 Subject: xfs: xfs_buf_iomove() doesn't care about signedness xfs_buf_iomove() uses xfs_caddr_t as it's parameter types, but it doesn't care about the signedness of the variables as it is just copying the data. Change the prototype to use void * so that we don't get sign warnings at call sites. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_buf.c | 2 +- fs/xfs/linux-2.6/xfs_buf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_buf.c') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 158fad4550d..efd745bb888 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1443,7 +1443,7 @@ xfs_buf_iomove( xfs_buf_t *bp, /* buffer to process */ size_t boff, /* starting buffer offset */ size_t bsize, /* length to copy */ - caddr_t data, /* data address */ + void *data, /* data address */ xfs_buf_rw_t mode) /* read/write/zero flag */ { size_t bend, cpoff, csize; diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 9a29d18656e..4f2ad66edb7 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -243,7 +243,7 @@ extern void xfs_buf_ioend(xfs_buf_t *, int); extern void xfs_buf_ioerror(xfs_buf_t *, int); extern int xfs_buf_iorequest(xfs_buf_t *); extern int xfs_buf_iowait(xfs_buf_t *); -extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t, +extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, xfs_buf_rw_t); static inline int xfs_buf_iostrategy(xfs_buf_t *bp) -- cgit v1.2.3-70-g09d2 From 0cadda1c5f194f98a05d252ff4385d86d2ed0862 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jan 2010 09:56:44 +0000 Subject: xfs: remove duplicate buffer flags Currently we define aliases for the buffer flags in various namespaces, which only adds confusion. Remove all but the XBF_ flags to clean this up a bit. Note that we still abuse XFS_B_ASYNC/XBF_ASYNC for some non-buffer uses, but I'll clean that up later. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_buf.c | 2 +- fs/xfs/linux-2.6/xfs_buf.h | 22 ++++------------------ fs/xfs/linux-2.6/xfs_fs_subr.c | 2 +- fs/xfs/linux-2.6/xfs_sync.c | 4 ++-- fs/xfs/quota/xfs_dquot.c | 3 +-- fs/xfs/quota/xfs_dquot_item.c | 3 +-- fs/xfs/xfs_alloc.c | 2 +- fs/xfs/xfs_attr.c | 12 +++++------- fs/xfs/xfs_attr_leaf.c | 2 +- fs/xfs/xfs_btree.c | 4 ++-- fs/xfs/xfs_ialloc.c | 2 +- fs/xfs/xfs_inode.c | 20 ++++++++++---------- fs/xfs/xfs_inode_item.c | 2 +- fs/xfs/xfs_log_recover.c | 8 ++++---- fs/xfs/xfs_mount.c | 4 ++-- fs/xfs/xfs_trans_buf.c | 27 ++++++++++++++------------- fs/xfs/xfs_vnodeops.c | 4 ++-- 17 files changed, 53 insertions(+), 70 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_buf.c') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index efd745bb888..730eff1e71a 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1169,7 +1169,7 @@ xfs_bioerror_relse( XFS_BUF_STALE(bp); XFS_BUF_CLR_IODONE_FUNC(bp); XFS_BUF_CLR_BDSTRAT_FUNC(bp); - if (!(fl & XFS_B_ASYNC)) { + if (!(fl & XBF_ASYNC)) { /* * Mark b_error and B_ERROR _both_. * Lot's of chunkcache code assumes that. diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 4f2ad66edb7..ea8c198f0c3 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -275,33 +275,19 @@ extern void xfs_buf_terminate(void); ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; }) -#define XFS_B_ASYNC XBF_ASYNC -#define XFS_B_DELWRI XBF_DELWRI -#define XFS_B_READ XBF_READ -#define XFS_B_WRITE XBF_WRITE -#define XFS_B_STALE XBF_STALE - -#define XFS_BUF_TRYLOCK XBF_TRYLOCK -#define XFS_INCORE_TRYLOCK XBF_TRYLOCK -#define XFS_BUF_LOCK XBF_LOCK -#define XFS_BUF_MAPPED XBF_MAPPED - -#define BUF_BUSY XBF_DONT_BLOCK - #define XFS_BUF_BFLAGS(bp) ((bp)->b_flags) #define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) -#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XFS_B_STALE) -#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XFS_B_STALE) -#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XFS_B_STALE) +#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XBF_STALE) +#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) +#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) #define XFS_BUF_SUPER_STALE(bp) do { \ XFS_BUF_STALE(bp); \ xfs_buf_delwri_dequeue(bp); \ XFS_BUF_DONE(bp); \ } while (0) -#define XFS_BUF_MANAGE XBF_FS_MANAGED #define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED) #define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) @@ -390,7 +376,7 @@ static inline void xfs_buf_relse(xfs_buf_t *bp) #define xfs_biomove(bp, off, len, data, rw) \ xfs_buf_iomove((bp), (off), (len), (data), \ - ((rw) == XFS_B_WRITE) ? XBRW_WRITE : XBRW_READ) + ((rw) == XBF_WRITE) ? XBRW_WRITE : XBRW_READ) #define xfs_biozero(bp, off, len) \ xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index 7501b85fd86..b6918d76bc7 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c @@ -79,7 +79,7 @@ xfs_flush_pages( xfs_iflags_clear(ip, XFS_ITRUNCATED); ret = -filemap_fdatawrite(mapping); } - if (flags & XFS_B_ASYNC) + if (flags & XBF_ASYNC) return ret; ret2 = xfs_wait_on_pages(ip, first, last); if (!ret) diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index b58f8412dfe..58c24be72c6 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -234,7 +234,7 @@ xfs_sync_inode_data( } error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? - 0 : XFS_B_ASYNC, FI_NONE); + 0 : XBF_ASYNC, FI_NONE); xfs_iunlock(ip, XFS_IOLOCK_SHARED); out_wait: @@ -370,7 +370,7 @@ xfs_sync_fsdata( if (flags & SYNC_TRYLOCK) { ASSERT(!(flags & SYNC_WAIT)); - bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); + bp = xfs_getsb(mp, XBF_TRYLOCK); if (!bp) goto out; diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index a447493690e..5756392ffde 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -1527,8 +1527,7 @@ xfs_qm_dqflock_pushbuf_wait( * the flush lock when the I/O completes. */ bp = xfs_incore(dqp->q_mount->m_ddev_targp, dqp->q_blkno, - XFS_QI_DQCHUNKLEN(dqp->q_mount), - XFS_INCORE_TRYLOCK); + XFS_QI_DQCHUNKLEN(dqp->q_mount), XBF_TRYLOCK); if (bp != NULL) { if (XFS_BUF_ISDELAYWRITE(bp)) { int error; diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index d0d4a9a0bbd..37929d19ef4 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -237,8 +237,7 @@ xfs_qm_dquot_logitem_pushbuf( } mp = dqp->q_mount; bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno, - XFS_QI_DQCHUNKLEN(mp), - XFS_INCORE_TRYLOCK); + XFS_QI_DQCHUNKLEN(mp), XBF_TRYLOCK); if (bp != NULL) { if (XFS_BUF_ISDELAYWRITE(bp)) { dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 8aa181d6dd7..a27aeb7d9e7 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -2180,7 +2180,7 @@ xfs_alloc_read_agf( ASSERT(agno != NULLAGNUMBER); error = xfs_read_agf(mp, tp, agno, - (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0, + (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0, bpp); if (error) return error; diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index f7b426a1b6e..b9c196a53c4 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -2015,15 +2015,14 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno, - blkcnt, - XFS_BUF_LOCK | XBF_DONT_BLOCK, + blkcnt, XBF_LOCK | XBF_DONT_BLOCK, &bp); if (error) return(error); tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : XFS_BUF_SIZE(bp); - xfs_biomove(bp, 0, tmp, dst, XFS_B_READ); + xfs_biomove(bp, 0, tmp, dst, XBF_READ); xfs_buf_relse(bp); dst += tmp; valuelen -= tmp; @@ -2149,13 +2148,13 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, - XFS_BUF_LOCK | XBF_DONT_BLOCK); + XBF_LOCK | XBF_DONT_BLOCK); ASSERT(bp); ASSERT(!XFS_BUF_GETERROR(bp)); tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : XFS_BUF_SIZE(bp); - xfs_biomove(bp, 0, tmp, src, XFS_B_WRITE); + xfs_biomove(bp, 0, tmp, src, XBF_WRITE); if (tmp < XFS_BUF_SIZE(bp)) xfs_biozero(bp, tmp, XFS_BUF_SIZE(bp) - tmp); if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */ @@ -2216,8 +2215,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args) /* * If the "remote" value is in the cache, remove it. */ - bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, - XFS_INCORE_TRYLOCK); + bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK); if (bp) { XFS_BUF_STALE(bp); XFS_BUF_UNDELAYWRITE(bp); diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 52519a20184..a90ce74fc25 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -2950,7 +2950,7 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, map.br_blockcount); bp = xfs_trans_get_buf(*trans, dp->i_mount->m_ddev_targp, - dblkno, dblkcnt, XFS_BUF_LOCK); + dblkno, dblkcnt, XBF_LOCK); xfs_trans_binval(*trans, bp); /* * Roll to next transaction. diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 36a0992dd66..96be4b0f249 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -977,7 +977,7 @@ xfs_btree_get_buf_block( xfs_daddr_t d; /* need to sort out how callers deal with failures first */ - ASSERT(!(flags & XFS_BUF_TRYLOCK)); + ASSERT(!(flags & XBF_TRYLOCK)); d = xfs_btree_ptr_to_daddr(cur, ptr); *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, @@ -1008,7 +1008,7 @@ xfs_btree_read_buf_block( int error; /* need to sort out how callers deal with failures first */ - ASSERT(!(flags & XFS_BUF_TRYLOCK)); + ASSERT(!(flags & XBF_TRYLOCK)); d = xfs_btree_ptr_to_daddr(cur, ptr); error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 52c9d006c0e..9d884c127bb 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -205,7 +205,7 @@ xfs_ialloc_inode_init( d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize * blks_per_cluster, - XFS_BUF_LOCK); + XBF_LOCK); ASSERT(fbuf); ASSERT(!XFS_BUF_GETERROR(fbuf)); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 0317b000ab4..bbb3bee8e93 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -151,7 +151,7 @@ xfs_imap_to_bp( "an error %d on %s. Returning error.", error, mp->m_fsname); } else { - ASSERT(buf_flags & XFS_BUF_TRYLOCK); + ASSERT(buf_flags & XBF_TRYLOCK); } return error; } @@ -239,7 +239,7 @@ xfs_inotobp( if (error) return error; - error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags); + error = xfs_imap_to_bp(mp, tp, &imap, &bp, XBF_LOCK, imap_flags); if (error) return error; @@ -285,7 +285,7 @@ xfs_itobp( return error; if (!bp) { - ASSERT(buf_flags & XFS_BUF_TRYLOCK); + ASSERT(buf_flags & XBF_TRYLOCK); ASSERT(tp == NULL); *bpp = NULL; return EAGAIN; @@ -807,7 +807,7 @@ xfs_iread( * Get pointers to the on-disk inode and the buffer containing it. */ error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, - XFS_BUF_LOCK, iget_flags); + XBF_LOCK, iget_flags); if (error) return error; dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); @@ -1751,7 +1751,7 @@ xfs_iunlink( * Here we put the head pointer into our next pointer, * and then we fall through to point the head at us. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); if (error) return error; @@ -1833,7 +1833,7 @@ xfs_iunlink_remove( * of dealing with the buffer when there is no need to * change it. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); if (error) { cmn_err(CE_WARN, "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", @@ -1895,7 +1895,7 @@ xfs_iunlink_remove( * Now last_ibp points to the buffer previous to us on * the unlinked list. Pull us from the list. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); if (error) { cmn_err(CE_WARN, "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", @@ -2040,7 +2040,7 @@ xfs_ifree_cluster( bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, mp->m_bsize * blks_per_cluster, - XFS_BUF_LOCK); + XBF_LOCK); pre_flushed = 0; lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); @@ -2151,7 +2151,7 @@ xfs_ifree( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XFS_BUF_LOCK); + error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XBF_LOCK); if (error) return error; @@ -2952,7 +2952,7 @@ xfs_iflush( * Get the buffer containing the on-disk inode. */ error = xfs_itobp(mp, NULL, ip, &dip, &bp, - noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK); + noblock ? XBF_TRYLOCK : XBF_LOCK); if (error || !bp) { xfs_ifunlock(ip); return error; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index f38855d21ea..6194fb5d377 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -785,7 +785,7 @@ xfs_inode_item_pushbuf( mp = ip->i_mount; bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno, - iip->ili_format.ilf_len, XFS_INCORE_TRYLOCK); + iip->ili_format.ilf_len, XBF_TRYLOCK); if (bp != NULL) { if (XFS_BUF_ISDELAYWRITE(bp)) { diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 65f1f137d78..97148f0c4bd 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2184,9 +2184,9 @@ xlog_recover_do_buffer_trans( } mp = log->l_mp; - buf_flags = XFS_BUF_LOCK; + buf_flags = XBF_LOCK; if (!(flags & XFS_BLI_INODE_BUF)) - buf_flags |= XFS_BUF_MAPPED; + buf_flags |= XBF_MAPPED; bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags); if (XFS_BUF_ISERROR(bp)) { @@ -2288,7 +2288,7 @@ xlog_recover_do_inode_trans( } bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, - XFS_BUF_LOCK); + XBF_LOCK); if (XFS_BUF_ISERROR(bp)) { xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, bp, in_f->ilf_blkno); @@ -3146,7 +3146,7 @@ xlog_recover_process_one_iunlink( /* * Get the on disk inode to find the next inode in the bucket. */ - error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XFS_BUF_LOCK); + error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XBF_LOCK); if (error) goto fail_iput; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index d95bd1809f3..bb0154047e8 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -665,7 +665,7 @@ xfs_readsb(xfs_mount_t *mp, int flags) * access to the superblock. */ sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); - extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED; + extra_flags = XBF_LOCK | XBF_FS_MANAGED | XBF_MAPPED; bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size), extra_flags); @@ -1969,7 +1969,7 @@ xfs_getsb( ASSERT(mp->m_sb_bp != NULL); bp = mp->m_sb_bp; - if (flags & XFS_BUF_TRYLOCK) { + if (flags & XBF_TRYLOCK) { if (!XFS_BUF_CPSEMA(bp)) { return NULL; } diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 49130628d5e..5ffd544434e 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -75,13 +75,14 @@ xfs_trans_get_buf(xfs_trans_t *tp, xfs_buf_log_item_t *bip; if (flags == 0) - flags = XFS_BUF_LOCK | XFS_BUF_MAPPED; + flags = XBF_LOCK | XBF_MAPPED; /* * Default to a normal get_buf() call if the tp is NULL. */ if (tp == NULL) - return xfs_buf_get(target_dev, blkno, len, flags | BUF_BUSY); + return xfs_buf_get(target_dev, blkno, len, + flags | XBF_DONT_BLOCK); /* * If we find the buffer in the cache with this transaction @@ -117,14 +118,14 @@ xfs_trans_get_buf(xfs_trans_t *tp, } /* - * We always specify the BUF_BUSY flag within a transaction so - * that get_buf does not try to push out a delayed write buffer + * We always specify the XBF_DONT_BLOCK flag within a transaction + * so that get_buf does not try to push out a delayed write buffer * which might cause another transaction to take place (if the * buffer was delayed alloc). Such recursive transactions can * easily deadlock with our current transaction as well as cause * us to run out of stack space. */ - bp = xfs_buf_get(target_dev, blkno, len, flags | BUF_BUSY); + bp = xfs_buf_get(target_dev, blkno, len, flags | XBF_DONT_BLOCK); if (bp == NULL) { return NULL; } @@ -290,15 +291,15 @@ xfs_trans_read_buf( int error; if (flags == 0) - flags = XFS_BUF_LOCK | XFS_BUF_MAPPED; + flags = XBF_LOCK | XBF_MAPPED; /* * Default to a normal get_buf() call if the tp is NULL. */ if (tp == NULL) { - bp = xfs_buf_read(target, blkno, len, flags | BUF_BUSY); + bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK); if (!bp) - return (flags & XFS_BUF_TRYLOCK) ? + return (flags & XBF_TRYLOCK) ? EAGAIN : XFS_ERROR(ENOMEM); if (XFS_BUF_GETERROR(bp) != 0) { @@ -385,14 +386,14 @@ xfs_trans_read_buf( } /* - * We always specify the BUF_BUSY flag within a transaction so - * that get_buf does not try to push out a delayed write buffer + * We always specify the XBF_DONT_BLOCK flag within a transaction + * so that get_buf does not try to push out a delayed write buffer * which might cause another transaction to take place (if the * buffer was delayed alloc). Such recursive transactions can * easily deadlock with our current transaction as well as cause * us to run out of stack space. */ - bp = xfs_buf_read(target, blkno, len, flags | BUF_BUSY); + bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK); if (bp == NULL) { *bpp = NULL; return 0; @@ -472,8 +473,8 @@ shutdown_abort: if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) cmn_err(CE_NOTE, "about to pop assert, bp == 0x%p", bp); #endif - ASSERT((XFS_BUF_BFLAGS(bp) & (XFS_B_STALE|XFS_B_DELWRI)) != - (XFS_B_STALE|XFS_B_DELWRI)); + ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) != + (XBF_STALE|XBF_DELWRI)); trace_xfs_trans_read_buf_shut(bp, _RET_IP_); xfs_buf_relse(bp); diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 9f7c001ef46..4da96cdffb7 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -256,7 +256,7 @@ xfs_setattr( iattr->ia_size > ip->i_d.di_size) { code = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size, - XFS_B_ASYNC, FI_NONE); + XBF_ASYNC, FI_NONE); } /* wait for all I/O to complete */ @@ -1096,7 +1096,7 @@ xfs_release( */ truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) - xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE); + xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); } if (ip->i_d.di_nlink != 0) { -- cgit v1.2.3-70-g09d2 From bdfb04301fa5fdd95f219539a9a5b9663b1e5fc2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Jan 2010 21:55:30 +0000 Subject: xfs: replace KM_LARGE with explicit vmalloc use We use the KM_LARGE flag to make kmem_alloc and friends use vmalloc if necessary. As we only need this for a few boot/mount time allocations just switch to explicit vmalloc calls there. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/kmem.c | 56 +++++++++++++++++----------------------------- fs/xfs/linux-2.6/kmem.h | 21 ++++++++++++++--- fs/xfs/linux-2.6/xfs_buf.c | 6 ++--- fs/xfs/quota/xfs_qm.c | 26 ++++++++++++++++----- fs/xfs/xfs_itable.c | 8 ++++--- 5 files changed, 66 insertions(+), 51 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_buf.c') diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index 2d3f90afe5f..bc7405585de 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -16,7 +16,6 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include -#include #include #include #include @@ -24,8 +23,25 @@ #include "time.h" #include "kmem.h" -#define MAX_VMALLOCS 6 -#define MAX_SLAB_SIZE 0x20000 +/* + * Greedy allocation. May fail and may return vmalloced memory. + * + * Must be freed using kmem_free_large. + */ +void * +kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize) +{ + void *ptr; + size_t kmsize = maxsize; + + while (!(ptr = kmem_zalloc_large(kmsize))) { + if ((kmsize >>= 1) <= minsize) + kmsize = minsize; + } + if (ptr) + *size = kmsize; + return ptr; +} void * kmem_alloc(size_t size, unsigned int __nocast flags) @@ -34,19 +50,8 @@ kmem_alloc(size_t size, unsigned int __nocast flags) gfp_t lflags = kmem_flags_convert(flags); void *ptr; -#ifdef DEBUG - if (unlikely(!(flags & KM_LARGE) && (size > PAGE_SIZE))) { - printk(KERN_WARNING "Large %s attempt, size=%ld\n", - __func__, (long)size); - dump_stack(); - } -#endif - do { - if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS) - ptr = kmalloc(size, lflags); - else - ptr = __vmalloc(size, lflags, PAGE_KERNEL); + ptr = kmalloc(size, lflags); if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) return ptr; if (!(++retries % 100)) @@ -68,27 +73,6 @@ kmem_zalloc(size_t size, unsigned int __nocast flags) return ptr; } -void * -kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize, - unsigned int __nocast flags) -{ - void *ptr; - size_t kmsize = maxsize; - unsigned int kmflags = (flags & ~KM_SLEEP) | KM_NOSLEEP; - - while (!(ptr = kmem_zalloc(kmsize, kmflags))) { - if ((kmsize <= minsize) && (flags & KM_NOSLEEP)) - break; - if ((kmsize >>= 1) <= minsize) { - kmsize = minsize; - kmflags = flags; - } - } - if (ptr) - *size = kmsize; - return ptr; -} - void kmem_free(const void *ptr) { diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index 179cbd630f6..f7c8f7a9ea6 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h @@ -21,6 +21,7 @@ #include #include #include +#include /* * General memory allocation interfaces @@ -30,7 +31,6 @@ #define KM_NOSLEEP 0x0002u #define KM_NOFS 0x0004u #define KM_MAYFAIL 0x0008u -#define KM_LARGE 0x0010u /* * We use a special process flag to avoid recursive callbacks into @@ -42,7 +42,7 @@ kmem_flags_convert(unsigned int __nocast flags) { gfp_t lflags; - BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL|KM_LARGE)); + BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL)); if (flags & KM_NOSLEEP) { lflags = GFP_ATOMIC | __GFP_NOWARN; @@ -56,10 +56,25 @@ kmem_flags_convert(unsigned int __nocast flags) extern void *kmem_alloc(size_t, unsigned int __nocast); extern void *kmem_zalloc(size_t, unsigned int __nocast); -extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast); extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast); extern void kmem_free(const void *); +static inline void *kmem_zalloc_large(size_t size) +{ + void *ptr; + + ptr = vmalloc(size); + if (ptr) + memset(ptr, 0, size); + return ptr; +} +static inline void kmem_free_large(void *ptr) +{ + vfree(ptr); +} + +extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); + /* * Zone interfaces */ diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 730eff1e71a..44e20e578ba 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1525,8 +1525,8 @@ xfs_alloc_bufhash( btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; - btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) * - sizeof(xfs_bufhash_t), KM_SLEEP | KM_LARGE); + btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * + sizeof(xfs_bufhash_t)); for (i = 0; i < (1 << btp->bt_hashshift); i++) { spin_lock_init(&btp->bt_hash[i].bh_lock); INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); @@ -1537,7 +1537,7 @@ STATIC void xfs_free_bufhash( xfs_buftarg_t *btp) { - kmem_free(btp->bt_hash); + kmem_free_large(btp->bt_hash); btp->bt_hash = NULL; } diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 9e627a8b5b0..11cfd8245c7 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -118,9 +118,14 @@ xfs_Gqm_init(void) */ udqhash = kmem_zalloc_greedy(&hsize, XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t), - XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t), - KM_SLEEP | KM_MAYFAIL | KM_LARGE); - gdqhash = kmem_zalloc(hsize, KM_SLEEP | KM_LARGE); + XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t)); + if (!udqhash) + goto out; + + gdqhash = kmem_zalloc_large(hsize); + if (!udqhash) + goto out_free_udqhash; + hsize /= sizeof(xfs_dqhash_t); ndquot = hsize << 8; @@ -170,6 +175,11 @@ xfs_Gqm_init(void) mutex_init(&qcheck_lock); #endif return xqm; + + out_free_udqhash: + kmem_free_large(udqhash); + out: + return NULL; } /* @@ -189,8 +199,8 @@ xfs_qm_destroy( xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i])); } - kmem_free(xqm->qm_usr_dqhtable); - kmem_free(xqm->qm_grp_dqhtable); + kmem_free_large(xqm->qm_usr_dqhtable); + kmem_free_large(xqm->qm_grp_dqhtable); xqm->qm_usr_dqhtable = NULL; xqm->qm_grp_dqhtable = NULL; xqm->qm_dqhashmask = 0; @@ -219,8 +229,12 @@ xfs_qm_hold_quotafs_ref( */ mutex_lock(&xfs_Gqm_lock); - if (xfs_Gqm == NULL) + if (!xfs_Gqm) { xfs_Gqm = xfs_Gqm_init(); + if (!xfs_Gqm) + return ENOMEM; + } + /* * We can keep a list of all filesystems with quotas mounted for * debugging and statistical purposes, but ... diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 940307a6a60..3af02314c60 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -408,8 +408,10 @@ xfs_bulkstat( (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog); nimask = ~(nicluster - 1); nbcluster = nicluster >> mp->m_sb.sb_inopblog; - irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4, - KM_SLEEP | KM_MAYFAIL | KM_LARGE); + irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); + if (!irbuf) + return ENOMEM; + nirbuf = irbsize / sizeof(*irbuf); /* @@ -727,7 +729,7 @@ xfs_bulkstat( /* * Done, we're either out of filesystem or space to put the data. */ - kmem_free(irbuf); + kmem_free_large(irbuf); *ubcountp = ubelem; /* * Found some inodes, return them now and return the error next time. -- cgit v1.2.3-70-g09d2 From d808f617ad00a413585b806de340feda5ad9a2da Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 2 Feb 2010 10:13:42 +1100 Subject: xfs: Don't issue buffer IO direct from AIL push V2 All buffers logged into the AIL are marked as delayed write. When the AIL needs to push the buffer out, it issues an async write of the buffer. This means that IO patterns are dependent on the order of buffers in the AIL. Instead of flushing the buffer, promote the buffer in the delayed write list so that the next time the xfsbufd is run the buffer will be flushed by the xfsbufd. Return the state to the xfsaild that the buffer was promoted so that the xfsaild knows that it needs to cause the xfsbufd to run to flush the buffers that were promoted. Using the xfsbufd for issuing the IO allows us to dispatch all buffer IO from the one queue. This means that we can make much more enlightened decisions on what order to flush buffers to disk as we don't have multiple places issuing IO. Optimisations to xfsbufd will be in a future patch. Version 2 - kill XFS_ITEM_FLUSHING as it is now unused. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_buf.c | 29 +++++++++++++ fs/xfs/linux-2.6/xfs_buf.h | 2 + fs/xfs/linux-2.6/xfs_trace.h | 1 + fs/xfs/quota/xfs_dquot_item.c | 85 ++++++------------------------------- fs/xfs/quota/xfs_dquot_item.h | 4 -- fs/xfs/xfs_buf_item.c | 64 +++++++++++++++------------- fs/xfs/xfs_inode_item.c | 98 +++++++------------------------------------ fs/xfs/xfs_inode_item.h | 6 --- fs/xfs/xfs_trans.h | 3 +- fs/xfs/xfs_trans_ail.c | 13 +++--- 10 files changed, 102 insertions(+), 203 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_buf.c') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 44e20e578ba..b306265caa3 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1778,6 +1778,35 @@ xfs_buf_delwri_dequeue( trace_xfs_buf_delwri_dequeue(bp, _RET_IP_); } +/* + * If a delwri buffer needs to be pushed before it has aged out, then promote + * it to the head of the delwri queue so that it will be flushed on the next + * xfsbufd run. We do this by resetting the queuetime of the buffer to be older + * than the age currently needed to flush the buffer. Hence the next time the + * xfsbufd sees it is guaranteed to be considered old enough to flush. + */ +void +xfs_buf_delwri_promote( + struct xfs_buf *bp) +{ + struct xfs_buftarg *btp = bp->b_target; + long age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1; + + ASSERT(bp->b_flags & XBF_DELWRI); + ASSERT(bp->b_flags & _XBF_DELWRI_Q); + + /* + * Check the buffer age before locking the delayed write queue as we + * don't need to promote buffers that are already past the flush age. + */ + if (bp->b_queuetime < jiffies - age) + return; + bp->b_queuetime = jiffies - age; + spin_lock(&btp->bt_delwrite_lock); + list_move(&bp->b_list, &btp->bt_delwrite_queue); + spin_unlock(&btp->bt_delwrite_lock); +} + STATIC void xfs_buf_runall_queues( struct workqueue_struct *queue) diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index ea8c198f0c3..be45e8c5768 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -266,6 +266,7 @@ extern int xfs_buf_ispin(xfs_buf_t *); /* Delayed Write Buffer Routines */ extern void xfs_buf_delwri_dequeue(xfs_buf_t *); +extern void xfs_buf_delwri_promote(xfs_buf_t *); /* Buffer Daemon Setup Routines */ extern int xfs_buf_init(void); @@ -395,6 +396,7 @@ extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); extern void xfs_wait_buftarg(xfs_buftarg_t *); extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); extern int xfs_flush_buftarg(xfs_buftarg_t *, int); + #ifdef CONFIG_KDB_MODULES extern struct list_head *xfs_get_buftarg_list(void); #endif diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 1bb09e70b2e..a4574dcf506 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -483,6 +483,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf); DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur); DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb); diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 1b564376d50..dda0fb045c8 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -212,18 +212,10 @@ xfs_qm_dquot_logitem_pushbuf( xfs_dquot_t *dqp; xfs_mount_t *mp; xfs_buf_t *bp; - uint dopush; dqp = qip->qli_dquot; ASSERT(XFS_DQ_IS_LOCKED(dqp)); - /* - * The qli_pushbuf_flag keeps others from - * trying to duplicate our effort. - */ - ASSERT(qip->qli_pushbuf_flag != 0); - ASSERT(qip->qli_push_owner == current_pid()); - /* * If flushlock isn't locked anymore, chances are that the * inode flush completed and the inode was taken off the AIL. @@ -231,47 +223,20 @@ xfs_qm_dquot_logitem_pushbuf( */ if (completion_done(&dqp->q_flush) || ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) { - qip->qli_pushbuf_flag = 0; xfs_dqunlock(dqp); return; } mp = dqp->q_mount; bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno, XFS_QI_DQCHUNKLEN(mp), XBF_TRYLOCK); - if (bp != NULL) { - if (XFS_BUF_ISDELAYWRITE(bp)) { - dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && - !completion_done(&dqp->q_flush)); - qip->qli_pushbuf_flag = 0; - xfs_dqunlock(dqp); - - if (XFS_BUF_ISPINNED(bp)) - xfs_log_force(mp, 0); - - if (dopush) { - int error; -#ifdef XFSRACEDEBUG - delay_for_intr(); - delay(300); -#endif - error = xfs_bawrite(mp, bp); - if (error) - xfs_fs_cmn_err(CE_WARN, mp, - "xfs_qm_dquot_logitem_pushbuf: pushbuf error %d on qip %p, bp %p", - error, qip, bp); - } else { - xfs_buf_relse(bp); - } - } else { - qip->qli_pushbuf_flag = 0; - xfs_dqunlock(dqp); - xfs_buf_relse(bp); - } + xfs_dqunlock(dqp); + if (!bp) return; - } + if (XFS_BUF_ISDELAYWRITE(bp)) + xfs_buf_delwri_promote(bp); + xfs_buf_relse(bp); + return; - qip->qli_pushbuf_flag = 0; - xfs_dqunlock(dqp); } /* @@ -289,50 +254,24 @@ xfs_qm_dquot_logitem_trylock( xfs_dq_logitem_t *qip) { xfs_dquot_t *dqp; - uint retval; dqp = qip->qli_dquot; if (atomic_read(&dqp->q_pincount) > 0) - return (XFS_ITEM_PINNED); + return XFS_ITEM_PINNED; if (! xfs_qm_dqlock_nowait(dqp)) - return (XFS_ITEM_LOCKED); + return XFS_ITEM_LOCKED; - retval = XFS_ITEM_SUCCESS; if (!xfs_dqflock_nowait(dqp)) { /* - * The dquot is already being flushed. It may have been - * flushed delayed write, however, and we don't want to - * get stuck waiting for that to complete. So, we want to check - * to see if we can lock the dquot's buffer without sleeping. - * If we can and it is marked for delayed write, then we - * hold it and send it out from the push routine. We don't - * want to do that now since we might sleep in the device - * strategy routine. We also don't want to grab the buffer lock - * here because we'd like not to call into the buffer cache - * while holding the AIL lock. - * Make sure to only return PUSHBUF if we set pushbuf_flag - * ourselves. If someone else is doing it then we don't - * want to go to the push routine and duplicate their efforts. + * dquot has already been flushed to the backing buffer, + * leave it locked, pushbuf routine will unlock it. */ - if (qip->qli_pushbuf_flag == 0) { - qip->qli_pushbuf_flag = 1; - ASSERT(qip->qli_format.qlf_blkno == dqp->q_blkno); -#ifdef DEBUG - qip->qli_push_owner = current_pid(); -#endif - /* - * The dquot is left locked. - */ - retval = XFS_ITEM_PUSHBUF; - } else { - retval = XFS_ITEM_FLUSHING; - xfs_dqunlock_nonotify(dqp); - } + return XFS_ITEM_PUSHBUF; } ASSERT(qip->qli_item.li_flags & XFS_LI_IN_AIL); - return (retval); + return XFS_ITEM_SUCCESS; } diff --git a/fs/xfs/quota/xfs_dquot_item.h b/fs/xfs/quota/xfs_dquot_item.h index 5a632531f84..5acae2ada70 100644 --- a/fs/xfs/quota/xfs_dquot_item.h +++ b/fs/xfs/quota/xfs_dquot_item.h @@ -27,10 +27,6 @@ typedef struct xfs_dq_logitem { xfs_log_item_t qli_item; /* common portion */ struct xfs_dquot *qli_dquot; /* dquot ptr */ xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ - unsigned short qli_pushbuf_flag; /* 1 bit used in push_ail */ -#ifdef DEBUG - uint64_t qli_push_owner; -#endif xfs_dq_logformat_t qli_format; /* logged structure */ } xfs_dq_logitem_t; diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index e0a11583ce5..f3c49e69eab 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -467,8 +467,10 @@ xfs_buf_item_unpin_remove( /* * This is called to attempt to lock the buffer associated with this * buf log item. Don't sleep on the buffer lock. If we can't get - * the lock right away, return 0. If we can get the lock, pull the - * buffer from the free list, mark it busy, and return 1. + * the lock right away, return 0. If we can get the lock, take a + * reference to the buffer. If this is a delayed write buffer that + * needs AIL help to be written back, invoke the pushbuf routine + * rather than the normal success path. */ STATIC uint xfs_buf_item_trylock( @@ -477,24 +479,18 @@ xfs_buf_item_trylock( xfs_buf_t *bp; bp = bip->bli_buf; - - if (XFS_BUF_ISPINNED(bp)) { + if (XFS_BUF_ISPINNED(bp)) return XFS_ITEM_PINNED; - } - - if (!XFS_BUF_CPSEMA(bp)) { + if (!XFS_BUF_CPSEMA(bp)) return XFS_ITEM_LOCKED; - } - /* - * Remove the buffer from the free list. Only do this - * if it's on the free list. Private buffers like the - * superblock buffer are not. - */ + /* take a reference to the buffer. */ XFS_BUF_HOLD(bp); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); trace_xfs_buf_item_trylock(bip); + if (XFS_BUF_ISDELAYWRITE(bp)) + return XFS_ITEM_PUSHBUF; return XFS_ITEM_SUCCESS; } @@ -626,11 +622,9 @@ xfs_buf_item_committed( } /* - * This is called to asynchronously write the buffer associated with this - * buf log item out to disk. The buffer will already have been locked by - * a successful call to xfs_buf_item_trylock(). If the buffer still has - * B_DELWRI set, then get it going out to disk with a call to bawrite(). - * If not, then just release the buffer. + * The buffer is locked, but is not a delayed write buffer. This happens + * if we race with IO completion and hence we don't want to try to write it + * again. Just release the buffer. */ STATIC void xfs_buf_item_push( @@ -642,17 +636,29 @@ xfs_buf_item_push( trace_xfs_buf_item_push(bip); bp = bip->bli_buf; + ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); + xfs_buf_relse(bp); +} - if (XFS_BUF_ISDELAYWRITE(bp)) { - int error; - error = xfs_bawrite(bip->bli_item.li_mountp, bp); - if (error) - xfs_fs_cmn_err(CE_WARN, bip->bli_item.li_mountp, - "xfs_buf_item_push: pushbuf error %d on bip %p, bp %p", - error, bip, bp); - } else { - xfs_buf_relse(bp); - } +/* + * The buffer is locked and is a delayed write buffer. Promote the buffer + * in the delayed write queue as the caller knows that they must invoke + * the xfsbufd to get this buffer written. We have to unlock the buffer + * to allow the xfsbufd to write it, too. + */ +STATIC void +xfs_buf_item_pushbuf( + xfs_buf_log_item_t *bip) +{ + xfs_buf_t *bp; + + ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); + trace_xfs_buf_item_pushbuf(bip); + + bp = bip->bli_buf; + ASSERT(XFS_BUF_ISDELAYWRITE(bp)); + xfs_buf_delwri_promote(bp); + xfs_buf_relse(bp); } /* ARGSUSED */ @@ -677,7 +683,7 @@ static struct xfs_item_ops xfs_buf_item_ops = { .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_buf_item_committed, .iop_push = (void(*)(xfs_log_item_t*))xfs_buf_item_push, - .iop_pushbuf = NULL, + .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_buf_item_pushbuf, .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_buf_item_committing }; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 207553e8295..d4dc063111f 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -602,33 +602,20 @@ xfs_inode_item_trylock( if (!xfs_iflock_nowait(ip)) { /* - * If someone else isn't already trying to push the inode - * buffer, we get to do it. + * inode has already been flushed to the backing buffer, + * leave it locked in shared mode, pushbuf routine will + * unlock it. */ - if (iip->ili_pushbuf_flag == 0) { - iip->ili_pushbuf_flag = 1; -#ifdef DEBUG - iip->ili_push_owner = current_pid(); -#endif - /* - * Inode is left locked in shared mode. - * Pushbuf routine gets to unlock it. - */ - return XFS_ITEM_PUSHBUF; - } else { - /* - * We hold the AIL lock, so we must specify the - * NONOTIFY flag so that we won't double trip. - */ - xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); - return XFS_ITEM_FLUSHING; - } - /* NOTREACHED */ + return XFS_ITEM_PUSHBUF; } /* Stale items should force out the iclog */ if (ip->i_flags & XFS_ISTALE) { xfs_ifunlock(ip); + /* + * we hold the AIL lock - notify the unlock routine of this + * so it doesn't try to get the lock again. + */ xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); return XFS_ITEM_PINNED; } @@ -746,11 +733,8 @@ xfs_inode_item_committed( * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK * failed to get the inode flush lock but did get the inode locked SHARED. * Here we're trying to see if the inode buffer is incore, and if so whether it's - * marked delayed write. If that's the case, we'll initiate a bawrite on that - * buffer to expedite the process. - * - * We aren't holding the AIL lock (or the flush lock) when this gets called, - * so it is inherently race-y. + * marked delayed write. If that's the case, we'll promote it and that will + * allow the caller to write the buffer by triggering the xfsbufd to run. */ STATIC void xfs_inode_item_pushbuf( @@ -759,26 +743,16 @@ xfs_inode_item_pushbuf( xfs_inode_t *ip; xfs_mount_t *mp; xfs_buf_t *bp; - uint dopush; ip = iip->ili_inode; - ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); - /* - * The ili_pushbuf_flag keeps others from - * trying to duplicate our effort. - */ - ASSERT(iip->ili_pushbuf_flag != 0); - ASSERT(iip->ili_push_owner == current_pid()); - /* * If a flush is not in progress anymore, chances are that the * inode was taken off the AIL. So, just get out. */ if (completion_done(&ip->i_flush) || ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { - iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); return; } @@ -787,53 +761,12 @@ xfs_inode_item_pushbuf( bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno, iip->ili_format.ilf_len, XBF_TRYLOCK); - if (bp != NULL) { - if (XFS_BUF_ISDELAYWRITE(bp)) { - /* - * We were racing with iflush because we don't hold - * the AIL lock or the flush lock. However, at this point, - * we have the buffer, and we know that it's dirty. - * So, it's possible that iflush raced with us, and - * this item is already taken off the AIL. - * If not, we can flush it async. - */ - dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) && - !completion_done(&ip->i_flush)); - iip->ili_pushbuf_flag = 0; - xfs_iunlock(ip, XFS_ILOCK_SHARED); - - trace_xfs_inode_item_push(bp, _RET_IP_); - - if (XFS_BUF_ISPINNED(bp)) - xfs_log_force(mp, 0); - - if (dopush) { - int error; - error = xfs_bawrite(mp, bp); - if (error) - xfs_fs_cmn_err(CE_WARN, mp, - "xfs_inode_item_pushbuf: pushbuf error %d on iip %p, bp %p", - error, iip, bp); - } else { - xfs_buf_relse(bp); - } - } else { - iip->ili_pushbuf_flag = 0; - xfs_iunlock(ip, XFS_ILOCK_SHARED); - xfs_buf_relse(bp); - } - return; - } - /* - * We have to be careful about resetting pushbuf flag too early (above). - * Even though in theory we can do it as soon as we have the buflock, - * we don't want others to be doing work needlessly. They'll come to - * this function thinking that pushing the buffer is their - * responsibility only to find that the buffer is still locked by - * another doing the same thing - */ - iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); + if (!bp) + return; + if (XFS_BUF_ISDELAYWRITE(bp)) + xfs_buf_delwri_promote(bp); + xfs_buf_relse(bp); return; } @@ -937,7 +870,6 @@ xfs_inode_item_init( /* We have zeroed memory. No need ... iip->ili_extents_buf = NULL; - iip->ili_pushbuf_flag = 0; */ iip->ili_format.ilf_type = XFS_LI_INODE; diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index cc8df1ac778..9a467958ecd 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -144,12 +144,6 @@ typedef struct xfs_inode_log_item { data exts */ struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged attr exts */ - unsigned int ili_pushbuf_flag; /* one bit used in push_ail */ - -#ifdef DEBUG - uint64_t ili_push_owner; /* one who sets pushbuf_flag - above gets to push the buf */ -#endif #ifdef XFS_TRANS_DEBUG int ili_root_size; char *ili_orig_root; diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index ca64f33c63a..c93e3a10285 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -861,8 +861,7 @@ typedef struct xfs_item_ops { #define XFS_ITEM_SUCCESS 0 #define XFS_ITEM_PINNED 1 #define XFS_ITEM_LOCKED 2 -#define XFS_ITEM_FLUSHING 3 -#define XFS_ITEM_PUSHBUF 4 +#define XFS_ITEM_PUSHBUF 3 /* * This structure is used to maintain a list of block ranges that have been diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index d7b1af8a832..e799824f724 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -253,6 +253,7 @@ xfsaild_push( int flush_log, count, stuck; xfs_mount_t *mp = ailp->xa_mount; struct xfs_ail_cursor *cur = &ailp->xa_cursors; + int push_xfsbufd = 0; spin_lock(&ailp->xa_lock); xfs_trans_ail_cursor_init(ailp, cur); @@ -308,6 +309,7 @@ xfsaild_push( XFS_STATS_INC(xs_push_ail_pushbuf); IOP_PUSHBUF(lip); last_pushed_lsn = lsn; + push_xfsbufd = 1; break; case XFS_ITEM_PINNED: @@ -322,12 +324,6 @@ xfsaild_push( stuck++; break; - case XFS_ITEM_FLUSHING: - XFS_STATS_INC(xs_push_ail_flushing); - last_pushed_lsn = lsn; - stuck++; - break; - default: ASSERT(0); break; @@ -374,6 +370,11 @@ xfsaild_push( xfs_log_force(mp, 0); } + if (push_xfsbufd) { + /* we've got delayed write buffers to flush */ + wake_up_process(mp->m_ddev_targp->bt_task); + } + if (!count) { /* We're past our target or empty, so idle */ last_pushed_lsn = 0; -- cgit v1.2.3-70-g09d2 From 089716aa1480b7197bcd678b8477774c379a2768 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 26 Jan 2010 15:13:25 +1100 Subject: xfs: Sort delayed write buffers before dispatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently when the xfsbufd writes delayed write buffers, it pushes them to disk in the order they come off the delayed write list. If there are lots of buffers Ń•pread widely over the disk, this results in overwhelming the elevator sort queues in the block layer and we end up losing the posibility of merging adjacent buffers to minimise the number of IOs. Use the new generic list_sort function to sort the delwri dispatch queue before issue to ensure that the buffers are pushed in the most friendly order possible to the lower layers. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_buf.c | 87 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 60 insertions(+), 27 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_buf.c') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index b306265caa3..4556a4c31e3 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "xfs_sb.h" #include "xfs_inum.h" @@ -1877,14 +1878,42 @@ xfs_buf_delwri_split( } +/* + * Compare function is more complex than it needs to be because + * the return value is only 32 bits and we are doing comparisons + * on 64 bit values + */ +static int +xfs_buf_cmp( + void *priv, + struct list_head *a, + struct list_head *b) +{ + struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list); + struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list); + xfs_daddr_t diff; + + diff = ap->b_bn - bp->b_bn; + if (diff < 0) + return -1; + if (diff > 0) + return 1; + return 0; +} + +void +xfs_buf_delwri_sort( + xfs_buftarg_t *target, + struct list_head *list) +{ + list_sort(NULL, list, xfs_buf_cmp); +} + STATIC int xfsbufd( void *data) { - struct list_head tmp; - xfs_buftarg_t *target = (xfs_buftarg_t *)data; - int count; - xfs_buf_t *bp; + xfs_buftarg_t *target = (xfs_buftarg_t *)data; current->flags |= PF_MEMALLOC; @@ -1893,6 +1922,8 @@ xfsbufd( do { long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); + int count = 0; + struct list_head tmp; if (unlikely(freezing(current))) { set_bit(XBT_FORCE_SLEEP, &target->bt_flags); @@ -1907,11 +1938,10 @@ xfsbufd( schedule_timeout_interruptible(tout); xfs_buf_delwri_split(target, &tmp, age); - count = 0; + list_sort(NULL, &tmp, xfs_buf_cmp); while (!list_empty(&tmp)) { - bp = list_entry(tmp.next, xfs_buf_t, b_list); - ASSERT(target == bp->b_target); - + struct xfs_buf *bp; + bp = list_first_entry(&tmp, struct xfs_buf, b_list); list_del_init(&bp->b_list); xfs_buf_iostrategy(bp); count++; @@ -1937,42 +1967,45 @@ xfs_flush_buftarg( xfs_buftarg_t *target, int wait) { - struct list_head tmp; - xfs_buf_t *bp, *n; + xfs_buf_t *bp; int pincount = 0; + LIST_HEAD(tmp_list); + LIST_HEAD(wait_list); xfs_buf_runall_queues(xfsconvertd_workqueue); xfs_buf_runall_queues(xfsdatad_workqueue); xfs_buf_runall_queues(xfslogd_workqueue); set_bit(XBT_FORCE_FLUSH, &target->bt_flags); - pincount = xfs_buf_delwri_split(target, &tmp, 0); + pincount = xfs_buf_delwri_split(target, &tmp_list, 0); /* - * Dropped the delayed write list lock, now walk the temporary list + * Dropped the delayed write list lock, now walk the temporary list. + * All I/O is issued async and then if we need to wait for completion + * we do that after issuing all the IO. */ - list_for_each_entry_safe(bp, n, &tmp, b_list) { + list_sort(NULL, &tmp_list, xfs_buf_cmp); + while (!list_empty(&tmp_list)) { + bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); ASSERT(target == bp->b_target); - if (wait) + list_del_init(&bp->b_list); + if (wait) { bp->b_flags &= ~XBF_ASYNC; - else - list_del_init(&bp->b_list); - + list_add(&bp->b_list, &wait_list); + } xfs_buf_iostrategy(bp); } - if (wait) + if (wait) { + /* Expedite and wait for IO to complete. */ blk_run_address_space(target->bt_mapping); + while (!list_empty(&wait_list)) { + bp = list_first_entry(&wait_list, struct xfs_buf, b_list); - /* - * Remaining list items must be flushed before returning - */ - while (!list_empty(&tmp)) { - bp = list_entry(tmp.next, xfs_buf_t, b_list); - - list_del_init(&bp->b_list); - xfs_iowait(bp); - xfs_buf_relse(bp); + list_del_init(&bp->b_list); + xfs_iowait(bp); + xfs_buf_relse(bp); + } } return pincount; -- cgit v1.2.3-70-g09d2 From 5322892d867e186c6b4c5fff5c99ea4863696a60 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 4 Feb 2010 10:09:14 +1100 Subject: xfs: kill xfs_bawrite There are no more users of this function left in the XFS code now that we've switched everything to delayed write flushing. Remove it. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_buf.c | 19 ------------------- fs/xfs/linux-2.6/xfs_buf.h | 1 - 2 files changed, 20 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_buf.c') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 4556a4c31e3..d50df3a8101 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1078,25 +1078,6 @@ xfs_bwrite( return error; } -int -xfs_bawrite( - void *mp, - struct xfs_buf *bp) -{ - trace_xfs_buf_bawrite(bp, _RET_IP_); - - ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); - - xfs_buf_delwri_dequeue(bp); - - bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD); - bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); - - bp->b_mount = mp; - bp->b_strat = xfs_bdstrat_cb; - return xfs_bdstrat_cb(bp); -} - void xfs_bdwrite( void *mp, diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index be45e8c5768..386e7361e50 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -233,7 +233,6 @@ extern void xfs_buf_unlock(xfs_buf_t *); /* Buffer Read and Write Routines */ extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); -extern int xfs_bawrite(void *mp, xfs_buf_t *bp); extern void xfs_bdwrite(void *mp, xfs_buf_t *bp); extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); -- cgit v1.2.3-70-g09d2 From 73c77e2ccc14413c232c3e0b3aa43a0c4b72ec70 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 25 Jan 2010 11:42:24 -0600 Subject: xfs: fix xfs to work with Virtually Indexed architectures xfs_buf.c includes what is essentially a hand rolled version of blk_rq_map_kern(). In order to work properly with the vmalloc buffers that xfs uses, this hand rolled routine must also implement the flushing API for vmap/vmalloc areas. [style updates from hch@lst.de] Acked-by: Christoph Hellwig Signed-off-by: James Bottomley --- fs/xfs/linux-2.6/xfs_buf.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'fs/xfs/linux-2.6/xfs_buf.c') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 77b8be81c76..6f3ebb634b8 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -76,6 +76,27 @@ struct workqueue_struct *xfsconvertd_workqueue; #define xfs_buf_deallocate(bp) \ kmem_zone_free(xfs_buf_zone, (bp)); +static inline int +xfs_buf_is_vmapped( + struct xfs_buf *bp) +{ + /* + * Return true if the buffer is vmapped. + * + * The XBF_MAPPED flag is set if the buffer should be mapped, but the + * code is clever enough to know it doesn't have to map a single page, + * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1. + */ + return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1; +} + +static inline int +xfs_buf_vmap_len( + struct xfs_buf *bp) +{ + return (bp->b_page_count * PAGE_SIZE) - bp->b_offset; +} + /* * Page Region interfaces. * @@ -314,7 +335,7 @@ xfs_buf_free( if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { uint i; - if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) + if (xfs_buf_is_vmapped(bp)) free_address(bp->b_addr - bp->b_offset); for (i = 0; i < bp->b_page_count; i++) { @@ -1107,6 +1128,9 @@ xfs_buf_bio_end_io( xfs_buf_ioerror(bp, -error); + if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) + invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); + do { struct page *page = bvec->bv_page; @@ -1216,6 +1240,10 @@ next_chunk: submit_io: if (likely(bio->bi_size)) { + if (xfs_buf_is_vmapped(bp)) { + flush_kernel_vmap_range(bp->b_addr, + xfs_buf_vmap_len(bp)); + } submit_bio(rw, bio); if (size) goto next_chunk; -- cgit v1.2.3-70-g09d2