From b1d6cc02f2f6a590c4d8dc2c3bcf7be3b9419945 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 2 Oct 2014 09:17:58 +1000 Subject: xfs: compat_xfs_bstat does not have forkoff struct compat_xfs_bstat is missing the di_forkoff field and so does not fully translate the structure correctly. Fix it. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/xfs_ioctl32.c | 2 ++ fs/xfs/xfs_ioctl32.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index a554646ff14..94ce027e28e 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -160,6 +160,7 @@ xfs_ioctl32_bstat_copyin( get_user(bstat->bs_gen, &bstat32->bs_gen) || get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) || get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) || + get_user(bstat->bs_forkoff, &bstat32->bs_forkoff) || get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || get_user(bstat->bs_aextents, &bstat32->bs_aextents)) @@ -214,6 +215,7 @@ xfs_bulkstat_one_fmt_compat( put_user(buffer->bs_gen, &p32->bs_gen) || put_user(buffer->bs_projid, &p32->bs_projid) || put_user(buffer->bs_projid_hi, &p32->bs_projid_hi) || + put_user(buffer->bs_forkoff, &p32->bs_forkoff) || put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || put_user(buffer->bs_dmstate, &p32->bs_dmstate) || put_user(buffer->bs_aextents, &p32->bs_aextents)) diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h index 80f4060e897..b1bb45444df 100644 --- a/fs/xfs/xfs_ioctl32.h +++ b/fs/xfs/xfs_ioctl32.h @@ -67,8 +67,9 @@ typedef struct compat_xfs_bstat { __u32 bs_gen; /* generation count */ __u16 bs_projid_lo; /* lower part of project id */ #define bs_projid bs_projid_lo /* (previously just bs_projid) */ + __u16 bs_forkoff; /* inode fork offset in bytes */ __u16 bs_projid_hi; /* high part of project id */ - unsigned char bs_pad[12]; /* pad space, unused */ + unsigned char bs_pad[10]; /* pad space, unused */ __u32 bs_dmevmask; /* DMIG event mask */ __u16 bs_dmstate; /* DMIG state info */ __u16 bs_aextents; /* attribute number of extents */ -- cgit v1.2.3-70-g09d2 From e076b0f3a5c472e77c0a0e163188f2761e8b4fed Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 2 Oct 2014 09:18:13 +1000 Subject: xfs: kill time.h The typedef for timespecs and nanotime() are completely unnecessary, and delay() can be moved to fs/xfs/linux.h, which means this file can go away. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/kmem.c | 1 - fs/xfs/time.h | 36 ------------------------------------ fs/xfs/xfs_inode.c | 4 ++-- fs/xfs/xfs_linux.h | 6 +++++- fs/xfs/xfs_trans_inode.c | 2 +- 5 files changed, 8 insertions(+), 41 deletions(-) delete mode 100644 fs/xfs/time.h (limited to 'fs') diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 844e288b957..53e95b2a136 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -21,7 +21,6 @@ #include #include #include -#include "time.h" #include "kmem.h" #include "xfs_message.h" diff --git a/fs/xfs/time.h b/fs/xfs/time.h deleted file mode 100644 index 387e695a184..00000000000 --- a/fs/xfs/time.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SUPPORT_TIME_H__ -#define __XFS_SUPPORT_TIME_H__ - -#include -#include - -typedef struct timespec timespec_t; - -static inline void delay(long ticks) -{ - schedule_timeout_uninterruptible(ticks); -} - -static inline void nanotime(struct timespec *tvp) -{ - *tvp = CURRENT_TIME; -} - -#endif /* __XFS_SUPPORT_TIME_H__ */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c92cb48617d..4c130ffcf0c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -654,7 +654,7 @@ xfs_ialloc( xfs_inode_t *ip; uint flags; int error; - timespec_t tv; + struct timespec tv; /* * Call the space management code to pick @@ -720,7 +720,7 @@ xfs_ialloc( ip->i_d.di_nextents = 0; ASSERT(ip->i_d.di_nblocks == 0); - nanotime(&tv); + tv = current_fs_time(mp->m_super); ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; ip->i_d.di_atime = ip->i_d.di_mtime; diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index d10dc8f397c..6a51619d869 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -56,7 +56,6 @@ typedef __uint64_t __psunsigned_t; #include "kmem.h" #include "mrlock.h" -#include "time.h" #include "uuid.h" #include @@ -179,6 +178,11 @@ typedef __uint64_t __psunsigned_t; #define MAX(a,b) (max(a,b)) #define howmany(x, y) (((x)+((y)-1))/(y)) +static inline void delay(long ticks) +{ + schedule_timeout_uninterruptible(ticks); +} + /* * XFS wrapper structure for sysfs support. It depends on external data * structures and is embedded in various internal data structures to implement diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 50c3f561428..cdb4d86520e 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -70,7 +70,7 @@ xfs_trans_ichgtime( int flags) { struct inode *inode = VFS_I(ip); - timespec_t tv; + struct timespec tv; ASSERT(tp); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); -- cgit v1.2.3-70-g09d2 From 9336e3a765b68d4a7fdd8256f393ebce95ecb0a7 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 2 Oct 2014 09:18:40 +1000 Subject: xfs: project id inheritance is a directory only flag xfs_set_diflags() allows it to be set on non-directory inodes, and this flags errors in xfs_repair. Further, inode allocation allows the same directory-only flag to be inherited to non-directories. Make sure directory inode flags don't appear on other types of inodes. This fixes several xfstests scratch fileystem corruption reports (e.g. xfs/050) now that xfstests checks scratch filesystems after test completion. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_inode.c | 4 ++-- fs/xfs/xfs_ioctl.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 4c130ffcf0c..2f63742a25f 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -769,6 +769,8 @@ xfs_ialloc( di_flags |= XFS_DIFLAG_EXTSZINHERIT; ip->i_d.di_extsize = pip->i_d.di_extsize; } + if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) + di_flags |= XFS_DIFLAG_PROJINHERIT; } else if (S_ISREG(mode)) { if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) di_flags |= XFS_DIFLAG_REALTIME; @@ -789,8 +791,6 @@ xfs_ialloc( if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && xfs_inherit_nosymlinks) di_flags |= XFS_DIFLAG_NOSYMLINKS; - if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) - di_flags |= XFS_DIFLAG_PROJINHERIT; if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && xfs_inherit_nodefrag) di_flags |= XFS_DIFLAG_NODEFRAG; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 3799695b924..05a19554efb 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -968,8 +968,6 @@ xfs_set_diflags( di_flags |= XFS_DIFLAG_NOATIME; if (xflags & XFS_XFLAG_NODUMP) di_flags |= XFS_DIFLAG_NODUMP; - if (xflags & XFS_XFLAG_PROJINHERIT) - di_flags |= XFS_DIFLAG_PROJINHERIT; if (xflags & XFS_XFLAG_NODEFRAG) di_flags |= XFS_DIFLAG_NODEFRAG; if (xflags & XFS_XFLAG_FILESTREAM) @@ -981,6 +979,8 @@ xfs_set_diflags( di_flags |= XFS_DIFLAG_NOSYMLINKS; if (xflags & XFS_XFLAG_EXTSZINHERIT) di_flags |= XFS_DIFLAG_EXTSZINHERIT; + if (xflags & XFS_XFLAG_PROJINHERIT) + di_flags |= XFS_DIFLAG_PROJINHERIT; } else if (S_ISREG(ip->i_d.di_mode)) { if (xflags & XFS_XFLAG_REALTIME) di_flags |= XFS_DIFLAG_REALTIME; -- cgit v1.2.3-70-g09d2 From a872703f34cd6033d0b174fa598f63f1a57145bb Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 2 Oct 2014 09:20:30 +1000 Subject: xfs: only set extent size hint when asked Currently the extent size hint is set unconditionally in xfs_ioctl_setattr() when the FSX_EXTSIZE flag is set. Hence we can set hints when the inode flags indicating the hint should be used are not set. Hence only set the extent size hint from userspace when the inode has the XFS_DIFLAG_EXTSIZE flag set to indicate that we should have an extent size hint set on the inode. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_ioctl.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 05a19554efb..d6afc9f144b 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1231,13 +1231,25 @@ xfs_ioctl_setattr( } - if (mask & FSX_EXTSIZE) - ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; if (mask & FSX_XFLAGS) { xfs_set_diflags(ip, fa->fsx_xflags); xfs_diflags_to_linux(ip); } + /* + * Only set the extent size hint if we've already determined that the + * extent size hint should be set on the inode. If no extent size flags + * are set on the inode then unconditionally clear the extent size hint. + */ + if (mask & FSX_EXTSIZE) { + int extsize = 0; + + if (ip->i_d.di_flags & + (XFS_DIFLAG_EXTSIZE | XFS_DIFLAG_EXTSZINHERIT)) + extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; + ip->i_d.di_extsize = extsize; + } + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); -- cgit v1.2.3-70-g09d2 From ce57bcf6b81caf1e9f780e98e8d23d3555746d74 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 2 Oct 2014 09:21:53 +1000 Subject: xfs: check for inode size overflow in xfs_new_eof() If we write to the maximum file offset (2^63-2), XFS fails to log the inode size update when the page is flushed. For example: $ xfs_io -fc "pwrite `echo "2^63-1-1" | bc` 1" /mnt/file wrote 1/1 bytes at offset 9223372036854775806 1.000000 bytes, 1 ops; 0.0000 sec (22.711 KiB/sec and 23255.8140 ops/sec) $ stat -c %s /mnt/file 9223372036854775807 $ umount /mnt ; mount /mnt/ $ stat -c %s /mnt/file 0 This occurs because XFS calculates the new file size as io_offset + io_size, I/O occurs in block sized requests, and the maximum supported file size is not block aligned. Therefore, a write to the max allowable offset on a 4k blocksize fs results in a write of size 4k to offset 2^63-4096 (e.g., equivalent to round_down(2^63-1, 4096), or IOW the offset of the block that contains the max file size). The offset plus size calculation (2^63 - 4096 + 4096 == 2^63) overflows the signed 64-bit variable which goes negative and causes the > comparison to the on-disk inode size to fail. This returns 0 from xfs_new_eof() and results in no change to the inode on-disk. Update xfs_new_eof() to explicitly detect overflow of the local calculation and use the VFS inode size in this scenario. The VFS inode size is capped to the maximum and thus XFS writes the correct inode size to disk. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_inode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index c10e3fadd9a..9af2882e1f4 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -102,7 +102,7 @@ xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size) { xfs_fsize_t i_size = i_size_read(VFS_I(ip)); - if (new_size > i_size) + if (new_size > i_size || new_size < 0) new_size = i_size; return new_size > ip->i_d.di_size ? new_size : 0; } -- cgit v1.2.3-70-g09d2 From 6ee49a20c13b4b4e79a3bba406df8106cff284a1 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 2 Oct 2014 09:23:49 +1000 Subject: xfs: don't send null bp to xfs_trans_brelse() In this case, if bp is NULL, error is set, and we send a NULL bp to xfs_trans_brelse, which will try to dereference it. Test whether we actually have a buffer before we try to free it. Coverity spotted this. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_da_btree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 2c42ae28d02..fd827530afe 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -2563,7 +2563,8 @@ xfs_da_get_buf( mapp, nmap, 0); error = bp ? bp->b_error : -EIO; if (error) { - xfs_trans_brelse(trans, bp); + if (bp) + xfs_trans_brelse(trans, bp); goto out_free; } -- cgit v1.2.3-70-g09d2 From 04dd1a0d4b17a71220eae4fb313218f15a49bcdd Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 2 Oct 2014 09:24:11 +1000 Subject: xfs: fix crc field handling in xfs_sb_to/from_disk I discovered this in userspace, but the same change applies to the kernel. If we xfs_mdrestore an image from a non-crc filesystem, lo and behold the restored image has gained a CRC: # db/xfs_metadump.sh -o /dev/sdc1 - | xfs_mdrestore - test.img # xfs_db -c "sb 0" -c "p crc" /dev/sdc1 crc = 0 (correct) # xfs_db -c "sb 0" -c "p crc" test.img crc = 0xb6f8d6a0 (correct) This is because xfs_sb_from_disk doesn't fill in sb_crc, but xfs_sb_to_disk(XFS_SB_ALL_BITS) does write the in-memory CRC to disk - so we get uninitialized memory on disk. Fix this by always initializing sb_crc to 0 when we read the superblock, and masking out the CRC bit from ALL_BITS when we write it. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_sb.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 8426e5e2682..5f902fa7913 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -445,6 +445,8 @@ __xfs_sb_from_disk( to->sb_features_incompat = be32_to_cpu(from->sb_features_incompat); to->sb_features_log_incompat = be32_to_cpu(from->sb_features_log_incompat); + /* crc is only used on disk, not in memory; just init to 0 here. */ + to->sb_crc = 0; to->sb_pad = 0; to->sb_pquotino = be64_to_cpu(from->sb_pquotino); to->sb_lsn = be64_to_cpu(from->sb_lsn); @@ -550,6 +552,9 @@ xfs_sb_to_disk( if (!fields) return; + /* We should never write the crc here, it's updated in the IO path */ + fields &= ~XFS_SB_CRC; + xfs_sb_quota_to_disk(to, from, &fields); while (fields) { f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); -- cgit v1.2.3-70-g09d2 From 5cca3f611d159e5a4a5ec60413bd09948ef40aea Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 2 Oct 2014 09:27:09 +1000 Subject: xfs: check for null dquot in xfs_quota_calc_throttle() Coverity spotted this. Granted, we *just* checked xfs_inod_dquot() in the caller (by calling xfs_quota_need_throttle). However, this is the only place we don't check the return value but the check is cheap and future-proof so add it. Signed-off-by: Eric Sandeen Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_iomap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index e9c47b6f5e5..afcf3c92656 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -404,8 +404,8 @@ xfs_quota_calc_throttle( int shift = 0; struct xfs_dquot *dq = xfs_inode_dquot(ip, type); - /* over hi wmark, squash the prealloc completely */ - if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) { + /* no dq, or over hi wmark, squash the prealloc completely */ + if (!dq || dq->q_res_bcount >= dq->q_prealloc_hi_wmark) { *qblocks = 0; *qfreesp = 0; return; -- cgit v1.2.3-70-g09d2 From 07d08681d26e99d8ba3bc4e56380f2cc04d3ff3b Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 2 Oct 2014 09:42:06 +1000 Subject: xfs: restore buffer_head unwritten bit on ioend cancel xfs_vm_writepage() walks each buffer_head on the page, maps to the block on disk and attaches to a running ioend structure that represents the I/O submission. A new ioend is created when the type of I/O (unwritten, delayed allocation or overwrite) required for a particular buffer_head differs from the previous. If a buffer_head is a delalloc or unwritten buffer, the associated bits are cleared by xfs_map_at_offset() once the buffer_head is added to the ioend. The process of mapping each buffer_head occurs in xfs_map_blocks() and acquires the ilock in blocking or non-blocking mode, depending on the type of writeback in progress. If the lock cannot be acquired for non-blocking writeback, we cancel the ioend, redirty the page and return. Writeback will revisit the page at some later point. Note that we acquire the ilock for each buffer on the page. Therefore during non-blocking writeback, it is possible to add an unwritten buffer to the ioend, clear the unwritten state, fail to acquire the ilock when mapping a subsequent buffer and cancel the ioend. If this occurs, the unwritten status of the buffer sitting in the ioend has been lost. The page will eventually hit writeback again, but xfs_vm_writepage() submits overwrite I/O instead of unwritten I/O and does not perform unwritten extent conversion at I/O completion. This leads to data corruption because unwritten extents are treated as holes on reads and zeroes are returned instead of reading from disk. Modify xfs_cancel_ioend() to restore the buffer unwritten bit for ioends of type XFS_IO_UNWRITTEN. This ensures that unwritten extent conversion occurs once the page is eventually written back. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_aops.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 11e9b4caa54..dc3e10882d1 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -548,6 +548,13 @@ xfs_cancel_ioend( do { next_bh = bh->b_private; clear_buffer_async_write(bh); + /* + * The unwritten flag is cleared when added to the + * ioend. We're not submitting for I/O so mark the + * buffer unwritten again for next time around. + */ + if (ioend->io_type == XFS_IO_UNWRITTEN) + set_buffer_unwritten(bh); unlock_buffer(bh); } while ((bh = next_bh) != NULL); -- cgit v1.2.3-70-g09d2 From da5f10969d54006a24777a84ed3eaeeb2a21047f Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 2 Oct 2014 09:44:54 +1000 Subject: xfs: flush the range before zero range conversion XFS currently discards delalloc blocks within the target range of a zero range request. Unaligned start and end offsets are zeroed through the page cache and the internal, aligned blocks are converted to unwritten extents. If EOF is page aligned and covered by a delayed allocation extent. The inode size is not updated until I/O completion. If a zero range request discards a delalloc range that covers page aligned EOF as such, the inode size update never occurs. For example: $ rm -f /mnt/file $ xfs_io -fc "pwrite 0 64k" -c "zero 60k 4k" /mnt/file $ stat -c "%s" /mnt/file 65536 $ umount /mnt $ mount /mnt $ stat -c "%s" /mnt/file 61440 Update xfs_zero_file_space() to flush the range rather than discard delalloc blocks to ensure that inode size updates occur appropriately. [dchinner: Note that this is really a workaround to avoid the underlying problems. More work is needed (and ongoing) to fix those issues so this fix is being added as a temporary stop-gap measure. ] Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_bmap_util.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 1cb345e7c18..6f5cb63bed1 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1392,14 +1392,14 @@ xfs_zero_file_space( if (start_boundary < end_boundary - 1) { /* - * punch out delayed allocation blocks and the page cache over - * the conversion range + * Writeback the range to ensure any inode size updates due to + * appending writes make it to disk (otherwise we could just + * punch out the delalloc blocks). */ - xfs_ilock(ip, XFS_ILOCK_EXCL); - error = xfs_bmap_punch_delalloc_range(ip, - XFS_B_TO_FSBT(mp, start_boundary), - XFS_B_TO_FSB(mp, end_boundary - start_boundary)); - xfs_iunlock(ip, XFS_ILOCK_EXCL); + error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, + start_boundary, end_boundary - 1); + if (error) + goto out; truncate_pagecache_range(VFS_I(ip), start_boundary, end_boundary - 1); -- cgit v1.2.3-70-g09d2 From 52177937e9ac4573391143065b250403d3a6ae4b Mon Sep 17 00:00:00 2001 From: Mark Tinguely Date: Fri, 3 Oct 2014 09:09:50 +1000 Subject: xfs: xfs_iflush_done checks the wrong log item callback Commit 3013683 ("xfs: remove all the inodes on a buffer from the AIL in bulk") made the xfs inode flush callback more efficient by combining all the inode writes on the buffer and the deletions of the inode log item from AIL. The initial loop in this patch should be looping through all the log items on the buffer to see which items have xfs_iflush_done as their callback function. But currently, only the log item passed to the function has its callback compared to xfs_iflush_done. If the log item pointer passed to the function does have the xfs_iflush_done callback function, then all the log items on the buffer are removed from the li_bio_list on the buffer b_fspriv and could be removed from the AIL even though they may have not been written yet. This problem is masked by the fact that currently all inodes on a buffer will have the same calback function - either xfs_iflush_done or xfs_istale_done - and hence the bug cannot manifest in any way. Still, we need to remove the landmine so that if we add new callbacks in future this doesn't cause us problems. Signed-off-by: Mark Tinguely Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_inode_item.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index de5a7be36e6..63de0b0acc3 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -615,7 +615,7 @@ xfs_iflush_done( blip = bp->b_fspriv; prev = NULL; while (blip != NULL) { - if (lip->li_cb != xfs_iflush_done) { + if (blip->li_cb != xfs_iflush_done) { prev = blip; blip = blip->li_bio_list; continue; -- cgit v1.2.3-70-g09d2 From a8b1ee8bafc765ebf029d03c5479a69aebff9693 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 13 Oct 2014 10:21:53 +1100 Subject: xfs: fix agno increment in xfs_inumbers() loop caused a regression in xfs_inumbers, which in turn broke xfsdump, causing incomplete dumps. The loop in xfs_inumbers() needs to fill the user-supplied buffers, and iterates via xfs_btree_increment, reading new ags as needed. But the first time through the loop, if xfs_btree_increment() succeeds, we continue, which triggers the ++agno at the bottom of the loop, and we skip to soon to the next ag - without the proper setup under next_ag to read the next ag. Fix this by removing the agno increment from the loop conditional, and only increment agno if we have actually hit the code under the next_ag: target. Cc: stable@vger.kernel.org Signed-off-by: Eric Sandeen Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_itable.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index f71be9c6801..f1deb961a29 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -639,7 +639,8 @@ next_ag: xfs_buf_relse(agbp); agbp = NULL; agino = 0; - } while (++agno < mp->m_sb.sb_agcount); + agno++; + } while (agno < mp->m_sb.sb_agcount); if (!error) { if (bufidx) { -- cgit v1.2.3-70-g09d2