summaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorJames Morris <jmorris@namei.org>2011-01-10 09:46:24 +1100
committerJames Morris <jmorris@namei.org>2011-01-10 09:46:24 +1100
commitd2e7ad19229f982fc1eb731827d82ceac90abfb3 (patch)
tree98a3741b4d4b27a48b3c7ea9babe331e539416a8 /fs/xfs
parentd03a5d888fb688c832d470b749acc5ed38e0bc1d (diff)
parent0c21e3aaf6ae85bee804a325aa29c325209180fd (diff)
Merge branch 'master' into next
Conflicts: security/smack/smack_lsm.c Verified and added fix by Stephen Rothwell <sfr@canb.auug.org.au> Ok'd by Casey Schaufler <casey@schaufler-ca.com> Signed-off-by: James Morris <jmorris@namei.org>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c94
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c35
-rw-r--r--fs/xfs/xfs_acl.h2
-rw-r--r--fs/xfs/xfs_bmap.c85
-rw-r--r--fs/xfs/xfs_bmap.h5
-rw-r--r--fs/xfs/xfs_dfrag.c13
-rw-r--r--fs/xfs/xfs_error.c3
-rw-r--r--fs/xfs/xfs_error.h5
-rw-r--r--fs/xfs/xfs_iget.c13
-rw-r--r--fs/xfs/xfs_inode_item.c31
-rw-r--r--fs/xfs/xfs_mru_cache.c2
-rw-r--r--fs/xfs/xfs_rename.c1
13 files changed, 212 insertions, 88 deletions
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index b2771862fd3..39f4f809bb6 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -219,12 +219,13 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
}
int
-xfs_check_acl(struct inode *inode, int mask)
+xfs_check_acl(struct inode *inode, int mask, unsigned int flags)
{
- struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_inode *ip;
struct posix_acl *acl;
int error = -EAGAIN;
+ ip = XFS_I(inode);
trace_xfs_check_acl(ip);
/*
@@ -234,6 +235,12 @@ xfs_check_acl(struct inode *inode, int mask)
if (!XFS_IFORK_Q(ip))
return -EAGAIN;
+ if (flags & IPERM_FLAG_RCU) {
+ if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
+ return -ECHILD;
+ return -EAGAIN;
+ }
+
acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl))
return PTR_ERR(acl);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 7d287afccde..691f61223ed 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -934,7 +934,6 @@ xfs_aops_discard_page(
struct xfs_inode *ip = XFS_I(inode);
struct buffer_head *bh, *head;
loff_t offset = page_offset(page);
- ssize_t len = 1 << inode->i_blkbits;
if (!xfs_is_delayed_page(page, IO_DELAY))
goto out_invalidate;
@@ -949,58 +948,14 @@ xfs_aops_discard_page(
xfs_ilock(ip, XFS_ILOCK_EXCL);
bh = head = page_buffers(page);
do {
- int done;
- xfs_fileoff_t offset_fsb;
- xfs_bmbt_irec_t imap;
- int nimaps = 1;
int error;
- xfs_fsblock_t firstblock;
- xfs_bmap_free_t flist;
+ xfs_fileoff_t start_fsb;
if (!buffer_delay(bh))
goto next_buffer;
- offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
-
- /*
- * Map the range first and check that it is a delalloc extent
- * before trying to unmap the range. Otherwise we will be
- * trying to remove a real extent (which requires a
- * transaction) or a hole, which is probably a bad idea...
- */
- error = xfs_bmapi(NULL, ip, offset_fsb, 1,
- XFS_BMAPI_ENTIRE, NULL, 0, &imap,
- &nimaps, NULL);
-
- if (error) {
- /* something screwed, just bail */
- if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
- xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
- "page discard failed delalloc mapping lookup.");
- }
- break;
- }
- if (!nimaps) {
- /* nothing there */
- goto next_buffer;
- }
- if (imap.br_startblock != DELAYSTARTBLOCK) {
- /* been converted, ignore */
- goto next_buffer;
- }
- WARN_ON(imap.br_blockcount == 0);
-
- /*
- * Note: while we initialise the firstblock/flist pair, they
- * should never be used because blocks should never be
- * allocated or freed for a delalloc extent and hence we need
- * don't cancel or finish them after the xfs_bunmapi() call.
- */
- xfs_bmap_init(&flist, &firstblock);
- error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock,
- &flist, &done);
-
- ASSERT(!flist.xbf_count && !flist.xbf_first);
+ start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
+ error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
if (error) {
/* something screwed, just bail */
if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
@@ -1010,7 +965,7 @@ xfs_aops_discard_page(
break;
}
next_buffer:
- offset += len;
+ offset += 1 << inode->i_blkbits;
} while ((bh = bh->b_this_page) != head);
@@ -1505,11 +1460,42 @@ xfs_vm_write_failed(
struct inode *inode = mapping->host;
if (to > inode->i_size) {
- struct iattr ia = {
- .ia_valid = ATTR_SIZE | ATTR_FORCE,
- .ia_size = inode->i_size,
- };
- xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK);
+ /*
+ * punch out the delalloc blocks we have already allocated. We
+ * don't call xfs_setattr() to do this as we may be in the
+ * middle of a multi-iovec write and so the vfs inode->i_size
+ * will not match the xfs ip->i_size and so it will zero too
+ * much. Hence we jus truncate the page cache to zero what is
+ * necessary and punch the delalloc blocks directly.
+ */
+ struct xfs_inode *ip = XFS_I(inode);
+ xfs_fileoff_t start_fsb;
+ xfs_fileoff_t end_fsb;
+ int error;
+
+ truncate_pagecache(inode, to, inode->i_size);
+
+ /*
+ * Check if there are any blocks that are outside of i_size
+ * that need to be trimmed back.
+ */
+ start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1;
+ end_fsb = XFS_B_TO_FSB(ip->i_mount, to);
+ if (end_fsb <= start_fsb)
+ return;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
+ end_fsb - start_fsb);
+ if (error) {
+ /* something screwed, just bail */
+ if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+ xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
+ "xfs_vm_write_failed: unable to clean up ino %lld",
+ ip->i_ino);
+ }
+ }
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
}
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index aa1d353def2..4c5deb6e9e3 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -488,29 +488,16 @@ found:
spin_unlock(&pag->pag_buf_lock);
xfs_perag_put(pag);
- /* Attempt to get the semaphore without sleeping,
- * if this does not work then we need to drop the
- * spinlock and do a hard attempt on the semaphore.
- */
- if (down_trylock(&bp->b_sema)) {
+ if (xfs_buf_cond_lock(bp)) {
+ /* failed, so wait for the lock if requested. */
if (!(flags & XBF_TRYLOCK)) {
- /* wait for buffer ownership */
xfs_buf_lock(bp);
XFS_STATS_INC(xb_get_locked_waited);
} else {
- /* We asked for a trylock and failed, no need
- * to look at file offset and length here, we
- * know that this buffer at least overlaps our
- * buffer and is locked, therefore our buffer
- * either does not exist, or is this buffer.
- */
xfs_buf_rele(bp);
XFS_STATS_INC(xb_busy_locked);
return NULL;
}
- } else {
- /* trylock worked */
- XB_SET_OWNER(bp);
}
if (bp->b_flags & XBF_STALE) {
@@ -876,10 +863,18 @@ xfs_buf_rele(
*/
/*
- * Locks a buffer object, if it is not already locked.
- * Note that this in no way locks the underlying pages, so it is only
- * useful for synchronizing concurrent use of buffer objects, not for
- * synchronizing independent access to the underlying pages.
+ * Locks a buffer object, if it is not already locked. Note that this in
+ * no way locks the underlying pages, so it is only useful for
+ * synchronizing concurrent use of buffer objects, not for synchronizing
+ * independent access to the underlying pages.
+ *
+ * If we come across a stale, pinned, locked buffer, we know that we are
+ * being asked to lock a buffer that has been reallocated. Because it is
+ * pinned, we know that the log has not been pushed to disk and hence it
+ * will still be locked. Rather than continuing to have trylock attempts
+ * fail until someone else pushes the log, push it ourselves before
+ * returning. This means that the xfsaild will not get stuck trying
+ * to push on stale inode buffers.
*/
int
xfs_buf_cond_lock(
@@ -890,6 +885,8 @@ xfs_buf_cond_lock(
locked = down_trylock(&bp->b_sema) == 0;
if (locked)
XB_SET_OWNER(bp);
+ else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
+ xfs_log_force(bp->b_target->bt_mount, 0);
trace_xfs_buf_cond_lock(bp, _RET_IP_);
return locked ? 0 : -EBUSY;
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 0135e2a669d..11dd72070cb 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -42,7 +42,7 @@ struct xfs_acl {
#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
#ifdef CONFIG_XFS_POSIX_ACL
-extern int xfs_check_acl(struct inode *inode, int mask);
+extern int xfs_check_acl(struct inode *inode, int mask, unsigned int flags);
extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
extern int xfs_acl_chmod(struct inode *inode);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 8abd12e32e1..4111cd3966c 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5471,8 +5471,13 @@ xfs_getbmap(
if (error)
goto out_unlock_iolock;
}
-
- ASSERT(ip->i_delayed_blks == 0);
+ /*
+ * even after flushing the inode, there can still be delalloc
+ * blocks on the inode beyond EOF due to speculative
+ * preallocation. These are not removed until the release
+ * function is called or the inode is inactivated. Hence we
+ * cannot assert here that ip->i_delayed_blks == 0.
+ */
}
lock = xfs_ilock_map_shared(ip);
@@ -6070,3 +6075,79 @@ xfs_bmap_disk_count_leaves(
*count += xfs_bmbt_disk_get_blockcount(frp);
}
}
+
+/*
+ * dead simple method of punching delalyed allocation blocks from a range in
+ * the inode. Walks a block at a time so will be slow, but is only executed in
+ * rare error cases so the overhead is not critical. This will alays punch out
+ * both the start and end blocks, even if the ranges only partially overlap
+ * them, so it is up to the caller to ensure that partial blocks are not
+ * passed in.
+ */
+int
+xfs_bmap_punch_delalloc_range(
+ struct xfs_inode *ip,
+ xfs_fileoff_t start_fsb,
+ xfs_fileoff_t length)
+{
+ xfs_fileoff_t remaining = length;
+ int error = 0;
+
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
+ do {
+ int done;
+ xfs_bmbt_irec_t imap;
+ int nimaps = 1;
+ xfs_fsblock_t firstblock;
+ xfs_bmap_free_t flist;
+
+ /*
+ * Map the range first and check that it is a delalloc extent
+ * before trying to unmap the range. Otherwise we will be
+ * trying to remove a real extent (which requires a
+ * transaction) or a hole, which is probably a bad idea...
+ */
+ error = xfs_bmapi(NULL, ip, start_fsb, 1,
+ XFS_BMAPI_ENTIRE, NULL, 0, &imap,
+ &nimaps, NULL);
+
+ if (error) {
+ /* something screwed, just bail */
+ if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+ xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
+ "Failed delalloc mapping lookup ino %lld fsb %lld.",
+ ip->i_ino, start_fsb);
+ }
+ break;
+ }
+ if (!nimaps) {
+ /* nothing there */
+ goto next_block;
+ }
+ if (imap.br_startblock != DELAYSTARTBLOCK) {
+ /* been converted, ignore */
+ goto next_block;
+ }
+ WARN_ON(imap.br_blockcount == 0);
+
+ /*
+ * Note: while we initialise the firstblock/flist pair, they
+ * should never be used because blocks should never be
+ * allocated or freed for a delalloc extent and hence we need
+ * don't cancel or finish them after the xfs_bunmapi() call.
+ */
+ xfs_bmap_init(&flist, &firstblock);
+ error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock,
+ &flist, &done);
+ if (error)
+ break;
+
+ ASSERT(!flist.xbf_count && !flist.xbf_first);
+next_block:
+ start_fsb++;
+ remaining--;
+ } while(remaining > 0);
+
+ return error;
+}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 71ec9b6ecdf..3651191daea 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -394,6 +394,11 @@ xfs_bmap_count_blocks(
int whichfork,
int *count);
+int
+xfs_bmap_punch_delalloc_range(
+ struct xfs_inode *ip,
+ xfs_fileoff_t start_fsb,
+ xfs_fileoff_t length);
#endif /* __KERNEL__ */
#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 3b9582c60a2..e60490bc00a 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -377,6 +377,19 @@ xfs_swap_extents(
ip->i_d.di_format = tip->i_d.di_format;
tip->i_d.di_format = tmp;
+ /*
+ * The extents in the source inode could still contain speculative
+ * preallocation beyond EOF (e.g. the file is open but not modified
+ * while defrag is in progress). In that case, we need to copy over the
+ * number of delalloc blocks the data fork in the source inode is
+ * tracking beyond EOF so that when the fork is truncated away when the
+ * temporary inode is unlinked we don't underrun the i_delayed_blks
+ * counter on that inode.
+ */
+ ASSERT(tip->i_delayed_blks == 0);
+ tip->i_delayed_blks = ip->i_delayed_blks;
+ ip->i_delayed_blks = 0;
+
ilf_fields = XFS_ILOG_CORE;
switch(ip->i_d.di_format) {
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index ed999026766..c78cc6a3d87 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -58,6 +58,7 @@ xfs_error_trap(int e)
int xfs_etest[XFS_NUM_INJECT_ERROR];
int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR];
char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
+int xfs_error_test_active;
int
xfs_error_test(int error_tag, int *fsidp, char *expression,
@@ -108,6 +109,7 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp)
len = strlen(mp->m_fsname);
xfs_etest_fsname[i] = kmem_alloc(len + 1, KM_SLEEP);
strcpy(xfs_etest_fsname[i], mp->m_fsname);
+ xfs_error_test_active++;
return 0;
}
}
@@ -137,6 +139,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
xfs_etest_fsid[i] = 0LL;
kmem_free(xfs_etest_fsname[i]);
xfs_etest_fsname[i] = NULL;
+ xfs_error_test_active--;
}
}
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index c2c1a072bb8..f338847f80b 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -127,13 +127,14 @@ extern void xfs_corruption_error(const char *tag, int level,
#define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT
#ifdef DEBUG
+extern int xfs_error_test_active;
extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);
#define XFS_NUM_INJECT_ERROR 10
#define XFS_TEST_ERROR(expr, mp, tag, rf) \
- ((expr) || \
+ ((expr) || (xfs_error_test_active && \
xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \
- (rf)))
+ (rf))))
extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp);
extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 0cdd26932d8..d7de5a3f786 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -91,6 +91,17 @@ xfs_inode_alloc(
return ip;
}
+STATIC void
+xfs_inode_free_callback(
+ struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct xfs_inode *ip = XFS_I(inode);
+
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_zone_free(xfs_inode_zone, ip);
+}
+
void
xfs_inode_free(
struct xfs_inode *ip)
@@ -134,7 +145,7 @@ xfs_inode_free(
ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush));
- kmem_zone_free(xfs_inode_zone, ip);
+ call_rcu(&ip->i_vnode.i_rcu, xfs_inode_free_callback);
}
/*
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index c7ac020705d..7c8d30c453c 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -657,18 +657,37 @@ xfs_inode_item_unlock(
}
/*
- * This is called to find out where the oldest active copy of the
- * inode log item in the on disk log resides now that the last log
- * write of it completed at the given lsn. Since we always re-log
- * all dirty data in an inode, the latest copy in the on disk log
- * is the only one that matters. Therefore, simply return the
- * given lsn.
+ * This is called to find out where the oldest active copy of the inode log
+ * item in the on disk log resides now that the last log write of it completed
+ * at the given lsn. Since we always re-log all dirty data in an inode, the
+ * latest copy in the on disk log is the only one that matters. Therefore,
+ * simply return the given lsn.
+ *
+ * If the inode has been marked stale because the cluster is being freed, we
+ * don't want to (re-)insert this inode into the AIL. There is a race condition
+ * where the cluster buffer may be unpinned before the inode is inserted into
+ * the AIL during transaction committed processing. If the buffer is unpinned
+ * before the inode item has been committed and inserted, then it is possible
+ * for the buffer to be written and IO completions before the inode is inserted
+ * into the AIL. In that case, we'd be inserting a clean, stale inode into the
+ * AIL which will never get removed. It will, however, get reclaimed which
+ * triggers an assert in xfs_inode_free() complaining about freein an inode
+ * still in the AIL.
+ *
+ * To avoid this, return a lower LSN than the one passed in so that the
+ * transaction committed code will not move the inode forward in the AIL but
+ * will still unpin it properly.
*/
STATIC xfs_lsn_t
xfs_inode_item_committed(
struct xfs_log_item *lip,
xfs_lsn_t lsn)
{
+ struct xfs_inode_log_item *iip = INODE_ITEM(lip);
+ struct xfs_inode *ip = iip->ili_inode;
+
+ if (xfs_iflags_test(ip, XFS_ISTALE))
+ return lsn - 1;
return lsn;
}
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 45ce15dc5b2..edfa178bafb 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -408,7 +408,7 @@ xfs_mru_cache_flush(
spin_lock(&mru->lock);
if (mru->queued) {
spin_unlock(&mru->lock);
- cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
+ cancel_delayed_work_sync(&mru->work);
spin_lock(&mru->lock);
}
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index d2af0a8381a..77a59891734 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -297,6 +297,7 @@ xfs_rename(
* it and some incremental backup programs won't work without it.
*/
xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
+ xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE);
/*
* Adjust the link count on src_dp. This is necessary when