From 20ad9ea9becd34a3c16252ca9d815f2c74f8f30f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 13 Feb 2011 12:06:34 +0000 Subject: xfs: enable delaylog by default Signed-off-by: Christoph Hellwig Signed-off-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/xfs/linux-2.6/xfs_super.c') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 9731898083a..7ec1fb8c131 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -189,6 +189,7 @@ xfs_parseargs( mp->m_flags |= XFS_MOUNT_BARRIER; mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; mp->m_flags |= XFS_MOUNT_SMALL_INUMS; + mp->m_flags |= XFS_MOUNT_DELAYLOG; /* * These can be overridden by the mount option parsing. -- cgit v1.2.3-70-g09d2 From 4f10700a2e4bb2ff3d3a80f08412e21109e6d4b5 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 7 Mar 2011 10:00:35 +1100 Subject: xfs: Convert linux-2.6/ files to new logging interface Convert the files in fs/xfs/linux-2.6/ to use the new xfs_ logging format that replaces the old Irix inherited cmn_err() interfaces. While there, also convert naked printk calls to use the relevant xfs logging function to standardise output format. Signed-off-by: Dave Chinner Reviewed-by: Alex Elder Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/kmem.c | 9 +-- fs/xfs/linux-2.6/xfs_aops.c | 6 +- fs/xfs/linux-2.6/xfs_buf.c | 17 +++--- fs/xfs/linux-2.6/xfs_super.c | 127 +++++++++++++++++++----------------------- fs/xfs/linux-2.6/xfs_sync.c | 5 +- fs/xfs/linux-2.6/xfs_sysctl.c | 2 +- 6 files changed, 76 insertions(+), 90 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_super.c') diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index 666c9db48eb..a907de565db 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -23,6 +23,7 @@ #include #include "time.h" #include "kmem.h" +#include "xfs_message.h" /* * Greedy allocation. May fail and may return vmalloced memory. @@ -56,8 +57,8 @@ kmem_alloc(size_t size, unsigned int __nocast flags) if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) return ptr; if (!(++retries % 100)) - printk(KERN_ERR "XFS: possible memory allocation " - "deadlock in %s (mode:0x%x)\n", + xfs_err(NULL, + "possible memory allocation deadlock in %s (mode:0x%x)", __func__, lflags); congestion_wait(BLK_RW_ASYNC, HZ/50); } while (1); @@ -112,8 +113,8 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) return ptr; if (!(++retries % 100)) - printk(KERN_ERR "XFS: possible memory allocation " - "deadlock in %s (mode:0x%x)\n", + xfs_err(NULL, + "possible memory allocation deadlock in %s (mode:0x%x)", __func__, lflags); congestion_wait(BLK_RW_ASYNC, HZ/50); } while (1); diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index ec7bbb5645b..8c5c8727745 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -854,7 +854,7 @@ xfs_aops_discard_page( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) goto out_invalidate; - xfs_fs_cmn_err(CE_ALERT, ip->i_mount, + xfs_alert(ip->i_mount, "page discard on page %p, inode 0x%llx, offset %llu.", page, ip->i_ino, offset); @@ -872,7 +872,7 @@ xfs_aops_discard_page( if (error) { /* something screwed, just bail */ if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { - xfs_fs_cmn_err(CE_ALERT, ip->i_mount, + xfs_alert(ip->i_mount, "page discard unable to remove delalloc mapping."); } break; @@ -1411,7 +1411,7 @@ xfs_vm_write_failed( if (error) { /* something screwed, just bail */ if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { - xfs_fs_cmn_err(CE_ALERT, ip->i_mount, + xfs_alert(ip->i_mount, "xfs_vm_write_failed: unable to clean up ino %lld", ip->i_ino); } diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index ac1c7e8378d..3cc671c8a67 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -401,9 +401,8 @@ _xfs_buf_lookup_pages( * handle buffer allocation failures we can't do much. */ if (!(++retries % 100)) - printk(KERN_ERR - "XFS: possible memory allocation " - "deadlock in %s (mode:0x%x)\n", + xfs_err(NULL, + "possible memory allocation deadlock in %s (mode:0x%x)", __func__, gfp_mask); XFS_STATS_INC(xb_page_retries); @@ -615,8 +614,8 @@ xfs_buf_get( if (!(bp->b_flags & XBF_MAPPED)) { error = _xfs_buf_map_pages(bp, flags); if (unlikely(error)) { - printk(KERN_WARNING "%s: failed to map pages\n", - __func__); + xfs_warn(target->bt_mount, + "%s: failed to map pages\n", __func__); goto no_buffer; } } @@ -850,8 +849,8 @@ xfs_buf_get_uncached( error = _xfs_buf_map_pages(bp, XBF_MAPPED); if (unlikely(error)) { - printk(KERN_WARNING "%s: failed to map pages\n", - __func__); + xfs_warn(target->bt_mount, + "%s: failed to map pages\n", __func__); goto fail_free_mem; } @@ -1617,8 +1616,8 @@ xfs_setsize_buftarg_flags( btp->bt_smask = sectorsize - 1; if (set_blocksize(btp->bt_bdev, sectorsize)) { - printk(KERN_WARNING - "XFS: Cannot set_blocksize to %u on device %s\n", + xfs_warn(btp->bt_mount, + "Cannot set_blocksize to %u on device %s\n", sectorsize, XFS_BUFTARG_NAME(btp)); return EINVAL; } diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 7ec1fb8c131..818c4cf2de8 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -172,6 +172,15 @@ xfs_parseargs( int iosize = 0; __uint8_t iosizelog = 0; + /* + * set up the mount name first so all the errors will refer to the + * correct device. + */ + mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); + if (!mp->m_fsname) + return ENOMEM; + mp->m_fsname_len = strlen(mp->m_fsname) + 1; + /* * Copy binary VFS mount flags we are interested in. */ @@ -208,24 +217,21 @@ xfs_parseargs( if (!strcmp(this_char, MNTOPT_LOGBUFS)) { if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", + xfs_warn(mp, "%s option requires an argument", this_char); return EINVAL; } mp->m_logbufs = simple_strtoul(value, &eov, 10); } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", + xfs_warn(mp, "%s option requires an argument", this_char); return EINVAL; } mp->m_logbsize = suffix_strtoul(value, &eov, 10); } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", + xfs_warn(mp, "%s option requires an argument", this_char); return EINVAL; } @@ -233,14 +239,12 @@ xfs_parseargs( if (!mp->m_logname) return ENOMEM; } else if (!strcmp(this_char, MNTOPT_MTPT)) { - cmn_err(CE_WARN, - "XFS: %s option not allowed on this system", + xfs_warn(mp, "%s option not allowed on this system", this_char); return EINVAL; } else if (!strcmp(this_char, MNTOPT_RTDEV)) { if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", + xfs_warn(mp, "%s option requires an argument", this_char); return EINVAL; } @@ -249,8 +253,7 @@ xfs_parseargs( return ENOMEM; } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", + xfs_warn(mp, "%s option requires an argument", this_char); return EINVAL; } @@ -258,8 +261,7 @@ xfs_parseargs( iosizelog = ffs(iosize) - 1; } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", + xfs_warn(mp, "%s option requires an argument", this_char); return EINVAL; } @@ -281,16 +283,14 @@ xfs_parseargs( mp->m_flags |= XFS_MOUNT_SWALLOC; } else if (!strcmp(this_char, MNTOPT_SUNIT)) { if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", + xfs_warn(mp, "%s option requires an argument", this_char); return EINVAL; } dsunit = simple_strtoul(value, &eov, 10); } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", + xfs_warn(mp, "%s option requires an argument", this_char); return EINVAL; } @@ -298,8 +298,7 @@ xfs_parseargs( } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; #if !XFS_BIG_INUMS - cmn_err(CE_WARN, - "XFS: %s option not allowed on this system", + xfs_warn(mp, "%s option not allowed on this system", this_char); return EINVAL; #endif @@ -357,20 +356,19 @@ xfs_parseargs( } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { mp->m_flags &= ~XFS_MOUNT_DELAYLOG; } else if (!strcmp(this_char, "ihashsize")) { - cmn_err(CE_WARN, - "XFS: ihashsize no longer used, option is deprecated."); + xfs_warn(mp, + "ihashsize no longer used, option is deprecated."); } else if (!strcmp(this_char, "osyncisdsync")) { - cmn_err(CE_WARN, - "XFS: osyncisdsync has no effect, option is deprecated."); + xfs_warn(mp, + "osyncisdsync has no effect, option is deprecated."); } else if (!strcmp(this_char, "osyncisosync")) { - cmn_err(CE_WARN, - "XFS: osyncisosync has no effect, option is deprecated."); + xfs_warn(mp, + "osyncisosync has no effect, option is deprecated."); } else if (!strcmp(this_char, "irixsgid")) { - cmn_err(CE_WARN, - "XFS: irixsgid is now a sysctl(2) variable, option is deprecated."); + xfs_warn(mp, + "irixsgid is now a sysctl(2) variable, option is deprecated."); } else { - cmn_err(CE_WARN, - "XFS: unknown mount option [%s].", this_char); + xfs_warn(mp, "unknown mount option [%s].", this_char); return EINVAL; } } @@ -380,40 +378,37 @@ xfs_parseargs( */ if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && !(mp->m_flags & XFS_MOUNT_RDONLY)) { - cmn_err(CE_WARN, "XFS: no-recovery mounts must be read-only."); + xfs_warn(mp, "no-recovery mounts must be read-only."); return EINVAL; } if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { - cmn_err(CE_WARN, - "XFS: sunit and swidth options incompatible with the noalign option"); + xfs_warn(mp, + "sunit and swidth options incompatible with the noalign option"); return EINVAL; } #ifndef CONFIG_XFS_QUOTA if (XFS_IS_QUOTA_RUNNING(mp)) { - cmn_err(CE_WARN, - "XFS: quota support not available in this kernel."); + xfs_warn(mp, "quota support not available in this kernel."); return EINVAL; } #endif if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) { - cmn_err(CE_WARN, - "XFS: cannot mount with both project and group quota"); + xfs_warn(mp, "cannot mount with both project and group quota"); return EINVAL; } if ((dsunit && !dswidth) || (!dsunit && dswidth)) { - cmn_err(CE_WARN, - "XFS: sunit and swidth must be specified together"); + xfs_warn(mp, "sunit and swidth must be specified together"); return EINVAL; } if (dsunit && (dswidth % dsunit != 0)) { - cmn_err(CE_WARN, - "XFS: stripe width (%d) must be a multiple of the stripe unit (%d)", + xfs_warn(mp, + "stripe width (%d) must be a multiple of the stripe unit (%d)", dswidth, dsunit); return EINVAL; } @@ -439,8 +434,7 @@ done: mp->m_logbufs != 0 && (mp->m_logbufs < XLOG_MIN_ICLOGS || mp->m_logbufs > XLOG_MAX_ICLOGS)) { - cmn_err(CE_WARN, - "XFS: invalid logbufs value: %d [not %d-%d]", + xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); return XFS_ERROR(EINVAL); } @@ -449,22 +443,16 @@ done: (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || !is_power_of_2(mp->m_logbsize))) { - cmn_err(CE_WARN, - "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", + xfs_warn(mp, + "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", mp->m_logbsize); return XFS_ERROR(EINVAL); } - mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); - if (!mp->m_fsname) - return ENOMEM; - mp->m_fsname_len = strlen(mp->m_fsname) + 1; - if (iosizelog) { if (iosizelog > XFS_MAX_IO_LOG || iosizelog < XFS_MIN_IO_LOG) { - cmn_err(CE_WARN, - "XFS: invalid log iosize: %d [not %d-%d]", + xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", iosizelog, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG); return XFS_ERROR(EINVAL); @@ -611,7 +599,7 @@ xfs_blkdev_get( mp); if (IS_ERR(*bdevp)) { error = PTR_ERR(*bdevp); - printk("XFS: Invalid device [%s], error=%d\n", name, error); + xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error); } return -error; @@ -665,23 +653,23 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp) int error; if (mp->m_logdev_targp != mp->m_ddev_targp) { - xfs_fs_cmn_err(CE_NOTE, mp, + xfs_notice(mp, "Disabling barriers, not supported with external log device"); mp->m_flags &= ~XFS_MOUNT_BARRIER; return; } if (xfs_readonly_buftarg(mp->m_ddev_targp)) { - xfs_fs_cmn_err(CE_NOTE, mp, - "Disabling barriers, underlying device is readonly"); + xfs_notice(mp, + "Disabling barriers, underlying device is readonly"); mp->m_flags &= ~XFS_MOUNT_BARRIER; return; } error = xfs_barrier_test(mp); if (error) { - xfs_fs_cmn_err(CE_NOTE, mp, - "Disabling barriers, trial barrier write failed"); + xfs_notice(mp, + "Disabling barriers, trial barrier write failed"); mp->m_flags &= ~XFS_MOUNT_BARRIER; return; } @@ -744,8 +732,8 @@ xfs_open_devices( goto out_close_logdev; if (rtdev == ddev || rtdev == logdev) { - cmn_err(CE_WARN, - "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev."); + xfs_warn(mp, + "Cannot mount filesystem with identical rtdev and ddev/logdev."); error = EINVAL; goto out_close_rtdev; } @@ -1346,8 +1334,8 @@ xfs_fs_remount( * options that we can't actually change. */ #if 0 - printk(KERN_INFO - "XFS: mount option \"%s\" not supported for remount\n", p); + xfs_info(mp, + "mount option \"%s\" not supported for remount\n", p); return -EINVAL; #else break; @@ -1368,8 +1356,7 @@ xfs_fs_remount( if (mp->m_update_flags) { error = xfs_mount_log_sb(mp, mp->m_update_flags); if (error) { - cmn_err(CE_WARN, - "XFS: failed to write sb changes"); + xfs_warn(mp, "failed to write sb changes"); return error; } mp->m_update_flags = 0; @@ -1453,15 +1440,15 @@ xfs_finish_flags( mp->m_logbsize = mp->m_sb.sb_logsunit; } else if (mp->m_logbsize > 0 && mp->m_logbsize < mp->m_sb.sb_logsunit) { - cmn_err(CE_WARN, - "XFS: logbuf size must be greater than or equal to log stripe size"); + xfs_warn(mp, + "logbuf size must be greater than or equal to log stripe size"); return XFS_ERROR(EINVAL); } } else { /* Fail a mount if the logbuf is larger than 32K */ if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { - cmn_err(CE_WARN, - "XFS: logbuf size for version 1 logs must be 16K or 32K"); + xfs_warn(mp, + "logbuf size for version 1 logs must be 16K or 32K"); return XFS_ERROR(EINVAL); } } @@ -1478,8 +1465,8 @@ xfs_finish_flags( * prohibit r/w mounts of read-only filesystems */ if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { - cmn_err(CE_WARN, - "XFS: cannot mount a read-only filesystem as read-write"); + xfs_warn(mp, + "cannot mount a read-only filesystem as read-write"); return XFS_ERROR(EROFS); } diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index e22f0057d21..6c10f1d2e3d 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -425,8 +425,7 @@ xfs_quiesce_attr( /* Push the superblock and write an unmount record */ error = xfs_log_sbcount(mp, 1); if (error) - xfs_fs_cmn_err(CE_WARN, mp, - "xfs_attr_quiesce: failed to log sb changes. " + xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " "Frozen image may not be consistent."); xfs_log_unmount_write(mp); xfs_unmountfs_writesb(mp); @@ -806,7 +805,7 @@ xfs_reclaim_inode( * pass on the error. */ if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { - xfs_fs_cmn_err(CE_WARN, ip->i_mount, + xfs_warn(ip->i_mount, "inode 0x%llx background reclaim flush failed with %d", (long long)ip->i_ino, error); } diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index ee3cee097e7..ee2d2adaa43 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c @@ -37,7 +37,7 @@ xfs_stats_clear_proc_handler( ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); if (!ret && write && *valp) { - printk("XFS Clearing xfsstats\n"); + xfs_notice(NULL, "Clearing xfsstats"); for_each_possible_cpu(c) { preempt_disable(); /* save vn_active, it's a universal truth! */ -- cgit v1.2.3-70-g09d2 From 1bfd8d04190c615bb8d1d98188dead0c09702208 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Sat, 26 Mar 2011 09:13:55 +1100 Subject: xfs: introduce inode cluster buffer trylocks for xfs_iflush There is an ABBA deadlock between synchronous inode flushing in xfs_reclaim_inode and xfs_icluster_free. xfs_icluster_free locks the buffer, then takes inode ilocks, whilst synchronous reclaim takes the ilock followed by the buffer lock in xfs_iflush(). To avoid this deadlock, separate the inode cluster buffer locking semantics from the synchronous inode flush semantics, allowing callers to attempt to lock the buffer but still issue synchronous IO if it can get the buffer. This requires xfs_iflush() calls that currently use non-blocking semantics to pass SYNC_TRYLOCK rather than 0 as the flags parameter. This allows xfs_reclaim_inode to avoid the deadlock on the buffer lock and detect the failure so that it can drop the inode ilock and restart the reclaim attempt on the inode. This allows xfs_ifree_cluster to obtain the inode lock, mark the inode stale and release it and hence defuse the deadlock situation. It also has the pleasant side effect of avoiding IO in xfs_reclaim_inode when it tries to next reclaim the inode as it is now marked stale. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Alex Elder --- fs/xfs/linux-2.6/xfs_super.c | 2 +- fs/xfs/linux-2.6/xfs_sync.c | 30 +++++++++++++++++++++++++++--- fs/xfs/xfs_inode.c | 2 +- fs/xfs/xfs_inode_item.c | 6 +++--- 4 files changed, 32 insertions(+), 8 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_super.c') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 818c4cf2de8..8a70b2a17d6 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1078,7 +1078,7 @@ xfs_fs_write_inode( error = 0; goto out_unlock; } - error = xfs_iflush(ip, 0); + error = xfs_iflush(ip, SYNC_TRYLOCK); } out_unlock: diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 6c10f1d2e3d..594cd822d84 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -761,8 +761,10 @@ xfs_reclaim_inode( struct xfs_perag *pag, int sync_mode) { - int error = 0; + int error; +restart: + error = 0; xfs_ilock(ip, XFS_ILOCK_EXCL); if (!xfs_iflock_nowait(ip)) { if (!(sync_mode & SYNC_WAIT)) @@ -788,9 +790,31 @@ xfs_reclaim_inode( if (xfs_inode_clean(ip)) goto reclaim; - /* Now we have an inode that needs flushing */ - error = xfs_iflush(ip, sync_mode); + /* + * Now we have an inode that needs flushing. + * + * We do a nonblocking flush here even if we are doing a SYNC_WAIT + * reclaim as we can deadlock with inode cluster removal. + * xfs_ifree_cluster() can lock the inode buffer before it locks the + * ip->i_lock, and we are doing the exact opposite here. As a result, + * doing a blocking xfs_itobp() to get the cluster buffer will result + * in an ABBA deadlock with xfs_ifree_cluster(). + * + * As xfs_ifree_cluser() must gather all inodes that are active in the + * cache to mark them stale, if we hit this case we don't actually want + * to do IO here - we want the inode marked stale so we can simply + * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush, + * just unlock the inode, back off and try again. Hopefully the next + * pass through will see the stale flag set on the inode. + */ + error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode); if (sync_mode & SYNC_WAIT) { + if (error == EAGAIN) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + /* backoff longer than in xfs_ifree_cluster */ + delay(2); + goto restart; + } xfs_iflock(ip); goto reclaim; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index da871f53223..742c8330994 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2835,7 +2835,7 @@ xfs_iflush( * Get the buffer containing the on-disk inode. */ error = xfs_itobp(mp, NULL, ip, &dip, &bp, - (flags & SYNC_WAIT) ? XBF_LOCK : XBF_TRYLOCK); + (flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK); if (error || !bp) { xfs_ifunlock(ip); return error; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index fd4f398bd6f..46cc40131d4 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -760,11 +760,11 @@ xfs_inode_item_push( * Push the inode to it's backing buffer. This will not remove the * inode from the AIL - a further push will be required to trigger a * buffer push. However, this allows all the dirty inodes to be pushed - * to the buffer before it is pushed to disk. THe buffer IO completion - * will pull th einode from the AIL, mark it clean and unlock the flush + * to the buffer before it is pushed to disk. The buffer IO completion + * will pull the inode from the AIL, mark it clean and unlock the flush * lock. */ - (void) xfs_iflush(ip, 0); + (void) xfs_iflush(ip, SYNC_TRYLOCK); xfs_iunlock(ip, XFS_ILOCK_SHARED); } -- cgit v1.2.3-70-g09d2 From 704b2907c2d47ceb187c0e25a6bbc2174b198f2f Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Sat, 26 Mar 2011 09:14:57 +1100 Subject: xfs: register the inode cache shrinker before quotachecks During mount, we can do a quotacheck that involves a bulkstat pass on all inodes. If there are more inodes in the filesystem than can be held in memory, we require the inode cache shrinker to run to ensure that we don't run out of memory. Unfortunately, the inode cache shrinker is not registered until we get to the end of the superblock setup process, which is after a quotacheck is run if it is needed. Hence we need to register the inode cache shrinker earlier in the mount process so that we don't OOM during mount. This requires that we also initialise the syncd work before we register the shrinker, so we nee dto juggle that around as well. While there, make sure that we have set up the block sizes in the VFS superblock correctly before the quotacheck is run so that any inodes that are cached as a result of the quotacheck have their block size fields set up correctly. Cc: stable@kernel.org Signed-off-by: Dave Chinner Reviewed-by: Alex Elder --- fs/xfs/linux-2.6/xfs_super.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_super.c') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 8a70b2a17d6..1ba5c451da3 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1539,10 +1539,14 @@ xfs_fs_fill_super( if (error) goto out_free_sb; - error = xfs_mountfs(mp); - if (error) - goto out_filestream_unmount; - + /* + * we must configure the block size in the superblock before we run the + * full mount process as the mount process can lookup and cache inodes. + * For the same reason we must also initialise the syncd and register + * the inode cache shrinker so that inodes can be reclaimed during + * operations like a quotacheck that iterate all inodes in the + * filesystem. + */ sb->s_magic = XFS_SB_MAGIC; sb->s_blocksize = mp->m_sb.sb_blocksize; sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; @@ -1550,6 +1554,16 @@ xfs_fs_fill_super( sb->s_time_gran = 1; set_posix_acl_flag(sb); + error = xfs_syncd_init(mp); + if (error) + goto out_filestream_unmount; + + xfs_inode_shrinker_register(mp); + + error = xfs_mountfs(mp); + if (error) + goto out_syncd_stop; + root = igrab(VFS_I(mp->m_rootip)); if (!root) { error = ENOENT; @@ -1565,14 +1579,11 @@ xfs_fs_fill_super( goto fail_vnrele; } - error = xfs_syncd_init(mp); - if (error) - goto fail_vnrele; - - xfs_inode_shrinker_register(mp); - return 0; + out_syncd_stop: + xfs_inode_shrinker_unregister(mp); + xfs_syncd_stop(mp); out_filestream_unmount: xfs_filestream_unmount(mp); out_free_sb: @@ -1596,6 +1607,9 @@ xfs_fs_fill_super( } fail_unmount: + xfs_inode_shrinker_unregister(mp); + xfs_syncd_stop(mp); + /* * Blow away any referenced inode in the filestreams cache. * This can and will cause log traffic as inodes go inactive -- cgit v1.2.3-70-g09d2